In [1]:
import sys
import pandas as pd
from dotenv import load_dotenv
from langchain.llms import HuggingFaceHub
from langchain import PromptTemplate, LLMChain
from langchain.llms.huggingface_pipeline import HuggingFacePipeline

# require "HUGGINGFACEHUB_API_TOKEN" in .env file!
load_dotenv()

sys.path.append("../")
from utils.data_utils import preprocess_score_df, preprocess_pattern_map_df
from utils.data_structurer import DataStructurer
from utils.constants import ANSWER_COL

In [2]:
score_df = pd.read_csv("../data/cem2_score_1.csv")
pattern_map_df = pd.read_csv("../data/pattern_map.csv")
score_df = preprocess_score_df(score_df)
pattern_map_df = preprocess_pattern_map_df(pattern_map_df)

In [3]:
structurer = DataStructurer(score_df, pattern_map_df, 1122128, 10)

In [4]:
def preprocess(input_string):
    input_string = str(input_string)
    preprocessed_string = input_string.strip()
    preprocessed_string = preprocessed_string.strip("\n")
    return preprocessed_string

In [5]:
template = """Compute the overall score of the response based on the provided criteria. Return only one score number (Max score is {max_score}).
###Criteria### 
{criteria}

###Answer### 
{answer}

###Score###
"""

template = preprocess(template)
prompt = PromptTemplate(template=template, input_variables=["criteria", "answer", "max_score"])

In [6]:
# llm = HuggingFaceHub(
#         # repo_id="google/flan-t5-base",
#         repo_id="google/flan-t5-large",
#         # repo_id="SeaLLMs/SeaLLM-7B-v2",
#         # repo_id="scb10x/typhoon-7b",
#         # repo_id="openthaigpt/openthaigpt-1.0.0-alpha-7b-chat-ckpt-hf"
#         # repo_id="migtissera/Tess-M-Creative-v1.0",
#         # repo_id="google/flan-t5-xl",
#         # model_kwargs={
#         #     "temperature": 0,
#         #     "max_length": 128,
#         # }
#     )

# llm_chain = LLMChain(
#     prompt=prompt,
#     llm=llm,
# )

In [7]:
# criteria = """
# + 2 point for having risk mitigation.
# + 3 point for having data-driven decision.
# """
# answer = """Data-Driven Decisions and Risk Mitigation
# """
# max_score = 5
# # criteria = preprocess(criteria)
# # answer = preprocess(answer)

# # result = llm_chain.run({"criteria": criteria, "answer": answer})
# # print(result)


# def predict_score(answer, pattern, max_score):
#     criteria = preprocess(pattern) 
#     answer = preprocess(answer)

#     result = llm_chain.run({"criteria": criteria, "answer": answer, "max_score": max_score})
    
#     try:
#         result = float(result)
#     except ValueError:
#         print(result)
#         result = -1
#     return result

# predict_score(answer, criteria, max_score)

In [8]:
# from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline

# hf = HuggingFacePipeline.from_model_id(
#     model_id="LoneStriker/SeaLLM-7B-v2-GGUF",
#     task="text-generation",
#     # pipeline_kwargs={"max_new_tokens": 10},
#     device=1,
# )

# from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# model_id = "LoneStriker/SeaLLM-7B-v2-GGUF"
# tokenizer = AutoTokenizer.from_pretrained(model_id)
# model = AutoModelForCausalLM.from_pretrained(model_id)
# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=10)
# hf = HuggingFacePipeline(pipeline=pipe)

# chain = prompt | hf

# def predict_score(answer, pattern, max_score):
#     criteria = preprocess(pattern) 
#     answer = preprocess(answer)

#     result = chain.invoke({"criteria": criteria, "answer": answer, "max_score": max_score})
    
#     try:
#         result = float(result)
#     except ValueError:
#         print(result)
#         result = -1
#     return result

In [9]:
from langchain_openai import ChatOpenAI, OpenAI

# llm = ChatOpenAI(temperature=0.0, base_url="http://localhost:1234/v1", api_key="not-needed")
llm = OpenAI(temperature=0.0, base_url="http://localhost:1234/v1", api_key="not-needed")

llm_chain = LLMChain(prompt=prompt, llm=llm)


def predict_score(answer, pattern, max_score):
    criteria = preprocess(pattern)
    answer = preprocess(answer)

    result = llm_chain.invoke(
        {"criteria": criteria, "answer": answer, "max_score": max_score}
    )
    result = result["text"]

    try:
        result = float(result)
    except ValueError:
        print(result)
        result = -1
    return result

In [10]:
predict_list = []
for index, row, in structurer.score_df.iterrows():
    pred_score_list = []
    answer = row[ANSWER_COL]
    
    for pattern_idx, (pattern, pattern_max_score) in enumerate(zip(structurer.pattern_list, structurer.pattern_max_score_list)):
        pred_score = predict_score(answer, pattern, pattern_max_score)
        pred_score_list.append(pred_score)
        # print(answer, pattern, pattern_max_score)
        print(f"{index=}, {pattern_idx=}\t, Actual: {row[f'pattern1_{pattern_idx+1}']}, Pred: {pred_score}, Max: {pattern_max_score}")
    predict_list.append(pred_score_list)
        
# structurer.pattern_list
predict_list

index=0, pattern_idx=0	, Actual: 2.0, Pred: 1.0, Max: 2.0
index=0, pattern_idx=1	, Actual: 0.0, Pred: 0.8, Max: 1.0
index=0, pattern_idx=2	, Actual: 0.0, Pred: 0.9, Max: 1.0
index=0, pattern_idx=3	, Actual: 2.0, Pred: 0.9, Max: 1.0
index=1, pattern_idx=0	, Actual: 0.0, Pred: 1.0, Max: 2.0
index=1, pattern_idx=1	, Actual: 0.0, Pred: 0.0, Max: 1.0
index=1, pattern_idx=2	, Actual: 0.0, Pred: 0.0, Max: 1.0
index=1, pattern_idx=3	, Actual: 0.0, Pred: 0.0, Max: 1.0
index=2, pattern_idx=0	, Actual: 0.0, Pred: 1.0, Max: 2.0
index=2, pattern_idx=1	, Actual: 0.0, Pred: 0.0, Max: 1.0
index=2, pattern_idx=2	, Actual: 0.0, Pred: 0.0, Max: 1.0
index=2, pattern_idx=3	, Actual: 0.0, Pred: 0.0, Max: 1.0
index=3, pattern_idx=0	, Actual: 0.0, Pred: 1.0, Max: 2.0
index=3, pattern_idx=1	, Actual: 0.0, Pred: 0.0, Max: 1.0
index=3, pattern_idx=2	, Actual: 0.0, Pred: 0.0, Max: 1.0
index=3, pattern_idx=3	, Actual: 0.0, Pred: 0.0, Max: 1.0
index=4, pattern_idx=0	, Actual: 0.0, Pred: 1.0, Max: 2.0
index=4, patte

[[1.0, 0.8, 0.9, 0.9],
 [1.0, 0.0, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0],
 [1.0, 0.0, 0.0, 0.0],
 [1.0, 0.8, 0.8, 0.5],
 [1.0, 0.0, 0.0, 0.0],
 [1.0, 0.5, 0.75, 0.5],
 [1.0, 0.8, 0.8, 0.5],
 [1.0, 0.5, 0.5, 0.5],
 [1.0, 0.85, 0.8, 0.8],
 [1.0, 0.0, 0.0, 0.0],
 [1.0, 0.85, 0.8, 0.8],
 [1.0, 0.8, 0.8, 0.5],
 [1.0, 0.85, 0.9, 0.9],
 [1.0, 0.8, 0.8, 0.8],
 [1.0, 0.5, 0.8, 0.8],
 [1.0, 0.8, 0.8, 0.8],
 [1.0, 0.0, 0.0, 0.0],
 [1.0, 0.5, 0.7, 0.5],
 [1.0, 0.5, 0.5, 0.5],
 [1.0, 0.75, 0.8, 0.75],
 [1.0, 0.0, 0.0, 0.0],
 [1.0, 0.8, 0.9, 0.8],
 [1.0, 0.5, 0.5, 0.5],
 [1.0, 0.5, 0.8, 0.5],
 [1.0, 0.0, 0.0, 0.0],
 [1.0, 0.8, 0.8, 0.8],
 [1.0, 0.8, 0.8, 0.5],
 [2.0, 0.95, 1.0, 1.0],
 [1.0, 0.8, 0.8, 0.8],
 [1.0, 0.75, 0.8, 0.5],
 [1.0, 0.5, 0.8, 0.7],
 [1.0, 0.8, 0.8, 0.5],
 [1.0, 0.8, 0.8, 0.75],
 [0.5, 0.5, 0.75, 0.5],
 [1.0, 0.8, 0.8, 0.8],
 [1.0, 0.8, 0.8, 0.8],
 [1.0, 0.8, 0.9, 0.9],
 [1.0, 0.85, 0.8, 0.8],
 [1.0, 0.0, 0.0, 0.0],
 [1.0, 0.8, 0.8, 0.8],
 [1.0, 0.8, 0.8, 0.8],
 [1.0, 0.75, 0.8, 0.75]

In [11]:
seallm_gguf_csa_1_result = [[0.5, 0.5, 0.75, 0.75],
 [1.0, 0.75, 1.0, 1.0],
 [1.5, 1.0, 1.0, 1.0],
 [1.0, 0.5, 0.75, 0.75],
 [1.0, 0.5, 0.75, 0.75],
 [1.0, 1.0, 1.0, 1.0],
 [0.5, 0.5, 0.75, 0.75],
 [1.0, 0.5, 0.75, 0.75],
 [-1, -1, -1, -1],
 [1.0, 1.0, 1.0, 1.0],
 [1.0, 0.5, 0.75, 1.0],
 [1.0, 0.75, 0.9, 1.0],
 [1.0, 0.9, 0.9, 0.9],
 [1.0, 0.75, 1.0, 0.75],
 [0.5, 0.5, 0.5, 0.75],
 [0.5, 0.5, 0.75, 0.75],
 [0.5, 0.5, 0.75, 0.75],
 [1.0, 0.25, 0.5, 0.5],
 [1.0, 0.5, 0.5, 0.5],
 [1.0, 0.75, 0.9, 0.9],
 [1.0, 0.5, 0.7, 0.75],
 [1.0, 1.0, 1.0, 1.0],
 [0.5, 0.5, 0.75, 0.75],
 [0.5, 0.5, 0.5, 0.75],
 [0.5, 0.5, 0.75, 0.75],
 [0.5, 0.5, 0.75, 0.75],
 [0.5, 0.5, 0.75, 0.75],
 [0.5, 0.5, 0.75, 0.75],
 [1.0, 0.3, 0.3, 0.3],
 [0.5, 0.75, 0.9, 0.9],
 [0.5, 0.5, 0.75, 0.5],
 [1.0, 0.5, 1.0, 1.0],
 [2.0, 0.75, 0.75, 0.75],
 [1.0, 0.5, 0.75, 0.75],
 [1.0, 0.25, 0.5, 0.5],
 [1.5, 0.75, 1.0, 1.0],
 [1.0, 0.75, 1.0, 1.0],
 [1.0, 0.75, 1.0, 0.75],
 [0.5, 0.5, 0.75, 0.75],
 [1.0, 0.5, 0.75, 1.0],
 [0.7, 0.7, 0.7, 0.7],
 [0.5, 0.5, 0.5, 0.75],
 [2.0, 0.75, 0.75, 0.9],
 [0.5, 0.5, 0.5, 0.5],
 [2.0, 0.75, 0.75, 0.75],
 [1.0, 0.75, 0.9, 0.9],
 [2.0, 0.75, 0.75, 0.75],
 [1.0, 0.75, 0.75, 0.75],
 [1.0, 0.5, 0.75, 0.75],
 [0.5, 0.5, 0.5, 0.5],
 [1.0, 1.0, 1.0, 1.0],
 [1.0, 0.75, 1.0, 1.0],
 [0.5, 0.5, 0.5, 0.5],
 [1.0, 0.3, 0.3, 0.3],
 [0.5, 0.5, 0.7, 0.75],
 [0.5, 0.5, 0.75, 0.75],
 [1.5, 1.0, 1.0, 1.0],
 [1.0, 0.75, 1.0, 0.75],
 [0.5, 0.5, 0.5, 0.75],
 [1.0, 0.2, 0.5, 0.5],
 [0.5, 0.5, 0.9, 0.75],
 [0.5, 0.5, 0.5, 0.75],
 [1.0, 0.0, 0.0, 0.0],
 [0.5, 0.5, 0.75, 0.75],
 [1.0, 0.75, 1.0, 0.9],
 [2.0, 0.75, 0.9, 0.5],
 [0.5, 0.75, 0.75, 0.75],
 [0.5, 0.5, 0.5, 0.75],
 [0.5, 0.5, 0.75, 0.7],
 [0.5, 0.5, 0.75, 0.75],
 [1.0, 0.5, 0.75, 0.75],
 [0.5, 0.5, 0.5, 0.5],
 [1.0, 0.7, 0.7, 0.7],
 [1.5, 0.75, 0.9, 1.0],
 [1.0, 0.5, 0.75, 0.75],
 [0.5, 0.5, 0.9, 0.9],
 [1.5, 1.0, 1.0, 1.0],
 [1.0, 0.5, 0.75, 0.75],
 [1.0, 0.5, 0.7, 0.7],
 [1.0, 0.75, 1.0, 1.0],
 [1.0, 0.75, 0.9, 0.9],
 [1.0, 0.5, 0.9, 0.9],
 [0.5, 0.5, 0.5, 0.5],
 [0.5, 0.5, 0.5, 0.5],
 [1.0, 0.75, 1.0, 0.75],
 [1.0, 0.75, 0.75, -1],
 [0.5, 0.5, 0.75, 0.75],
 [1.0, 0.5, 0.75, 0.75],
 [1.0, 1.0, 1.0, 1.0],
 [2.0, 0.75, 0.75, 0.75],
 [1.0, 0.75, 0.9, 0.9],
 [1.0, 0.75, 0.75, 0.9],
 [0.5, 0.5, 0.5, 0.75],
 [1.0, 1.0, 1.0, 1.0],
 [1.0, 0.5, 1.0, 1.0],
 [2.0, 0.75, 0.75, 0.75],
 [1.0, 0.75, 1.0, 0.9],
 [0.5, 0.5, 0.75, 0.75],
 [1.0, 0.3, 0.3, 0.3],
 [1.0, 0.75, 1.0, 1.0],
 [1.0, 0.7, 1.0, 0.9],
 [1.5, 0.75, 0.9, 0.9],
 [1.0, 0.5, 0.5, 0.5],
 [1.0, 0.5, 0.75, 0.75],
 [0.5, 0.5, 0.75, 0.9],
 [1.0, 0.7, 0.75, 0.75],
 [1.0, 0.5, 1.0, 1.0],
 [0.5, 0.5, 0.5, 0.5],
 [1.0, 0.7, 1.0, 1.0],
 [0.5, 0.5, 0.75, 0.5],
 [0.5, 0.5, 0.5, 0.5],
 [1.0, 0.25, 0.5, 0.5],
 [1.0, 0.5, 1.0, 1.0],
 [1.0, 0.5, 0.75, 0.75],
 [1.0, 0.75, 0.75, 0.75],
 [1.0, 0.0, 0.0, 0.0],
 [1.0, 0.5, 0.75, 0.75],
 [0.5, 0.5, 0.9, 0.75],
 [1.0, 0.5, 1.0, 1.0],
 [1.0, 0.7, 0.7, 0.7],
 [0.5, 0.5, 0.75, 0.75],
 [1.0, 0.5, 0.75, 1.0],
 [1.0, 0.75, 0.75, 0.75],
 [1.0, 0.5, 0.75, 0.75],
 [1.0, 0.5, 0.5, 1.0],
 [1.0, 0.75, 0.9, 0.75],
 [0.5, 0.5, 0.75, 0.75],
 [1.0, 0.7, 0.7, 0.9],
 [1.0, 0.5, 0.5, 0.5],
 [1.0, 0.5, 0.75, 0.75],
 [1.0, 0.75, 0.75, 0.75],
 [1.0, 0.75, 0.9, 0.9],
 [0.5, 0.5, 0.9, 0.9],
 [1.0, 0.2, 0.5, 0.5],
 [1.5, 0.5, 0.5, 0.5],
 [0.5, 0.5, 0.5, 0.7],
 [1.0, 0.0, 0.0, 0.0],
 [0.5, 0.5, 0.5, 0.5],
 [1.5, 1.0, 1.0, 1.0],
 [1.0, 0.0, 0.0, 0.0],
 [1.0, 0.7, 0.75, 0.7],
 [1.0, 0.75, 0.75, 0.75],
 [1.0, 0.5, 0.75, 0.75],
 [1.0, 0.75, 0.75, 0.75],
 [1.0, 0.75, 0.75, 0.75],
 [0.5, 0.5, 0.75, 0.75]]

In [12]:
(49*60+51)/(146*4)

5.1215753424657535