In [None]:
# Use a dataset to train the labels and compare.
import pandas as pd
from io import StringIO
from transformers import pipeline

with open("LLMs Answers.csv", "r", encoding="utf-8", errors="ignore") as f:
    data = f.read()
df = pd.read_csv(StringIO(data))

classifier = pipeline(
    "text-classification",
    model="MMADS/MoralFoundationsClassifier",
    tokenizer="MMADS/MoralFoundationsClassifier",
    return_all_scores=True,
    function_to_apply="sigmoid",
    truncation=True,       
    max_length=512         
)


def classify_text_all_scores(text):

    if pd.isna(text) or not isinstance(text, str) or text.strip() == "":
        return ""
    
    results = classifier(text)  
    label_scores = results[0]   
    
    scores_str = ";".join([f"{item['label']}={item['score']:.4f}" for item in label_scores])
    return scores_str

df["expert_scores"] = df["Expert Opinion (From Jesse)"].apply(classify_text_all_scores)
df["model1_scores"] = df["ChatGPT's Answer "].apply(classify_text_all_scores)
df["model2_scores"] = df["Deepseek's Answer"].apply(classify_text_all_scores)
df["model3_scores"] = df["Claude's Answer"].apply(classify_text_all_scores)

output_path = "LLMs Answers_with_all_scores.csv"
df.to_csv(output_path, index=False)
print(df.head())

Device set to use cuda:0


               Topic                                Dilemma Name & Link  \
0  Allocating Credit                         Who Gets the Credit? (PDF)   
1  Allocating Credit                        But That Was My Idea! (PDF)   
2  Allocating Credit  Replacing a First Author on a Second Submissio...   
3  Allocating Credit        The Tyrannical Principal Investigator (PDF)   
4  Allocating Credit                      The Overly Nice Advisor (PDF)   

                                Dilemma Description   \
0  I joined a lab during graduate school and was ...   
1  Graduate students A and B are working on somew...   
2  David is a new postdoc in Dr. Goliaths lab. Up...   
3  A PI moves his lab to a different university, ...   
4  George Washington is one of two postdocs worki...   

                                   ChatGPT's Answer   \
0  This situation involves ethical concerns relat...   
1  This situation raises significant ethical conc...   
2  Davids handling of the manuscript raises 

In [8]:
# Directly compare to judge whether it is a good response and evaluate through the propotion of good responses.
import pandas as pd
from sentence_transformers import SentenceTransformer, util
from io import StringIO

with open("LLMs Answers.csv", "r", encoding="utf-8", errors="ignore") as f:
    data = f.read()
df = pd.read_csv(StringIO(data))

######
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

######
threshold = 0.7

model1_scores, model2_scores, model3_scores = [], [], []
model1_statuses, model2_statuses, model3_statuses = [], [], []

for idx, row in df.iterrows():
    expert_text = row["Expert Opinion (From Jesse)"]
    m1 = row["ChatGPT's Answer "]
    m2 = row["Deepseek's Answer"]
    m3 = row["Claude's Answer"]
    
    expert_embedding = model.encode(expert_text, convert_to_tensor=True)
    
    scores = []
    statuses = []
    
    for model_answer in [m1, m2, m3]:
        answer_embedding = model.encode(model_answer, convert_to_tensor=True)
        similarity = util.cos_sim(expert_embedding, answer_embedding).item()
        scores.append(similarity)
        statuses.append(1 if similarity >= threshold else 0)
    
    model1_scores.append(scores[0])
    model2_scores.append(scores[1])
    model3_scores.append(scores[2])
    model1_statuses.append(statuses[0])
    model2_statuses.append(statuses[1])
    model3_statuses.append(statuses[2])

df["model1_similarity"] = model1_scores
df["model1_status"] = model1_statuses
df["model2_similarity"] = model2_scores
df["model2_status"] = model2_statuses
df["model3_similarity"] = model3_scores
df["model3_status"] = model3_statuses

print(model1_scores, model1_statuses, model2_scores, model2_statuses, model3_scores, model3_statuses)

df.to_csv("LLMs Answers_with_similarity.csv", index=False)

[0.556069016456604, 0.366840660572052, 0.7436351776123047, 0.7610009908676147, 0.6180353164672852, 0.7156317234039307, 0.4273454248905182, 0.3276230990886688, 0.6225638389587402, 0.6078225374221802, 0.7586894035339355, 0.6929235458374023, 0.5986394882202148, 0.6055260300636292, 0.7045950293540955] [0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1] [0.44605299830436707, 0.3555789589881897, 0.6455888748168945, 0.744238018989563, 0.5798215866088867, 0.7657429575920105, 0.5138099789619446, 0.5363022089004517, 0.5810044407844543, 0.6067180037498474, 0.5938402414321899, 0.5310254096984863, 0.6078423261642456, 0.6688383221626282, 0.7309655547142029] [0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1] [0.5126893520355225, 0.47484835982322693, 0.6913854479789734, 0.7563563585281372, 0.5765470266342163, 0.7236986756324768, 0.4507281184196472, 0.47473403811454773, 0.6010711789131165, 0.5767053365707397, 0.7423221468925476, 0.603062629699707, 0.5895752906799316, 0.5499739646911621, 0.6792951822280884] [0, 0,