In [1]:
from metrics import Metrics
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
global metrics
metrics = Metrics()

In [3]:
def all_scores_(reference_text, hypothesis_text):
    bleu_score = metrics.compute_bleu(reference_text, hypothesis_text)
    nist_score = metrics.compute_nist(reference_text, hypothesis_text)
    meteor_s = metrics.compute_meteor(reference_text, hypothesis_text)
    rouge_scores = metrics.compute_rouge(reference_text, hypothesis_text)
    em = metrics.exact_match(reference_text, hypothesis_text)
    levenshtein_dist = metrics.levenshtein_distance(reference_text, hypothesis_text)
    damerau_levenshtein_score = metrics.damerau_levenshtein(reference_text, hypothesis_text)
    word_error_rate_score = metrics.word_error_rate(reference_text, hypothesis_text)
    gestalt_score = metrics.gestalt_pattern_matching(reference_text, hypothesis_text)
    jaccard_score = metrics.jaccard(reference_text, hypothesis_text)
    overlap_coeff_score = metrics.overlap_coeff(reference_text, hypothesis_text)
    dice_sorensen_score = metrics.dice_sorensen_coefficient(reference_text, hypothesis_text)
    tf_idf_score = metrics.compute_cosine_similarity(reference_text, hypothesis_text, "tf-idf")
    word_freq_score = metrics.compute_cosine_similarity(reference_text, hypothesis_text, "word_frequency")
    spacy_similarity = metrics.semantic_similarity_spacy(reference_text, hypothesis_text)
    wmd_score = metrics.wmd(reference_text, hypothesis_text)
    gensim_cosine_score = metrics.gensim_cosine(reference_text, hypothesis_text)
    sbert_score = metrics.sbert_cosine(reference_text, hypothesis_text)
    use_score = metrics.USE_similarity(reference_text, hypothesis_text)
    soft_cosine_score = metrics.soft_cosine(reference_text, hypothesis_text)
    
    return [
        bleu_score, nist_score, meteor_s, rouge_scores, em, levenshtein_dist, 
        damerau_levenshtein_score, word_error_rate_score, gestalt_score, jaccard_score, 
        overlap_coeff_score, dice_sorensen_score, tf_idf_score, word_freq_score, 
        spacy_similarity, wmd_score, gensim_cosine_score, sbert_score, use_score, soft_cosine_score
    ]


In [4]:
reference_text = "The transition from winter to spring is one of the most anticipated times of the year. The cold weather begins to fade, and the warmth of the sun gradually returns. As the days grow longer, nature awakens, with flowers blooming and trees regrowing their leaves. People feel renewed, eager to spend more time outdoors and enjoy the fresh air. It is a season of hope, growth, and change."
hypothesis_text = "The transition from winter to spring is one of the most anticipated times of the year. The cold begins to disappear and the heat of the sun gradually returns. As the days get longer, nature awakens, flowers bloom and trees grow again. People feel refreshed and look forward to spending more time outdoors enjoying the fresh air. It is a time of hope, growth and change."
all_scores_(reference_text, hypothesis_text)

100%|██████████| 63/63 [00:05<00:00, 10.60it/s]


[0.8260465164578865,
 4.853602676071032,
 0.8494417728578494,
 {'r': 0.7358490566037735, 'p': 0.78, 'f': 0.7572815484023001},
 0,
 83,
 0.7844155844155845,
 0.7844155844155845,
 0.32625994694960214,
 0.9666666666666667,
 1.0,
 0.9830508474576272,
 0.8603579934292733,
 0.9225806451612905,
 0.9948443554656535,
 0.2749621074490797,
 0.99941224,
 0.9730767011642456,
 0.95047617,
 0.12815313]

In [5]:
metrics.compute_meteor("this is a cat", "this is not a cat")

0.9146341463414633

In [6]:
llm_answers = pd.read_csv("../data/llm_answers_structured_output.csv")
llm_answers

Unnamed: 0.1,Unnamed: 0,Dilemma Description,Expert Opinion,expert_output,dilemma_output,Expert Introduction,Expert Key Factors in Consideration,Expert Historical & Theoretical Perspectives,Expert Proposed Resolution Strategies,Expert Key Takeaways,Dilemma Introduction,Dilemma Key Factors in Consideration,Dilemma Historical & Theoretical Perspectives,Dilemma Proposed Resolution Strategies,Dilemma Key Takeaways
0,0,I joined a lab during graduate school and was ...,We were surprised to discover that the literat...,%Introduction: \nThis dilemma reflects a conf...,%Introduction: \nThis dilemma highlights a si...,This dilemma reflects a conflict over credit a...,"Key factors include originality, effort, and t...",Relevant concepts such as Rescher’s credit all...,One suggested approach is to engage an indepen...,The allocation of credit in collaborative rese...,This dilemma highlights a significant ethical ...,Key factors include the originality of the dis...,Theories such as Merton’s Matthew Effect illus...,A potential resolution could involve an indepe...,The allocation of credit in collaborative rese...
1,1,Graduate students A and B are working on somew...,Disputes over assigning credit in science are ...,%Introduction: \nThis dilemma highlights a co...,%Introduction: \nThis ethical dilemma centers...,This dilemma highlights a conflict over intell...,Key factors include the originality of the ide...,"Historical examples, such as the disputes betw...","To resolve the dispute, it is essential to ass...",The allocation of credit in collaborative rese...,This ethical dilemma centers on the appropriat...,Key factors include the nature of idea generat...,Theories of intellectual property and academic...,A potential resolution could involve mediation...,This case underscores the importance of clear ...
2,2,David is a new postdoc in Dr. Goliath’s lab. U...,A key source of ethical guidance in resolving ...,%Introduction: \nThis dilemma centers on auth...,%Introduction: \nThis dilemma raises critical...,This dilemma centers on authorship and ethical...,Key factors include the substantial contributi...,The International Committee of Medical Journal...,An ad hoc committee could evaluate the contrib...,The ethical allocation of authorship in collab...,This dilemma raises critical questions about a...,Key factors include the original contributions...,"The principles of authorship, as outlined by t...",A potential resolution could involve establish...,The ethical landscape of authorship in researc...
3,3,"A PI moves his lab to a different university, ...","At first blush, this PI certainly seems to be ...",%Introduction: \nThis dilemma highlights the ...,%Introduction: \nThis situation raises signif...,This dilemma highlights the ethical implicatio...,Key factors include the integrity of authorshi...,The situation reflects longstanding issues in ...,A potential resolution could involve consultin...,The ethical landscape of authorship and public...,This situation raises significant ethical conc...,Key factors include the principles of authorsh...,The ethical principles surrounding authorship ...,A potential resolution could involve establish...,The ethical allocation of authorship and the r...
4,4,George Washington is one of two postdocs worki...,"Dr. Big might be a fine scientist, but he’s no...",%Introduction: \nThis dilemma highlights the ...,%Introduction: \nThis ethical dilemma centers...,This dilemma highlights the ethical challenges...,Key factors include the ethical implications o...,The situation reflects broader issues in acade...,"To address these issues, institutions should i...",Effective mentorship is critical for the devel...,This ethical dilemma centers on the fairness a...,Key factors include the ethical obligations of...,The situation can be analyzed through the lens...,A potential resolution could involve establish...,The ethical management of authorship and recom...
5,5,"Some years ago, I worked as a bronchoscopy tec...",One of the problems posed by this dilemma is w...,%Introduction: \nThis ethical dilemma highlig...,%Introduction: \nThis ethical dilemma highlig...,This ethical dilemma highlights the tension be...,Key factors include the necessity of aggressiv...,The dilemma reflects longstanding debates in m...,"To address these ethical concerns, it is essen...",The ethical complexities of lung transplantati...,This ethical dilemma highlights the tension be...,Key factors include the necessity of early det...,The debate surrounding aggressive post-transpl...,One potential resolution strategy is to establ...,The ethical landscape of lung transplantation ...
6,6,Jane is a very motivated and bright graduate s...,We purposefully omitted describing the origins...,%Introduction: \nThis ethical dilemma revolve...,%Introduction: \nThis scenario raises signifi...,This ethical dilemma revolves around the shari...,Key factors include the ownership of intellect...,The scenario highlights the complexities of in...,"To resolve the ethical issues, Dr. Smith shoul...",The ethical landscape in research is fraught w...,This scenario raises significant ethical conce...,Key factors include the ethical implications o...,The situation can be analyzed through the lens...,A potential resolution could involve establish...,This scenario illustrates the complexities of ...
7,7,I once worked in a lab that conducted anti-agi...,"""I once worked in a lab that conducted anti-ag...",%Introduction: \nThis dilemma raises signific...,%Introduction: \nThis ethical dilemma highlig...,This dilemma raises significant ethical concer...,Key factors include the ethical treatment of r...,"The principles of biomedical ethics, particula...","To address these issues, it is crucial to impl...","The ethical conduct of research is paramount, ...",This ethical dilemma highlights serious concer...,Key factors include the validity of informed c...,"The principles of biomedical ethics, particula...",A potential resolution could involve reporting...,This case illustrates the critical importance ...
8,8,I had decided to submit my first abstract ever...,"In reflecting on this scenario, we were remind...",%Introduction: \nThis dilemma raises ethical ...,%Introduction: \nThis dilemma raises importan...,This dilemma raises ethical questions about se...,Key factors include the risk of bias in result...,Hans Jonas's reflections on experimenting with...,"A cautious approach is recommended, where self...",Self-recruitment in research poses significant...,This dilemma raises important questions about ...,"Key factors include the potential for bias, th...","The ethical principle of ""do no harm"" and guid...",A prudent approach would involve consulting in...,Self-recruitment in research poses ethical cha...
9,9,One of the oddest but most memorable experienc...,This scenario speaks to lab management issues ...,%Introduction: \nThis scenario raises ethical...,%Introduction: \nThis scenario raises questio...,This scenario raises ethical questions about t...,Key factors include the potential impact of pr...,The discussion touches on Western scientific r...,One approach is to document any prayer practic...,The integration of personal beliefs in scienti...,This scenario raises questions about the inter...,Key factors include the potential impact of pe...,Historical perspectives on the role of spiritu...,A potential resolution could involve establish...,The integration of personal spirituality in sc...


In [7]:
pairs = [
    ('Expert Introduction', 'Dilemma Introduction'),
    ('Expert Key Factors in Consideration', 'Dilemma Key Factors in Consideration'),
    ('Expert Historical & Theoretical Perspectives', 'Dilemma Historical & Theoretical Perspectives'),
    ('Expert Proposed Resolution Strategies', 'Dilemma Proposed Resolution Strategies'),
    ('Expert Key Takeaways', 'Dilemma Key Takeaways')
]


In [8]:
for expert_col, dilemma_col in pairs:
    new_col_name = f"Score {expert_col}"
    llm_answers[new_col_name] = llm_answers.apply(lambda row: metrics.semantic_similarity_spacy(row[expert_col], row[dilemma_col]), axis=1)
llm_answers

Unnamed: 0.1,Unnamed: 0,Dilemma Description,Expert Opinion,expert_output,dilemma_output,Expert Introduction,Expert Key Factors in Consideration,Expert Historical & Theoretical Perspectives,Expert Proposed Resolution Strategies,Expert Key Takeaways,Dilemma Introduction,Dilemma Key Factors in Consideration,Dilemma Historical & Theoretical Perspectives,Dilemma Proposed Resolution Strategies,Dilemma Key Takeaways,Score Expert Introduction,Score Expert Key Factors in Consideration,Score Expert Historical & Theoretical Perspectives,Score Expert Proposed Resolution Strategies,Score Expert Key Takeaways
0,0,I joined a lab during graduate school and was ...,We were surprised to discover that the literat...,%Introduction: \nThis dilemma reflects a conf...,%Introduction: \nThis dilemma highlights a si...,This dilemma reflects a conflict over credit a...,"Key factors include originality, effort, and t...",Relevant concepts such as Rescher’s credit all...,One suggested approach is to engage an indepen...,The allocation of credit in collaborative rese...,This dilemma highlights a significant ethical ...,Key factors include the originality of the dis...,Theories such as Merton’s Matthew Effect illus...,A potential resolution could involve an indepe...,The allocation of credit in collaborative rese...,0.965661,0.975808,0.97499,0.96436,0.975663
1,1,Graduate students A and B are working on somew...,Disputes over assigning credit in science are ...,%Introduction: \nThis dilemma highlights a co...,%Introduction: \nThis ethical dilemma centers...,This dilemma highlights a conflict over intell...,Key factors include the originality of the ide...,"Historical examples, such as the disputes betw...","To resolve the dispute, it is essential to ass...",The allocation of credit in collaborative rese...,This ethical dilemma centers on the appropriat...,Key factors include the nature of idea generat...,Theories of intellectual property and academic...,A potential resolution could involve mediation...,This case underscores the importance of clear ...,0.976058,0.987376,0.974396,0.945067,0.924286
2,2,David is a new postdoc in Dr. Goliath’s lab. U...,A key source of ethical guidance in resolving ...,%Introduction: \nThis dilemma centers on auth...,%Introduction: \nThis dilemma raises critical...,This dilemma centers on authorship and ethical...,Key factors include the substantial contributi...,The International Committee of Medical Journal...,An ad hoc committee could evaluate the contrib...,The ethical allocation of authorship in collab...,This dilemma raises critical questions about a...,Key factors include the original contributions...,"The principles of authorship, as outlined by t...",A potential resolution could involve establish...,The ethical landscape of authorship in researc...,0.928072,0.985938,0.960161,0.954262,0.957027
3,3,"A PI moves his lab to a different university, ...","At first blush, this PI certainly seems to be ...",%Introduction: \nThis dilemma highlights the ...,%Introduction: \nThis situation raises signif...,This dilemma highlights the ethical implicatio...,Key factors include the integrity of authorshi...,The situation reflects longstanding issues in ...,A potential resolution could involve consultin...,The ethical landscape of authorship and public...,This situation raises significant ethical conc...,Key factors include the principles of authorsh...,The ethical principles surrounding authorship ...,A potential resolution could involve establish...,The ethical allocation of authorship and the r...,0.937334,0.965188,0.951606,0.965474,0.966513
4,4,George Washington is one of two postdocs worki...,"Dr. Big might be a fine scientist, but he’s no...",%Introduction: \nThis dilemma highlights the ...,%Introduction: \nThis ethical dilemma centers...,This dilemma highlights the ethical challenges...,Key factors include the ethical implications o...,The situation reflects broader issues in acade...,"To address these issues, institutions should i...",Effective mentorship is critical for the devel...,This ethical dilemma centers on the fairness a...,Key factors include the ethical obligations of...,The situation can be analyzed through the lens...,A potential resolution could involve establish...,The ethical management of authorship and recom...,0.966209,0.982799,0.971,0.943943,0.977703
5,5,"Some years ago, I worked as a bronchoscopy tec...",One of the problems posed by this dilemma is w...,%Introduction: \nThis ethical dilemma highlig...,%Introduction: \nThis ethical dilemma highlig...,This ethical dilemma highlights the tension be...,Key factors include the necessity of aggressiv...,The dilemma reflects longstanding debates in m...,"To address these ethical concerns, it is essen...",The ethical complexities of lung transplantati...,This ethical dilemma highlights the tension be...,Key factors include the necessity of early det...,The debate surrounding aggressive post-transpl...,One potential resolution strategy is to establ...,The ethical landscape of lung transplantation ...,0.986174,0.977164,0.960912,0.950563,0.974323
6,6,Jane is a very motivated and bright graduate s...,We purposefully omitted describing the origins...,%Introduction: \nThis ethical dilemma revolve...,%Introduction: \nThis scenario raises signifi...,This ethical dilemma revolves around the shari...,Key factors include the ownership of intellect...,The scenario highlights the complexities of in...,"To resolve the ethical issues, Dr. Smith shoul...",The ethical landscape in research is fraught w...,This scenario raises significant ethical conce...,Key factors include the ethical implications o...,The situation can be analyzed through the lens...,A potential resolution could involve establish...,This scenario illustrates the complexities of ...,0.95462,0.969261,0.972654,0.956618,0.894209
7,7,I once worked in a lab that conducted anti-agi...,"""I once worked in a lab that conducted anti-ag...",%Introduction: \nThis dilemma raises signific...,%Introduction: \nThis ethical dilemma highlig...,This dilemma raises significant ethical concer...,Key factors include the ethical treatment of r...,"The principles of biomedical ethics, particula...","To address these issues, it is crucial to impl...","The ethical conduct of research is paramount, ...",This ethical dilemma highlights serious concer...,Key factors include the validity of informed c...,"The principles of biomedical ethics, particula...",A potential resolution could involve reporting...,This case illustrates the critical importance ...,0.989101,0.973543,0.987791,0.957122,0.956719
8,8,I had decided to submit my first abstract ever...,"In reflecting on this scenario, we were remind...",%Introduction: \nThis dilemma raises ethical ...,%Introduction: \nThis dilemma raises importan...,This dilemma raises ethical questions about se...,Key factors include the risk of bias in result...,Hans Jonas's reflections on experimenting with...,"A cautious approach is recommended, where self...",Self-recruitment in research poses significant...,This dilemma raises important questions about ...,"Key factors include the potential for bias, th...","The ethical principle of ""do no harm"" and guid...",A prudent approach would involve consulting in...,Self-recruitment in research poses ethical cha...,0.965839,0.982817,0.949643,0.899634,0.989439
9,9,One of the oddest but most memorable experienc...,This scenario speaks to lab management issues ...,%Introduction: \nThis scenario raises ethical...,%Introduction: \nThis scenario raises questio...,This scenario raises ethical questions about t...,Key factors include the potential impact of pr...,The discussion touches on Western scientific r...,One approach is to document any prayer practic...,The integration of personal beliefs in scienti...,This scenario raises questions about the inter...,Key factors include the potential impact of pe...,Historical perspectives on the role of spiritu...,A potential resolution could involve establish...,The integration of personal spirituality in sc...,0.976403,0.986664,0.946097,0.977098,0.960801


In [9]:
llm_answers["Total Score"] = llm_answers["Score Expert Introduction"] * 0.1 + llm_answers["Score Expert Key Factors in Consideration"] * 0.2 + llm_answers["Score Expert Historical & Theoretical Perspectives"] * 0.15 + llm_answers["Score Expert Proposed Resolution Strategies"] * 0.33 + llm_answers["Score Expert Key Takeaways"] * 0.22
llm_answers

Unnamed: 0.1,Unnamed: 0,Dilemma Description,Expert Opinion,expert_output,dilemma_output,Expert Introduction,Expert Key Factors in Consideration,Expert Historical & Theoretical Perspectives,Expert Proposed Resolution Strategies,Expert Key Takeaways,...,Dilemma Key Factors in Consideration,Dilemma Historical & Theoretical Perspectives,Dilemma Proposed Resolution Strategies,Dilemma Key Takeaways,Score Expert Introduction,Score Expert Key Factors in Consideration,Score Expert Historical & Theoretical Perspectives,Score Expert Proposed Resolution Strategies,Score Expert Key Takeaways,Total Score
0,0,I joined a lab during graduate school and was ...,We were surprised to discover that the literat...,%Introduction: \nThis dilemma reflects a conf...,%Introduction: \nThis dilemma highlights a si...,This dilemma reflects a conflict over credit a...,"Key factors include originality, effort, and t...",Relevant concepts such as Rescher’s credit all...,One suggested approach is to engage an indepen...,The allocation of credit in collaborative rese...,...,Key factors include the originality of the dis...,Theories such as Merton’s Matthew Effect illus...,A potential resolution could involve an indepe...,The allocation of credit in collaborative rese...,0.965661,0.975808,0.97499,0.96436,0.975663,0.970861
1,1,Graduate students A and B are working on somew...,Disputes over assigning credit in science are ...,%Introduction: \nThis dilemma highlights a co...,%Introduction: \nThis ethical dilemma centers...,This dilemma highlights a conflict over intell...,Key factors include the originality of the ide...,"Historical examples, such as the disputes betw...","To resolve the dispute, it is essential to ass...",The allocation of credit in collaborative rese...,...,Key factors include the nature of idea generat...,Theories of intellectual property and academic...,A potential resolution could involve mediation...,This case underscores the importance of clear ...,0.976058,0.987376,0.974396,0.945067,0.924286,0.956456
2,2,David is a new postdoc in Dr. Goliath’s lab. U...,A key source of ethical guidance in resolving ...,%Introduction: \nThis dilemma centers on auth...,%Introduction: \nThis dilemma raises critical...,This dilemma centers on authorship and ethical...,Key factors include the substantial contributi...,The International Committee of Medical Journal...,An ad hoc committee could evaluate the contrib...,The ethical allocation of authorship in collab...,...,Key factors include the original contributions...,"The principles of authorship, as outlined by t...",A potential resolution could involve establish...,The ethical landscape of authorship in researc...,0.928072,0.985938,0.960161,0.954262,0.957027,0.959471
3,3,"A PI moves his lab to a different university, ...","At first blush, this PI certainly seems to be ...",%Introduction: \nThis dilemma highlights the ...,%Introduction: \nThis situation raises signif...,This dilemma highlights the ethical implicatio...,Key factors include the integrity of authorshi...,The situation reflects longstanding issues in ...,A potential resolution could involve consultin...,The ethical landscape of authorship and public...,...,Key factors include the principles of authorsh...,The ethical principles surrounding authorship ...,A potential resolution could involve establish...,The ethical allocation of authorship and the r...,0.937334,0.965188,0.951606,0.965474,0.966513,0.960751
4,4,George Washington is one of two postdocs worki...,"Dr. Big might be a fine scientist, but he’s no...",%Introduction: \nThis dilemma highlights the ...,%Introduction: \nThis ethical dilemma centers...,This dilemma highlights the ethical challenges...,Key factors include the ethical implications o...,The situation reflects broader issues in acade...,"To address these issues, institutions should i...",Effective mentorship is critical for the devel...,...,Key factors include the ethical obligations of...,The situation can be analyzed through the lens...,A potential resolution could involve establish...,The ethical management of authorship and recom...,0.966209,0.982799,0.971,0.943943,0.977703,0.965427
5,5,"Some years ago, I worked as a bronchoscopy tec...",One of the problems posed by this dilemma is w...,%Introduction: \nThis ethical dilemma highlig...,%Introduction: \nThis ethical dilemma highlig...,This ethical dilemma highlights the tension be...,Key factors include the necessity of aggressiv...,The dilemma reflects longstanding debates in m...,"To address these ethical concerns, it is essen...",The ethical complexities of lung transplantati...,...,Key factors include the necessity of early det...,The debate surrounding aggressive post-transpl...,One potential resolution strategy is to establ...,The ethical landscape of lung transplantation ...,0.986174,0.977164,0.960912,0.950563,0.974323,0.966224
6,6,Jane is a very motivated and bright graduate s...,We purposefully omitted describing the origins...,%Introduction: \nThis ethical dilemma revolve...,%Introduction: \nThis scenario raises signifi...,This ethical dilemma revolves around the shari...,Key factors include the ownership of intellect...,The scenario highlights the complexities of in...,"To resolve the ethical issues, Dr. Smith shoul...",The ethical landscape in research is fraught w...,...,Key factors include the ethical implications o...,The situation can be analyzed through the lens...,A potential resolution could involve establish...,This scenario illustrates the complexities of ...,0.95462,0.969261,0.972654,0.956618,0.894209,0.947622
7,7,I once worked in a lab that conducted anti-agi...,"""I once worked in a lab that conducted anti-ag...",%Introduction: \nThis dilemma raises signific...,%Introduction: \nThis ethical dilemma highlig...,This dilemma raises significant ethical concer...,Key factors include the ethical treatment of r...,"The principles of biomedical ethics, particula...","To address these issues, it is crucial to impl...","The ethical conduct of research is paramount, ...",...,Key factors include the validity of informed c...,"The principles of biomedical ethics, particula...",A potential resolution could involve reporting...,This case illustrates the critical importance ...,0.989101,0.973543,0.987791,0.957122,0.956719,0.968116
8,8,I had decided to submit my first abstract ever...,"In reflecting on this scenario, we were remind...",%Introduction: \nThis dilemma raises ethical ...,%Introduction: \nThis dilemma raises importan...,This dilemma raises ethical questions about se...,Key factors include the risk of bias in result...,Hans Jonas's reflections on experimenting with...,"A cautious approach is recommended, where self...",Self-recruitment in research poses significant...,...,"Key factors include the potential for bias, th...","The ethical principle of ""do no harm"" and guid...",A prudent approach would involve consulting in...,Self-recruitment in research poses ethical cha...,0.965839,0.982817,0.949643,0.899634,0.989439,0.95015
9,9,One of the oddest but most memorable experienc...,This scenario speaks to lab management issues ...,%Introduction: \nThis scenario raises ethical...,%Introduction: \nThis scenario raises questio...,This scenario raises ethical questions about t...,Key factors include the potential impact of pr...,The discussion touches on Western scientific r...,One approach is to document any prayer practic...,The integration of personal beliefs in scienti...,...,Key factors include the potential impact of pe...,Historical perspectives on the role of spiritu...,A potential resolution could involve establish...,The integration of personal spirituality in sc...,0.976403,0.986664,0.946097,0.977098,0.960801,0.970706


In [10]:
sequence = llm_answers["Total Score"].sort_values(ascending=False).index.to_list()
sequence

[0, 9, 7, 5, 4, 3, 2, 1, 8, 6]

In [None]:
ground_truth_ranks = [
    [8],             
    [5, 0],          
    [1, 3, 6, 7],    
    [2, 9],          
    [4]             
]
rank_map = {}
for i, group in enumerate(ground_truth_ranks):
    for num in group:
        rank_map[num] = i

[1, 2, 2, 3, 3, 3, 3, 4, 4, 5]

[1, 2, 2, 3, 4, 4, 3, 3, 5, 3]

[1, 2, 2, 3, 4, 3, 4, 3, 5, 3]
def count_inversions(arr):
    def merge_sort(arr):
        if len(arr) <= 1:
            return arr, 0
        mid = len(arr) // 2
        left, inv_left = merge_sort(arr[:mid])
        right, inv_right = merge_sort(arr[mid:])
        merged, inv_split = merge(left, right)
        return merged, inv_left + inv_right + inv_split

    def merge(left, right):
        merged = []
        i = j = inv = 0
        while i < len(left) and j < len(right):
            if left[i] <= right[j]:
                merged.append(left[i])
                i += 1
            else:
                merged.append(right[j])
                inv += len(left) - i
                j += 1
        merged.extend(left[i:])
        merged.extend(right[j:])
        return merged, inv

    _, inv_count = merge_sort(arr)
    return inv_count

def compute_loss(sequence):
    """
    Computes the number of adjacent swaps (inversions) needed to sort the input 
    sequence based on group rank placeholders.

    Args:
        sequence (List[int]): Input sequence of numbers.

    Returns:
        int: Number of adjacent swaps needed to match group order.
    """
    group_ranks = [rank_map[num] for num in sequence]
    return count_inversions(group_ranks)
compute_loss(sequence)

7

In [137]:
#I think avg should be 18-22, but not sure if i did my math well

In [None]:
# n-gram (should prob fix others)
# compute_bleu: 7 -- [8, 0, 5, 7, 9, 2, 6, 3, 4, 1]

# Lexical (intituively not great)
# levenshtein_distance 14 [8, 0, 2, 9, 6, 3, 7, 5, 4, 1]
# damerau_levenshtein: 16 -- [5, 2, 9, 7, 8, 0, 3, 1, 4, 6]
# word_error_rate: 16 -- [5, 2, 0, 1, 9, 8, 4, 6, 7, 3]
# gestalt_pattern_matching: 20 -- [7, 2, 9, 0, 5, 3, 8, 4, 1, 6]

# Set (intituively not great)
# jaccard: 16 [8, 2, 1, 9, 7, 5, 0, 6, 4, 3]
# overlap_coeff: 18 [2, 7, 1, 3, 0, 8, 5, 6, 4, 9]
# dice_sorensen_coefficient: 16 [8, 2, 1, 9, 7, 5, 0, 6, 4, 3]



# Cosine
# compute_cosine_similarity-tf-idf: 15 -- [5, 9, 7, 2, 8, 0, 3, 1, 4, 6]
# cosine-word_frequency: 17 -- [5, 9, 2, 7, 3, 8, 0, 1, 6, 4]

# Semantic
# semantic_similarity_spacy: 20 -- [0, 9, 7, 5, 4, 3, 2, 1, 8, 6]
# wmd: 23 -- [1, 3, 6, 4, 7, 2, 0, 8, 9, 5]
# gensim_cosine: 18 -- [7, 8, 9, 5, 2, 4, 1, 6, 0, 3]
# sbert_cosine: 13 -- [7, 2, 8, 3, 5, 0, 9, 6, 1, 4]
# USE: 20 -- [7, 5, 9, 8, 2, 4, 6, 3, 1, 0]
# soft cosine: 14: [5, 7, 6, 8, 9, 1, 2, 4, 3, 0]
