### Evaluation experiments

- Jaccard Coefficient: For vocabulary similarities (done)
- Positional Spearman's p Correlation Coefficient: Order of shared words could indicate semantic similarities?
- Semantic Embedding Similarity?

In [1]:
import pandas as pd
from dotenv.parser import Position

from slt_positional_bias.dataset import generate_merged_data_frame, sort_data_frame, store_df_as_parquet, load_parquet_as_df, normalize_and_tokenize, jaccard, spearman_word_order_correlation

from slt_positional_bias.plots import savetable, export_apa_table_png

df_10_name = "LLM-1 - Llama3 405 the best general model and big context size-sample-count-10-1-2025-08-10 00h-18m-01s"
df_20_name = "LLM-1 - Llama3 405 the best general model and big context size-sample-count-20-1-2025-08-10 14h-40m-19s"
df_30_name = "LLM-1 - Llama3 405 the best general model and big context size-sample-count-30-1-2025-08-10 16h-54m-56s"
df_40_name = "LLM-1 - Llama3 405 the best general model and big context size-sample-count-40-1-2025-08-10 18h-46m-08s"

df_10 = load_parquet_as_df(df_10_name)
df_20 = load_parquet_as_df(df_20_name)
df_30 = load_parquet_as_df(df_30_name)
df_40 = load_parquet_as_df(df_40_name)

[32m2025-08-11 17:18:51.803[0m | [1mINFO    [0m | [36mslt_positional_bias.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: C:\Users\Albert\Documents\SLT\slt_group_2_positional_bias[0m
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Albert\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
for d in [df_10, df_20, df_30, df_40]:
    d['jaccard'] = d.apply(
        lambda row: jaccard(row['oracle'], row['answer']), axis=1)
    d[['Spearman correlation coefficient', 'Spearman p-value']] = d.apply(
        lambda row: pd.Series(spearman_word_order_correlation(row['oracle'], row['answer'])),
        axis=1
    )
    
df_30

Unnamed: 0,topic_id,topic,rel_3_doc_position,nr_rel_3_doc,nr_rel_0_doc,oracle,answer,jaccard,Spearman correlation coefficient,Spearman p-value
0,2024-105741,"is it dangerous to have wbc over 15,000 withou...",0,1,29,Leukocytosis (Inpatient Care) – What You Need ...,"Having a WBC count over 15,000 without treatme...",0.158333,0.277193,0.250589
1,2024-105741,"is it dangerous to have wbc over 15,000 withou...",7,1,29,Leukocytosis (Inpatient Care) – What You Need ...,"A white blood cell (WBC) count over 15,000 can...",0.142857,0.281863,0.273060
2,2024-105741,"is it dangerous to have wbc over 15,000 withou...",14,1,29,Leukocytosis (Inpatient Care) – What You Need ...,"A white blood cell (WBC) count over 15,000 can...",0.146552,0.468137,0.058072
3,2024-105741,"is it dangerous to have wbc over 15,000 withou...",22,1,29,Leukocytosis (Inpatient Care) – What You Need ...,"Having a white blood cell (WBC) count over 15,...",0.160839,0.458498,0.027778
4,2024-105741,"is it dangerous to have wbc over 15,000 withou...",29,1,29,Leukocytosis (Inpatient Care) – What You Need ...,"Having a white blood cell (WBC) count over 15,...",0.197080,0.509768,0.006603
...,...,...,...,...,...,...,...,...,...,...
275,2024-96063,how using maps can impact your pedagogy,0,1,29,Maps and map learning in social studies involv...,Using maps can significantly impact pedagogy b...,0.277027,0.654181,0.000004
276,2024-96063,how using maps can impact your pedagogy,7,1,29,Maps and map learning in social studies involv...,Using maps can significantly impact pedagogy b...,0.230366,0.263707,0.083695
277,2024-96063,how using maps can impact your pedagogy,14,1,29,Maps and map learning in social studies involv...,Using maps can significantly impact pedagogy b...,0.212766,0.571079,0.000981
278,2024-96063,how using maps can impact your pedagogy,22,1,29,Maps and map learning in social studies involv...,Using maps in pedagogy can have a significant ...,0.157895,0.617216,0.000605


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from slt_positional_bias.plots import savefig

dfs = {'df_10': df_10, 'df_20':df_20, 'df_30':df_30, 'df_40':df_40}
new_dfs = {}

for name, d in dfs.items():
    new_df = d.groupby('rel_3_doc_position')[['jaccard', 'Spearman correlation coefficient', 'Spearman p-value']].mean().reset_index()
    new_df.columns = ['Position of oracle document', 'Jaccard coefficient', 'Spearman correlation coefficient', 'Spearman p-value']
    new_df['Position of oracle document'] = new_df['Position of oracle document'] + 1
    
    new_dfs[f'{name}_new'] = new_df


new_dfs

In [None]:
rows = []
for name, nd in new_dfs.items():
    nd = nd.copy()
    nd['Position of oracle document'] = nd['Position of oracle document'].astype(int)

    for _, r in nd.iterrows():
        rows.append({
            ('Dataset', ''): name,
            ('Position', ''): r['Position of oracle document'],
            ('Avg. Jaccard-Coeff.', ''): r['Jaccard coefficient'],
            ("Avg. Spearman's Rank", 'r'): r['Spearman correlation coefficient'],
            ("Avg. Spearman's Rank", 'p-value'): r['Spearman p-value'],
        })

df_apa = pd.DataFrame(rows)
df_apa.columns = pd.MultiIndex.from_tuples([c if isinstance(c, tuple) else (c, '') for c in df_apa.columns])

value_cols = [
    ('Avg. Jaccard-Coeff.', ''),
    ("Avg. Spearman's Rank", 'r'),
    ("Avg. Spearman's Rank", 'p-value'),
]

table_apa = (
    df_apa
    .sort_values([('Dataset',''), ('Position','')])
    .set_index([('Dataset',''), ('Position','')])[value_cols]
    .round(2)
)

print(table_apa)

In [None]:
savetable(table_apa, "Avg. Jaccard and Spearman's Rank", 'table_apa')