In [1]:
import pandas as pd
from dotenv.parser import Position

from slt_positional_bias.dataset import generate_merged_data_frame, sort_data_frame, store_df_as_parquet, load_parquet_as_df, normalize_and_tokenize, jaccard, spearman_word_order_correlation

from slt_positional_bias.plots import savetable
from slt_positional_bias.features import sacrebleu_corpus, rouge_corpus, meteor_corpus, bertscore_verbose

df_10_name = "LLM-1 - Llama3 405 the best general model and big context size-sample-count-10-1-2025-08-10 00h-18m-01s"
df_20_name = "LLM-1 - Llama3 405 the best general model and big context size-sample-count-20-1-2025-08-10 14h-40m-19s"
df_30_name = "LLM-1 - Llama3 405 the best general model and big context size-sample-count-30-1-2025-08-10 16h-54m-56s"
df_40_name = "LLM-1 - Llama3 405 the best general model and big context size-sample-count-40-1-2025-08-10 18h-46m-08s"

df_10 = load_parquet_as_df(df_10_name)
df_20 = load_parquet_as_df(df_20_name)
df_30 = load_parquet_as_df(df_30_name)
df_40 = load_parquet_as_df(df_40_name)

df_10

[32m2025-08-13 12:04:39.008[0m | [1mINFO    [0m | [36mslt_positional_bias.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: C:\Users\Albert\Documents\SLT\slt_group_2_positional_bias[0m
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Albert\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,topic_id,topic,rel_3_doc_position,nr_rel_3_doc,nr_rel_0_doc,oracle,answer
0,2024-105741,"is it dangerous to have wbc over 15,000 withou...",0,1,9,Leukocytosis (Inpatient Care) – What You Need ...,"Having a WBC count over 15,000 without treatme..."
1,2024-105741,"is it dangerous to have wbc over 15,000 withou...",2,1,9,Leukocytosis (Inpatient Care) – What You Need ...,"Having a white blood cell (WBC) count over 15,..."
2,2024-105741,"is it dangerous to have wbc over 15,000 withou...",4,1,9,Leukocytosis (Inpatient Care) – What You Need ...,"Having a white blood cell (WBC) count over 15,..."
3,2024-105741,"is it dangerous to have wbc over 15,000 withou...",7,1,9,Leukocytosis (Inpatient Care) – What You Need ...,"Having a WBC count over 15,000 without treatme..."
4,2024-105741,"is it dangerous to have wbc over 15,000 withou...",9,1,9,Leukocytosis (Inpatient Care) – What You Need ...,"Having a white blood cell (WBC) count over 15,..."
...,...,...,...,...,...,...,...
275,2024-96063,how using maps can impact your pedagogy,0,1,9,Maps and map learning in social studies involv...,Using maps can have a significant impact on pe...
276,2024-96063,how using maps can impact your pedagogy,2,1,9,Maps and map learning in social studies involv...,Using maps can significantly impact pedagogy b...
277,2024-96063,how using maps can impact your pedagogy,4,1,9,Maps and map learning in social studies involv...,Using maps can significantly impact pedagogy b...
278,2024-96063,how using maps can impact your pedagogy,7,1,9,Maps and map learning in social studies involv...,Using maps can significantly impact pedagogy b...


In [2]:
dfs = {'df_10': df_10, 'df_20':df_20, 'df_30':df_30, 'df_40':df_40}
new_dfs = {}

for name, d in dfs.items():
    nd = d.copy()
    nd.rename(columns={'rel_3_doc_position': 'Position of Oracle', 'nr_rel_0_doc': 'Number of Documents', 'oracle': 'references', 'answer': 'predictions'}, inplace=True)
    nd.drop(columns=['nr_rel_3_doc'], inplace=True)
    nd['Position of Oracle'] = nd['Position of Oracle'] + 1
    nd['Number of Documents'] = nd['Number of Documents'] + 1
    new_dfs[f'n{name}'] = nd

new_dfs

{'ndf_10':         topic_id                                              topic  \
 0    2024-105741  is it dangerous to have wbc over 15,000 withou...   
 1    2024-105741  is it dangerous to have wbc over 15,000 withou...   
 2    2024-105741  is it dangerous to have wbc over 15,000 withou...   
 3    2024-105741  is it dangerous to have wbc over 15,000 withou...   
 4    2024-105741  is it dangerous to have wbc over 15,000 withou...   
 ..           ...                                                ...   
 275   2024-96063            how using maps can impact your pedagogy   
 276   2024-96063            how using maps can impact your pedagogy   
 277   2024-96063            how using maps can impact your pedagogy   
 278   2024-96063            how using maps can impact your pedagogy   
 279   2024-96063            how using maps can impact your pedagogy   
 
      Position of Oracle  Number of Documents  \
 0                     1                   10   
 1                     3  

In [3]:
import evaluate
import pandas as pd

bertscore = evaluate.load("bertscore")

new_dfs_2 = {}

for name, d in new_dfs.items():
    nd = d.copy()

    preds = nd['predictions'].tolist()
    refs  = nd['references'].tolist()

    res = bertscore.compute(
        predictions=preds,
        references=refs,
        lang="en",
        verbose=True
    )

    nd['BERTScore-Precision'] = [round(x, 4) for x in res['precision']]
    nd['BERTScore-Recall']    = [round(x, 4) for x in res['recall']]
    nd['BERTScore-F1']        = [round(x, 4) for x in res['f1']]

    new_dfs_2[f'n{name}'] = nd

new_dfs_2

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 6/6 [02:59<00:00, 29.85s/it]


computing greedy matching.


100%|██████████| 5/5 [00:00<00:00,  7.56it/s]


done in 1866895.84 seconds, 0.00 sentences/sec
calculating scores...
computing bert embedding.


100%|██████████| 6/6 [03:00<00:00, 30.04s/it]


computing greedy matching.


100%|██████████| 5/5 [00:00<00:00,  7.49it/s]


done in 1867076.84 seconds, 0.00 sentences/sec
calculating scores...
computing bert embedding.


100%|██████████| 6/6 [03:15<00:00, 32.59s/it]


computing greedy matching.


100%|██████████| 5/5 [00:00<00:00,  6.60it/s]


done in 1867273.17 seconds, 0.00 sentences/sec
calculating scores...
computing bert embedding.


100%|██████████| 6/6 [03:17<00:00, 32.92s/it]


computing greedy matching.


100%|██████████| 5/5 [00:00<00:00,  6.87it/s]

done in 1867471.49 seconds, 0.00 sentences/sec





{'nndf_10':         topic_id                                              topic  \
 0    2024-105741  is it dangerous to have wbc over 15,000 withou...   
 1    2024-105741  is it dangerous to have wbc over 15,000 withou...   
 2    2024-105741  is it dangerous to have wbc over 15,000 withou...   
 3    2024-105741  is it dangerous to have wbc over 15,000 withou...   
 4    2024-105741  is it dangerous to have wbc over 15,000 withou...   
 ..           ...                                                ...   
 275   2024-96063            how using maps can impact your pedagogy   
 276   2024-96063            how using maps can impact your pedagogy   
 277   2024-96063            how using maps can impact your pedagogy   
 278   2024-96063            how using maps can impact your pedagogy   
 279   2024-96063            how using maps can impact your pedagogy   
 
      Position of Oracle  Number of Documents  \
 0                     1                   10   
 1                     3 