In [1]:
import pandas as pd

from evaluate import evaluate_models
from model import initialize_model, call_llm


In [2]:

def preprocess(file_path):
    data = pd.read_json(path_or_buf=file_path, lines=True)
    ids = set()
    rows_to_be_dropped = []
    for i in range(len(data)):
        row = data.iloc[i]
        if row.unique_id in ids:
            rows_to_be_dropped.append(i)
        else:
            ids.add(row.unique_id)
    data = data.drop(rows_to_be_dropped)
    return data

In [7]:
data = preprocess("./data/train.jsonl")

In [11]:
len(data)

8194

In [8]:
data.to_json("./data/train_cleaned.jsonl", orient="records", lines=True)

In [None]:
print(set([x for x in data["label"]]))

In [None]:
print(len(data))

In [5]:
print(data.head())

     source  citeEnd                                        sectionName  \
0  explicit    175.0                                       Introduction   
1  explicit     36.0  Novel Quantitative Trait Loci for Seminal Root...   
2  explicit    228.0                                       Introduction   
3  explicit    110.0                                         Discussion   
4  explicit    239.0                                         Discussion   

   citeStart                                             string       label  \
0      168.0  However, how frataxin interacts with the Fe-S ...  background   
1       16.0  In the study by Hickey et al. (2012), spikes w...  background   
2      225.0  The drug also reduces catecholamine secretion,...  background   
3       46.0  By clustering with lowly aggressive close kin ...  background   
4      234.0  Ophthalmic symptoms are rare manifestations of...  background   

   label_confidence                             citingPaperId  \
0        

In [11]:
print(data.iloc[0].string)

However, how frataxin interacts with the Fe-S cluster biosynthesis components remains unclear as direct one-to-one interactions with each component were reported (IscS [12,22], IscU/Isu1 [6,11,16] or ISD11/Isd11 [14,15]).


In [12]:
print(len(data))

8243


In [7]:
labels = []
reasonings = []
raw_output = []
client, _ = initialize_model("")
for i in range(50):
    current_data = data.iloc[i]
    response = call_llm(client, "", current_data.string, current_data.sectionName)
    raw_output.append(response)
    labels.append(response[0])
    reasonings.append(response[1])

NameError: name 'data' is not defined

In [19]:
reasonings = [" ".join([x.strip() for x in y[1:]]) for y in raw_output]

In [20]:
print(reasonings)

['This is the introduction section, providing an overview of the current knowledge gap in the field, and setting the stage for the rest of the text.', ' This study appears to be providing background information on the context and methods used to collect the data, including the specific procedures used to sample, dry, and store the grains, as well as the purpose of the study.', 'This is the introduction section that describes the benefits and effects of the drug, specifically its impact on heart rate and blood pressure, making it a result.', 'This text provides the conclusion or outcome of the research, specifically discussing the potential benefits of breeding females clustering with lowly aggressive close kin.', 'This is a transitional sentence that serves to introduce the discussion, summarizing the key points discussed in the previous section, which is a common approach in academic writing.', 'This is the reasoning  The classification is "result" because the text describes the outco

In [14]:
print(reasonings)

['This is the introduction section, providing an overview of the current knowledge gap in the field, and setting the stage for the rest of the text.', '', 'This is the introduction section that describes the benefits and effects of the drug, specifically its impact on heart rate and blood pressure, making it a result.', 'This text provides the conclusion or outcome of the research, specifically discussing the potential benefits of breeding females clustering with lowly aggressive close kin.', 'This is a transitional sentence that serves to introduce the discussion, summarizing the key points discussed in the previous section, which is a common approach in academic writing.', 'This is the reasoning', 'This section appears to be the discussion section of a research paper, specifically focusing on the importance of early diagnosis and immediate treatment, citing references [17, 18]. The language used is formal and academic, indicating a methodological or interpretive section, rather than 

In [8]:
print(labels)

['result', 'background', 'result', 'result', 'background', 'result', 'method', 'result', 'method', 'result', 'result', 'result', 'background', 'background', 'result', 'background', 'result', 'method', 'method', 'result', 'result', 'background', 'result', 'result', 'result', 'result', 'method', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'method', 'result', 'method', 'result', 'result', 'result', 'result', 'result', 'background', 'method', 'method', 'result', 'result', 'result', 'result', 'background', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'result', 'method', 'method', 'result', 'result', 'result', 'result', 'background', 'result', 'result', 'result', 'result', 'result', 'background', 'result', 'background', 'background', 'result', 'result', 'result', 'method', 'result', 'result', 'result', 'method', 'result', 'method', 'result', 'result', '

In [21]:
df = pd.DataFrame(zip(labels, reasonings), columns=["id", "reasoning"])
df.to_csv("first_50.csv", )

In [3]:
teacher = pd.read_csv("results/Teachers/Ours/deepseek-openai/deepseek_openai_combined.csv")

In [4]:
teacher.head()

Unnamed: 0.1,Unnamed: 0,id,model_classification,reasoning
0,0,1872080baa7d30ec8fb87be9a65358cd3a7fb649>894be...,background,The citation text is discussing the current st...
1,1,ce1d09a4a3a8d7fd3405b9328f65f00c952cf64b>b6642...,method,The citation text describes specific procedure...
2,2,9cdf605beb1aa1078f235c4332b3024daa8b31dc>4e6a1...,background,The citation text is found in the Introduction...
3,3,d9f3207db0c79a3b154f3875c9760cc6b056904b>2cc6f...,background,The citation is used to provide context and su...
4,4,88b86556857f4374842d2af2e359576806239175>a5bb0...,background,The citation text describes general informatio...


In [5]:
llama = pd.read_csv("results/Teachers/Llama/meta-llama_Llama-3.3-70B-Instruct-Turbo-Free/output.csv")

In [17]:
len(llama)

8194

In [8]:
llama_corr =llama['model_classification'] == data["label"].reset_index(drop=True)

In [9]:
teacher_wrong = teacher["model_classification"] != data["label"].reset_index(drop=True)

In [10]:
data.reset_index(drop=True)[teacher_wrong & llama_corr]

Unnamed: 0,source,citeEnd,sectionName,citeStart,string,label,label_confidence,citingPaperId,citedPaperId,isKeyCitation,id,unique_id,excerpt_index,label2,label2_confidence
1,explicit,36.0,Novel Quantitative Trait Loci for Seminal Root...,16.0,"In the study by Hickey et al. (2012), spikes w...",background,1.0000,ce1d09a4a3a8d7fd3405b9328f65f00c952cf64b,b6642e19efb8db5623b3cc4eef1c5822a6151107,True,ce1d09a4a3a8d7fd3405b9328f65f00c952cf64b>b6642...,ce1d09a4a3a8d7fd3405b9328f65f00c952cf64b>b6642...,2,,
14,explicit,190.0,Providing self-determination opportunities,180.0,For women with competing care-giving responsib...,background,0.7338,0323f404c7211e8e7f5034f070e3e1a54ecff495,7b889faead3d086f66e10d2b2024f1a85419c7d0,True,0323f404c7211e8e7f5034f070e3e1a54ecff495>7b889...,0323f404c7211e8e7f5034f070e3e1a54ecff495>7b889...,1,,
21,explicit,161.0,Results,143.0,The third group of proteins might be necessary...,background,0.7749,5d3996c173eeedf80bd41f36aff3c0c9da54937a,5142ce7b4f63cb4a956fbb52a07349210dd64cf6,False,5d3996c173eeedf80bd41f36aff3c0c9da54937a>5142c...,5d3996c173eeedf80bd41f36aff3c0c9da54937a>5142c...,0,,
40,explicit,206.0,1. Introduction,184.0,In this chapter we will present the formalism ...,background,0.7406,e364d59a1dc537f2d51cebd7647030833629bb07,d18293e4a7b9d11f93cd5384005e289e00122f07,False,e364d59a1dc537f2d51cebd7647030833629bb07>d1829...,e364d59a1dc537f2d51cebd7647030833629bb07>d1829...,0,,
42,explicit,123.0,Discussion,119.0,A serotype-shift occurred over the years with ...,background,1.0000,ac5fb99eda6b95e9703bb3d93417df5f7373e662,e9891d090394b105cc1a6a85ccf55253ca96728c,False,ac5fb99eda6b95e9703bb3d93417df5f7373e662>e9891...,ac5fb99eda6b95e9703bb3d93417df5f7373e662>e9891...,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8132,explicit,144.0,,140.0,"For more details on EDC calculation, forwarder...",background,0.7297,dd8c456a60b0ae3dc19f2d1eee6fbd3c3921c400,1cc4208afda03257c461e8b767c007b5570104bd,False,dd8c456a60b0ae3dc19f2d1eee6fbd3c3921c400>1cc42...,dd8c456a60b0ae3dc19f2d1eee6fbd3c3921c400>1cc42...,13,,
8162,explicit,282.0,3.2. Yield and characterisation of ethanol org...,265.0,The efficient removal of lignin due to the pri...,background,1.0000,fe677d2f86d623b3dfc92f060bd8d0d2a0ea413d,548992f59f682a83a671e7f288f7aa574580d4a5,True,fe677d2f86d623b3dfc92f060bd8d0d2a0ea413d>54899...,fe677d2f86d623b3dfc92f060bd8d0d2a0ea413d>54899...,2,,
8165,explicit,79.0,Discussion,75.0,Gadolinium enhancement of the inflamed tissues...,background,0.7635,d5e9bb418f5183ebcaf371efce583b2e710315ef,6aa6183f41837ccaed16951fe32e9e3e861a0356,True,d5e9bb418f5183ebcaf371efce583b2e710315ef>6aa61...,d5e9bb418f5183ebcaf371efce583b2e710315ef>6aa61...,1,,
8182,explicit,60.0,1 Introduction,56.0,Intuitively this is done by using packed secre...,background,0.6334,d47a55e61b83373dcb896c0f0f0c6dcb89aa28fd,4493db6859a8550d2d41b0eb29c75d6045df884e,True,d47a55e61b83373dcb896c0f0f0c6dcb89aa28fd>4493d...,d47a55e61b83373dcb896c0f0f0c6dcb89aa28fd>4493d...,5,,


In [11]:
better_teacher = pd.read_csv("results/Teachers/Ours/deepseek-openai/deepseek_openai_combined.csv")[["model_classification", "reasoning"]].reset_index(drop=True)

In [39]:
llama

Unnamed: 0,model_classification,reasoning
0,background,The text discusses the current state of knowle...
1,background,The text mentions a specific study by Hickey e...
2,background,The text provides information about the effect...
3,background,The text discusses existing research and theor...
4,background,The text provides an introduction to the topic...
...,...,...
8189,result,The text mentions specific results from a stud...
8190,background,The text discusses the need for education and ...
8191,background,The text mentions previous studies and their f...
8192,background,The text provides an overview of existing know...


In [12]:
mask = (teacher_wrong & llama_corr).reset_index(drop=True)
llama = llama[["model_classification","reasoning"]].reset_index(drop=True)
# for i in range(len(mask)):
#     if mask[i]:
#         better_teacher.iloc[i] = llama.iloc[i]
better_teacher[mask] = llama[mask]

In [13]:
better_teacher.to_csv("results/Teachers/Ours/LongLiveLLama.csv", index=False)

In [17]:
from evaluate import evaluate_models
from model_card import ModelCard

In [20]:
evaluate_models([ModelCard("The King","v1","is back","NLP Team 19", 0,1000,"results/Teachers/Ours/LongLiveLLama.csv"),
                ModelCard("Our Teacher","v0","Our best teacher model so far","NLP Team 19", 0, 900, "results/Teachers/Ours/deepseek-openai/deepseek_openai_combined.csv")])

Nikhil - The King v1
Size: 1000 Billion Parameters
Price: $0/M tokens
Model Metrics Report
Accuracy: 0.85
Macro F1: 0.82
Micro F1: 0.88
Macro Recall: 0.86
Micro Recall: 0.83
Macro Precision: 0.80
Micro Precision: 0.93
Report: None
NLP Team 19 - Our Teacher v0
Size: 900 Billion Parameters
Price: $0/M tokens
Model Metrics Report
Accuracy: 0.78
Macro F1: 0.76
Micro F1: 0.81
Macro Recall: 0.82
Micro Recall: 0.73
Macro Precision: 0.74
Micro Precision: 0.91
Report: None
