In [61]:
import pandas as pd
# Use a pipeline as a high-level helper
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM
import json
from tqdm import tqdm


In [None]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")

In [51]:
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B-Instruct", max_new_tokens=2048)
pipe(messages)

Device set to use mps:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[{'generated_text': [{'role': 'user', 'content': 'Who are you?'},
   {'role': 'assistant',
    'content': 'I\'m an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."'}]}]

In [8]:
def call_pipe(pipe, section_name, content):
    messages = [{"role": "system", "content": 
                 "You are an AI assistant who will assess whether a given text is used as a background, result or method section in a scientific paper."
                 "You will be given a section name and the text, and you will need to classify the text as one of [background, result, method]."
                 "You will also need to provide a reasoning for your classification."
                 "The output should strictly be a json with the following two keys: classification, reasoning."
                 "Example output would look like: {\"classification\": \"result\", \"reasoning\": \"This is the reasoning\"}"
    }, 
    {"role": "user", "content": f"section name: {section_name}, text: {content}"}]
    response = pipe(messages)[0]
    try:
        if response["generated_text"][2]["content"]:
            #print(response["generated_text"][2]["content"])
            content = response["generated_text"][2]["content"]
            start = content.find("{")
            end = content.rfind("}") + 1
            json_str = content[start:end]
            answers = json.loads(json_str)
            classification = answers["classification"]
            reasoning = answers["reasoning"]
            return classification, reasoning
        else:
            return "Invalid", "No reasoning provided"
    except Exception as e:
        print(e)
        return "Invalid", "No reasoning provided"

In [55]:

result = call_pipe(pipe, "Introduction", "However, how frataxin interacts with the Fe-S cluster biosynthesis components remains unclear as direct one-to-one interactions with each component were reported (IscS [12,22], IscU/Isu1 [6,11,16] or ISD11/Isd11 [14,15]).")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


{"classification": "result", "reasoning": "This is the reasoning. The given text mentions specific components (IscS, IscU/Isu1, ISD11/Isd11) that are known to interact with the Fe-S cluster biosynthesis, but does not provide a direct description of how frataxin interacts with these components. The text suggests that the interaction is complex and may involve multiple components, but does not provide any specific details about the mechanism or outcome of this interaction."}


In [5]:
print(result)

('background', 'The text establishes the initial gap in knowledge regarding the interaction between frataxin and the Fe-S cluster biosynthesis components, setting the stage for subsequent research.')


In [67]:

def preprocess(file_path):
    data = pd.read_json(path_or_buf=file_path, lines=True)
    ids = set()
    rows_to_be_dropped = []
    for i in range(len(data)):
        row = data.iloc[i]
        if row.unique_id in ids:
            rows_to_be_dropped.append(i)
        else:
            ids.add(row.unique_id)
    data = data.drop(rows_to_be_dropped)
    return data

In [68]:
data = preprocess("./data/train.jsonl")

In [64]:
first_partition = data[:1365]
second_partition = data[1365:2730]
third_partition = data[2730:4095]
fourth_partition = data[4095:5460]
fifth_partition = data[5460:6825]
sixth_partition = data[6825:]

In [None]:
labels = []
reasonings = []
raw_output = []
ids = []

for i in tqdm(range(1365)):
    current_data = data.iloc[i]
    ids.append(current_data.unique_id)
    classification, reasoning = call_pipe(pipe, current_data.sectionName, current_data.string)
    raw_output.append(classification)
    labels.append(classification)
    reasonings.append(reasoning)

  0%|          | 3/1365 [00:13<1:41:58,  4.49s/it]

In [None]:
df = pd.DataFrame(zip(ids, labels, reasonings), columns=["id", "model_classification", "reasoning"])
df.to_csv("results/Gemma/Gemma-1b/first-partition.csv")

In [118]:
filename = "r1_second_part.txt"
c = []
r = []
i = [second_partition.iloc[x].unique_id for x in range(len(second_partition))]

with open(filename) as file:
    lines = [line.rstrip() for line in file]
    for line in lines:
        if "ChatCompletion" in line:
            continue
        start = line.find("{")
        end = line.rfind("}") + 1
        if start == -1:
            continue
        print(line)
        json_str = json.dumps(eval(line[start:end]))
        answers = json.loads(json_str)
        
        classification = answers["classification"]
        reasoning = answers["reasoning"]
        print(classification)
        print(reasoning)
        c.append(classification)
        r.append(reasoning)

df = pd.DataFrame(zip(i, c, r), columns=["id", "model_classification", "reasoning"])

df.to_csv("deepseek_second_part.csv")

{"classification": "background", "reasoning": "The text cites prior studies on maize and oats to provide context and establish existing knowledge in the field, which is typical of a background section."}
background
The text cites prior studies on maize and oats to provide context and establish existing knowledge in the field, which is typical of a background section.
{"classification": "method", "reasoning": "The text describes experimental procedures (immunoblotting, antibody use, inhibitor treatments), which are methodological details."}
method
The text describes experimental procedures (immunoblotting, antibody use, inhibitor treatments), which are methodological details.
{"classification": "result", "reasoning": "The text compares the authors' results with previous findings on S. aureus mastitis cases, characteristic of a discussion of results."}
result
The text compares the authors' results with previous findings on S. aureus mastitis cases, characteristic of a discussion of resul

In [None]:
openai = pd.read_csv("./results/gpt-4o/4o_full.csv")
deepseek = pd.read_csv("./results/deepseek-r1/deepseek_first_part.csv")
train = data

In [71]:
deepseek_w_openai_c = 0
deepseek_c_openai_w = 0;
deepseek_w_openai_w = 0
deepseek_c_openai_c = 0
for i in range(len(deepseek)):
    index = i + 1365
    correct_label = train.iloc[index].label
    deepseek_correct = deepseek.iloc[i].model_classification == correct_label
    openai_correct = openai.iloc[index].model_classification == correct_label
    if deepseek_correct and openai_correct:
        deepseek_c_openai_c += 1
    elif deepseek_correct:
        deepseek_c_openai_w += 1
    elif openai_correct:
        deepseek_w_openai_c += 1
    else:
        deepseek_w_openai_w += 1

print(f"Deepseek is correct and openai is correct: {deepseek_c_openai_c}")
print(f"Deepseek is correct and openai is wrong: {deepseek_c_openai_w}")
print(f"Deepseek is wrong and openai is correct: {deepseek_w_openai_c}")
print(f"Deepseek is wrong and openai is wrong: {deepseek_w_openai_w}")

Deepseek is correct and openai is correct: 809
Deepseek is correct and openai is wrong: 78
Deepseek is wrong and openai is correct: 204
Deepseek is wrong and openai is wrong: 268


In [121]:
openai = pd.read_csv("./results/gpt-4o/4o_full.csv")
deepseek = pd.read_csv("./results/deepseek-r1/deepseek_first_part.csv")
train = data
deepseek_w_openai_c = 0
deepseek_c_openai_w = 0;
deepseek_w_openai_w = 0
deepseek_c_openai_c = 0
for i in range(len(deepseek)):
    index = i
    correct_label = train.iloc[index].label
    deepseek_correct = deepseek.iloc[i].model_classification == correct_label
    openai_correct = openai.iloc[index].model_classification == correct_label
    if deepseek_correct and openai_correct:
        deepseek_c_openai_c += 1
    elif deepseek_correct:
        deepseek_c_openai_w += 1
    elif openai_correct:
        deepseek_w_openai_c += 1
    else:
        deepseek_w_openai_w += 1

print(f"Deepseek is correct and openai is correct: {deepseek_c_openai_c}")
print(f"Deepseek is correct and openai is wrong: {deepseek_c_openai_w}")
print(f"Deepseek is wrong and openai is correct: {deepseek_w_openai_c}")
print(f"Deepseek is wrong and openai is wrong: {deepseek_w_openai_w}")

Deepseek is correct and openai is correct: 910
Deepseek is correct and openai is wrong: 43
Deepseek is wrong and openai is correct: 130
Deepseek is wrong and openai is wrong: 282


In [122]:
openai = pd.read_csv("./results/gpt-4o/4o_full.csv")
deepseek = pd.read_csv("./results/deepseek-r1/deepseek_second_part.csv")
train = data
deepseek_w_openai_c = 0
deepseek_c_openai_w = 0;
deepseek_w_openai_w = 0
deepseek_c_openai_c = 0
for i in range(len(deepseek)):
    index = i + 1365
    correct_label = train.iloc[index].label
    deepseek_correct = deepseek.iloc[i].model_classification == correct_label
    openai_correct = openai.iloc[index].model_classification == correct_label
    if deepseek_correct and openai_correct:
        deepseek_c_openai_c += 1
    elif deepseek_correct:
        deepseek_c_openai_w += 1
    elif openai_correct:
        deepseek_w_openai_c += 1
    else:
        deepseek_w_openai_w += 1

print(f"Deepseek is correct and openai is correct: {deepseek_c_openai_c}")
print(f"Deepseek is correct and openai is wrong: {deepseek_c_openai_w}")
print(f"Deepseek is wrong and openai is correct: {deepseek_w_openai_c}")
print(f"Deepseek is wrong and openai is wrong: {deepseek_w_openai_w}")

Deepseek is correct and openai is correct: 866
Deepseek is correct and openai is wrong: 62
Deepseek is wrong and openai is correct: 153
Deepseek is wrong and openai is wrong: 284


In [123]:
openai = pd.read_csv("./results/gpt-4o/4o_full.csv")
deepseek = pd.read_csv("./results/deepseek-r1/deepseek_third_part.csv")
train = data
deepseek_w_openai_c = 0
deepseek_c_openai_w = 0;
deepseek_w_openai_w = 0
deepseek_c_openai_c = 0
for i in range(len(deepseek)):
    index = i + 2730
    correct_label = train.iloc[index].label
    deepseek_correct = deepseek.iloc[i].model_classification == correct_label
    openai_correct = openai.iloc[index].model_classification == correct_label
    if deepseek_correct and openai_correct:
        deepseek_c_openai_c += 1
    elif deepseek_correct:
        deepseek_c_openai_w += 1
    elif openai_correct:
        deepseek_w_openai_c += 1
    else:
        deepseek_w_openai_w += 1

print(f"Deepseek is correct and openai is correct: {deepseek_c_openai_c}")
print(f"Deepseek is correct and openai is wrong: {deepseek_c_openai_w}")
print(f"Deepseek is wrong and openai is correct: {deepseek_w_openai_c}")
print(f"Deepseek is wrong and openai is wrong: {deepseek_w_openai_w}")

Deepseek is correct and openai is correct: 840
Deepseek is correct and openai is wrong: 58
Deepseek is wrong and openai is correct: 149
Deepseek is wrong and openai is wrong: 318


In [124]:
openai = pd.read_csv("./results/gpt-4o/4o_full.csv")
deepseek = pd.read_csv("./results/deepseek-r1/deepseek_fourth_part.csv")
train = data
deepseek_w_openai_c = 0
deepseek_c_openai_w = 0;
deepseek_w_openai_w = 0
deepseek_c_openai_c = 0
for i in range(len(deepseek)):
    index = i + 4095
    correct_label = train.iloc[index].label
    deepseek_correct = deepseek.iloc[i].model_classification == correct_label
    openai_correct = openai.iloc[index].model_classification == correct_label
    if deepseek_correct and openai_correct:
        deepseek_c_openai_c += 1
    elif deepseek_correct:
        deepseek_c_openai_w += 1
    elif openai_correct:
        deepseek_w_openai_c += 1
    else:
        deepseek_w_openai_w += 1

print(f"Deepseek is correct and openai is correct: {deepseek_c_openai_c}")
print(f"Deepseek is correct and openai is wrong: {deepseek_c_openai_w}")
print(f"Deepseek is wrong and openai is correct: {deepseek_w_openai_c}")
print(f"Deepseek is wrong and openai is wrong: {deepseek_w_openai_w}")

Deepseek is correct and openai is correct: 852
Deepseek is correct and openai is wrong: 62
Deepseek is wrong and openai is correct: 158
Deepseek is wrong and openai is wrong: 293


In [125]:
openai = pd.read_csv("./results/gpt-4o/4o_full.csv")
deepseek = pd.read_csv("./results/deepseek-r1/deepseek_fifth_part.csv")
train = data
deepseek_w_openai_c = 0
deepseek_c_openai_w = 0;
deepseek_w_openai_w = 0
deepseek_c_openai_c = 0
for i in range(len(deepseek)):
    index = i + 5460
    correct_label = train.iloc[index].label
    deepseek_correct = deepseek.iloc[i].model_classification == correct_label
    openai_correct = openai.iloc[index].model_classification == correct_label
    if deepseek_correct and openai_correct:
        deepseek_c_openai_c += 1
    elif deepseek_correct:
        deepseek_c_openai_w += 1
    elif openai_correct:
        deepseek_w_openai_c += 1
    else:
        deepseek_w_openai_w += 1

print(f"Deepseek is correct and openai is correct: {deepseek_c_openai_c}")
print(f"Deepseek is correct and openai is wrong: {deepseek_c_openai_w}")
print(f"Deepseek is wrong and openai is correct: {deepseek_w_openai_c}")
print(f"Deepseek is wrong and openai is wrong: {deepseek_w_openai_w}")

Deepseek is correct and openai is correct: 860
Deepseek is correct and openai is wrong: 47
Deepseek is wrong and openai is correct: 146
Deepseek is wrong and openai is wrong: 312


In [126]:
openai = pd.read_csv("./results/gpt-4o/4o_full.csv")
deepseek = pd.read_csv("./results/deepseek-r1/deepseek_sixth_part.csv")
train = data
deepseek_w_openai_c = 0
deepseek_c_openai_w = 0;
deepseek_w_openai_w = 0
deepseek_c_openai_c = 0
for i in range(len(deepseek)):
    index = i + 6825
    correct_label = train.iloc[index].label
    deepseek_correct = deepseek.iloc[i].model_classification == correct_label
    openai_correct = openai.iloc[index].model_classification == correct_label
    if deepseek_correct and openai_correct:
        deepseek_c_openai_c += 1
    elif deepseek_correct:
        deepseek_c_openai_w += 1
    elif openai_correct:
        deepseek_w_openai_c += 1
    else:
        deepseek_w_openai_w += 1

print(f"Deepseek is correct and openai is correct: {deepseek_c_openai_c}")
print(f"Deepseek is correct and openai is wrong: {deepseek_c_openai_w}")
print(f"Deepseek is wrong and openai is correct: {deepseek_w_openai_c}")
print(f"Deepseek is wrong and openai is wrong: {deepseek_w_openai_w}")

Deepseek is correct and openai is correct: 875
Deepseek is correct and openai is wrong: 70
Deepseek is wrong and openai is correct: 124
Deepseek is wrong and openai is wrong: 300


In [128]:
deepseek_first = pd.read_csv("./results/deepseek-r1/deepseek_first_part.csv")
deepseek_second = pd.read_csv("./results/deepseek-r1/deepseek_second_part.csv")
deepseek_third = pd.read_csv("./results/deepseek-r1/deepseek_third_part.csv")
deepseek_fourth = pd.read_csv("./results/deepseek-r1/deepseek_fourth_part.csv")
deepseek_fifth = pd.read_csv("./results/deepseek-r1/deepseek_fifth_part.csv")
deepseek_sixth = pd.read_csv("./results/deepseek-r1/deepseek_sixth_part.csv")



In [129]:
print(len(deepseek_first) + len(deepseek_second) + len(deepseek_third) + len(deepseek_fourth) + len(deepseek_fifth) + len(deepseek_sixth))

8194


In [130]:
print(len(deepseek_first))
print(len(deepseek_second))
print(len(deepseek_third))
print(len(deepseek_fourth))
print(len(deepseek_fifth))
print(len(deepseek_sixth))

1365
1365
1365
1365
1365
1369


In [131]:
print(len(openai))

8194
