In [131]:
from dotenv import load_dotenv
import os
import json
import pandas as pd
from openai import OpenAI
from pydantic import BaseModel, Field
from src.dataimport import list_files_with_extension_directory, load_text
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer
from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate

# load files

In [11]:
TXT_FILES_PATH = 'data/original/brat-project-final/'
JSON_FILES_PATH = 'data/transformed/'

In [132]:
txt_files_directory_list = list_files_with_extension_directory(TXT_FILES_PATH, '.txt')
# txt_files_directory_list

json_files_directory_list = list_files_with_extension_directory(JSON_FILES_PATH, '.json')
# json_files_directory_list

print(f"Anzahl Text-Dateien: {len(txt_files_directory_list)}")
print(f"Anzahl Brat-Dateien: {len(json_files_directory_list)}")

Anzahl Text-Dateien: 402
Anzahl Brat-Dateien: 402


In [133]:
# create dataframe with file names
df = pd.DataFrame()
df['txt_path'] = txt_files_directory_list
df['json_path'] = json_files_directory_list
df['txt_file'] = df['txt_path'].apply(lambda x: os.path.basename(x))
df['json_file'] = df['json_path'].apply(lambda x: os.path.basename(x))
df['txt'] = df['txt_path'].apply(load_text)
df['json'] = df['json_path'].apply(load_text)

print(df.shape)
df.head()

# save to csv
#df.to_csv('dataframe.csv', index=False)
# load dataframe
# df = pd.read_csv('dataframe.csv')
# df.head()

(402, 6)


Unnamed: 0,txt_path,json_path,txt_file,json_file,txt,json
0,data/original/brat-project-final/essay001.txt,data/transformed/essay001.json,essay001.txt,essay001.json,Should students be taught to compete or to coo...,"{\n ""MajorClaims"": {\n ""MC1"": ""we should a..."
1,data/original/brat-project-final/essay002.txt,data/transformed/essay002.json,essay002.txt,essay002.json,More people are migrating to other countries t...,"{\n ""MajorClaims"": {\n ""MC1"": ""they are ab..."
2,data/original/brat-project-final/essay003.txt,data/transformed/essay003.json,essay003.txt,essay003.json,International tourism is now more common than ...,"{\n ""MajorClaims"": {\n ""MC1"": ""it has cont..."
3,data/original/brat-project-final/essay004.txt,data/transformed/essay004.json,essay004.txt,essay004.json,International tourism is now more common than ...,"{\n ""MajorClaims"": {\n ""MC1"": ""this indust..."
4,data/original/brat-project-final/essay005.txt,data/transformed/essay005.json,essay005.txt,essay005.json,Living and studying overseas\n\nIt is every st...,"{\n ""MajorClaims"": {\n ""MC1"": ""one who stu..."


# train test split

In [134]:
# Split the dataframe into training and test sets
train_df, test_df = train_test_split(df, train_size=40, random_state=42)

# Display the first few rows of the training and test sets
print(f"Training DataFrame: {train_df.shape}")
print(f"\nTest DataFrame: {test_df.shape}")

Training DataFrame: (40, 6)

Test DataFrame: (362, 6)


# load prompt templates

In [19]:
PROMPTS_PATH = 'prompts/final-prompts/'

In [20]:
prompt_files_directory_list = list_files_with_extension_directory(PROMPTS_PATH, '.txt')
prompt_files_directory_list
prompt_files_list = [os.path.basename(x) for x in prompt_files_directory_list]
# remove the .txt extension
prompt_names = [x.split('.')[0] for x in prompt_files_list]

prompt_df = pd.DataFrame()
# get the file name without the extension from prompt_files, 'str' object has no attribute 'path'
prompt_df['prompt_name'] = prompt_names
prompt_df['prompt_txt'] = prompt_files_directory_list
prompt_df['prompt_txt'] = prompt_df['prompt_txt'].apply(load_text)
print(F"Es gibt {prompt_df.shape[0]} Prompts")
# prompt_df

Es gibt 20 Prompts


In [21]:
# get the API key from the .env file
load_dotenv()
llama_api = os.getenv("HUGGINGFACE_TOKEN")

model_id = "meta-llama/Llama-3.2-3B-Instruct"
# model_id = "meta-llama/Llama-3.3-70B-Instruct" # requires HugginFace Pro subscription

# Function to calculate token count
def calculate_token_count(prompt):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenized_prompt = tokenizer(prompt, return_tensors='pt') # pt for PyTorch tensors
    return tokenized_prompt.input_ids.size(1)

# Apply the function to the 'prompt' column and create a new column 'token_count'
prompt_df['token_count'] = prompt_df['prompt_txt'].apply(calculate_token_count)

prompt_df.sort_values(by='token_count')

Unnamed: 0,prompt_name,prompt_txt,token_count
19,zero-shot,You will be given a text. Extract the argument...,102
18,zero-shot-persona,You are a expert in Argument Mining and theref...,126
16,zero-shot-cot,You will be given a text. Extract the argument...,470
17,zero-shot-persona-cot,You are a expert in Argument Mining and theref...,494
15,one-shot,You will be given a text. Extract the argument...,1550
14,one-shot-persona,You are a expert in Argument Mining and theref...,1562
12,one-shot-cot,You will be given a text. Extract the argument...,1907
13,one-shot-persona-cot,You are a expert in Argument Mining and theref...,1931
3,few-shot-10,You will be given a text. Extract the argument...,12088
2,few-shot-10-persona,You are a expert in Argument Mining and theref...,12112


In [22]:
# structured output 
class ArgumentativeRelation(BaseModel):
    """Argumentative relation between the origin and target"""
    origin_id: str = Field(description="ID of the claim or premise")
    relation_type: str = Field(description="Type of relation (e.g., 'For', 'Against', 'Support', 'Attack')")
    target_id: str = Field(description="ID of the target (e.g., Major Claim ID)")

class ArgumentMiningExtraction(BaseModel):
    """Extraction of argument components and relations from a text"""
    major_claims: dict[str, str] = Field(description="Dictionary of major claims with their IDs as keys and text as values")
    claims: dict[str, str] = Field(description="Dictionary of claims with their IDs as keys and text as values")
    premises: dict[str, str] = Field(description="Dictionary of premises with their IDs as keys and text as values")
    argumentative_relations: list[ArgumentativeRelation] = Field(description="List of argumentative relations between origin and target")

    
# Quellen Structured Outputs:
# - https://platform.openai.com/docs/guides/structured-outputs
# - https://python.langchain.com/docs/concepts/structured_outputs/
# - https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html -->Structured output

# allgemeiner Test

In [24]:
load_dotenv()
openai_api = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api)

llm = ChatOpenAI(
    model="gpt-4o-mini",
    #max_tokens=1024,
    #max_tokens_input=1024,
    temperature=0,
    timeout=None,
    # max_retries=2,
    api_key=openai_api,
    #response_format=ArgumentMiningExtraction
    )

In [25]:
messages = [
    (
        "system",
        "You are a helpful assistant that translates English to French. Translate the user sentence.",
    ),
    ("human", "I love programming."),
]
ai_msg = llm.invoke(messages)
ai_msg

AIMessage(content="J'aime la programmation.", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 31, 'total_tokens': 37, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_d02d531b47', 'finish_reason': 'stop', 'logprobs': None}, id='run-10370652-6415-4656-9498-3521cebcd09c-0', usage_metadata={'input_tokens': 31, 'output_tokens': 6, 'total_tokens': 37, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [26]:
print(ai_msg.content)

J'aime la programmation.


In [27]:
# Token usage tracking
ai_msg.usage_metadata

# Quelle: 
# - https://python.langchain.com/docs/how_to/chat_token_usage_tracking/
# - https://python.langchain.com/api_reference/openai/chat_models/langchain_openai.chat_models.base.ChatOpenAI.html

{'input_tokens': 31,
 'output_tokens': 6,
 'total_tokens': 37,
 'input_token_details': {'audio': 0, 'cache_read': 0},
 'output_token_details': {'audio': 0, 'reasoning': 0}}

# Argument mining Test

In [43]:
# Beispiel aus Testdatensatz
test_data_example = test_df.sample(1, random_state=42)
test_data_example_txt = test_data_example['txt'].values[0]
print(test_data_example_txt) 

The precondition of doing research by professors

Nowadays, many professors conduct research while teaching in colleges or universities. Although research could bring funding and latest achievements in the field, the research takes up too much teaching time. As far as I am concerned, professors should spend more time on preparing courses than research.
To begin with, it is vital that professors should assist students to acquire knowledge. The professors’ duty is to nurture students, and prepare excellent courses. If professors spend a lot of time on research, he will cut the time on preparing courses is reduced, which may decrease the quality of classes. The reason why people enter a university is that they want to learn cutting-edged technologies. In this way, professors should provide well-preparation courses to convey the state-of-art knowledge for college and university students. Therefore, professors should pay more attention on how to teach students rather than conduct research.


In [32]:
# zero-shot als test-prompt
zero_shot_prompt = prompt_df.loc[prompt_df['prompt_name'] == 'zero-shot', 'prompt_txt'].values[0]
zero_shot_prompt

'You will be given a text. Extract the argumentative units “major claim”, “claim”, and “premise” as parts from the text. Also extract the argumentative relationships between the units. Claims can be “for” or “against” the major claims. Premises, on the other hand, can “support” or “attack” a claim or another premise. It is possible that there are several major claims. Return only the argumentative units and relationships between them as a JSON object.'

In [40]:
load_dotenv()
openai_api = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=openai_api)

In [48]:
llm_struc_output = ChatOpenAI(
    model="gpt-4o-mini",
    #max_tokens=1024,
    #max_tokens_input=1024,
    temperature=0,
    timeout=None,
    # max_retries=2,
    api_key=openai_api,
    #response_format=ArgumentMiningExtraction,
    model_kwargs={"structured_output": True,
                  "structured_output_type": ArgumentMiningExtraction}
    )

# llm_struc_output

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000002104F1916A0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000002104F1934A0>, root_client=<openai.OpenAI object at 0x000002104F15B050>, root_async_client=<openai.AsyncOpenAI object at 0x000002104F1915E0>, model_name='gpt-4o-mini', temperature=0.0, model_kwargs={'structured_output': True, 'structured_output_type': <class '__main__.ArgumentMiningExtraction'>}, openai_api_key=SecretStr('**********'))

In [42]:
# template
prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "{system_message}"),
        ("user", "Text: {argument_text}"),
    ]
)

# print example for one prompt template
print(
    prompt_template.invoke(
        {
            "system_message": zero_shot_prompt,
            "argument_text": test_data_example_txt,
        }
    )
)

messages=[SystemMessage(content='You will be given a text. Extract the argumentative units “major claim”, “claim”, and “premise” as parts from the text. Also extract the argumentative relationships between the units. Claims can be “for” or “against” the major claims. Premises, on the other hand, can “support” or “attack” a claim or another premise. It is possible that there are several major claims. Return only the argumentative units and relationships between them as a JSON object.', additional_kwargs={}, response_metadata={}), HumanMessage(content='Text: The precondition of doing research by professors\n\nNowadays, many professors conduct research while teaching in colleges or universities. Although research could bring funding and latest achievements in the field, the research takes up too much teaching time. As far as I am concerned, professors should spend more time on preparing courses than research.\nTo begin with, it is vital that professors should assist students to acquire kn

In [39]:
llm_chain = prompt_template | llm
llm_chain

ChatPromptTemplate(input_variables=['argument_text', 'system_message'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['system_message'], input_types={}, partial_variables={}, template='{system_message}'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['argument_text'], input_types={}, partial_variables={}, template='Text: {argument_text}'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000002104EFF90A0>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000002104EFFADB0>, root_client=<openai.OpenAI object at 0x000002104C85C980>, root_async_client=<openai.AsyncOpenAI object at 0x000002104EFF9100>, model_name='gpt-4o-mini', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********'))

## zero-shot

In [43]:
zero_shot_answ = llm_chain.invoke(
    {
        "system_message": zero_shot_prompt,
        "argument_text": test_data_example_txt,
    }
)
zero_shot_answ

AIMessage(content='```json\n{\n  "units": {\n    "major_claims": [\n      "Professors should spend more time on preparing courses than research.",\n      "Conducting research is helpful for professors and students to some extent."\n    ],\n    "claims": [\n      {\n        "claim": "Professors should assist students to acquire knowledge.",\n        "relationship": "support",\n        "to": "Professors should spend more time on preparing courses than research."\n      },\n      {\n        "claim": "If professors spend a lot of time on research, the quality of classes may decrease.",\n        "relationship": "support",\n        "to": "Professors should spend more time on preparing courses than research."\n      },\n      {\n        "claim": "If professors are busy in doing research, it would ruin the reputation of universities.",\n        "relationship": "support",\n        "to": "Professors should spend more time on preparing courses than research."\n      },\n      {\n        "claim": 

In [45]:
print(zero_shot_answ.content)

```json
{
  "units": {
    "major_claims": [
      "Professors should spend more time on preparing courses than research.",
      "Conducting research is helpful for professors and students to some extent."
    ],
    "claims": [
      {
        "claim": "Professors should assist students to acquire knowledge.",
        "relationship": "support",
        "to": "Professors should spend more time on preparing courses than research."
      },
      {
        "claim": "If professors spend a lot of time on research, the quality of classes may decrease.",
        "relationship": "support",
        "to": "Professors should spend more time on preparing courses than research."
      },
      {
        "claim": "If professors are busy in doing research, it would ruin the reputation of universities.",
        "relationship": "support",
        "to": "Professors should spend more time on preparing courses than research."
      },
      {
        "claim": "Spending more time to nurture excellent st

In [46]:
zero_shot_answ.usage_metadata

{'input_tokens': 573,
 'output_tokens': 630,
 'total_tokens': 1203,
 'input_token_details': {'audio': 0, 'cache_read': 0},
 'output_token_details': {'audio': 0, 'reasoning': 0}}

## one shot

In [51]:
# zero-shot als test-prompt
one_shot_prompt = prompt_df.loc[prompt_df['prompt_name'] == 'one-shot', 'prompt_txt'].values[0]
print(one_shot_prompt)

You will be given a text. Extract the argumentative units “major claim”, “claim”, and “premise” as parts from the text. Also extract the argumentative relationships between the units. Claims can be “for” or “against” the major claims. Premises, on the other hand, can “support” or “attack” a claim or another premise. It is possible that there are several major claims. Return only the argumentative units and relationships between them as a JSON object.Here is one example of a text and its corresponding json data:
## Input:
Do you think it is good for teenagers to work while schooling?

In my opinion, it is not the good idea for teenagers to have job while they are still students. Although, many argue that it provide good working experience, but I think it can interfere with their life in various ways. Having jobs would affect the health of the student. It divert their mind from studies and would take away their childhood phase from their life.
A student has to do lots of studies in today

In [70]:
# one-shot
os_template = prompt_template.invoke(
        {
            "system_message": one_shot_prompt,
            "argument_text": test_data_example_txt,
        }
    )
print(os_template)

messages=[SystemMessage(content='You will be given a text. Extract the argumentative units “major claim”, “claim”, and “premise” as parts from the text. Also extract the argumentative relationships between the units. Claims can be “for” or “against” the major claims. Premises, on the other hand, can “support” or “attack” a claim or another premise. It is possible that there are several major claims. Return only the argumentative units and relationships between them as a JSON object.Here is one example of a text and its corresponding json data:\n## Input:\nDo you think it is good for teenagers to work while schooling?\n\nIn my opinion, it is not the good idea for teenagers to have job while they are still students. Although, many argue that it provide good working experience, but I think it can interfere with their life in various ways. Having jobs would affect the health of the student. It divert their mind from studies and would take away their childhood phase from their life.\nA stud

In [71]:
system_msg = os_template.messages[0].content 
# print(system_msg)
user_msg = os_template.messages[1].content
# print(user_msg)

In [72]:
os_answ = llm_chain.invoke(
    {
        "system_message": one_shot_prompt,
        "argument_text": test_data_example_txt,
    }
)

'{\n  "MajorClaims": {\n    "MC1": "professors should spend more time on preparing courses than research",\n    "MC2": "spending more time to nurture excellent students is more consequential to professors, rather than research",\n    "MC3": "the precondition of doing research is growing students’ solid theory fundamental"\n  },\n  "Claims": {\n    "C1": "research could bring funding and latest achievements in the field",\n    "C2": "the research takes up too much teaching time",\n    "C3": "professors should assist students to acquire knowledge",\n    "C4": "the professors’ duty is to nurture students, and prepare excellent courses",\n    "C5": "if professors spend a lot of time on research, the time on preparing courses is reduced",\n    "C6": "this may decrease the quality of classes",\n    "C7": "students want to learn cutting-edged technologies",\n    "C8": "professors should provide well-preparation courses to convey the state-of-art knowledge",\n    "C9": "if professors are busy 

In [74]:
print(os_answ.content)

{
  "MajorClaims": {
    "MC1": "professors should spend more time on preparing courses than research",
    "MC2": "spending more time to nurture excellent students is more consequential to professors, rather than research",
    "MC3": "the precondition of doing research is growing students’ solid theory fundamental"
  },
  "Claims": {
    "C1": "research could bring funding and latest achievements in the field",
    "C2": "the research takes up too much teaching time",
    "C3": "professors should assist students to acquire knowledge",
    "C4": "the professors’ duty is to nurture students, and prepare excellent courses",
    "C5": "if professors spend a lot of time on research, the time on preparing courses is reduced",
    "C6": "this may decrease the quality of classes",
    "C7": "students want to learn cutting-edged technologies",
    "C8": "professors should provide well-preparation courses to convey the state-of-art knowledge",
    "C9": "if professors are busy in doing researc

In [73]:
os_answ.usage_metadata

{'input_tokens': 2018,
 'output_tokens': 1138,
 'total_tokens': 3156,
 'input_token_details': {'audio': 0, 'cache_read': 0},
 'output_token_details': {'audio': 0, 'reasoning': 0}}

# Evaluation
Start here so you dont need to invoke the llm every time. Just use the output for further processing. 

In [239]:
output_str = """{
  "MajorClaims": {
    "MC1": "professors should spend more time on preparing courses than research",
    "MC2": "spending more time to nurture excellent students is more consequential to professors, rather than research",
    "MC3": "the precondition of doing research is growing students’ solid theory fundamental"
  },
  "Claims": {
    "C1": "research could bring funding and latest achievements in the field",
    "C2": "the research takes up too much teaching time",
    "C3": "professors should assist students to acquire knowledge",
    "C4": "the professors’ duty is to nurture students, and prepare excellent courses",
    "C5": "if professors spend a lot of time on research, the time on preparing courses is reduced",
    "C6": "this may decrease the quality of classes",
    "C7": "students want to learn cutting-edged technologies",
    "C8": "professors should provide well-preparation courses to convey the state-of-art knowledge",
    "C9": "if professors are busy in doing research, it would ruin the reputation of universities",
    "C10": "students are unsatisfied with the unprepared course",
    "C11": "the enrollment of universities may decline",
    "C12": "hardly can professors find assistants to help with projects",
    "C13": "spending more time to nurture excellent students is more consequential to professors, rather than research",
    "C14": "professors could teach students through research and projects",
    "C15": "some students enjoy taking part in some research",
    "C16": "they may get more experience in the process of research",
    "C17": "only under specific circumstance, when students master all of the systematic knowledge, can they accomplish research",
    "C18": "this requires professors provide high quality courses",
    "C19": "conducting research is helpful for professors and students to some extent",
    "C20": "professors could combine doing research and preparing good courses in a dynamic way"
  },
  "Premises": {
    "P1": "the reason why people enter a university is that they want to learn cutting-edged technologies",
    "P2": "students would not choose these kinds of universities to study",
    "P3": "it is adverse to the whole academic field",
    "P4": "through to use the equipment and invent their own work, students may get more experience",
    "P5": "electronic students always invent new machines such as robots, MP3 and remote control car",
    "P6": "they can manage the electric theory in practical skills",
    "P7": "some students will be rewarded by investing new inventions"
  },
  "ArgumentativeRelations": [
    {
      "Claim": "C1",
      "Relation": "For",
      "Target": "MC1"
    },
    {
      "Claim": "C2",
      "Relation": "For",
      "Target": "MC1"
    },
    {
      "Claim": "C3",
      "Relation": "For",
      "Target": "MC1"
    },
    {
      "Claim": "C4",
      "Relation": "For",
      "Target": "MC1"
    },
    {
      "Claim": "C5",
      "Relation": "supports",
      "Target": "C4"
    },
    {
      "Claim": "C6",
      "Relation": "supports",
      "Target": "C5"
    },
    {
      "Claim": "C7",
      "Relation": "supports",
      "Target": "C4"
    },
    {
      "Claim": "C8",
      "Relation": "For",
      "Target": "MC1"
    },
    {
      "Claim": "C9",
      "Relation": "For",
      "Target": "MC2"
    },
    {
      "Claim": "C10",
      "Relation": "supports",
      "Target": "C9"
    },
    {
      "Claim": "C11",
      "Relation": "supports",
      "Target": "C9"
    },
    {
      "Claim": "C12",
      "Relation": "supports",
      "Target": "C9"
    },
    {
      "Claim": "C13",
      "Relation": "For",
      "Target": "MC2"
    },
    {
      "Claim": "C14",
      "Relation": "For",
      "Target": "MC3"
    },
    {
      "Claim": "C15",
      "Relation": "supports",
      "Target": "C14"
    },
    {
      "Claim": "C16",
      "Relation": "supports",
      "Target": "C14"
    },
    {
      "Claim": "C17",
      "Relation": "supports",
      "Target": "C14"
    },
    {
      "Claim": "C18",
      "Relation": "supports",
      "Target": "C17"
    },
    {
      "Claim": "C19",
      "Relation": "For",
      "Target": "MC3"
    },
    {
      "Claim": "C20",
      "Relation": "For",
      "Target": "MC3"
    }
  ]
}
"""

output_df = pd.DataFrame({'json_file': 'essay169.json', 'os_json': output_str}, index=[0])
output_df


Unnamed: 0,json_file,os_json
0,essay169.json,"{\n ""MajorClaims"": {\n ""MC1"": ""professors ..."


In [249]:
print(output_df['os_json'].values[0])

{
  "MajorClaims": {
    "MC1": "professors should spend more time on preparing courses than research",
    "MC2": "spending more time to nurture excellent students is more consequential to professors, rather than research",
    "MC3": "the precondition of doing research is growing students’ solid theory fundamental"
  },
  "Claims": {
    "C1": "research could bring funding and latest achievements in the field",
    "C2": "the research takes up too much teaching time",
    "C3": "professors should assist students to acquire knowledge",
    "C4": "the professors’ duty is to nurture students, and prepare excellent courses",
    "C5": "if professors spend a lot of time on research, the time on preparing courses is reduced",
    "C6": "this may decrease the quality of classes",
    "C7": "students want to learn cutting-edged technologies",
    "C8": "professors should provide well-preparation courses to convey the state-of-art knowledge",
    "C9": "if professors are busy in doing researc

In [240]:
# #os_data = os_answ.content
# os_data = json.loads(output_str)
# os_data
# # put data into a dataframe column
# test_data_example["os_json"] = json.dumps(os_data)
# os_df
test_data_example = test_df.sample(1, random_state=42)
test_data_example

Unnamed: 0,txt_path,json_path,txt_file,json_file,txt,json
168,data/original/brat-project-final/essay169.txt,data/transformed/essay169.json,essay169.txt,essay169.json,The precondition of doing research by professo...,"{\n ""MajorClaims"": {\n ""MC1"": ""Professors ..."


## ground truth

In [241]:
def replace_ids_with_sentences(data):
    """Replace IDs with sentences in the argumentative relations ,to check if the model is able to extract the relations correctly"""
    transformed_relations = []
    try:
        for relation in data['ArgumentativeRelations']:
            origin_id = relation['Origin']
            target_id = relation['Target']
            
            if origin_id.startswith('MC'):
                origin_sentence = 'MC'
            elif origin_id.startswith('C'):
                origin_sentence = data['Claims'][origin_id]
            elif origin_id.startswith('P'):
                origin_sentence = data['Premises'][origin_id]
            
            if target_id.startswith('MC'):
                target_sentence = 'MC'
            elif target_id.startswith('C'):
                target_sentence = data['Claims'][target_id]
            elif target_id.startswith('P'):
                target_sentence = data['Premises'][target_id]
            
            transformed_relations.append({
                'Origin': origin_sentence,
                'Relation': relation['Relation'],
                'Target': target_sentence
            })
    
        return transformed_relations
    
    except KeyError: # Für den Fall, dass die Struktur abweicht, da Struktur später noch konkretisiert wurde // Nur für Testzwecke
        for relation in data['ArgumentativeRelations']:
            origin_id = relation['Claim']
            target_id = relation['Target']
            
            if origin_id.startswith('MC'):
                origin_sentence = 'MC'
            elif origin_id.startswith('C'):
                origin_sentence = data['Claims'][origin_id]
            elif origin_id.startswith('P'):
                origin_sentence = data['Premises'][origin_id]
            
            if target_id.startswith('MC'):
                target_sentence = 'MC'
            elif target_id.startswith('C'):
                target_sentence = data['Claims'][target_id]
            elif target_id.startswith('P'):
                target_sentence = data['Premises'][target_id]
            
            transformed_relations.append({
                'Origin': origin_sentence,
                'Relation': relation['Relation'],
                'Target': target_sentence
            })

        return transformed_relations



def extract_relations(json_obj):
    try:
        return [(rel['Origin'], rel['Relation'], rel['Target']) for rel in json_obj]
    except KeyError:
        return [(rel['Claim'], rel['Relation'], rel['Target']) for rel in json_obj]

In [242]:
# Ground Truth    
eval_df = test_df.loc[:, ['json_file', 'json']] # selecting only the columns with the json data as the ground truth
eval_df["json"] = eval_df["json"].apply(json.loads) # convert string to json
eval_df["relations"] = eval_df["json"].apply(replace_ids_with_sentences) # replace IDs with sentences
eval_df["relations"] = eval_df["relations"].apply(extract_relations) # return the relations as tuples instead of dictionaries
# extract the sentences from the json and convert dictionary values to lists
eval_df["major_claims"] = eval_df["json"].apply(lambda x: list(x['MajorClaims'].values())) 
eval_df["claims"] = eval_df["json"].apply(lambda x: list(x['Claims'].values()))
eval_df["premises"] = eval_df["json"].apply(lambda x: list(x['Premises'].values()))
eval_df

Unnamed: 0,json_file,json,relations,major_claims,claims,premises
285,essay286.json,{'MajorClaims': {'MC1': 'Many circumstances th...,[(These experiences taught me to change my lif...,[Many circumstances that I experienced change ...,[These experiences taught me to change my life...,"[I could find a better way to solve problems, ..."
281,essay282.json,{'MajorClaims': {'MC1': 'reducing stress by li...,[(I could enjoy listening music without thinki...,"[reducing stress by listening to music, playin...","[listening to music is a great way to relax, a...","[I always listen music from my mp3 player, I c..."
33,essay034.json,{'MajorClaims': {'MC1': 'student have to be fo...,[(there are some advantages of leaving school ...,"[student have to be forced to study at school,...",[there are some advantages of leaving school t...,[studying at school is always the best choice ...
211,essay212.json,{'MajorClaims': {'MC1': 'I would follow the ne...,[(it will be easier for people to assimilate i...,"[I would follow the new customs, if I migrated...",[it will be easier for people to assimilate in...,"[When people migrate to a new country, they ar..."
93,essay094.json,{'MajorClaims': {'MC1': 'It is a completely ri...,[(reasonable attentions should be paid to trea...,[It is a completely right and extremely wise p...,[reasonable attentions should be paid to treat...,[Preventative measures will help improve gener...
...,...,...,...,...,...,...
134,essay135.json,{'MajorClaims': {'MC1': 'international aid sho...,"[(International aid is critical, For, MC), (Ma...","[international aid should be prioritised, peop...","[International aid is critical, it is reasonab...","[Many people, for example in some African coun..."
306,essay307.json,{'MajorClaims': {'MC1': 'mass media deal too m...,"[(Thanks to the Internet, we can go online and...",[mass media deal too much with superficial thi...,[television and newspapers focus on famous peo...,[Gutter press and Tabloid TV are more and more...
385,essay386.json,{'MajorClaims': {'MC1': 'the classmates should...,[(they provide tremendous incentive and horizo...,[the classmates should be regarded as the most...,[they provide tremendous incentive and horizon...,"[In high school, for instance, an individual c..."
319,essay320.json,{'MajorClaims': {'MC1': 'type of temper and ad...,[(my productivity boosts when style of work is...,[type of temper and advantages of individual w...,[my productivity boosts when style of work is ...,"[Being by nature introvert, I have to spend mu..."


In [243]:
#TODO: Dateiname in Spalte einfügen, damit Output und Ground Truth für gleiche Datei verglichen werden können.

# comparing the output with the ground truth
# create dataframe with output_str
os_df = output_df.copy()
# os_df = pd.DataFrame({"os_json": output_str}, index=[0])
#os_df["os_json"] = output_str # use json.dumps() to convert json to string or use the string directly, or json.loads() to convert string to json
os_df["os_json"] = os_df["os_json"].apply(json.loads) # convert string to json
os_df["relations"] = os_df["os_json"].apply(replace_ids_with_sentences) # replace IDs with sentences
os_df["relations"] = os_df["relations"].apply(extract_relations) # return the relations as tuples instead of dictionaries
os_df["relations"].values[0]
# extract the sentences from the json and convert dictionary values to lists
os_df["major_claims"] = os_df["os_json"].apply(lambda x: list(x['MajorClaims'].values()))
os_df["claims"] = os_df["os_json"].apply(lambda x: list(x['Claims'].values()))
os_df["premises"] = os_df["os_json"].apply(lambda x: list(x['Premises'].values()))
os_df

Unnamed: 0,json_file,os_json,relations,major_claims,claims,premises
0,essay169.json,{'MajorClaims': {'MC1': 'professors should spe...,[(research could bring funding and latest achi...,[professors should spend more time on preparin...,[research could bring funding and latest achie...,[the reason why people enter a university is t...


In [245]:
# merge the dataframes os_df and eval_df
merged_df = pd.merge(eval_df, os_df, on='json_file', suffixes=('_eval', '_os'))
merged_df

Unnamed: 0,json_file,json,relations_eval,major_claims_eval,claims_eval,premises_eval,os_json,relations_os,major_claims_os,claims_os,premises_os
0,essay169.json,{'MajorClaims': {'MC1': 'Professors could comb...,"[(Only by developing students, can we have a b...",[Professors could combine doing research and p...,"[Only by developing students, can we have a be...",[conducting research is helpful for professors...,{'MajorClaims': {'MC1': 'professors should spe...,[(research could bring funding and latest achi...,[professors should spend more time on preparin...,[research could bring funding and latest achie...,[the reason why people enter a university is t...


## Evaluatio metrics

In [115]:
from sklearn.metrics import jaccard_score
import numpy as np

def jaccard_similarity(list1, list2):
    """Calculate the Jaccard similarity between two lists of sentences"""
    set1 = set(list1)
    set2 = set(list2)
    intersection = len(set1.intersection(set2))
    union = len(set1.union(set2))
    return intersection / union

# Calculate Jaccard similarity for major claims, claims, and premises
major_claims_similarity = jaccard_similarity(eval_df['major_claims'].values[0], os_df['os_major_claims'].values[0])
claims_similarity = jaccard_similarity(eval_df['claims'].values[0], os_df['os_claims'].values[0])
premises_similarity = jaccard_similarity(eval_df['premises'].values[0], os_df['os_premises'].values[0])

print(f"Major Claims Jaccard Similarity: {major_claims_similarity}")
print(f"Claims Jaccard Similarity: {claims_similarity}")
print(f"Premises Jaccard Similarity: {premises_similarity}")


Major Claims Jaccard Similarity: 0.25
Claims Jaccard Similarity: 0.13043478260869565
Premises Jaccard Similarity: 0.038461538461538464


In [117]:
import nltk
from nltk.translate.bleu_score import sentence_bleu

# compare llm answer with the original text
def bleu_score(reference, candidate):
    """Calculate the BLEU score between the reference and candidate texts"""
    reference = reference.split()
    candidate = candidate.split()
    return sentence_bleu([reference], candidate)

# Calculate BLEU score for major claims, claims, and premises
major_claims_bleu = bleu_score(' '.join(eval_df['major_claims'].values[0]), ' '.join(os_df['os_major_claims'].values[0]))
claims_bleu = bleu_score(' '.join(eval_df['claims'].values[0]), ' '.join(os_df['os_claims'].values[0]))
premises_bleu = bleu_score(' '.join(eval_df['premises'].values[0]), ' '.join(os_df['os_premises'].values[0]))

print(f"Major Claims BLEU Score: {major_claims_bleu}")
print(f"Claims BLEU Score: {claims_bleu}")
print(f"Premises BLEU Score: {premises_bleu}")

Major Claims BLEU Score: 0.262573847389014
Claims BLEU Score: 0.15525199978977314
Premises BLEU Score: 0.0925856333477574


In [126]:
from sklearn.metrics import precision_recall_fscore_support

def extract_relations(relations):
    return [(rel['origin_id'], rel['relation_type'], rel['target_id']) for rel in relations]

# Extract relations from ground truth and LLM output
ground_truth_relations = extract_relations(eval_df['argumentative_relations'].values[0])
llm_relations = extract_relations(os_df['os_argumentative_relations'].values[0])

# Convert to sets for comparison
ground_truth_set = set(ground_truth_relations)
llm_set = set(llm_relations)

# Calculate precision, recall, and F1 score
precision, recall, f1, _ = precision_recall_fscore_support(
    list(ground_truth_set), list(llm_set), average='micro'
)

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

KeyError: 'origin_id'