In [2]:
import pandas as pd
import json

def load_annotations_df(path):
    annotations_df = pd.read_csv(path, sep='\t', header=None)

    annotations_df.columns = ['id', 'narratives', 'subnarratives']

    annotations_df['narratives'] = annotations_df['narratives'].apply(lambda x: x.split(';'))
    annotations_df['subnarratives'] = annotations_df['subnarratives'].apply(lambda x: x.split(';'))

    return annotations_df

annotations_df = load_annotations_df('data/EN/subtask-2-annotations.txt')

In [3]:
# method to extract the list of all unique subnarratives
def get_subnarratives_list(file):
    """
    Extracts subnarratives from the nested JSON structure.
    
    Args:
        data (dict): The JSON-like dictionary containing narratives.
    
    Returns:
        list: A list of subnarratives with the hierarchy preserved in their names.
    """
    subnarratives = ["Other"]
    with open(file, 'r') as f:    
        data = json.load(f)
        for main_category, subcategories in data.items():
            for subcategory, narratives in subcategories.items():
                if "Other" not in narratives:
                    narratives.append("Other")

                for narrative in narratives:
                    subnarratives.append(f"{main_category}: {subcategory}: {narrative}")
        
    return subnarratives

def get_narratives_list(file):
    """
    Extracts narratives from the nested JSON structure.
    
    Args:
        data (dict): The JSON-like dictionary containing narratives.
    
    Returns:
        list: A list of narratives with the hierarchy preserved in their names.
    """
    narratives = ["Other"]
    with open(file, 'r') as f:    
        data = json.load(f)
        for main_category, subcategories in data.items():
            for subcategory, narrative in subcategories.items():
                narratives.append(f"{main_category}: {subcategory}")
        
    return narratives

narratives_list = get_narratives_list('data/taxonomy.json')
subnarratives_list = get_subnarratives_list('data/taxonomy.json')
taxonomy = json.load(open('data/taxonomy.json'))

In [4]:
import os
def read_text(file_id, base_path='data/EN/raw-documents'):
    with open(os.path.join(base_path, f'{file_id}'), 'r', encoding='utf-8') as f:
        return f.read()
    
def get_sibling_subnarratives(subnarrative):
    """
    Get all the sibling subnarratives of the given subnarrative.
    
    Args:
        subnarrative (str): The subnarrative for which to find siblings.
    
    Returns:
        list: A list of sibling subnarratives.
    """
    return taxonomy[subnarrative.split(': ')[0]][subnarrative.split(': ')[1]]

def get_narrative_definition(narrative):
    if narrative == 'Other':
        return 'Statements that are NOT related to anyone of these topics : {}'.format(', '.join([get_narrative_short_name(narrative) for narrative in narratives_list if narrative != 'Other']))
    narrative_definitions = pd.read_csv('data/narratives definition.csv')
    short_name = narrative.split(':')[-1].strip()
    return narrative_definitions[narrative_definitions['narrative'] == short_name]['definition'].values[0]

def get_narrative_examples(narrative):
    if narrative == 'Other':
        return None
    narrative_definitions = pd.read_csv('data/narratives definition.csv')
    short_name = narrative.split(':')[-1].strip()
    return narrative_definitions[narrative_definitions['narrative'] == short_name]['example'].values[0]

def get_narrative_short_name(narrative):
    return narrative.split(':')[-1].strip()

def get_subnarrative_definition(subnarrative):
    if subnarrative == 'Other':
        return 'Statements that are NOT related to anyone of these narratives : {}'.format(', '.join([narrative for narrative in narratives_list if narrative != 'Other']))
    short_name = subnarrative.split(':')[-1].strip()
    narrative = subnarrative.split(':')[-2].strip()
    narrative_defintion = get_narrative_definition(narrative)
    if short_name == 'Other':
        return 'Statement that are related to the narrative "{}", defined as {} but are not related to anyone of these subnarratives : {}'.format(get_narrative_short_name(narrative), narrative_defintion ,get_sibling_subnarratives(subnarrative))
    subnarrative_definitions = pd.read_csv('data/subnarrative definitions.csv')
    return subnarrative_definitions[subnarrative_definitions['subnarrative'] == short_name]['definition'].values[0]


def get_subnarrative_examples(subnarrative):
    if subnarrative == 'Other':
        return None
    short_name = subnarrative.split(':')[-1].strip()
    if short_name == 'Other':
        return None
    subnarrative_definitions = pd.read_csv('data/subnarrative definitions.csv')
    return subnarrative_definitions[subnarrative_definitions['subnarrative'] == short_name]['examples'].values[0]

def get_subnarrative_short_name(subnarrative):
    return subnarrative.split(':')[-1].strip()
        

In [5]:
subnarratives_dfs = {}
for subnarrative in subnarratives_list:
    temp_df = annotations_df
    temp_df['text'] = temp_df['id'].apply(lambda x: read_text(x))
    temp_df['label'] = temp_df['subnarratives'].apply(lambda x: 1 if subnarrative in x else 0)
    # keep only text and label column
    temp_df = temp_df[['id','text', 'label']]
    print('Attempting to create dataframe for subnarrative:', subnarrative)
    subnarratives_dfs[subnarrative] = {
        'df': temp_df,
        'definition': get_subnarrative_definition(subnarrative),
        'examples': get_subnarrative_examples(subnarrative)
    }

Attempting to create dataframe for subnarrative: Other
Attempting to create dataframe for subnarrative: URW: Blaming the war on others rather than the invader: Ukraine is the aggressor
Attempting to create dataframe for subnarrative: URW: Blaming the war on others rather than the invader: The West are the aggressors
Attempting to create dataframe for subnarrative: URW: Blaming the war on others rather than the invader: Other
Attempting to create dataframe for subnarrative: URW: Discrediting Ukraine: Rewriting Ukraine’s history
Attempting to create dataframe for subnarrative: URW: Discrediting Ukraine: Discrediting Ukrainian nation and society
Attempting to create dataframe for subnarrative: URW: Discrediting Ukraine: Discrediting Ukrainian military
Attempting to create dataframe for subnarrative: URW: Discrediting Ukraine: Discrediting Ukrainian government and officials and policies
Attempting to create dataframe for subnarrative: URW: Discrediting Ukraine: Ukraine is a puppet of the W

In [6]:
# sort the subnarratives dataframe by label to group positive and negative examples
for subnarrative, data in subnarratives_dfs.items():
    subnarratives_dfs[subnarrative]['df'] = data['df'].sort_values('label', ascending=False)

In [7]:
import autogen

system_prompt = (
    "You are a classification model trained to do binary classification by detecting whether a given text is related to a specific subnarrative or not. "
    "You have been trained to recognize the subnarrative: '{}'. "
    "This subnarrative is defined as: {}. "
    "Here are some examples of statements related to this subnarrative: {}. "
    "If the text is related to the subnarrative, please respond with '1'. Otherwise, respond with '0'. Do not try to make sentences, just respond with '1' or '0'."
    "You are ONLY allowed to answer with '1' or '0' and NOTHING else."
    "Only answer with 1 if there are explicit and clear mentions of the subnarrative in the text. If you are slightly unsure, classify as 0."
)

user_prompt = (
    "Please classify the following text as related to the subnarrative '{}' or not. "
    "Text : \n ```{}```"
)

Patching name='__init__', member=<function LLMLingua.__init__ at 0x7f583ed842c0>, patched=<function function.__call__ at 0x7f583ed84220>
Patching name='compress_text', member=<function LLMLingua.compress_text at 0x7f583ed84360>, patched=<function function.__call__ at 0x7f583ed84540>


In [8]:
narratives_dfs = {}

for narrative in narratives_list:
    temp_df = annotations_df
    temp_df['text'] = temp_df['id'].apply(lambda x: read_text(x))
    temp_df['label'] = temp_df['narratives'].apply(lambda x: 1 if narrative in x else 0)
    # keep only text and label column
    temp_df = temp_df[['text', 'label']]
    print('Attempting to create dataframe for narrative:', narrative)
    narratives_dfs[narrative] = {
        'df': temp_df,
        'definition': get_narrative_definition(narrative),
        'examples': get_narrative_examples(narrative)
    }

Attempting to create dataframe for narrative: Other
Attempting to create dataframe for narrative: URW: Blaming the war on others rather than the invader
Attempting to create dataframe for narrative: URW: Discrediting Ukraine
Attempting to create dataframe for narrative: URW: Russia is the Victim
Attempting to create dataframe for narrative: URW: Praise of Russia
Attempting to create dataframe for narrative: URW: Overpraising the West
Attempting to create dataframe for narrative: URW: Speculating war outcomes
Attempting to create dataframe for narrative: URW: Discrediting the West, Diplomacy
Attempting to create dataframe for narrative: URW: Negative Consequences for the West
Attempting to create dataframe for narrative: URW: Distrust towards Media
Attempting to create dataframe for narrative: URW: Amplifying war-related fears
Attempting to create dataframe for narrative: URW: Hidden plots by secret schemes of powerful groups
Attempting to create dataframe for narrative: CC: Criticism o

In [32]:
narrative_system_prompt = (
    "You are a classification model trained to do binary classification by detecting whether a given text is related to a specific narrative or not. The only output you are allowed to give is '0' or '1' "
    "You have been trained to recognize the narrative: '{}' "
    "defined as: {}. "
    "Here are some examples of statements related to this narrative: {}. "
    "If the text is related to the narrative, you MUST respond with '1' only. Otherwise, you MUST with '0' only."
    "You are ONLY allowed to answer with '1' or '0' and NOTHING else."
    "Only answer with 1 if there are EXPLICIT and CLEAR mentions of the narrative in the text. Some text will be ambiguous so if you are slightly unsure, answer 0."
)

narrative_user_prompt = (
    "Please classify the following text as related to the narrative '{}' or not. "
    "Text : "
    "{}"
)

In [None]:
def create_narrative_agents(narratives_list):
    narrative_agents = []

    for narrative in narratives_list:
        agent = autogen.AssistantAgent(
            name="Agent_" + str(narratives_list.index(narrative)),
            system_message= narrative_system_prompt.format(narrative, narratives_dfs[narrative]['definition'], narratives_dfs[narrative]['examples']),
            llm_config={
                "config_list": [
                    {
                        "model": "gpt-4o",
                        "api_key": os.environ.get("OPENAI_API_KEY")
                    }
                ],
                'temperature': 0
            }
        )

        agent.description = (
            "I am a classification model trained to do binary classification by detecting whether a given text is related to the following narrative: {}. "
            "I will be looking for {}"
        ).format(get_narrative_short_name(narrative), get_narrative_definition(narrative))
        narrative_agents.append(agent)
    return narrative_agents

narrative_agents = create_narrative_agents(narratives_list)

In [None]:
def reset_agents(agent_list):
    for agent in agent_list:
        agent.reset()

# reset_agents(narrative_agents)

In [None]:
narratives_user_proxy_agent = autogen.UserProxyAgent(
    name="user",
    code_execution_config=False,
    llm_config={
        "config_list": [
            {
                "model": "gpt-4o-mini",
                "api_key": os.environ.get("OPENAI_API_KEY")
            }
        ]
    },
    human_input_mode='NEVER'
)

allowed_transitions = {}

for agent in narrative_agents:
    allowed_transitions[agent] = [narratives_user_proxy_agent]

narratives_group_chat = autogen.GroupChat(
    agents= [narratives_user_proxy_agent] + narrative_agents,
    messages=[],
    max_round=6,
    send_introductions=True,
    allowed_or_disallowed_speaker_transitions=allowed_transitions,
    speaker_transitions_type="disallowed",
)

narratives_manager = autogen.GroupChatManager(
    groupchat=narratives_group_chat,
    llm_config = {
        "config_list": [
            {
                "model": "gpt-4o",
                "api_key": os.environ.get("OPENAI_API_KEY")
            }
        ]
    },
)



In [None]:
def classify(user_proxy_agent, manager, df, n = 5):
    histories = []
    for i in range(n):
        chat_result = user_proxy_agent.initiate_chat(
            manager,
            message="Here is the text that needs to be classified: \n```{}```\n ### \n You are ONLY allowed to reply with '0' or '1'".format(df['text'].iloc[i]),
            summary_method='reflection_with_llm'
        )
        histories.append({
            'chat_result': chat_result.chat_history,
            'file': df['id'].iloc[i]
        })
        narratives_user_proxy_agent.reset()
        reset_agents(narrative_agents)
        print('Completed ', i+1, ' out of ', n, ' texts')
    return histories

In [14]:
def extract_recognized_narratives(chat_history):
    recognized_narratives = []
    for message in chat_history:
        if message['name'] == 'user' or message['name'] == 'chat_manager':
            continue
        index = int(message['name'].split('_')[-1])
        if message['content'] == '1':
            recognized_narratives.append(narratives_list[index])
    if len(recognized_narratives) == 0:
        recognized_narratives.append('Other')

    if len(recognized_narratives) > 1 and 'Other' in recognized_narratives:
        recognized_narratives.remove('Other')
    return recognized_narratives

In [15]:
def get_accuracy(extracted_narratives, true_narratives):
    if len(extracted_narratives) == 0:
        return 0
    return sum([1 for narrative in extracted_narratives if narrative in true_narratives]) / len(extracted_narratives)

In [None]:
histories = classify(narratives_user_proxy_agent, narratives_manager, annotations_df, 50)

In [None]:
# Calculate accuracy for each history
import pprint
accuracies = []
for history in histories:
    extracted_narratives = extract_recognized_narratives(history['chat_result'])
    true_narratives = annotations_df[annotations_df['id'] == history['file']]['narratives'].values[0]
    accuracies.append(get_accuracy(extracted_narratives, true_narratives))

pprint.pprint(accuracies)

In [16]:
def create_annotations_df(histories, base_path='data/EN/raw-documents'):
    annotations = []
    for history in histories:
        extracted_narratives = extract_recognized_narratives(history['chat_result'])
        # Remove 'Other' if there are additional narratives
        if 'Other' in extracted_narratives and len(extracted_narratives) > 1:
            extracted_narratives = [narrative for narrative in extracted_narratives if narrative != 'Other']
        
        extracted_narratives = list(set(extracted_narratives))
        
        annotations.append({
            'id': history['file'],
            'text': read_text(history['file'], base_path),
            'narratives': extracted_narratives
        })
    df = pd.DataFrame(annotations)
    df['narratives'] = df['narratives'].apply(lambda x: ';'.join(x))
    return df

In [None]:
test_annotation_df = create_annotations_df(histories)

In [64]:
def create_subnarrative_agents(subnarratives_list):
    subnarrative_agents = {}
    for subnarrative in subnarratives_list:
        agent = autogen.AssistantAgent(
            name="Agent_" + str(subnarratives_list.index(subnarrative)),
            system_message= system_prompt.format(subnarrative, subnarratives_dfs[subnarrative]['definition'], subnarratives_dfs[subnarrative]['examples']),
            llm_config={
                "config_list": [
                    {
                        "base_url": "https://api.deepseek.com",
                        "model": "deepseek-chat",
                        "api_key": os.environ.get("DEEPSEEK_API_KEY")
                    }
                ]
            }
        )

        agent.description = 'I am a classification model trained to do binary classification by detecting whether a given text is related to the following subnarrative: {}'.format(get_subnarrative_short_name(subnarrative))
        subnarrative_agents[subnarrative] = agent

    return subnarrative_agents

def create_group_chat(agent_list):
    user_proxy_agent = autogen.UserProxyAgent(
        name="user",
        code_execution_config=False,
        llm_config={
            "config_list": [
                {
                    "base_url": "https://api.deepseek.com",
                    "model": "deepseek-chat",
                    "api_key": os.environ.get("DEEPSEEK_API_KEY")
                }
            ]
        },
        human_input_mode='NEVER'
    )
    
    allowed_transitions = {}
    for agent in agent_list:
        allowed_transitions[agent] = [user_proxy_agent]

    group_chat = autogen.GroupChat(
        agents= [user_proxy_agent] + agent_list,
        messages=[],
        max_round=len(agent_list) + 1,
        send_introductions=True,
        allowed_or_disallowed_speaker_transitions=allowed_transitions,
        speaker_transitions_type='disallowed',
    )

    manager = autogen.GroupChatManager(
        groupchat=group_chat,
        llm_config = {
            "config_list": [
                {
                    "base_url": "https://api.deepseek.com",
                    "model": "deepseek-chat",
                    "api_key": os.environ.get("DEEPSEEK_API_KEY")
                }
            ]
        },
        system_message="You are a group chat manager. You are asked to give the classification task to the agents that look relevant to the topic"
    )

    print('Created group chat with the following agents: ', agent_list)
    return group_chat, manager, user_proxy_agent

In [18]:
# group subnarrative agents by narrative

def group_subnarrative_agents_by_narrative(subnarrative_agents):
    grouped_agents = {}
    for subnarrative, agent in subnarrative_agents.items():
        if subnarrative == 'Other':
            continue
        narrative = subnarrative.split(': ')[0] + ': ' + subnarrative.split(': ')[1]
        if narrative not in grouped_agents:
            grouped_agents[narrative] = []
        grouped_agents[narrative].append(agent)
    return grouped_agents

In [19]:
subnarrative_agents = create_subnarrative_agents(subnarratives_list)
grouped_agents = group_subnarrative_agents_by_narrative(subnarrative_agents)


In [20]:
def create_group_chat_for_narrative(narrative):
    return create_group_chat(grouped_agents[narrative])

def extract_recognized_subnarratives(chat_history):
    recognized_subnarratives = []
    for message in chat_history:
        if message['name'] == 'user' or message['name'] == 'chat_manager':
            continue
        index = int(message['name'].split('_')[-1])
        if message['content'] == '1':
            recognized_subnarratives.append(subnarratives_list[index])
    if len(recognized_subnarratives) == 0:
        recognized_subnarratives.append('Other')

    if len(recognized_subnarratives) > 1 and 'Other' in recognized_subnarratives:
        recognized_subnarratives.remove('Other')
    return recognized_subnarratives

def extract_subnarratives_for_one_narrative(narrative, text):
    group_chat, manager, user_proxy_agent = create_group_chat_for_narrative(narrative)
    # We get the subnarratives that belong to the narrative
    chat_result = user_proxy_agent.initiate_chat(
        manager,
        message="Here is the text that needs to be classified: \n```{}```\nYou are ONLY allowed to reply with '0' or '1'".format(text),
        summary_method='reflection_with_llm'
    )
    return extract_recognized_subnarratives(chat_result.chat_history)

def extract_subnarratives_for_narratives(narratives_list, text):
    # if the list only has one element that is 'Other', return 'Other'
    if len(narratives_list) == 1 and narratives_list[0] == 'Other':
        return ['Other']
    subnarratives = []
    for narrative in narratives_list:
        subnarratives.append(extract_subnarratives_for_one_narrative(narrative, text))
    return subnarratives

In [21]:
def get_subnarratives_for_df(df):
    # Create a new list to store subnarratives for each row
    subnarratives = []

    for _, row in df.iterrows():
        narratives = row['narratives'].split(';')  # Split narratives by ';'
        subnarratives.append(extract_subnarratives_for_narratives(narratives, row['text']))  # Extract subnarratives
        

    # Add a new column to the DataFrame with the extracted subnarratives
    df['subnarratives'] = subnarratives

    return df


In [None]:
get_subnarratives_for_df(test_annotation_df)

In [22]:
# concat arrays
def concat_arrays(arr):
    if len(arr) == 0:
        return []
    if len(arr) == 1:
        return arr[0]
    return [item for sublist in arr for item in sublist]

def update_narratives_for_only_other(df):
    """
    Updates the 'narratives' column to 'Other' if the 'subnarratives' column contains only 'Other'.
    
    Parameters:
    df (pd.DataFrame): The input DataFrame with 'narratives' and 'subnarratives' columns.

    Returns:
    pd.DataFrame: The updated DataFrame.
    """
    def update_row(row):
        # Check if subnarratives consist only of 'Other'
        if row['subnarratives'] == ['Other']:
            row['narratives'] = 'Other'
        return row

    # Apply the update to each row
    return df.apply(update_row, axis=1)

def remove_parasit_other(df):
    """
    Removes 'Other' from the 'subnarratives' column if it is the only element in the list.
    
    Parameters:
    df (pd.DataFrame): The input DataFrame with 'subnarratives' column.

    Returns:
    pd.DataFrame: The updated DataFrame.
    """
    def remove_other(row):
        if len(row) > 1 and 'Other' in row:
            row.remove('Other')
        return row

    # Apply the removal to the 'subnarratives' column
    df['subnarratives'] = df['subnarratives'].apply(remove_other)
    return df

def flatten_and_deduplicate_subnarratives(df, column_name):
    """
    Flattens and deduplicates list of lists in the specified column of the DataFrame.

    Parameters:
    df (pd.DataFrame): The input DataFrame.
    column_name (str): The name of the column to process.

    Returns:
    pd.DataFrame: The updated DataFrame with flattened and deduplicated lists.
    """
    def process_row(value):
        if isinstance(value, list):  # Check if the value is a list
            # Flatten and deduplicate if it's a list of lists
            flattened = set()
            for item in value:
                if isinstance(item, list):
                    flattened.update(item)  # Add elements from inner lists
                else:
                    flattened.add(item)  # Add single elements
            return list(flattened)  # Return as a unique list
        return value  # Return as-is if not a list

    # Apply the processing function to the specified column
    df[column_name] = df[column_name].apply(process_row)
    return df

In [23]:
def get_df_to_right_format(df):
    df = flatten_and_deduplicate_subnarratives(df, 'subnarratives')
    df = remove_parasit_other(df)
    df = update_narratives_for_only_other(df)
    # join subnarratives
    df['subnarratives'] = df['subnarratives'].apply(lambda x: ';'.join(x))
    # keep only id, narratives and subnarratives
    df = df[['id', 'narratives', 'subnarratives']]
    return df

In [24]:
def calculate_classification_accuracy(predictions_df, ground_truth_df):
    """
    Calculates the accuracy of subnarrative classification by comparing predictions with ground truth.

    Parameters:
    predictions_df (pd.DataFrame): DataFrame with your classification results, including 'id' and 'subnarratives'.
    ground_truth_df (pd.DataFrame): DataFrame with the ground truth classifications, including 'id' and 'subnarratives'.

    Returns:
    float: Overall accuracy (average IoU) for all rows.
    pd.DataFrame: DataFrame with per-row IoU scores.
    """
    def calculate_iou(predicted, true):
        # Convert to sets for comparison
        predicted_set = set(predicted)
        true_set = set(true)

        # Handle edge case: both are empty
        if not predicted_set and not true_set:
            return 1.0

        # Intersection over union (IoU)
        intersection = len(predicted_set & true_set)
        union = len(predicted_set | true_set)
        return intersection / union if union > 0 else 0.0

    # Merge predictions and ground truth on 'id'
    merged_df = predictions_df.merge(ground_truth_df, on='id', suffixes=('_predicted', '_true'))

    # Calculate IoU for each row
    merged_df['iou'] = merged_df.apply(
        lambda row: calculate_iou(row['subnarratives_predicted'], row['subnarratives_true']), axis=1
    )

    # Calculate overall accuracy (average IoU)
    overall_accuracy = merged_df['iou'].mean()

    return overall_accuracy, merged_df


In [None]:
# get the first 10 rows of the annotations_df
#test_annotation_df['narratives'] = test_annotation_df['narratives'].apply(lambda x: x.split(';'))
#test_annotation_df['subnarratives'] = test_annotation_df['subnarratives'].apply(lambda x: x.split(';'))
truth_annotation_df = annotations_df.head(50)

In [None]:
overall_accuracy, merged_df = calculate_classification_accuracy(test_annotation_df, truth_annotation_df)

In [25]:
def load_files(folder):
    files = []
    for file in os.listdir(folder):
        if file.endswith('.txt'):
            files.append(file)
    return files

def create_text_df(files):
    texts = []
    for file in files:
        texts.append({
            'id': file,
            'text': read_text(file, base_path='devset/EN/subtask-2-documents')
        })
    return pd.DataFrame(texts)

files = load_files('devset/EN/subtask-2-documents')
dev_df = create_text_df(files)

In [26]:
reset_agents(narrative_agents)
for subnarrative in subnarratives_list:
    subnarrative_agents[subnarrative].reset()

In [27]:
narrative_agents = create_narrative_agents(narratives_list)
#histories = classify(narratives_user_proxy_agent, narratives_manager, dev_df, len(dev_df))
#dev_df = create_annotations_df(histories, base_path='devset/EN/subtask-2-documents')

In [28]:
subnarrative_agents = []
subnarrative_agents = create_subnarrative_agents(subnarratives_list)
grouped_agents = group_subnarrative_agents_by_narrative(subnarrative_agents)


In [None]:
get_subnarratives_for_df(dev_df)

In [None]:
dev_df = get_df_to_right_format(dev_df)

In [None]:
# export the dataframe to a txt file with no headers
# keep only id, narratives and subnarratives
dev_df = dev_df[['id', 'narratives', 'subnarratives']]
dev_df.to_csv('devset/EN/subtask-2-annotations.txt', sep='\t', header=False, index=False)

In [51]:
def generate_predictions_for_texts(folder):
    def load_files(folder):
        files = []
        for file in os.listdir(folder):
            if file.endswith('.txt'):
                files.append(file)
        return files

    def create_text_df(files):
        texts = []
        for file in files:
            texts.append({
                'id': file,
                'text': read_text(file, base_path=folder)
            })
        return pd.DataFrame(texts)

    files = load_files(folder)
    predictions_df = create_text_df(files)
    narrative_agents = create_narrative_agents(narratives_list)
    histories = classify(narratives_user_proxy_agent, narratives_manager, predictions_df, len(predictions_df))
    predictions_df = create_annotations_df(histories, base_path=folder)

    subnarrative_agents = []
    subnarrative_agents = create_subnarrative_agents(subnarratives_list)
    grouped_agents = group_subnarrative_agents_by_narrative(subnarrative_agents)
    
    get_subnarratives_for_df(predictions_df)
    get_df_to_right_format(predictions_df)
    return predictions_df

In [52]:
test_df = generate_predictions_for_texts('devset/EN/subtask-2-documents')

[33muser[0m (to chat_manager):

Here is the text that needs to be classified: 
```Biden’s green policies are making housing no longer affordable for the average American 

 This is according to Edward Ring, an author and a senior fellow with the conservative think tank, the California Policy Center, who warned of the far-reaching consequences of Biden's "globalist green agenda."

"To allegedly save us all from a 'climate crisis,' development and use of oil, natural gas and coal is being halted," wrote Ring for American Greatness, while at the same time pointing out that the development of renewable energy sources like hydroelectric and nuclear power plants is also being slowed down, if not completely stopped.

Ring added that newer energy sources will come from more inefficient and expensive wind, solar and biomass energy generation. These are far more destructive to the environment, require the extraction and importing of more raw materials for their construction and maintenance "an

AuthenticationError: Error code: 401 - {'error': {'message': 'Authentication Fails (no such user)', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}

In [53]:
test_df = test_df[['id', 'narratives', 'subnarratives']]
test_df['subnarratives'] = test_df['subnarratives'].apply(lambda x: ';'.join(x))
test_df.to_csv('devset/EN/subtask-2-annotations-4o-mini.txt', sep='\t', header=False, index=False)

# Computing the confusion matrix

In [56]:
import csv

def load_annotation_dicts(filepath):
    narratives_dict = {}
    subnarratives_dict = {}
    
    with open(filepath, "r", encoding="utf-8") as f:
        reader = csv.reader(f, delimiter="\t")
        for row in reader:
            if len(row) < 3:
                continue  # Skip malformed lines
            
            file_id = row[0].strip()
            # Split semicolon-separated labels
            narratives = [x.strip() for x in row[1].split(";") if x.strip()]
            subnarratives = [x.strip() for x in row[2].split(";") if x.strip()]
            
            narratives_dict[file_id] = set(narratives)
            subnarratives_dict[file_id] = set(subnarratives)
    
    return narratives_dict, subnarratives_dict

In [77]:
from sklearn.metrics import confusion_matrix

def compute_multilabel_confusion_matrices(pred_dict, gold_dict):
    """
    pred_dict: {file_id: set_of_labels}
    gold_dict: {file_id: set_of_labels}

    Returns a dict: { label: confusion_matrix(2x2) }
                    where confusion_matrix is [[TN, FP],
                                               [FN, TP]]
    """
    # 1) Collect all unique labels
    all_labels = set()
    for file_id in pred_dict:
        all_labels.update(pred_dict[file_id])
    for file_id in gold_dict:
        all_labels.update(gold_dict[file_id])
    
    # 2) Prepare the confusion matrices
    label_to_confmat = {}

    # We'll ensure we iterate over all file IDs (union of both keys).
    all_file_ids = set(pred_dict.keys()).union(gold_dict.keys())
    
    for label in sorted(all_labels):
        # Build y_true and y_pred for this label
        y_true = []
        y_pred = []
        
        for file_id in all_file_ids:
            gold_set = gold_dict.get(file_id, set())
            pred_set = pred_dict.get(file_id, set())
            
            # 1 if this label is in the gold set, else 0
            y_true.append(1 if label in gold_set else 0)
            # 1 if this label is predicted, else 0
            y_pred.append(1 if label in pred_set else 0)
        
        # 3) Compute scikit-learn confusion matrix
        cm = confusion_matrix(y_true, y_pred)
        # cm is [[TN, FP], [FN, TP]]
        
        label_to_confmat[label] = cm
    
    return label_to_confmat


In [57]:
pred_file = "devset/EN/subtask-2-annotations-4o-mini.txt"
gold_file = "devset_gold_labels/EN/subtask-2-annotations.txt"

# 1) Load predicted and gold annotations
pred_narr, pred_subnarr = load_annotation_dicts(pred_file)
gold_narr, gold_subnarr = load_annotation_dicts(gold_file)

# 2) Compute confusion matrices for narratives
narr_conf_mats = compute_multilabel_confusion_matrices(pred_narr, gold_narr)

# 3) Compute confusion matrices for subnarratives
subnarr_conf_mats = compute_multilabel_confusion_matrices(pred_subnarr, gold_subnarr)

# 4) Print or store results
print("=== Confusion Matrices for Narratives ===")
for label, cm in narr_conf_mats.items():
    tn, fp, fn, tp = cm.ravel()
    print(f"{label} => TN={tn}, FP={fp}, FN={fn}, TP={tp}")

print("\n=== Confusion Matrices for Subnarratives ===")
for label, cm in subnarr_conf_mats.items():
    tn, fp, fn, tp = cm.ravel()
    print(f"{label} => TN={tn}, FP={fp}, FN={fn}, TP={tp}")

=== Confusion Matrices for Narratives ===
CC: Amplifying Climate Fears => TN=39, FP=2, FN=0, TP=0
CC: Climate change is beneficial => TN=40, FP=0, FN=0, TP=1
CC: Controversy about green technologies => TN=34, FP=5, FN=0, TP=2
CC: Criticism of climate movement => TN=25, FP=8, FN=1, TP=7
CC: Criticism of climate policies => TN=24, FP=14, FN=2, TP=1
CC: Criticism of institutions and authorities => TN=27, FP=6, FN=3, TP=5
CC: Downplaying climate change => TN=36, FP=3, FN=2, TP=0
CC: Green policies are geopolitical instruments => TN=37, FP=1, FN=1, TP=2
CC: Hidden plots by secret schemes of powerful groups => TN=36, FP=1, FN=4, TP=0
CC: Questioning the measurements and science => TN=36, FP=1, FN=1, TP=3
Other => TN=28, FP=2, FN=6, TP=5
URW: Amplifying war-related fears => TN=36, FP=2, FN=3, TP=0
URW: Blaming the war on others rather than the invader => TN=35, FP=0, FN=2, TP=4
URW: Discrediting Ukraine => TN=34, FP=0, FN=2, TP=5
URW: Discrediting the West, Diplomacy => TN=32, FP=0, FN=4, TP=

In [58]:
from sklearn.metrics import classification_report

def compute_classification_report(pred_dict, gold_dict):
    # Collect all unique labels
    all_labels = sorted({lab for labs in gold_dict.values() for lab in labs}.union(
                        {lab for labs in pred_dict.values() for lab in labs}))

    # Prepare a consistent order
    label_to_index = {label: i for i, label in enumerate(all_labels)}

    # Gather all file IDs
    all_file_ids = set(pred_dict.keys()).union(gold_dict.keys())

    # Build multi-hot vectors
    y_true_multi = []
    y_pred_multi = []
    for file_id in all_file_ids:
        gold_set = gold_dict.get(file_id, set())
        pred_set = pred_dict.get(file_id, set())
        
        gold_vec = [1 if label in gold_set else 0 for label in all_labels]
        pred_vec = [1 if label in pred_set else 0 for label in all_labels]
        
        y_true_multi.append(gold_vec)
        y_pred_multi.append(pred_vec)
    
    # average="macro" or "micro", depending on your preference
    report = classification_report(y_true_multi, y_pred_multi, target_names=all_labels, zero_division=0)
    print(report)


In [59]:
def compute_global_confusion_matrix(pred_dict, gold_dict):
    """
    Computes the sum of TP, FP, FN, TN across all labels.
    Returns: (total_TP, total_FP, total_FN, total_TN)
    """
    # Gather all unique labels across predictions & ground truth
    all_labels = set()
    for file_id in pred_dict:
        all_labels.update(pred_dict[file_id])
    for file_id in gold_dict:
        all_labels.update(gold_dict[file_id])

    # Initialize global confusion matrix counts
    total_TP, total_FP, total_FN, total_TN = 0, 0, 0, 0

    # Union of all file IDs
    all_file_ids = set(pred_dict.keys()).union(gold_dict.keys())

    for label in all_labels:
        y_true = []
        y_pred = []

        for file_id in all_file_ids:
            gold_labels = gold_dict.get(file_id, set())
            pred_labels = pred_dict.get(file_id, set())

            y_true.append(1 if label in gold_labels else 0)
            y_pred.append(1 if label in pred_labels else 0)

        # Compute confusion matrix for this label
        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

        # Accumulate values
        total_TP += tp
        total_FP += fp
        total_FN += fn
        total_TN += tn

    return total_TP, total_FP, total_FN, total_TN


In [62]:
# Paths
pred_file = "devset/EN/subtask-2-annotations.txt"
gold_file = "devset_gold_labels/EN/subtask-2-annotations.txt"

# Load annotations
pred_narr, pred_subnarr = load_annotation_dicts(pred_file)
gold_narr, gold_subnarr = load_annotation_dicts(gold_file)

# Compute global confusion matrices
global_TP_narr, global_FP_narr, global_FN_narr, global_TN_narr = compute_global_confusion_matrix(pred_narr, gold_narr)
global_TP_subnarr, global_FP_subnarr, global_FN_subnarr, global_TN_subnarr = compute_global_confusion_matrix(pred_subnarr, gold_subnarr)

# Print results
print("=== Global Confusion Matrix for Narratives ===")
print(f"TP: {global_TP_narr}, FP: {global_FP_narr}, FN: {global_FN_narr}, TN: {global_TN_narr}")

print("\n=== Global Confusion Matrix for Subnarratives ===")
print(f"TP: {global_TP_subnarr}, FP: {global_FP_subnarr}, FN: {global_FN_subnarr}, TN: {global_TN_subnarr}")

# Global Performance Metrics
precision_narr = global_TP_narr / (global_TP_narr + global_FP_narr) if (global_TP_narr + global_FP_narr) > 0 else 0
recall_narr = global_TP_narr / (global_TP_narr + global_FN_narr) if (global_TP_narr + global_FN_narr) > 0 else 0
f1_narr = 2 * (precision_narr * recall_narr) / (precision_narr + recall_narr) if (precision_narr + recall_narr) > 0 else 0

precision_subnarr = global_TP_subnarr / (global_TP_subnarr + global_FP_subnarr) if (global_TP_subnarr + global_FP_subnarr) > 0 else 0
recall_subnarr = global_TP_subnarr / (global_TP_subnarr + global_FN_subnarr) if (global_TP_subnarr + global_FN_subnarr) > 0 else 0
f1_subnarr = 2 * (precision_subnarr * recall_subnarr) / (precision_subnarr + recall_subnarr) if (precision_subnarr + recall_subnarr) > 0 else 0

print("\n=== Global Performance Metrics ===")
print(f"Narrative Precision: {precision_narr:.4f}, Recall: {recall_narr:.4f}, F1-score: {f1_narr:.4f}")
print(f"Subnarrative Precision: {precision_subnarr:.4f}, Recall: {recall_subnarr:.4f}, F1-score: {f1_subnarr:.4f}")


=== Global Confusion Matrix for Narratives ===
TP: 43, FP: 39, FN: 42, TN: 778

=== Global Confusion Matrix for Subnarratives ===
TP: 52, FP: 62, FN: 62, TN: 2407

=== Global Performance Metrics ===
Narrative Precision: 0.5244, Recall: 0.5059, F1-score: 0.5150
Subnarrative Precision: 0.4561, Recall: 0.4561, F1-score: 0.4561


In [45]:
def extract_narrative_for_text(text):
    chat_result = narratives_user_proxy_agent.initiate_chat(
            narratives_manager,
            message="Here is the text that needs to be classified: \n```{}```\n ### \n You are ONLY allowed to reply with '0' or '1'".format(text),
            summary_method='reflection_with_llm'
        )
    recognized_narratives = extract_recognized_narratives(chat_result.chat_history)
    return recognized_narratives

def extract_subnarratives_for_text(narratives, text):
    subnarratives = []
    for narrative in narratives:
        subnarratives.append(extract_subnarratives_for_one_narrative(narrative, text))
    return subnarratives

In [49]:
text = "The study, published in Environmental Research Letters, reveals significant changes in the relationship between vegetation growth and water availability in the Northern Hemisphere's mid-latitudes over the past three decades. The research, led by Yang Song and colleagues, highlights the impact of elevated carbon dioxide (CO2) levels on this relationship, suggesting a closer relationship between vegetation growth and water availability than previously understood. The very compound that the Democrats are targeting – CO2 – is actually the solution to preserving croplands, grasslands, forests and water supplies for growing populations."

extracted_narratives = list(set(extract_narrative_for_text(text)))
extracted_subnarratives = extract_subnarratives_for_text(extracted_narratives, text)

[33muser[0m (to chat_manager):

Here is the text that needs to be classified: 
```The study, published in Environmental Research Letters, reveals significant changes in the relationship between vegetation growth and water availability in the Northern Hemisphere's mid-latitudes over the past three decades. The research, led by Yang Song and colleagues, highlights the impact of elevated carbon dioxide (CO2) levels on this relationship, suggesting a closer relationship between vegetation growth and water availability than previously understood. The very compound that the Democrats are targeting – CO2 – is actually the solution to preserving croplands, grasslands, forests and water supplies for growing populations.```
 ### 
 You are ONLY allowed to reply with '0' or '1'

--------------------------------------------------------------------------------


AuthenticationError: Error code: 401 - {'error': {'message': 'Authentication Fails (no such user)', 'type': 'authentication_error', 'param': None, 'code': 'invalid_request_error'}}

In [50]:
print(extracted_narratives)
print(extracted_subnarratives)

['CC: Climate change is beneficial']
[['CC: Climate change is beneficial: CO2 is beneficial']]
