In [1]:
import pandas as pd
import json

def load_annotations_df(path):
    annotations_df = pd.read_csv(path, sep='\t', header=None)

    annotations_df.columns = ['id', 'narratives', 'subnarratives']

    annotations_df['narratives'] = annotations_df['narratives'].apply(lambda x: x.split(';'))
    annotations_df['subnarratives'] = annotations_df['subnarratives'].apply(lambda x: x.split(';'))

    return annotations_df

annotations_df = load_annotations_df('data/EN/subtask-2-annotations.txt')

In [2]:
# method to extract the list of all unique subnarratives
def get_subnarratives_list(file):
    """
    Extracts subnarratives from the nested JSON structure.
    
    Args:
        data (dict): The JSON-like dictionary containing narratives.
    
    Returns:
        list: A list of subnarratives with the hierarchy preserved in their names.
    """
    subnarratives = ["Other"]
    with open(file, 'r') as f:    
        data = json.load(f)
        for main_category, subcategories in data.items():
            for subcategory, narratives in subcategories.items():
                if "Other" not in narratives:
                    narratives.append("Other")

                for narrative in narratives:
                    subnarratives.append(f"{main_category}: {subcategory}: {narrative}")
        
    return subnarratives

def get_narratives_list(file):
    """
    Extracts narratives from the nested JSON structure.
    
    Args:
        data (dict): The JSON-like dictionary containing narratives.
    
    Returns:
        list: A list of narratives with the hierarchy preserved in their names.
    """
    narratives = ["Other"]
    with open(file, 'r') as f:    
        data = json.load(f)
        for main_category, subcategories in data.items():
            for subcategory, narrative in subcategories.items():
                narratives.append(f"{main_category}: {subcategory}")
        
    return narratives

narratives_list = get_narratives_list('data/taxonomy.json')
subnarratives_list = get_subnarratives_list('data/taxonomy.json')
taxonomy = json.load(open('data/taxonomy.json'))

In [3]:
import os
def read_text(file_id, base_path='data/EN/raw-documents'):
    with open(os.path.join(base_path, f'{file_id}'), 'r', encoding='utf-8') as f:
        return f.read()
    
def get_sibling_subnarratives(subnarrative):
    """
    Get all the sibling subnarratives of the given subnarrative.
    
    Args:
        subnarrative (str): The subnarrative for which to find siblings.
    
    Returns:
        list: A list of sibling subnarratives.
    """
    return taxonomy[subnarrative.split(': ')[0]][subnarrative.split(': ')[1]]

def get_narrative_definition(narrative):
    if narrative == 'Other':
        return 'Statements that are NOT related to anyone of these topics : {}'.format(', '.join([get_narrative_short_name(narrative) for narrative in narratives_list if narrative != 'Other']))
    narrative_definitions = pd.read_csv('data/narratives definition.csv')
    short_name = narrative.split(':')[-1].strip()
    return narrative_definitions[narrative_definitions['narrative'] == short_name]['definition'].values[0]

def get_narrative_examples(narrative):
    if narrative == 'Other':
        return None
    narrative_definitions = pd.read_csv('data/narratives definition.csv')
    short_name = narrative.split(':')[-1].strip()
    return narrative_definitions[narrative_definitions['narrative'] == short_name]['example'].values[0]

def get_narrative_short_name(narrative):
    return narrative.split(':')[-1].strip()

def get_subnarrative_definition(subnarrative):
    if subnarrative == 'Other':
        return 'Statements that are NOT related to anyone of these narratives : {}'.format(', '.join([narrative for narrative in narratives_list if narrative != 'Other']))
    short_name = subnarrative.split(':')[-1].strip()
    narrative = subnarrative.split(':')[-2].strip()
    narrative_defintion = get_narrative_definition(narrative)
    if short_name == 'Other':
        return 'Statement that are related to the narrative "{}", defined as {} but are not related to anyone of these subnarratives : {}'.format(get_narrative_short_name(narrative), narrative_defintion ,get_sibling_subnarratives(subnarrative))
    subnarrative_definitions = pd.read_csv('data/subnarrative definitions.csv')
    return subnarrative_definitions[subnarrative_definitions['subnarrative'] == short_name]['definition'].values[0]


def get_subnarrative_examples(subnarrative):
    if subnarrative == 'Other':
        return None
    short_name = subnarrative.split(':')[-1].strip()
    if short_name == 'Other':
        return None
    subnarrative_definitions = pd.read_csv('data/subnarrative definitions.csv')
    return subnarrative_definitions[subnarrative_definitions['subnarrative'] == short_name]['examples'].values[0]

def get_subnarrative_short_name(subnarrative):
    return subnarrative.split(':')[-1].strip()
        

In [4]:
subnarratives_dfs = {}
for subnarrative in subnarratives_list:
    temp_df = annotations_df
    temp_df['text'] = temp_df['id'].apply(lambda x: read_text(x))
    temp_df['label'] = temp_df['subnarratives'].apply(lambda x: 1 if subnarrative in x else 0)
    # keep only text and label column
    temp_df = temp_df[['id','text', 'label']]
    print('Attempting to create dataframe for subnarrative:', subnarrative)
    subnarratives_dfs[subnarrative] = {
        'df': temp_df,
        'definition': get_subnarrative_definition(subnarrative),
        'examples': get_subnarrative_examples(subnarrative)
    }

Attempting to create dataframe for subnarrative: Other
Attempting to create dataframe for subnarrative: URW: Blaming the war on others rather than the invader: Ukraine is the aggressor
Attempting to create dataframe for subnarrative: URW: Blaming the war on others rather than the invader: The West are the aggressors
Attempting to create dataframe for subnarrative: URW: Blaming the war on others rather than the invader: Other
Attempting to create dataframe for subnarrative: URW: Discrediting Ukraine: Rewriting Ukraine’s history
Attempting to create dataframe for subnarrative: URW: Discrediting Ukraine: Discrediting Ukrainian nation and society
Attempting to create dataframe for subnarrative: URW: Discrediting Ukraine: Discrediting Ukrainian military
Attempting to create dataframe for subnarrative: URW: Discrediting Ukraine: Discrediting Ukrainian government and officials and policies
Attempting to create dataframe for subnarrative: URW: Discrediting Ukraine: Ukraine is a puppet of the W

In [5]:
# sort the subnarratives dataframe by label to group positive and negative examples
for subnarrative, data in subnarratives_dfs.items():
    subnarratives_dfs[subnarrative]['df'] = data['df'].sort_values('label', ascending=False)

In [6]:
import autogen

system_prompt = (
    "You are a classification model trained to do binary classification by detecting whether a given text is related to a specific subnarrative or not. "
    "You have been trained to recognize the subnarrative: '{}'. "
    "This subnarrative is defined as: {}. "
    "Here are some examples of statements related to this subnarrative: {}. "
    "If the text is related to the subnarrative, please respond with '1'. Otherwise, respond with '0'. Do not try to make sentences, just respond with '1' or '0'."
    "You are ONLY allowed to answer with '1' or '0' and NOTHING else."
    "Only answer with 1 if there are explicit and clear mentions of the subnarrative in the text. If you are slightly unsure, classify as 0."
)

user_prompt = (
    "Please classify the following text as related to the subnarrative '{}' or not. "
    "Text : \n ```{}```"
)

Patching name='__init__', member=<function LLMLingua.__init__ at 0x7fcaedf63880>, patched=<function function.__call__ at 0x7fcaedf637e0>
Patching name='compress_text', member=<function LLMLingua.compress_text at 0x7fcaedf63920>, patched=<function function.__call__ at 0x7fcaedf63b00>


In [7]:
narratives_dfs = {}

for narrative in narratives_list:
    temp_df = annotations_df
    temp_df['text'] = temp_df['id'].apply(lambda x: read_text(x))
    temp_df['label'] = temp_df['narratives'].apply(lambda x: 1 if narrative in x else 0)
    # keep only text and label column
    temp_df = temp_df[['text', 'label']]
    print('Attempting to create dataframe for narrative:', narrative)
    narratives_dfs[narrative] = {
        'df': temp_df,
        'definition': get_narrative_definition(narrative),
        'examples': get_narrative_examples(narrative)
    }

Attempting to create dataframe for narrative: Other
Attempting to create dataframe for narrative: URW: Blaming the war on others rather than the invader
Attempting to create dataframe for narrative: URW: Discrediting Ukraine
Attempting to create dataframe for narrative: URW: Russia is the Victim
Attempting to create dataframe for narrative: URW: Praise of Russia
Attempting to create dataframe for narrative: URW: Overpraising the West
Attempting to create dataframe for narrative: URW: Speculating war outcomes
Attempting to create dataframe for narrative: URW: Discrediting the West, Diplomacy
Attempting to create dataframe for narrative: URW: Negative Consequences for the West
Attempting to create dataframe for narrative: URW: Distrust towards Media
Attempting to create dataframe for narrative: URW: Amplifying war-related fears
Attempting to create dataframe for narrative: URW: Hidden plots by secret schemes of powerful groups
Attempting to create dataframe for narrative: CC: Criticism o

In [8]:
narrative_system_prompt = (
    "You are a classification model trained to do binary classification by detecting whether a given text is related to a specific narrative or not. "
    "You have been trained to recognize the narrative: '{}' "
    "defined as: {}. "
    "Here are some examples of statements related to this narrative: {}. "
    "If the text is related to the narrative, you MUST respond with '1' only. Otherwise, you MUST with '0' only."
    "You are ONLY allowed to answer with '1' or '0' and NOTHING else."
    "Only answer with 1 if there are EXPLICIT and CLEAR mentions of the narrative in the text. Some text will be ambiguous so if you are slightly unsure, answer 0."
)

narrative_user_prompt = (
    "Please classify the following text as related to the narrative '{}' or not. "
    "Text : "
    "{}"
)

In [9]:
def create_narrative_agents(narratives_list):
    narrative_agents = []

    for narrative in narratives_list:
        agent = autogen.AssistantAgent(
            name="Agent_" + str(narratives_list.index(narrative)),
            system_message= narrative_system_prompt.format(narrative, narratives_dfs[narrative]['definition'], narratives_dfs[narrative]['examples']),
            llm_config={
                "config_list": [
                    {
                        "model": "gpt-4o",
                        "api_key": os.environ.get("OPENAI_API_KEY")
                    }
                ],
                'temperature': 0
            }
        )

        agent.description = (
            "I am a classification model trained to do binary classification by detecting whether a given text is related to the following narrative: {}. "
            "I will be looking for {}"
        ).format(get_narrative_short_name(narrative), get_narrative_definition(narrative))
        narrative_agents.append(agent)
    return narrative_agents

narrative_agents = create_narrative_agents(narratives_list)

In [10]:
def reset_agents(agent_list):
    for agent in agent_list:
        agent.reset()

In [11]:
narratives_user_proxy_agent = autogen.UserProxyAgent(
    name="user",
    code_execution_config=False,
    llm_config={
        "config_list": [
            {
                "model": "gpt-4o-mini",
                "api_key": os.environ.get("OPENAI_API_KEY")
            }
        ]
    },
    human_input_mode='NEVER'
)

allowed_transitions = {}

for agent in narrative_agents:
    allowed_transitions[agent] = [narratives_user_proxy_agent]

narratives_group_chat = autogen.GroupChat(
    agents= [narratives_user_proxy_agent] + narrative_agents,
    messages=[],
    max_round=6,
    send_introductions=True,
    allowed_or_disallowed_speaker_transitions=allowed_transitions,
    speaker_transitions_type="disallowed",
)

narratives_manager = autogen.GroupChatManager(
    groupchat=narratives_group_chat,
    llm_config = {
        "config_list": [
            {
                "model": "gpt-4o",
                "api_key": os.environ.get("OPENAI_API_KEY")
            }
        ]
    },
)



In [12]:
def classify(user_proxy_agent, manager, df, n = 5):
    histories = []
    for i in range(n):
        chat_result = user_proxy_agent.initiate_chat(
            manager,
            message="Here is the text that needs to be classified: \n```{}```\nYou are ONLY allowed to reply with '0' or '1'".format(df['text'].iloc[i]),
            summary_method='reflection_with_llm'
        )
        histories.append({
            'chat_result': chat_result.chat_history,
            'file': df['id'].iloc[i]
        })
        narratives_user_proxy_agent.reset()
        reset_agents(narrative_agents)
        print('Completed ', i+1, ' out of ', n, ' texts')
    return histories

In [13]:
def extract_recognized_narratives(chat_history):
    recognized_narratives = []
    for message in chat_history:
        if message['name'] == 'user' or message['name'] == 'chat_manager':
            continue
        index = int(message['name'].split('_')[-1])
        if message['content'] == '1':
            recognized_narratives.append(narratives_list[index])
    if len(recognized_narratives) == 0:
        recognized_narratives.append('Other')

    if len(recognized_narratives) > 1 and 'Other' in recognized_narratives:
        recognized_narratives.remove('Other')
    return recognized_narratives

In [14]:
def get_accuracy(extracted_narratives, true_narratives):
    if len(extracted_narratives) == 0:
        return 0
    return sum([1 for narrative in extracted_narratives if narrative in true_narratives]) / len(extracted_narratives)

In [None]:
histories = classify(narratives_user_proxy_agent, narratives_manager, annotations_df, 50)

In [None]:
# Calculate accuracy for each history
import pprint
accuracies = []
for history in histories:
    extracted_narratives = extract_recognized_narratives(history['chat_result'])
    true_narratives = annotations_df[annotations_df['id'] == history['file']]['narratives'].values[0]
    accuracies.append(get_accuracy(extracted_narratives, true_narratives))

pprint.pprint(accuracies)

In [15]:
def create_annotations_df(histories, base_path='data/EN/raw-documents'):
    annotations = []
    for history in histories:
        extracted_narratives = extract_recognized_narratives(history['chat_result'])
        # Remove 'Other' if there are additional narratives
        if 'Other' in extracted_narratives and len(extracted_narratives) > 1:
            extracted_narratives = [narrative for narrative in extracted_narratives if narrative != 'Other']
        
        extracted_narratives = list(set(extracted_narratives))
        
        annotations.append({
            'id': history['file'],
            'text': read_text(history['file'], base_path),
            'narratives': extracted_narratives
        })
    df = pd.DataFrame(annotations)
    df['narratives'] = df['narratives'].apply(lambda x: ';'.join(x))
    return df

In [None]:
test_annotation_df = create_annotations_df(histories)

In [16]:
def create_subnarrative_agents(subnarratives_list):
    subnarrative_agents = {}
    for subnarrative in subnarratives_list:
        agent = autogen.AssistantAgent(
            name="Agent_" + str(subnarratives_list.index(subnarrative)),
            system_message= system_prompt.format(subnarrative, subnarratives_dfs[subnarrative]['definition'], subnarratives_dfs[subnarrative]['examples']),
            llm_config={
                "config_list": [
                    {
                        "model": "gpt-4o",
                        "api_key": os.environ.get("OPENAI_API_KEY")
                    }
                ]
            }
        )

        agent.description = 'I am a classification model trained to do binary classification by detecting whether a given text is related to the following subnarrative: {}'.format(get_subnarrative_short_name(subnarrative))
        subnarrative_agents[subnarrative] = agent

    return subnarrative_agents

def create_group_chat(agent_list):
    user_proxy_agent = autogen.UserProxyAgent(
        name="user",
        code_execution_config=False,
        llm_config={
            "config_list": [
                {
                    "model": "gpt-4o",
                    "api_key": os.environ.get("OPENAI_API_KEY")
                }
            ]
        },
        human_input_mode='NEVER'
    )
    
    allowed_transitions = {}
    for agent in agent_list:
        allowed_transitions[agent] = [user_proxy_agent]

    group_chat = autogen.GroupChat(
        agents= [user_proxy_agent] + agent_list,
        messages=[],
        max_round=len(agent_list) + 1,
        send_introductions=True,
        allowed_or_disallowed_speaker_transitions=allowed_transitions,
        speaker_transitions_type='disallowed',
        speaker_selection_method='round_robin'
    )

    manager = autogen.GroupChatManager(
        groupchat=group_chat,
        llm_config = {
            "config_list": [
                {
                    "model": "gpt-4o",
                    "api_key": os.environ.get("OPENAI_API_KEY")
                }
            ]
        },
        system_message="You are a group chat manager. You are asked to give the classification task to the agents that look relevant to the topic"
    )

    print('Created group chat with the following agents: ', agent_list)
    return group_chat, manager, user_proxy_agent

In [17]:
# group subnarrative agents by narrative

def group_subnarrative_agents_by_narrative(subnarrative_agents):
    grouped_agents = {}
    for subnarrative, agent in subnarrative_agents.items():
        if subnarrative == 'Other':
            continue
        narrative = subnarrative.split(': ')[0] + ': ' + subnarrative.split(': ')[1]
        if narrative not in grouped_agents:
            grouped_agents[narrative] = []
        grouped_agents[narrative].append(agent)
    return grouped_agents

In [18]:
subnarrative_agents = create_subnarrative_agents(subnarratives_list)
grouped_agents = group_subnarrative_agents_by_narrative(subnarrative_agents)


In [19]:
def create_group_chat_for_narrative(narrative):
    return create_group_chat(grouped_agents[narrative])

def extract_recognized_subnarratives(chat_history):
    recognized_subnarratives = []
    for message in chat_history:
        if message['name'] == 'user' or message['name'] == 'chat_manager':
            continue
        index = int(message['name'].split('_')[-1])
        if message['content'] == '1':
            recognized_subnarratives.append(subnarratives_list[index])
    if len(recognized_subnarratives) == 0:
        recognized_subnarratives.append('Other')

    if len(recognized_subnarratives) > 1 and 'Other' in recognized_subnarratives:
        recognized_subnarratives.remove('Other')
    return recognized_subnarratives

def extract_subnarratives_for_one_narrative(narrative, text):
    group_chat, manager, user_proxy_agent = create_group_chat_for_narrative(narrative)
    # We get the subnarratives that belong to the narrative
    chat_result = user_proxy_agent.initiate_chat(
        manager,
        message="Here is the text that needs to be classified: \n```{}```\nYou are ONLY allowed to reply with '0' or '1'".format(text),
        summary_method='reflection_with_llm'
    )
    return extract_recognized_subnarratives(chat_result.chat_history)

def extract_subnarratives_for_narratives(narratives_list, text):
    # if the list only has one element that is 'Other', return 'Other'
    if len(narratives_list) == 1 and narratives_list[0] == 'Other':
        return ['Other']
    subnarratives = []
    for narrative in narratives_list:
        subnarratives.append(extract_subnarratives_for_one_narrative(narrative, text))
    return subnarratives

In [20]:
def get_subnarratives_for_df(df):
    # Create a new list to store subnarratives for each row
    subnarratives = []

    for _, row in df.iterrows():
        narratives = row['narratives'].split(';')  # Split narratives by ';'
        subnarratives.append(extract_subnarratives_for_narratives(narratives, row['text']))  # Extract subnarratives
        

    # Add a new column to the DataFrame with the extracted subnarratives
    df['subnarratives'] = subnarratives

    return df


In [None]:
get_subnarratives_for_df(test_annotation_df)

In [21]:
# concat arrays
def concat_arrays(arr):
    if len(arr) == 0:
        return []
    if len(arr) == 1:
        return arr[0]
    return [item for sublist in arr for item in sublist]

def update_narratives_for_only_other(df):
    """
    Updates the 'narratives' column to 'Other' if the 'subnarratives' column contains only 'Other'.
    
    Parameters:
    df (pd.DataFrame): The input DataFrame with 'narratives' and 'subnarratives' columns.

    Returns:
    pd.DataFrame: The updated DataFrame.
    """
    def update_row(row):
        # Check if subnarratives consist only of 'Other'
        if row['subnarratives'] == ['Other']:
            row['narratives'] = 'Other'
        return row

    # Apply the update to each row
    return df.apply(update_row, axis=1)

def remove_parasit_other(df):
    """
    Removes 'Other' from the 'subnarratives' column if it is the only element in the list.
    
    Parameters:
    df (pd.DataFrame): The input DataFrame with 'subnarratives' column.

    Returns:
    pd.DataFrame: The updated DataFrame.
    """
    def remove_other(row):
        if len(row) > 1 and 'Other' in row:
            row.remove('Other')
        return row

    # Apply the removal to the 'subnarratives' column
    df['subnarratives'] = df['subnarratives'].apply(remove_other)
    return df

def flatten_and_deduplicate_subnarratives(df, column_name):
    """
    Flattens and deduplicates list of lists in the specified column of the DataFrame.

    Parameters:
    df (pd.DataFrame): The input DataFrame.
    column_name (str): The name of the column to process.

    Returns:
    pd.DataFrame: The updated DataFrame with flattened and deduplicated lists.
    """
    def process_row(value):
        if isinstance(value, list):  # Check if the value is a list
            # Flatten and deduplicate if it's a list of lists
            flattened = set()
            for item in value:
                if isinstance(item, list):
                    flattened.update(item)  # Add elements from inner lists
                else:
                    flattened.add(item)  # Add single elements
            return list(flattened)  # Return as a unique list
        return value  # Return as-is if not a list

    # Apply the processing function to the specified column
    df[column_name] = df[column_name].apply(process_row)
    return df

In [22]:
def get_df_to_right_format(df):
    df = flatten_and_deduplicate_subnarratives(df, 'subnarratives')
    df = remove_parasit_other(df)
    df = update_narratives_for_only_other(df)
    # join subnarratives
    df['subnarratives'] = df['subnarratives'].apply(lambda x: ';'.join(x))
    # keep only id, narratives and subnarratives
    df = df[['id', 'narratives', 'subnarratives']]
    return df

In [23]:
def calculate_classification_accuracy(predictions_df, ground_truth_df):
    """
    Calculates the accuracy of subnarrative classification by comparing predictions with ground truth.

    Parameters:
    predictions_df (pd.DataFrame): DataFrame with your classification results, including 'id' and 'subnarratives'.
    ground_truth_df (pd.DataFrame): DataFrame with the ground truth classifications, including 'id' and 'subnarratives'.

    Returns:
    float: Overall accuracy (average IoU) for all rows.
    pd.DataFrame: DataFrame with per-row IoU scores.
    """
    def calculate_iou(predicted, true):
        # Convert to sets for comparison
        predicted_set = set(predicted)
        true_set = set(true)

        # Handle edge case: both are empty
        if not predicted_set and not true_set:
            return 1.0

        # Intersection over union (IoU)
        intersection = len(predicted_set & true_set)
        union = len(predicted_set | true_set)
        return intersection / union if union > 0 else 0.0

    # Merge predictions and ground truth on 'id'
    merged_df = predictions_df.merge(ground_truth_df, on='id', suffixes=('_predicted', '_true'))

    # Calculate IoU for each row
    merged_df['iou'] = merged_df.apply(
        lambda row: calculate_iou(row['subnarratives_predicted'], row['subnarratives_true']), axis=1
    )

    # Calculate overall accuracy (average IoU)
    overall_accuracy = merged_df['iou'].mean()

    return overall_accuracy, merged_df


In [355]:
# get the first 10 rows of the annotations_df
#test_annotation_df['narratives'] = test_annotation_df['narratives'].apply(lambda x: x.split(';'))
#test_annotation_df['subnarratives'] = test_annotation_df['subnarratives'].apply(lambda x: x.split(';'))
truth_annotation_df = annotations_df.head(50)

In [356]:
overall_accuracy, merged_df = calculate_classification_accuracy(test_annotation_df, truth_annotation_df)

In [391]:
def load_files(folder):
    files = []
    for file in os.listdir(folder):
        if file.endswith('.txt'):
            files.append(file)
    return files

def create_text_df(files):
    texts = []
    for file in files:
        texts.append({
            'id': file,
            'text': read_text(file, base_path='devset/EN/subtask-2-documents')
        })
    return pd.DataFrame(texts)

files = load_files('devset/EN/subtask-2-documents')
dev_df = create_text_df(files)

In [392]:
reset_agents(narrative_agents)
for subnarrative in subnarratives_list:
    subnarrative_agents[subnarrative].reset()

In [396]:
narrative_agents = create_narrative_agents(narratives_list)
histories = classify(narratives_user_proxy_agent, narratives_manager, dev_df, len(dev_df))
dev_df = create_annotations_df(histories, base_path='devset/EN/subtask-2-documents')

[33muser[0m (to chat_manager):

Here is the text that needs to be classified: 
```Biden’s green policies are making housing no longer affordable for the average American 

 This is according to Edward Ring, an author and a senior fellow with the conservative think tank, the California Policy Center, who warned of the far-reaching consequences of Biden's "globalist green agenda."

"To allegedly save us all from a 'climate crisis,' development and use of oil, natural gas and coal is being halted," wrote Ring for American Greatness, while at the same time pointing out that the development of renewable energy sources like hydroelectric and nuclear power plants is also being slowed down, if not completely stopped.

Ring added that newer energy sources will come from more inefficient and expensive wind, solar and biomass energy generation. These are far more destructive to the environment, require the extraction and importing of more raw materials for their construction and maintenance "an

In [24]:
subnarrative_agents = []
subnarrative_agents = create_subnarrative_agents(subnarratives_list)
grouped_agents = group_subnarrative_agents_by_narrative(subnarrative_agents)


In [400]:
get_subnarratives_for_df(dev_df)

Created group chat with the following agents:  [<autogen.agentchat.assistant_agent.AssistantAgent object at 0x7f3dcb2e53a0>, <autogen.agentchat.assistant_agent.AssistantAgent object at 0x7f3dcb2e4380>, <autogen.agentchat.assistant_agent.AssistantAgent object at 0x7f3dcb1a0110>, <autogen.agentchat.assistant_agent.AssistantAgent object at 0x7f3ddd09eab0>]
[33muser[0m (to chat_manager):

Here is the text that needs to be classified: 
```Biden’s green policies are making housing no longer affordable for the average American 

 This is according to Edward Ring, an author and a senior fellow with the conservative think tank, the California Policy Center, who warned of the far-reaching consequences of Biden's "globalist green agenda."

"To allegedly save us all from a 'climate crisis,' development and use of oil, natural gas and coal is being halted," wrote Ring for American Greatness, while at the same time pointing out that the development of renewable energy sources like hydroelectric an

Unnamed: 0,id,text,narratives,subnarratives
0,EN_CC_200071.txt,Biden’s green policies are making housing no l...,CC: Criticism of climate movement;CC: Criticis...,"[[Other], [CC: Criticism of climate policies: ..."
1,EN_UA_DEV_100036.txt,Opinion: The unseen scars of Ukraine's mental ...,Other,[Other]
2,EN_CC_200085.txt,The Great California Wind Energy Ripoff… \n\n ...,Other,[Other]
3,EN_CC_200049.txt,Alarmists Warn of U.S. ‘Heat Dome’ Tied to Hum...,CC: Questioning the measurements and science,[[Other]]
4,EN_CC_200034.txt,If we “just stop oil” like climate protesters ...,CC: Amplifying Climate Fears;CC: Criticism of ...,[[CC: Amplifying Climate Fears: Doomsday scena...
5,EN_CC_200035.txt,Gretchen Whitmer Orders Michigan’s State Fleet...,CC: Criticism of institutions and authorities,[[CC: Criticism of institutions and authoritie...
6,EN_CC_200078.txt,Greta Thunberg Detained Again At Anti-Coal Pro...,CC: Criticism of institutions and authorities;...,"[[Other], [Other]]"
7,EN_UA_DEV_100033.txt,Wild Kremlin TV hosts threaten the U.S. with n...,Other,[Other]
8,EN_UA_DEV_100012.txt,Ukraine's Minerals: What the West is Fighting ...,URW: Negative Consequences for the West;URW: D...,[[URW: Negative Consequences for the West: Oth...
9,EN_CC_200060.txt,French state bank to end financing of oil and ...,CC: Criticism of institutions and authorities,[[Other]]


In [401]:
dev_df = get_df_to_right_format(dev_df)

In [402]:
# export the dataframe to a txt file with no headers
# keep only id, narratives and subnarratives
dev_df = dev_df[['id', 'narratives', 'subnarratives']]
dev_df.to_csv('devset/EN/subtask-2-annotations.txt', sep='\t', header=False, index=False)

In [25]:
def generate_predictions_for_texts(folder):
    def load_files(folder):
        files = []
        for file in os.listdir(folder):
            if file.endswith('.txt'):
                files.append(file)
        return files

    def create_text_df(files):
        texts = []
        for file in files:
            texts.append({
                'id': file,
                'text': read_text(file, base_path=folder)
            })
        return pd.DataFrame(texts)

    files = load_files(folder)
    predictions_df = create_text_df(files)
    narrative_agents = create_narrative_agents(narratives_list)
    histories = classify(narratives_user_proxy_agent, narratives_manager, predictions_df, len(predictions_df))
    predictions_df = create_annotations_df(histories, base_path=folder)

    subnarrative_agents = []
    subnarrative_agents = create_subnarrative_agents(subnarratives_list)
    grouped_agents = group_subnarrative_agents_by_narrative(subnarrative_agents)
    
    get_subnarratives_for_df(predictions_df)
    get_df_to_right_format(predictions_df)
    return predictions_df

In [None]:
test_df = generate_predictions_for_texts('testset/EN/subtask-2-documents')

[33muser[0m (to chat_manager):

Here is the text that needs to be classified: 
```Kurt Volker to Newsmax: A Ukraine Win Would Help Solidify War Crime Trials Against Russia 

 Kurt Volker, the former United States ambassador to NATO and one-time member of the Trump White House, doesn't envision Russia being overly cooperative with Geneva Convention officials anytime soon, regarding the Russians' alleged conduct in their six-month war with Ukraine. "I don't think so," Volker pointedly told Newsmax Wednesday evening, while appearing on " In a nutshell, the Geneva Convention involves four globally recognized treaties and three additional protocols that comprise the international legal standards for humanitarian treatment in war. It extensively covers the basic rights of wartime prisoners (civilians and military personnel), established protections for the wounded and sick, and offers protections for the civilians in and around a war zone. Also, the Geneva Convention document defines the r

In [28]:
test_df = test_df[['id', 'narratives', 'subnarratives']]
test_df['subnarratives'] = test_df['subnarratives'].apply(lambda x: ';'.join(x))
test_df.to_csv('testset/EN/subtask-2-annotations.txt', sep='\t', header=False, index=False)