# Util functions

In [5]:
import pandas as pd
import json

In [6]:
# method to extract the list of all unique subnarratives
def get_subnarratives_list(file):
    """
    Extracts subnarratives from the nested JSON structure.
    
    Args:
        data (dict): The JSON-like dictionary containing narratives.
    
    Returns:
        list: A list of subnarratives with the hierarchy preserved in their names.
    """
    subnarratives = ["Other"]
    with open(file, 'r') as f:    
        data = json.load(f)
        for main_category, subcategories in data.items():
            for subcategory, narratives in subcategories.items():
                if "Other" not in narratives:
                    narratives.append("Other")

                for narrative in narratives:
                    subnarratives.append(f"{main_category}: {subcategory}: {narrative}")
        
    return subnarratives

def get_narratives_list(file):
    """
    Extracts narratives from the nested JSON structure.
    
    Args:
        data (dict): The JSON-like dictionary containing narratives.
    
    Returns:
        list: A list of narratives with the hierarchy preserved in their names.
    """
    narratives = ["Other"]
    with open(file, 'r') as f:    
        data = json.load(f)
        for main_category, subcategories in data.items():
            for subcategory, narrative in subcategories.items():
                narratives.append(f"{main_category}: {subcategory}")
        
    return narratives

import os
def read_text(file_id, base_path='data/EN/raw-documents'):
    with open(os.path.join(base_path, f'{file_id}'), 'r', encoding='utf-8') as f:
        return f.read()
    
def get_sibling_subnarratives(subnarrative, taxonomy):
    """
    Get all the sibling subnarratives of the given subnarrative.
    
    Args:
        subnarrative (str): The subnarrative for which to find siblings.
    
    Returns:
        list: A list of sibling subnarratives.
    """
    return taxonomy[subnarrative.split(': ')[0]][subnarrative.split(': ')[1]]

def get_narrative_definition(narrative, narratives_list):
    if narrative == 'Other':
        return 'Statements that are NOT related to anyone of these topics : {}'.format(', '.join([get_narrative_short_name(narrative) for narrative in narratives_list if narrative != 'Other']))
    narrative_definitions = pd.read_csv('data/narratives definition.csv')
    short_name = narrative.split(':')[-1].strip()
    return narrative_definitions[narrative_definitions['narrative'] == short_name]['definition'].values[0]

def get_narrative_examples(narrative):
    if narrative == 'Other':
        return None
    narrative_definitions = pd.read_csv('data/narratives definition.csv')
    short_name = narrative.split(':')[-1].strip()
    return narrative_definitions[narrative_definitions['narrative'] == short_name]['example'].values[0]

def get_narrative_short_name(narrative):
    return narrative.split(':')[-1].strip()

def get_subnarrative_definition(subnarrative, narratives_list, taxonomy):
    if subnarrative == 'Other':
        return 'Statements that are NOT related to anyone of these narratives : {}'.format(', '.join([narrative for narrative in narratives_list if narrative != 'Other']))
    short_name = subnarrative.split(':')[-1].strip()
    narrative = subnarrative.split(':')[-2].strip()
    narrative_defintion = get_narrative_definition(narrative, narratives_list)
    if short_name == 'Other':
        return 'Statement that are related to the narrative "{}", defined as {} but are not related to anyone of these subnarratives : {}'.format(get_narrative_short_name(narrative), narrative_defintion ,get_sibling_subnarratives(subnarrative, taxonomy))
    subnarrative_definitions = pd.read_csv('data/subnarrative definitions.csv')
    return subnarrative_definitions[subnarrative_definitions['subnarrative'] == short_name]['definition'].values[0]


def get_subnarrative_examples(subnarrative):
    if subnarrative == 'Other':
        return None
    short_name = subnarrative.split(':')[-1].strip()
    if short_name == 'Other':
        return None
    subnarrative_definitions = pd.read_csv('data/subnarrative definitions.csv')
    return subnarrative_definitions[subnarrative_definitions['subnarrative'] == short_name]['examples'].values[0]

def get_subnarrative_short_name(subnarrative):
    return subnarrative.split(':')[-1].strip()
        

In [7]:
def build_narratives_with_definitions_and_examples(taxonomy_file='data/taxonomy.json'):
    """
    Builds a list of narratives from the taxonomy with their definitions and examples.
    
    Args:
        taxonomy_file (str): Path to the JSON taxonomy file
        
    Returns:
        list: List of dictionaries with narrative, definition, and examples
    """
    narratives_data = {}
    
    # Get the list of narratives
    narratives_list = get_narratives_list(taxonomy_file)
    
    # Create an entry for each narrative with its definition and examples
    for narrative in narratives_list:
        narratives_data[narrative] = {
            "definition": get_narrative_definition(narrative, narratives_list),
            "examples": get_narrative_examples(narrative)
        }
    
    return narratives_data

def build_subnarratives_with_definitions_and_examples(taxonomy_file='data/taxonomy.json'):
    """
    Builds a list of subnarratives from the taxonomy with their definitions and examples.
    
    Args:
        taxonomy_file (str): Path to the JSON taxonomy file
        
    Returns:
        list: List of dictionaries with subnarrative, definition, and examples
    """
    subnarratives_data = {}
    
    # Get the list of narratives and subnarratives
    narratives_list = get_narratives_list(taxonomy_file)
    subnarratives_list = get_subnarratives_list(taxonomy_file)
    
    # Load taxonomy data to use for subnarrative context
    with open(taxonomy_file, 'r') as f:    
        taxonomy = json.load(f)
    
    # Create an entry for each subnarrative with its definition and examples
    for subnarrative in subnarratives_list:
        subnarratives_data[subnarrative] = {
            "definition": get_subnarrative_definition(subnarrative, narratives_list, taxonomy),
            "examples": get_subnarrative_examples(subnarrative)
        }
    
    return subnarratives_data

In [50]:
narratives = build_narratives_with_definitions_and_examples()
subnarratives = build_subnarratives_with_definitions_and_examples()

# Prompts

In [9]:
narrative_system_prompt = (
    "<instruction>"
    "You are a highly precise binary classification model trained to determine whether a given text explicitly relates to the narrative: '{}'. "
    "This narrative is defined as follows: ```{}```. "
    "Here are clear and representative examples of statements that are related to this narrative: ```{}```. "
    "Your task is to classify the given text strictly based on whether it contains explicit, unambiguous references to this narrative. "
    "If the text directly aligns with the narrative's definition and examples, you MUST respond with '1'. "
    "If there is any uncertainty, indirect reference, or ambiguity, you MUST respond with '0'. "
    "Output only with a single character. Your answer MUST be strictly '1' or '0'—no explanations, no justifications, and no additional text. "
    "If the narrative is only weakly implied or suggested, you MUST classify it as '0'. "
    "</instruction>"
)

narrative_user_prompt = (
    "Please classify the following text as related to the narrative '{}' or not. "
    "Text\n"
    "```{}```"
)

subnarrative_system_prompt = (
    "<instruction>"
    "You are a highly precise binary classification model trained to determine whether a given text explicitly relates to the subnarrative: '{}'. "
    "This subnarrative is defined as follows: ```{}```. "
    "Here are clear and representative examples of statements that are related to this subnarrative: ```{}```. "
    "Your task is to classify the given text strictly based on whether it contains explicit, unambiguous references to this subnarrative. "
    "Output only with a single character. If the text directly aligns with the subnarrative's definition and examples, respond with '1'. Otherwise, respond with '0'. "
    "You MUST classify as '0' if the connection is weak, indirect, implicit, or uncertain. "
    "You are STRICTLY FORBIDDEN from providing any explanation, justification, or additional text. "
    "ONLY respond with '1' or '0' based on clear and explicit evidence in the text."
    "</instruction>"
)

subnarrative_user_prompt = (
    "Please classify the following text as related to the subnarrative '{}' or not. "
    "Text\n"
    "```{}```"
)

# Creating group chats

In [53]:
import autogen

def create_narrative_agents(narratives_dict):
    import hashlib
    narrative_agents = {}
    narratives_list = get_narratives_list('data/taxonomy.json')
    llm_config={
                "config_list": [
                    {
                        "model": "gpt-4o",
                        "api_key": os.environ.get("OPENAI_API_KEY")
                    }
                ],
                'temperature': 0
            }
    for narrative, data in narratives_dict.items():
        short_hash = hashlib.md5(narrative.encode()).hexdigest()[:6]
        key = f"agent_{short_hash}"
        
        agent = autogen.AssistantAgent(
            name=key,
            system_message=narrative_system_prompt.format(narrative, data['definition'], data['examples']),
            llm_config=llm_config
        )

        agent.description = (
            "I am a classification model trained to do classify whether a given text is related to the following narrative: {}. "
            "I will be looking for {}"
        ).format(get_narrative_short_name(narrative), get_narrative_definition(narrative, narratives_list))

        narrative_agents[key] = {"agent": agent, "narrative": narrative}

    return narrative_agents
    
def create_narratives_group_chat(narrative_agents):
    narratives_user_proxy_agent = autogen.UserProxyAgent(
        name="user",
        code_execution_config=False,
        llm_config={
            "config_list": [
                {
                    "model": "gpt-4o-mini",
                    "api_key": os.environ.get("OPENAI_API_KEY")
                }
            ]
        },
        human_input_mode='NEVER'
    )

    # Extract just the agent objects from the dictionary values
    narrative_agent_objects = [agent_data["agent"] for agent_data in narrative_agents.values()]

    allowed_transitions = {}

    for agent_key in narrative_agents:
        allowed_transitions[narrative_agents[agent_key]["agent"]] = [narratives_user_proxy_agent]

    narratives_group_chat = autogen.GroupChat(
        agents= [narratives_user_proxy_agent] + narrative_agent_objects,
        messages=[],
        max_round=6,
        send_introductions=True,
        allowed_or_disallowed_speaker_transitions=allowed_transitions,
        speaker_transitions_type="disallowed",
    )

    narratives_manager = autogen.GroupChatManager(
        groupchat=narratives_group_chat,
        llm_config = {
            "config_list": [
                {
                    "model": "gpt-4o",
                    "api_key": os.environ.get("OPENAI_API_KEY")
                }
            ]
        },
    )

    return narratives_group_chat, narratives_manager, narratives_user_proxy_agent


In [55]:
def create_subnarratives_agents(subnarratives_dict):
    import hashlib
    subnarrative_agents = {}
    narratives_list = get_narratives_list('data/taxonomy.json')
    with open('data/taxonomy.json', 'r') as f:    
        taxonomy = json.load(f)
    llm_config={
                "config_list": [
                    {
                        "model": "gpt-4o",
                        "api_key": os.environ.get("OPENAI_API_KEY")
                    }
                ],
                'temperature': 0
            }
    for subnarrative, data in subnarratives_dict.items():
        short_hash = hashlib.md5(subnarrative.encode()).hexdigest()[:6]
        key = f"agent_{short_hash}"
        agent = autogen.AssistantAgent(
            name=key,
            system_message=subnarrative_system_prompt.format(subnarrative, data['definition'], data['examples']),
            llm_config=llm_config
        )

        agent.description = (
            "I am a classification model trained to do classify whether a given text is related to the following subnarrative: {}. "
            "I will be looking for {}"
        ).format(get_subnarrative_short_name(subnarrative), get_subnarrative_definition(subnarrative, narratives_list, taxonomy))

        subnarrative_agents[key] = {"agent": agent, "subnarrative": subnarrative}

    return subnarrative_agents

def create_subnarratives_group_chat(subnarrative_agents):
    subnarratives_user_proxy_agent = autogen.UserProxyAgent(
        name="user",
        code_execution_config=False,
        llm_config={
            "config_list": [
                {
                    "model": "gpt-4o-mini",
                    "api_key": os.environ.get("OPENAI_API_KEY")
                }
            ]
        },
        human_input_mode='NEVER'
    )
    # Extract just the agent objects from the dictionary values
    subnarrative_agent_objects = [agent_data["agent"] for agent_data in subnarrative_agents.values()]

    allowed_transitions = {}

    for agent_key in subnarrative_agents:
        allowed_transitions[subnarrative_agents[agent_key]["agent"]] = [subnarratives_user_proxy_agent]

    subnarrative_group_chat = autogen.GroupChat(
        agents= [subnarratives_user_proxy_agent] + subnarrative_agent_objects,
        messages=[],
        max_round=len(subnarrative_agents) + 1,
        send_introductions=True,
        allowed_or_disallowed_speaker_transitions=allowed_transitions,
        speaker_transitions_type='disallowed',
    )

    subnarrative_manager = autogen.GroupChatManager(
        groupchat=subnarrative_group_chat,
        llm_config = {
            "config_list": [
                {
                    "model": "gpt-4o",
                    "api_key": os.environ.get("OPENAI_API_KEY")
                }
            ]
        },
        system_message="You are a group chat manager. You are asked to give the classification task to the agents that look relevant to the topic"
    )
    
    return subnarrative_group_chat, subnarrative_manager, subnarratives_user_proxy_agent


In [56]:
def group_subnarrative_agents_by_narrative(subnarrative_agents):
    """
    Groups subnarrative agents by their corresponding narrative.
    
    Args:
        subnarrative_agents (list): List of subnarrative agents.
        
    Returns:
        dict: Dictionary where keys are narratives and values are lists of corresponding subnarrative agents.
    """
    grouped_agents = {}
    

    for agent_data in subnarrative_agents.values():
        subnarrative = agent_data["subnarrative"]
        if subnarrative == 'Other':
            continue
        narrative = subnarrative.split(': ')[0] + ': ' + subnarrative.split(': ')[1]
        if narrative not in grouped_agents:
            grouped_agents[narrative] = []
        grouped_agents[narrative].append(agent_data)
    
    return grouped_agents

In [57]:
def create_groupchat_for_narrative(narrative, grouped_agents):
    return create_subnarratives_group_chat(grouped_agents[narrative])

In [31]:
def reset_agents(agent_list):
    for agent in agent_list:
        agent.reset()

# Narrative extraction

In [None]:
def extract_recognized_narratives(chat_history):
    recognized_narratives = []
    for message in chat_history:
        if message['name'] == 'user' or message['name'] == 'chat_manager':
            continue
        narrative = message['name'].replace('_', ' ').replace('-', ': ')
        if message['content'] == '1':
            recognized_narratives.append(narrative)
    if len(recognized_narratives) == 0:
        recognized_narratives.append('Other')

    if len(recognized_narratives) > 1 and 'Other' in recognized_narratives:
        recognized_narratives.remove('Other')
    return recognized_narratives

def extract_recognized_subnarratives(chat_history):
    recognized_subnarratives = []
    for message in chat_history:
        if message['name'] == 'user' or message['name'] == 'chat_manager':
            continue
        subnarrative = message['name'].replace('_', ' ').replace('-', ': ')
        if message['content'] == '1':
            recognized_subnarratives.append(subnarrative)
    if len(recognized_subnarratives) == 0:
        recognized_subnarratives.append('Other')

    if len(recognized_subnarratives) > 1 and 'Other' in recognized_subnarratives:
        recognized_subnarratives.remove('Other')
    return recognized_subnarratives

In [46]:
def extract_subnarratives_for_one_narrative(narrative, text, grouped_agents):
    _ , manager, user_proxy_agent = create_groupchat_for_narrative(narrative, grouped_agents)
    # We get the subnarratives that belong to the narrative
    chat_result = user_proxy_agent.initiate_chat(
        manager,
        message="Here is the text that needs to be classified: \n```{}```\nYou are ONLY allowed to reply with '0' or '1'".format(text),
        summary_method='reflection_with_llm'
    )
    return extract_recognized_subnarratives(chat_result.chat_history)

In [58]:
narrative_agents = create_narrative_agents(narratives)
narratives_group_chat, narratives_manager, narratives_user_proxy_agent = create_narratives_group_chat(narrative_agents)

In [59]:
subnarrative_agents = create_subnarratives_agents(subnarratives)
subnarrative_group_chat, subnarrative_manager, subnarratives_user_proxy_agent = create_subnarratives_group_chat(subnarrative_agents)

In [60]:
grouped_agents = group_subnarrative_agents_by_narrative(subnarrative_agents)

In [39]:
def classify_text_with_narratives(text, narrative_agents, narratives_manager, narratives_user_proxy_agent):
    """
    Classifies the text using narrative agents.
    
    Args:
        text (str): The text to classify.
        narratives_group_chat (GroupChat): The group chat for narrative agents.
        narratives_manager (GroupChatManager): The manager for the group chat.
        narratives_user_proxy_agent (UserProxyAgent): The user proxy agent for the group chat.
        
    Returns:
        list: List of recognized narratives.
    """
    chat_result = narratives_user_proxy_agent.initiate_chat(
        narratives_manager,
        message="Here is the text that needs to be classified: \n```{}```\n ### \n You are ONLY allowed to reply with '0' or '1'".format(text),
        summary_method='reflection_with_llm'
    ) 

    history = chat_result.chat_history
    narratives = extract_recognized_narratives(history)
    reset_agents(narrative_agents)
    narratives_group_chat.reset()
    narratives_user_proxy_agent.reset()

    return narratives


In [40]:
def read_text(file_id, base_path='data/EN/raw-documents'):
    with open(os.path.join(base_path, f'{file_id}'), 'r', encoding='utf-8') as f:
        return f.read()

In [44]:
text = read_text('CC_TEST_00000.txt', base_path='testset/EN/subtask-2-documents')

In [47]:
narratives = classify_text_with_narratives(text, narrative_agents, narratives_manager, narratives_user_proxy_agent)
narrative = narratives[0]
subnarratives = extract_subnarratives_for_one_narrative(narrative, text, grouped_agents)
print('Narrative:', narrative)
print('Subnarratives:', subnarratives)

[33muser[0m (to chat_manager):

Here is the text that needs to be classified: 
```‘Absolute Genius’: How Three Alarmist Billionaires Bankrolled The Fake Climate Catastrophe 

 Do you think that the constant catastrophizing of weather and climate in the mainstream media, politics, and science has just appeared by accident? [emphasis, links added]

Over the last few years, the BBC and the Guardian, [working] as of one mind, decided to float improbable ‘tipping point’ scares under cover of ‘scientists say’, while UN officials concluded that we had two years to save a ‘boiling’ planet and the ubiquitous ‘Jim’ Dale has been given free rein to make it up as he goes along on Talk TV and GB News.

Of course, all this didn’t suddenly happen.

Each of these examples is a testament to an extraordinary corruption of the true scientific process – an “amazing tale” according to political science writer Roger Pielke Jr., “a story of how wealth and power sought to shape climate science in pursuit of

BadRequestError: Error code: 400 - {'error': {'message': "Invalid 'messages[2].name': string too long. Expected a string with maximum length 64, but got a string with length 74 instead.", 'type': 'invalid_request_error', 'param': 'messages[2].name', 'code': 'string_above_max_length'}}