In [1]:
import pandas as pd
import os
import sys
import numpy as np

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
# Set the current working directory to the project root
ROOT_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))
os.chdir(ROOT_DIR)

In [2]:
import pandas as pd
import json
from tqdm.auto import tqdm
import time
import os

from src.llm.chains import create_narrative_generator_chain, create_subnarrative_generator_chain

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from src.data_management.label_parser import parse_json_for_narratives_subnarratives
import pandas as pd

print("--- Updating label formats in source CSVs ---")

# --- Load data for augmentation ---
print("\n--- Loading data for augmentation ---")

print("Loading target narratives...")
target_narratives_df = pd.read_csv("least_perf_narratives.csv")

print("Loading target subnarratives...")
target_subnarratives_df = pd.read_csv("least_perf_subnarratives.csv")

target_narratives_list = target_narratives_df['label'].tolist()
target_subnarratives_list = target_subnarratives_df['label'].tolist()

narrative_definitions_df = pd.read_csv("data/narrative_definitions.csv")
subnarrative_definitions_df = pd.read_csv("data/subnarrative_definitions.csv")

print("Data loaded.")



--- Updating label formats in source CSVs ---

--- Loading data for augmentation ---
Loading target narratives...
Loading target subnarratives...
Data loaded.


In [4]:
target_narratives_df['definition'] = target_narratives_df['label'].apply(
    lambda x: narrative_definitions_df[narrative_definitions_df['narrative'] == x]['definition'].values[0] if x in narrative_definitions_df['narrative'].values else None
)

target_narratives_df['examples'] = target_narratives_df['label'].apply(
    lambda x: narrative_definitions_df[narrative_definitions_df['narrative'] == x]['example'].values[0] if x in narrative_definitions_df['narrative'].values else None
)

target_subnarratives_df['definition'] = target_subnarratives_df['label'].apply(
    lambda x: subnarrative_definitions_df[subnarrative_definitions_df['subnarrative'] == x]['definition'].values[0] if x in subnarrative_definitions_df['subnarrative'].values else None
)

target_subnarratives_df['examples'] = target_subnarratives_df['label'].apply(
    lambda x: subnarrative_definitions_df[subnarrative_definitions_df['subnarrative'] == x]['examples'].values[0] if x in subnarrative_definitions_df['subnarrative'].values else None
)

In [5]:
NUM_EXAMPLES_PER_LABEL = 10

narrative_generator = create_narrative_generator_chain()
subnarrative_generator = create_subnarrative_generator_chain()

Creating generator chain...
Generator chain created successfully.
Creating sub-narrative generator chain...
Sub-narrative generator chain created successfully.


In [6]:
# from tqdm.auto import tqdm

# generated_narrative_texts = []

# for idx, row in tqdm(target_narratives_df.iterrows(), total=len(target_narratives_df)):
#     print(f"Generating narratives for label: {row['label']}")
#     texts = []
#     # Prepare input for the generator
#     input_dict = {
#         "narrative_name": row['label'],
#         "narrative_def": row['definition'],
#         "num_examples": NUM_EXAMPLES_PER_LABEL,
#         "narrative_example": row['examples'] if pd.notnull(row['examples']) else "",
#         "format_instructions": "Return only the generated text as a string."
#     }
    
#     result = narrative_generator.invoke(input_dict)
#     # Extract the generated text from the result
#     articles = result.articles  # Access the articles from the Pydantic object
#     texts = [article.generated_text for article in articles]  # Extract the generated text from each article
#     generated_narrative_texts.append(texts)

# # Add the generated texts to the DataFrame
# target_narratives_df['generated_texts'] = generated_narrative_texts



In [7]:
from tqdm.auto import tqdm
generated_subnarrative_texts = []

for idx, row in tqdm(target_subnarratives_df.iterrows(), total=len(target_subnarratives_df)):
    print(f"Generating subnarratives for label: {row['label']}")
    texts = []
    # Prepare input for the generator
    input_dict = {
        "subnarrative_name": row['label'],
        "subnarrative_def": row['definition'],
        "num_examples": NUM_EXAMPLES_PER_LABEL,
        "subnarrative_example": row['examples'] if pd.notnull(row['examples']) else "",
        "format_instructions": "Return only the generated text as a string."
    }
    
    result = subnarrative_generator.invoke(input_dict)
    # Extract the generated text from the result
    articles = result.articles  # Access the articles from the Pydantic object
    texts = [article.generated_text for article in articles]  # Extract the generated text from each article
    generated_subnarrative_texts.append(texts)
    
# Add the generated texts to the DataFrame
target_subnarratives_df['generated_texts'] = generated_subnarrative_texts

# Prepare data for JSON export
subnarrative_texts_json = []
for i, row in target_subnarratives_df.iterrows():
    subnarrative_texts_json.append({
        "subnarrative": row["label"],
        "generated_texts": row["generated_texts"]
    })
# Save to file
with open("generated_subnarrative_texts.json", "w", encoding="utf-8") as f:
    json.dump(subnarrative_texts_json, f, ensure_ascii=False, indent=2)

  0%|          | 0/28 [00:00<?, ?it/s]

Generating subnarratives for label: CC: Controversy about green technologies: Nuclear energy is not climate friendly


  4%|▎         | 1/28 [00:33<15:09, 33.70s/it]

Generating subnarratives for label: CC: Amplifying Climate Fears: Earth will be uninhabitable soon


  7%|▋         | 2/28 [01:04<13:54, 32.10s/it]

Generating subnarratives for label: CC: Amplifying Climate Fears: Whatever we do it is already too late


 11%|█         | 3/28 [01:44<14:55, 35.80s/it]

Generating subnarratives for label: CC: Climate change is beneficial: CO2 is beneficial


 14%|█▍        | 4/28 [02:15<13:25, 33.56s/it]

Generating subnarratives for label: CC: Climate change is beneficial: Other


 18%|█▊        | 5/28 [02:47<12:44, 33.23s/it]

Generating subnarratives for label: CC: Controversy about green technologies: Other


 21%|██▏       | 6/28 [03:24<12:37, 34.44s/it]

Generating subnarratives for label: CC: Climate change is beneficial: Temperature increase is beneficial


 25%|██▌       | 7/28 [03:56<11:46, 33.64s/it]

Generating subnarratives for label: CC: Downplaying climate change: Human activities do not impact climate change


 29%|██▊       | 8/28 [04:27<10:56, 32.83s/it]

Generating subnarratives for label: CC: Downplaying climate change: Humans and nature will adapt to the changes


 32%|███▏      | 9/28 [05:05<10:53, 34.39s/it]

Generating subnarratives for label: CC: Downplaying climate change: Ice is not melting


 36%|███▌      | 10/28 [05:31<09:35, 31.95s/it]

Generating subnarratives for label: CC: Downplaying climate change: Other


 39%|███▉      | 11/28 [06:09<09:33, 33.74s/it]

Generating subnarratives for label: CC: Downplaying climate change: Sea levels are not rising


 43%|████▎     | 12/28 [06:44<09:05, 34.07s/it]

Generating subnarratives for label: CC: Criticism of institutions and authorities: Other


 46%|████▋     | 13/28 [07:19<08:34, 34.30s/it]

Generating subnarratives for label: CC: Criticism of climate policies: Climate policies are only for profit


 50%|█████     | 14/28 [07:51<07:51, 33.71s/it]

Generating subnarratives for label: CC: Criticism of institutions and authorities: Criticism of international entities


 54%|█████▎    | 15/28 [08:38<08:09, 37.68s/it]

Generating subnarratives for label: CC: Criticism of institutions and authorities: Criticism of the EU


 57%|█████▋    | 16/28 [09:09<07:06, 35.52s/it]

Generating subnarratives for label: CC: Criticism of climate movement: Climate movement is corrupt


 61%|██████    | 17/28 [09:41<06:20, 34.62s/it]

Generating subnarratives for label: CC: Criticism of climate movement: Ad hominem attacks on key activists


 64%|██████▍   | 18/28 [10:14<05:40, 34.08s/it]

Generating subnarratives for label: CC: Controversy about green technologies: Renewable energy is unreliable


 68%|██████▊   | 19/28 [10:42<04:50, 32.31s/it]

Generating subnarratives for label: CC: Controversy about green technologies: Renewable energy is dangerous


 71%|███████▏  | 20/28 [11:11<04:09, 31.23s/it]

Generating subnarratives for label: CC: Controversy about green technologies: Renewable energy is costly


 75%|███████▌  | 21/28 [11:43<03:40, 31.56s/it]

Generating subnarratives for label: CC: Downplaying climate change: Weather suggests the trend is global cooling


 79%|███████▊  | 22/28 [12:18<03:15, 32.61s/it]

Generating subnarratives for label: CC: Green policies are geopolitical instruments: Climate-related international relations are abusive/exploitative


 82%|████████▏ | 23/28 [12:47<02:36, 31.39s/it]

Generating subnarratives for label: CC: Green policies are geopolitical instruments: Green activities are a form of neo-colonialism


 86%|████████▌ | 24/28 [13:26<02:15, 33.87s/it]

Generating subnarratives for label: CC: Green policies are geopolitical instruments: Other


 89%|████████▉ | 25/28 [13:54<01:36, 32.03s/it]

Generating subnarratives for label: CC: Questioning the measurements and science: Data shows no temperature increase


 93%|█████████▎| 26/28 [14:22<01:01, 30.70s/it]

Generating subnarratives for label: CC: Hidden plots by secret schemes of powerful groups: Other


 96%|█████████▋| 27/28 [14:50<00:29, 29.92s/it]

Generating subnarratives for label: CC: Downplaying climate change: Temperature increase does not have significant impact


100%|██████████| 28/28 [15:12<00:00, 32.61s/it]

