In [None]:
import pandas as pd
import ast


csv_file_path = 'gpt4o-mini_preds.csv'
data = pd.read_csv(csv_file_path)

In [None]:
narrative_prefix_mapping = {
    "Amplifying Climate Fears": ["CC"],
    "Climate change is beneficial": ["CC"],
    "Controversy about green technologies": ["CC"],
    "Criticism of climate movement": ["CC"],
    "Criticism of climate policies": ["CC"],
    "Criticism of institutions and authorities": ["CC"],
    "Downplaying climate change": ["CC"],
    "Green policies are geopolitical instruments": ["CC"],
    "Hidden plots by secret schemes of powerful groups": ["CC", "URW"],
    "Questioning the measurements and science": ["CC"],
    "Amplifying war-related fears": ["URW"],
    "Blaming the war on others rather than the invader": ["URW"],
    "Discrediting Ukraine": ["URW"],
    "Discrediting the West, Diplomacy": ["URW"],
    "Distrust towards Media": ["URW"],
    "Negative Consequences for the West": ["URW"],
    "Overpraising the West": ["URW"],
    "Praise of Russia": ["URW"],
    "Russia is the Victim": ["URW"],
    "Speculating war outcomes": ["URW"],
    "Other": []  # 'Other' does not get a prefix
}

def get_prefixes(narrative):
    return narrative_prefix_mapping.get(narrative, [])

def prefix_narratives(narratives):
    prefixed_narratives = []
    for narrative in narratives:
        prefixes = get_prefixes(narrative)
        if prefixes:
            for prefix in prefixes:
                prefixed_narratives.append(f"{prefix}: {narrative}")
        else:
            prefixed_narratives.append(narrative)  # No prefix for 'Other' or unrecognized narratives
    # Remove duplicates and sort
    unique_prefixed_narratives = sorted(list(set(prefixed_narratives)))
    return unique_prefixed_narratives

def prefix_subnarratives(narratives, subnarratives):
    prefixed_subnarratives = []
    for narrative, subnarrative in zip(narratives, subnarratives):
        if narrative == "Other":
            prefixed_subnarratives.append("Other")
            continue
        prefixes = get_prefixes(narrative)
        if prefixes:
            for prefix in prefixes:
                if subnarrative.lower() == 'other':
                    # If subnarrative is "Other", include the narrative
                    prefixed_subnarratives.append(f"{prefix}: {narrative}: Other")
                else:
                    # Prefix only the subnarrative
                    prefixed_subnarratives.append(f"{prefix}: {subnarrative}")
        else:
            # Handle 'Other' or unrecognized narratives without prefix
            if subnarrative.lower() == 'other':
                prefixed_subnarratives.append("Other")
            else:
                prefixed_subnarratives.append(f"{subnarrative}")
    # Remove duplicates and sort
    unique_prefixed_subnarratives = sorted(list(set(prefixed_subnarratives)))
    return unique_prefixed_subnarratives

# Helper function to process narratives and subnarratives with prefixes
def process_narratives_and_subnarratives(p_system):
    narratives = []
    subnarratives = []
    
    try:
        parsed_data = ast.literal_eval(p_system)
        raw_narratives = parsed_data.get('narrative', [])
        raw_subnarratives = parsed_data.get('subnarrative', [])
    except Exception as e:
        print(f"Error parsing p_system: {p_system}, error: {e}")
        return '', ''
    
    prefixed_narratives = prefix_narratives(raw_narratives)
    
    prefixed_subnarratives = prefix_subnarratives(raw_narratives, raw_subnarratives)
    
    return ';'.join(prefixed_narratives), ';'.join(prefixed_subnarratives)

data['narrative'], data['subnarrative'] = zip(*data['p_system'].apply(process_narratives_and_subnarratives))

# Group by the 'language' column and save separate TSV files
languages = data['language'].unique()
for lang in languages:
    lang_data = data[data['language'] == lang]
    
    # Create the final output DataFrame
    output_data = lang_data[['article_id', 'narrative', 'subnarrative']]
    output_data.columns = ['article_id', 'narrative_1;...;narrative_N', 'subnarrative_1;...;subnarrative_N']
    
    # Save as TSV
    output_tsv_path = f'{lang}_validation_predictions_transformed_6.tsv'
    output_data.to_csv(output_tsv_path, sep='\t', index=False)
    print(f"TSV file saved to {output_tsv_path}")

# For demonstration purposes, let's print the resulting DataFrame
print("\nProcessed DataFrame:")
print(data[['article_id', 'narrative', 'subnarrative']])