In [None]:
import pandas as pd

csv_file_path = "final_predictions.csv" 
output_tsv_path = 'final_predictions_transformed.tsv'
data = pd.read_csv(csv_file_path)

In [None]:
narrative_prefix_mapping = {
    "Amplifying Climate Fears": ["CC"],
    "Climate change is beneficial": ["CC"],
    "Controversy about green technologies": ["CC"],
    "Criticism of climate movement": ["CC"],
    "Criticism of climate policies": ["CC"],
    "Criticism of institutions and authorities": ["CC"],
    "Downplaying climate change": ["CC"],
    "Green policies are geopolitical instruments": ["CC"],
    "Hidden plots by secret schemes of powerful groups": ["CC", "URW"],
    "Questioning the measurements and science": ["CC"],
    "Amplifying war-related fears": ["URW"],
    "Blaming the war on others rather than the invader": ["URW"],
    "Discrediting Ukraine": ["URW"],
    "Discrediting the West, Diplomacy": ["URW"],
    "Distrust towards Media": ["URW"],
    "Negative Consequences for the West": ["URW"],
    "Overpraising the West": ["URW"],
    "Praise of Russia": ["URW"],
    "Russia is the Victim": ["URW"],
    "Speculating war outcomes": ["URW"],
    "Other": []  # 'Other' does not get a prefix
}

def get_prefixes(narrative):
    return narrative_prefix_mapping.get(narrative, [])

def process_labels(label):
    narratives = []
    subnarratives = []

    # Split by commas but ensure commas within labels are preserved
    labels = []
    current_label = ""
    for part in label.split(','):
        part = part.strip()
        if current_label and part.startswith('_'):
            # Append to the current label if it's part of the same narrative
            current_label += ',' + part
        else:
            if current_label:
                labels.append(current_label)
            current_label = part
    if current_label:
        labels.append(current_label)

    for entry in labels:
        if entry == "Other__Other":
            # Special case: "Other__Other"
            narratives.append("Other")
            subnarratives.append("Other")
        elif '__' in entry:
            # Split into narrative and subnarrative
            narrative, subnarrative = entry.split('__', 1)
            narrative = narrative.replace('_', ' ').strip()
            subnarrative = subnarrative.replace('_', ' ').strip()

            # Get prefixes for the narrative
            prefixes = get_prefixes(narrative)

            if prefixes:
                for prefix in prefixes:
                    prefixed_narrative = f"{prefix}: {narrative}"
                    narratives.append(prefixed_narrative)

                    if subnarrative.lower() == 'other':
                        # If subnarrative is "Other", format it as [Prefix]: [Narrative]: Other
                        subnarratives.append(f"{prefixed_narrative}: Other")
                    else:
                        subnarratives.append(f"{prefixed_narrative}: {subnarrative}")
            else:
                # Handle 'Other' or unrecognized narratives without prefix
                narratives.append(narrative)
                if subnarrative.lower() == 'other':
                    subnarratives.append("Other")
                else:
                    subnarratives.append(f"{narrative}: {subnarrative}")
        elif entry.lower() == 'other':
            # Special case: 'Other'
            narratives.append("Other")
            subnarratives.append("Other")
        else:
            # Narrative exists but no specific subnarrative
            narrative = entry.replace('_', ' ').strip()
            prefixes = get_prefixes(narrative)
            if prefixes:
                for prefix in prefixes:
                    prefixed_narrative = f"{prefix}: {narrative}"
                    narratives.append(prefixed_narrative)
                    subnarratives.append(f"{prefixed_narrative}: Other")
            else:
                # Handle 'Other' or unrecognized narratives without prefix
                narratives.append(narrative)
                subnarratives.append(f"{narrative}: Other")

    # Remove duplicates by converting to set and back to list
    unique_narratives = list(set(narratives))
    unique_subnarratives = list(set(subnarratives))

    # Sort for consistency
    unique_narratives.sort()
    unique_subnarratives.sort()

    return ';'.join(unique_narratives), ';'.join(unique_subnarratives)

# Apply the processing function to the 'predicted_labels' column
data['narrative'], data['subnarrative'] = zip(*data['predicted_labels'].apply(process_labels))

# Create the final output DataFrame
output_data = data[['article_id', 'narrative', 'subnarrative']]
output_data.columns = ['article_id', 'narrative_1;...;narrative_N', 'subnarrative_1;...;subnarrative_N']

# Save as TSV
output_data.to_csv(output_tsv_path, sep='\t', index=False)

print(f"TSV file saved to {output_tsv_path}")


