In [1]:
import pandas as pd
import json

def process_group_sankey_data(file_path, output_json):
    cols = ['gname', 'attacktype1_txt', 'targtype1_txt']
    df = pd.read_csv(file_path, encoding='ISO-8859-1', usecols=cols)

    group_mapping = {
        'ISIL': 'Islamic State of Iraq and the Levant (ISIL)',
        'taliban': 'Taliban',
        'SL': 'Shining Path (SL)'
    }

    output_data = {}

    for key, gtd_name in group_mapping.items():
        group_df = df[df['gname'] == gtd_name]
        if group_df.empty:
            output_data[key] = {"nodes": [], "links": []}
            continue

        top_attacks = group_df['attacktype1_txt'].value_counts().head(5).index.tolist()
        top_targets = group_df['targtype1_txt'].value_counts().head(5).index.tolist()

        filtered_df = group_df[
            (group_df['attacktype1_txt'].isin(top_attacks)) & 
            (group_df['targtype1_txt'].isin(top_targets))
        ]

        flows = filtered_df.groupby(['attacktype1_txt', 'targtype1_txt']).size().reset_index(name='value')

        attack_nodes = [{"name": a, "type": "attack"} for a in top_attacks]
        target_nodes = [{"name": t, "type": "target"} for t in top_targets]
        all_nodes = attack_nodes + target_nodes

        node_map = {}
        for i, node in enumerate(all_nodes):
            node_map[f"{node['type']}_{node['name']}"] = i

        links = []
        for _, row in flows.iterrows():
            src_key = f"attack_{row['attacktype1_txt']}"
            tgt_key = f"target_{row['targtype1_txt']}"
            if src_key in node_map and tgt_key in node_map:
                links.append({
                    "source": node_map[src_key],
                    "target": node_map[tgt_key],
                    "value": int(row['value'])
                })

        output_data[key] = {"nodes": all_nodes, "links": links}

    with open(output_json, 'w', encoding='utf-8') as f:
        json.dump(output_data, f, indent=2)

process_group_sankey_data('terrorism_dataset.csv', 'groups_flows.json')