In [1]:
import pandas as pd
import json

INPUT_FILE = 'terrorism_dataset.csv'

GROUPS = {
    "Islamic State of Iraq and the Levant (ISIL)": "ISIL",
    "Shining Path (SL)": "SL",
    "Taliban": "taliban"
}

def load_and_clean_data(filepath):
    df = pd.read_csv(filepath, encoding='ISO-8859-1', low_memory=False)
    df["year"] = df["iyear"].astype(int)
    return df

def generate_gtd_groups_json(df):
    local_df = df[df["gname"].isin(GROUPS.keys())].copy()
    
    counts = local_df.groupby(["gname", "year", "country_txt"]).size().reset_index(name="count")
    total_per_group = counts.groupby("gname")["count"].sum().to_dict()
    
    output = {short: {} for short in GROUPS.values()}
    
    for gname, short_name in GROUPS.items():
        group_df = counts[counts["gname"] == gname]
        group_total = total_per_group[gname]
        
        for year, year_df in group_df.groupby("year"):
            year_total = int(year_df["count"].sum())
            countries = [
                {"country": row["country_txt"], "count": int(row["count"])}
                for _, row in year_df.iterrows()
            ]
            output[short_name][str(year)] = {
                "total_count": year_total,
                "total_percentage": round(year_total / group_total, 3),
                "countries": countries
            }
            
    with open("JSON/gtd_groups.json", "w", encoding="utf-8") as f:
        json.dump(output, f, indent=2)

clean_df = load_and_clean_data(INPUT_FILE)
generate_gtd_groups_json(clean_df)