In [7]:
import pandas as pd

In [None]:
def load_data():
    dataset = pd.read_excel(r"globalterrorismdb_2021Jan-June_1222dist.xlsx") # 2021-2021 June
    return dataset

- Computes the total number of kills (nkills) for each unique value in a specified column of the dataset, ranks the values based on their severity (total kills), saves the results to a CSV file, and integrates them into the **preprocessing function**

In [None]:
def calculate_kill_sum(dataset_cleaned, column_name):
    dataset_cleaned[column_name] = dataset_cleaned[column_name].astype(str).str.lower()
    dataset_cleaned = dataset_cleaned[dataset_cleaned[column_name] != 'unknown']
    
    kill_sum = dataset_cleaned.groupby(column_name)['nkill'].sum()
    sorted_values = kill_sum.sort_values(ascending=False).index
    severity_score = kill_sum[sorted_values].rank(ascending=True, method='dense')

    metrics = pd.DataFrame({
        column_name: sorted_values,  
        'kill_sum': kill_sum[sorted_values].values,
        'severity_score': severity_score
    })
    
    metrics.to_csv(f"metrics/kill_sum_{column_name}.csv", index=False)


def precompute_values(dataset):
    dataset_cleaned = dataset.dropna(subset=['gname', 'city']) 
    dataset_cleaned = dataset_cleaned[dataset_cleaned['gname'] != '']  
    dataset_cleaned = dataset_cleaned[dataset_cleaned['city'] != ''] 

    calculate_kill_sum(dataset_cleaned, 'gname')
    calculate_kill_sum(dataset_cleaned, 'city')
    calculate_kill_sum(dataset_cleaned, 'country')
    calculate_kill_sum(dataset_cleaned, 'attacktype1')
    calculate_kill_sum(dataset_cleaned, 'targtype1')
    calculate_kill_sum(dataset_cleaned, 'weaptype1')

dataset = load_data()
precompute_values(dataset)
