In [12]:
import pandas as pd
import numpy as np
from datetime import datetime

In [13]:
# Function for Normalization
def normalize(series):
    min_val, max_val = series.min(), series.max()
    return (series - min_val) / (max_val - min_val) if max_val > min_val else 0

In [14]:
#global variables
current_date = datetime.now().strftime("%Y%m%d")
COUNTRY_SCORE_WEIGHTS = {'Exports': 0.57, 'Imports': 0.43, 'Gross_Deals': 0.0}
CATEGORY_SCORE_WEIGHTS = {'Exports': 0.57, 'Imports': 0.43, 'Gross_Deals': 0.0}


# Load the dataset
df = pd.read_csv("final_data/20241122one_row_one_country_data.csv")

# Filter the data
df = df[df['Year of order'] >= 1996]

df.describe()

Unnamed: 0.1,Unnamed: 0,transaction_id,Year of order,Number ordered,SIPRI TIV per unit,SIPRI TIV for total order,SIPRI TIV for total order - Pos_Neg,Armed forces personnel (% of total labor force),"Armed forces personnel, total",Arms exports (SIPRI trend indicator values),Arms imports (SIPRI trend indicator values),Military expenditure (% of GDP),Military expenditure (% of general government expenditure),Military expenditure (current LCU),Primary Latitude,Primary Longitude,Partner Latitude,Partner Longitude
count,23756.0,23756.0,23756.0,23300.0,23752.0,23752.0,23752.0,17097.0,17107.0,13696.0,17888.0,18549.0,15213.0,18587.0,23283.0,23283.0,23283.0,23283.0
mean,29356.736403,14828.736403,2010.470029,104.656481,8.661398,66.573915,4.901285e-15,1.459537,621733.3,2723054000.0,508608200.0,3.011649,7.821516,3.318316e+17,33.02837,14.222618,33.02837,14.222618
std,16863.854878,8560.892135,7.978137,611.16426,34.466537,246.281041,255.1208,1.471211,777861.1,3713252000.0,666975100.0,3.794553,6.254524,3.198827e+19,21.507556,68.206959,21.507556,68.206959
min,0.0,1.0,1996.0,0.0,0.0,0.0,-8280.0,0.0,0.0,0.0,0.0,0.151017,0.53912,0.0,-40.900557,-175.198242,-40.900557,-175.198242
25%,15034.25,7511.0,2004.0,3.0,0.18,3.6,-12.5,0.584123,71000.0,84000000.0,71000000.0,1.377908,3.296392,21428000000.0,23.684994,-3.74922,23.684994,-3.74922
50%,29057.5,15034.5,2011.0,10.0,1.0,12.5,0.0,0.981261,244000.0,656000000.0,266000000.0,2.088531,5.359466,63966000000.0,37.09024,19.145136,37.09024,19.145136
75%,44090.75,22057.0,2017.0,43.0,5.0,40.0,12.5,1.812401,1347300.0,5336000000.0,731000000.0,3.481428,10.848141,679229000000.0,48.379433,53.847818,48.379433,53.847818
max,58115.0,29058.0,2023.0,25000.0,1250.0,8280.0,8280.0,17.222284,3910000.0,14515000000.0,5367000000.0,33.546573,43.155187,3.083842e+21,64.963051,179.414413,64.963051,179.414413


In [15]:
# Compute country_metrics
exports = df[df['Country Role'] == 'Supplier'].groupby('Country')['SIPRI TIV for total order'].sum()
imports = df[df['Country Role'] == 'Recipient'].groupby('Country')['SIPRI TIV for total order'].sum().abs()
gross_deals = exports.abs() + imports

# Normalize country_metrics
country_metrics = pd.DataFrame({'Exports': exports, 'Imports': imports, 'Gross_Deals': gross_deals}).fillna(0)
country_metrics['Normalized_Exports'] = normalize(country_metrics['Exports'])
country_metrics['Normalized_Imports'] = normalize(country_metrics['Imports'])
country_metrics['Normalized_Gross'] = normalize(country_metrics['Gross_Deals'])

# Compute composite score

country_metrics['Country_Score'] = round(
    COUNTRY_SCORE_WEIGHTS['Exports'] * country_metrics['Normalized_Exports'] +
    COUNTRY_SCORE_WEIGHTS['Imports'] * country_metrics['Normalized_Imports'] +
    COUNTRY_SCORE_WEIGHTS['Gross_Deals'] * country_metrics['Normalized_Gross']
    , 5
) * 100

# Sort by score
country_metrics = country_metrics.sort_values(by='Country')
display(country_metrics)


Unnamed: 0_level_0,Exports,Imports,Gross_Deals,Normalized_Exports,Normalized_Imports,Normalized_Gross,Country_Score
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Afghanistan,0.00,4242.95,0.00,0.000000e+00,0.058314,0.000000,2.507
African Union**,0.00,88.58,0.00,0.000000e+00,0.001217,0.000000,0.052
Albania,1.20,75.51,76.71,3.867896e-06,0.001038,0.000231,0.045
Algeria,0.30,17665.33,17665.63,9.669741e-07,0.242786,0.053287,10.440
Angola,1.72,2197.82,2199.54,5.543985e-06,0.030206,0.006635,1.299
...,...,...,...,...,...,...,...
Zimbabwe,3.60,143.50,147.10,1.160369e-05,0.001972,0.000444,0.085
eSwatini,0.00,4.44,0.00,0.000000e+00,0.000061,0.000000,0.003
unknown rebel group*,0.00,0.20,0.00,0.000000e+00,0.000003,0.000000,0.000
unknown recipient(s),0.00,310.30,0.00,0.000000e+00,0.004265,0.000000,0.183


In [16]:
# Initialize an empty list to store rows for the final DataFrame
rows = []

# Get the unique categories
categories = df['Category'].unique()

# Loop through each category to compute metrics
for category in categories:
    # Filter data for the current category
    category_data = df[df['Category'] == category]
    
    # Compute Exports, Imports, and Gross Deals for the current category
    exports = category_data[category_data['Country Role'] == 'Supplier'].groupby('Country')['SIPRI TIV for total order'].sum()
    imports = category_data[category_data['Country Role'] == 'Recipient'].groupby('Country')['SIPRI TIV for total order'].sum().abs()
    gross_deals = exports.abs() + imports
    
    # Normalize category_metrics
    category_metrics = pd.DataFrame({'Exports': exports, 'Imports': imports, 'Gross_Deals': gross_deals}).fillna(0)
    category_metrics['Normalized_Exports'] = (category_metrics['Exports'] - category_metrics['Exports'].min()) / (category_metrics['Exports'].max() - category_metrics['Exports'].min())
    category_metrics['Normalized_Imports'] = (category_metrics['Imports'] - category_metrics['Imports'].min()) / (category_metrics['Imports'].max() - category_metrics['Imports'].min())
    category_metrics['Normalized_Gross'] = (category_metrics['Gross_Deals'] - category_metrics['Gross_Deals'].min()) / (category_metrics['Gross_Deals'].max() - category_metrics['Gross_Deals'].min())
    
    # Define weights and calculate composite score
    category_metrics['Country_Score'] = round(
        CATEGORY_SCORE_WEIGHTS['Exports'] * category_metrics['Normalized_Exports'] +
        CATEGORY_SCORE_WEIGHTS['Imports'] * category_metrics['Normalized_Imports'] +
        CATEGORY_SCORE_WEIGHTS['Gross_Deals'] * category_metrics['Normalized_Gross']
        , 5
    ) * 100
    
    # Add rows for each country in the category
    for country, data in category_metrics.iterrows():
        rows.append({'Country': country, 'Metric Scope': 'Import', 'Metric Name': category, 'Value': data['Normalized_Imports']*CATEGORY_SCORE_WEIGHTS['Imports']*100})
        rows.append({'Country': country, 'Metric Scope': 'Export', 'Metric Name': category, 'Value': data['Normalized_Exports']*CATEGORY_SCORE_WEIGHTS['Exports']*100})
        rows.append({'Country': country, 'Metric Scope': 'Overall', 'Metric Name': category, 'Value': data['Country_Score']})

# Convert rows into a DataFrame
final_metrics_df = pd.DataFrame(rows)

# Display the final DataFrame
display(final_metrics_df)

Unnamed: 0,Country,Metric Scope,Metric Name,Value
0,Afghanistan,Import,Aircraft,0.530260
1,Afghanistan,Export,Aircraft,0.000000
2,Afghanistan,Overall,Aircraft,0.530000
3,Algeria,Import,Aircraft,6.856307
4,Algeria,Export,Aircraft,0.000000
...,...,...,...,...
3418,Yemen,Export,Naval Vessels,0.000000
3419,Yemen,Overall,Naval Vessels,0.491000
3420,unknown supplier(s),Import,Naval Vessels,0.000000
3421,unknown supplier(s),Export,Naval Vessels,0.010765


In [17]:
# Adding loop score copy of Aircraft for RADAR chart
aircraft_rows = final_metrics_df[final_metrics_df['Metric Name'] == 'Aircraft'].copy()
aircraft_rows['Metric Name'] = 'ZZ_loop_score_Aircraft'
final_metrics_df = pd.concat([final_metrics_df,aircraft_rows], ignore_index=True)
display(final_metrics_df)

Unnamed: 0,Country,Metric Scope,Metric Name,Value
0,Afghanistan,Import,Aircraft,0.530260
1,Afghanistan,Export,Aircraft,0.000000
2,Afghanistan,Overall,Aircraft,0.530000
3,Algeria,Import,Aircraft,6.856307
4,Algeria,Export,Aircraft,0.000000
...,...,...,...,...
3907,unknown recipient(s),Export,ZZ_loop_score_Aircraft,0.000000
3908,unknown recipient(s),Overall,ZZ_loop_score_Aircraft,0.008000
3909,unknown supplier(s),Import,ZZ_loop_score_Aircraft,0.000000
3910,unknown supplier(s),Export,ZZ_loop_score_Aircraft,0.107583


In [192]:
#Exporting the result to a csv file
final_metrics_df.to_csv('final_data/'+current_date+'_country_category_score_data.csv')

#Test Code Below

In [191]:
# Inner join based on 'Country'
result = pd.merge(final_metrics_df, country_metrics, on='Country', how='outer')
# result_with_loop = pd.concat([result, final_metrics_df['Aircraft'].rename('ZZ_loop_score')], axis=1)
display(result)

Unnamed: 0,Country,Metric Scope,Metric Name,Value,Exports,Imports,Gross_Deals,Normalized_Exports,Normalized_Imports,Normalized_Gross,Country_Score
0,ANC (South Africa)*,Import,Missiles/Rockets/Bombs,0.000505,0.00,0.24,0.0,0.000000,0.000002,0.0,0.000
1,ANC (South Africa)*,Export,Missiles/Rockets/Bombs,0.000000,0.00,0.24,0.0,0.000000,0.000002,0.0,0.000
2,ANC (South Africa)*,Overall,Missiles/Rockets/Bombs,0.001000,0.00,0.24,0.0,0.000000,0.000002,0.0,0.000
3,Afghanistan,Import,Aircraft,3.161037,0.00,15079.26,0.0,0.000000,0.107573,0.0,4.626
4,Afghanistan,Export,Aircraft,0.000000,0.00,15079.26,0.0,0.000000,0.107573,0.0,4.626
...,...,...,...,...,...,...,...,...,...,...,...
4672,unknown supplier(s),Export,Naval Vessels,0.018716,1260.08,0.00,0.0,0.001675,0.000000,0.0,0.095
4673,unknown supplier(s),Overall,Naval Vessels,0.019000,1260.08,0.00,0.0,0.001675,0.000000,0.0,0.095
4674,unknown supplier(s),Import,ZZ_loop_score_Aircraft,0.000000,1260.08,0.00,0.0,0.001675,0.000000,0.0,0.095
4675,unknown supplier(s),Export,ZZ_loop_score_Aircraft,0.053452,1260.08,0.00,0.0,0.001675,0.000000,0.0,0.095


In [160]:
# Melt the result DataFrame
melted_result = result_with_loop.reset_index().melt(
    id_vars=['Country'],  # Columns to keep as identifiers
    var_name='Metric Name',  # Name for the new column with metric names
    value_name='Metric Value'  # Name for the new column with metric values
).sort_values(by=['Country', 'Metric Name'])

# Display the melted DataFrame
display(melted_result)
melted_result.to_csv('final_data/'+current_date+'_country_score_data_melted.csv')

Unnamed: 0,Country,Metric Name,Metric Value
2088,ANC (South Africa)*,Aircraft_Normalized_Exports,
2610,ANC (South Africa)*,Aircraft_Normalized_Gross,
2349,ANC (South Africa)*,Aircraft_Normalized_Imports,
1827,ANC (South Africa)*,Aircraft_Score,
1566,ANC (South Africa)*,Country_Score,0.000000
...,...,...,...
6785,unknown supplier(s),Unmanned Vehicles_Normalized_Imports,0.000000
6263,unknown supplier(s),Unmanned Vehicles_Score,0.212000
10700,unknown supplier(s),ZZ_loop_export_score,0.093776
10961,unknown supplier(s),ZZ_loop_import_score,0.000000
