In [None]:
import pandas as pd
import numpy as np
from datetime import datetime

In [None]:
# Function for Normalization
def normalize(series):
    min_val, max_val = series.min(), series.max()
    return (series - min_val) / (max_val - min_val) if max_val > min_val else 0

In [None]:
#global variables
current_date = datetime.now().strftime("%Y%m%d")
COUNTRY_SCORE_WEIGHTS = {'Exports': 0.4, 'Imports': 0.3, 'Gross_Deals': 0.3}
CATEGORY_SCORE_WEIGHTS = {'Exports': 0.4, 'Imports': 0.3, 'Gross_Deals': 0.3}


# Load the dataset
df = pd.read_csv("final_data/20241122one_row_one_country_data.csv")

In [None]:
# Compute country_metrics
exports = df[df['Country Role'] == 'Supplier'].groupby('Country')['SIPRI TIV for total order'].sum()
imports = df[df['Country Role'] == 'Recipient'].groupby('Country')['SIPRI TIV for total order'].sum().abs()
gross_deals = exports.abs() + imports

# Normalize country_metrics
country_metrics = pd.DataFrame({'Exports': exports, 'Imports': imports, 'Gross_Deals': gross_deals}).fillna(0)
country_metrics['Normalized_Exports'] = normalize(country_metrics['Exports'])
country_metrics['Normalized_Imports'] = normalize(country_metrics['Imports'])
country_metrics['Normalized_Gross'] = normalize(country_metrics['Gross_Deals'])

# Compute composite score

country_metrics['Country_Score'] = round(
    COUNTRY_SCORE_WEIGHTS['Exports'] * country_metrics['Normalized_Exports'] +
    COUNTRY_SCORE_WEIGHTS['Imports'] * country_metrics['Normalized_Imports'] +
    COUNTRY_SCORE_WEIGHTS['Gross_Deals'] * country_metrics['Normalized_Gross']
    , 5
) * 100

# Sort by score
country_metrics = country_metrics.sort_values(by='Country_Score', ascending=False)
display(country_metrics)


In [None]:
# Initialize an empty dictionary to store category-wise scores
category_scores = {}

# Get the unique categories
categories = df['Category'].unique()

for category in categories:
    # Filter data for the current category
    category_data = df[df['Category'] == category]
    
    # Compute Exports, Imports, and Gross Deals for the current category
    exports = category_data[category_data['Country Role'] == 'Supplier'].groupby('Country')['SIPRI TIV for total order'].sum()
    imports = category_data[category_data['Country Role'] == 'Recipient'].groupby('Country')['SIPRI TIV for total order'].sum().abs()
    gross_deals = exports.abs() + imports
    
    # Normalize category_metrics
    category_metrics = pd.DataFrame({'Exports': exports, 'Imports': imports, 'Gross_Deals': gross_deals}).fillna(0)
    category_metrics['Normalized_Exports'] = normalize(category_metrics['Exports'])
    category_metrics['Normalized_Imports'] = normalize(category_metrics['Imports'])
    category_metrics['Normalized_Gross'] = normalize(category_metrics['Gross_Deals'])
    
    # Define weights and calculate composite score
    category_metrics['Country_Score'] = round(
        CATEGORY_SCORE_WEIGHTS['Exports'] * category_metrics['Normalized_Exports'] +
        CATEGORY_SCORE_WEIGHTS['Imports'] * category_metrics['Normalized_Imports'] +
        CATEGORY_SCORE_WEIGHTS['Gross_Deals'] * category_metrics['Normalized_Gross']
        , 5
    ) * 100
    
    # Store the scores in the dictionary
    category_scores[category] = category_metrics[['Country_Score']]
    
# Combine all category scores into one DataFrame for analysis
final_category_scores = pd.concat(category_scores, axis=1)
final_category_scores.columns = [f"{category}_Score" for category in category_scores.keys()]

# Display the final scores
display(final_category_scores)

In [86]:
# Inner join based on 'Country'
result = pd.merge(country_metrics, final_category_scores, on='Country')
result_with_loop = pd.concat([result, final_category_scores['Aircraft_Score'].rename('ZZ_loop_score')], axis=1)
display(result_with_loop)

Unnamed: 0_level_0,Exports,Imports,Gross_Deals,Normalized_Exports,Normalized_Imports,Normalized_Gross,Country_Score,Aircraft_Score,Specialized Equipment_Score,Missiles/Rockets/Bombs_Score,Helicopters_Score,Unmanned Vehicles_Score,Ground Vehicles/Artillery_Score,Radar/Detection Systems_Score,Naval Vessels_Score,ZZ_loop_score
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
United States,752270.38,42010.47,794280.85,1.000000,0.299696,1.000000,78.991,75.299,100.000,81.471,78.385,70.946,58.469,79.706,59.521,75.299
Soviet Union,373140.03,21974.87,395114.90,0.496019,0.156765,0.497450,39.467,30.641,1.421,21.605,23.605,21.845,75.672,21.230,47.469,30.641
India,1028.96,140176.72,141205.68,0.001368,1.000000,0.177778,35.388,34.443,17.726,33.551,18.269,30.000,36.725,39.884,40.934,34.443
Saudi Arabia,170.04,84180.94,84350.98,0.000226,0.600534,0.106198,21.211,21.109,4.623,34.481,12.800,12.466,19.034,17.351,8.707,21.109
United Kingdom,102732.99,42200.34,144933.33,0.136564,0.301051,0.182471,19.968,16.682,53.010,25.799,19.370,23.418,11.528,20.239,31.678,16.682
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PRC (Israel/Palestine)*,0.00,0.25,0.00,0.000000,0.000002,0.000000,0.000,,,0.000,,,,,,
Provisional IRA (UK)*,0.00,1.20,0.00,0.000000,0.000009,0.000000,0.000,,,0.002,,,,,,
RPF (Rwanda)*,0.00,0.35,0.00,0.000000,0.000002,0.000000,0.000,,,0.001,,,,,,
RUF (Sierra Leone)*,0.00,0.35,0.00,0.000000,0.000002,0.000000,0.000,,,0.001,,,,,,


In [87]:
#Exporting the result to a csv file
result_with_loop.to_csv('final_data/'+current_date+'_country_score_data.csv')

In [88]:
result_with_loop.info()

<class 'pandas.core.frame.DataFrame'>
Index: 261 entries, United States to MTA (Myanmar)*
Data columns (total 16 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Exports                          261 non-null    float64
 1   Imports                          261 non-null    float64
 2   Gross_Deals                      261 non-null    float64
 3   Normalized_Exports               261 non-null    float64
 4   Normalized_Imports               261 non-null    float64
 5   Normalized_Gross                 261 non-null    float64
 6   Country_Score                    261 non-null    float64
 7   Aircraft_Score                   194 non-null    float64
 8   Specialized Equipment_Score      156 non-null    float64
 9   Missiles/Rockets/Bombs_Score     193 non-null    float64
 10  Helicopters_Score                183 non-null    float64
 11  Unmanned Vehicles_Score          140 non-null    float64
 12  Grou

In [89]:
result_with_loop.describe()

Unnamed: 0,Exports,Imports,Gross_Deals,Normalized_Exports,Normalized_Imports,Normalized_Gross,Country_Score,Aircraft_Score,Specialized Equipment_Score,Missiles/Rockets/Bombs_Score,Helicopters_Score,Unmanned Vehicles_Score,Ground Vehicles/Artillery_Score,Radar/Detection Systems_Score,Naval Vessels_Score,ZZ_loop_score
count,261.0,261.0,261.0,261.0,261.0,261.0,261.0,194.0,156.0,193.0,183.0,140.0,205.0,137.0,157.0,194.0
mean,7654.255632,7654.255632,15069.958123,0.01017487,0.054604,0.018973,2.614322,2.97816,3.021263,3.437166,3.194497,3.856071,3.635361,4.418839,5.657631,2.97816
std,54333.239315,16671.941776,59919.88672,0.07222568,0.118935,0.075439,7.083542,7.35454,10.129503,8.361781,7.515988,9.64984,8.668256,9.656414,11.309874,7.35454
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001,0.007,0.0
25%,0.0,30.99,0.0,0.0,0.000221,0.0,0.008,0.02325,0.026,0.006,0.121,0.095,0.038,0.129,0.076,0.02325
50%,0.64,402.28,14.26,8.507579e-07,0.00287,1.8e-05,0.091,0.237,0.153,0.219,0.636,0.535,0.404,0.867,0.673,0.237
75%,103.0,7164.06,7069.18,0.0001369189,0.051107,0.0089,1.805,2.5685,1.42025,2.344,3.0265,1.78025,2.958,3.961,6.23,2.5685
max,752270.38,140176.72,794280.85,1.0,1.0,1.0,78.991,75.299,100.0,81.471,78.385,70.946,75.672,79.706,73.271,75.299


In [90]:
# Melt the result DataFrame
melted_result = result_with_loop.reset_index().melt(
    id_vars=['Country'],  # Columns to keep as identifiers
    var_name='Metric Name',  # Name for the new column with metric names
    value_name='Metric Value'  # Name for the new column with metric values
).sort_values(by=['Country', 'Metric Name'])

# Display the melted DataFrame
display(melted_result)
melted_result.to_csv('final_data/'+current_date+'_country_score_data_melted.csv')

Unnamed: 0,Country,Metric Name,Metric Value
2062,ANC (South Africa)*,Aircraft_Score,
1801,ANC (South Africa)*,Country_Score,0.000
235,ANC (South Africa)*,Exports,0.000
757,ANC (South Africa)*,Gross_Deals,0.000
3367,ANC (South Africa)*,Ground Vehicles/Artillery_Score,
...,...,...,...
1181,unknown supplier(s),Normalized_Imports,0.000
3530,unknown supplier(s),Radar/Detection Systems_Score,0.002
2225,unknown supplier(s),Specialized Equipment_Score,0.266
3008,unknown supplier(s),Unmanned Vehicles_Score,0.149
