In [46]:
import pandas as pd  
import os


In [47]:
csv_path = os.path.join('..', 'Web Scrapping', 'raw_gmi_data.csv')

# Read the CSV file
gmi_df = pd.read_csv(csv_path)  # Using pandas

In [48]:
gmi_df.head(10)

Unnamed: 0,Year,Country,Military Expenditure,Military Personnel,Heavy Weapons,GMI Score,GMI Rank,Score Trend
0,1990,Kuwait,6.4,0.81,2.85,413,1,-
1,1990,Israel,3.52,2.96,4.0,387,2,-
2,1990,Singapore,2.61,3.07,2.71,344,3,-
3,1990,Syria,2.8,1.91,3.04,318,4,-
4,1990,Jordan,2.86,1.74,2.98,311,5,-
5,1990,Saudi Arabia,4.86,0.41,2.28,309,6,-
6,1990,Qatar,3.57,0.84,2.79,295,7,-
7,1990,Mongolia,2.67,1.39,3.07,292,8,-
8,1990,Brunei,3.29,1.3,2.3,282,9,-
9,1990,Cyprus,3.11,1.83,2.61,279,10,-


In [49]:
gmi_df.columns.tolist() #check all the columns 

['Year',
 'Country',
 'Military Expenditure',
 'Military Personnel',
 'Heavy Weapons',
 'GMI Score',
 'GMI Rank',
 'Score Trend']

In [50]:
columns_to_drop=['Military Expenditure',
 'Military Personnel',
 'Heavy Weapons','Score Trend','GMI Rank']

In [51]:
gmi_df.drop(columns_to_drop,axis=1,inplace=True)

In [52]:
gmi_df.head(10)

Unnamed: 0,Year,Country,GMI Score
0,1990,Kuwait,413
1,1990,Israel,387
2,1990,Singapore,344
3,1990,Syria,318
4,1990,Jordan,311
5,1990,Saudi Arabia,309
6,1990,Qatar,295
7,1990,Mongolia,292
8,1990,Brunei,282
9,1990,Cyprus,279


In [40]:
gmi_df.columns.tolist() #check all the columns again

['Year', 'Country', 'GMI Score']

In [53]:
gmi_df.isna().sum() # check any null values

Year         0
Country      0
GMI Score    0
dtype: int64

In [54]:
print(len(gmi_df['Year'].unique())) # total unique year value
print(gmi_df['Year'].max()) #max year
print(gmi_df['Year'].min()) #min year
print(gmi_df['Year'].max()-gmi_df['Year'].min()+1) #looks like all years from 1990 to 2021 exist

32
2021
1990
32


In [55]:
all_years = range(1990, 2022)  # 2021 inclusive

# Create a pivot table with countries as rows and years as columns
pivot_df = (
    gmi_df.pivot_table(
        index='Country',
        columns='Year',
        values='GMI Score',
        aggfunc='first'  # Takes the first occurrence if duplicates exist
    )
    .reindex(columns=all_years)  # Ensure all years are present as columns
    .sort_index(axis=0)  # Sort countries alphabetically
    .reset_index()       # Move Country from index to column
)

# Rename the columns to match your requirements
pivot_df.columns.name = None  # Remove the 'Year' label from columns
pivot_df = pivot_df.rename(columns={'Country': 'Country'})

 Handle missing values. Fill null values with mean

In [56]:
# Fill NaN values with the respective country's mean
pivot_df = pivot_df.set_index("Country").apply(lambda row: row.fillna(row.mean()), axis=1).reset_index()

In [57]:
pivot_df.isna().sum()

Country    0
1990       0
1991       0
1992       0
1993       0
1994       0
1995       0
1996       0
1997       0
1998       0
1999       0
2000       0
2001       0
2002       0
2003       0
2004       0
2005       0
2006       0
2007       0
2008       0
2009       0
2010       0
2011       0
2012       0
2013       0
2014       0
2015       0
2016       0
2017       0
2018       0
2019       0
2020       0
2021       0
dtype: int64

In [58]:
pivot_df.to_csv('global_military_index.csv',index=False)