In [17]:
import pandas as pd  
import os


In [18]:
csv_path = os.path.join('..', 'Web Scrapping', 'raw_gpi_data.csv')

# Read the CSV file
gpi_df = pd.read_csv(csv_path)  # Using pandas

In [19]:
gpi_df.head(50)

Unnamed: 0,Year,Rank,Country,GPI
0,2024,1,Iceland,1.112
1,2024,2,Ireland,1.303
2,2024,3,Austria,1.313
3,2024,4,New Zealand,1.323
4,2024,5,Singapore,1.339
5,2024,6,Switzerland,1.35
6,2024,7,Portugal,1.372
7,2024,8,Denmark,1.382
8,2024,9,Slovenia,1.395
9,2024,10,Malaysia,1.427


In [20]:
gpi_df.drop('Rank',axis=1,inplace=True)

In [21]:
gpi_df.isna().sum() # check any null values

Year       0
Country    0
GPI        0
dtype: int64

In [22]:
print(len(gpi_df['Year'].unique())) # total unique year value
print(gpi_df['Year'].max()) #max year
print(gpi_df['Year'].min()) #min year
print(gpi_df['Year'].max()-gpi_df['Year'].min()+1) #looks like all years from 1990 to 2021 exist

17
2024
2008
17


In [23]:
all_years = range(2008, 2025)  # 2021 inclusive

# Create a pivot table with countries as rows and years as columns
pivot_df = (
    gpi_df.pivot_table(
        index='Country',
        columns='Year',
        values='GPI',
        aggfunc='first'  # Takes the first occurrence if duplicates exist
    )
    .reindex(columns=all_years)  # Ensure all years are present as columns
    .sort_index(axis=0)  # Sort countries alphabetically
    .reset_index()       # Move Country from index to column
)

# Rename the columns to match your requirements
pivot_df.columns.name = None  # Remove the 'Year' label from columns
pivot_df = pivot_df.rename(columns={'Country': 'Country'})

In [24]:
pivot_df.head(50)

Unnamed: 0,Country,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,Afghanistan,3.218,3.296,3.201,3.212,3.265,3.293,3.272,3.363,3.457,3.552,3.614,3.619,3.657,3.663,3.62,3.287,3.294
1,Albania,1.84,1.859,1.85,1.873,1.922,1.944,1.944,1.888,1.851,1.82,1.808,1.742,1.809,1.796,1.786,1.795,1.809
2,Algeria,2.269,2.251,2.293,2.345,2.307,2.253,2.227,2.169,2.192,2.186,2.193,2.195,2.143,2.174,2.068,2.102,2.11
3,Angola,2.124,2.042,2.124,2.113,2.068,2.105,2.122,1.975,2.036,2.024,2.012,1.973,2.008,2.01,1.979,2.119,2.043
4,Argentina,1.918,1.984,2.01,1.95,1.892,2.051,1.957,2.056,2.068,2.025,1.997,2.01,1.97,1.961,1.948,1.857,1.855
5,Armenia,2.223,2.254,2.332,2.238,2.197,2.248,2.138,2.118,2.18,2.154,2.164,2.123,1.975,2.069,1.991,2.044,2.052
6,Australia,1.466,1.474,1.455,1.492,1.513,1.485,1.422,1.46,1.49,1.489,1.503,1.513,1.501,1.549,1.614,1.581,1.536
7,Austria,1.344,1.356,1.378,1.406,1.384,1.319,1.313,1.286,1.265,1.335,1.291,1.292,1.296,1.338,1.308,1.299,1.313
8,Azerbaijan,2.323,2.399,2.353,2.373,2.399,2.414,2.393,2.424,2.422,2.424,2.372,2.359,2.106,2.428,2.219,2.187,2.248
9,Bahrain,1.931,1.888,1.875,2.154,2.16,2.108,2.117,2.172,2.207,2.247,2.263,2.219,2.134,2.12,2.148,2.143,2.072


 Handle missing values. Fill null values with zero

In [25]:
# Fill NaN values with the respective country's mean
pivot_df = pivot_df.set_index("Country").apply(lambda row: row.fillna(row.mean()), axis=1).reset_index()

In [26]:
pivot_df.to_csv('global_peace_index.csv',index=False)