In [187]:
import pandas as pd

In [188]:
# List of all CSV files
csv_files = [
    "../Data Preprocessing/global_crime_index.csv",
    "../Data Preprocessing/global_health_index.csv",
    "../Data Preprocessing/homicide_justice_rate.csv",
    "../Data Preprocessing/global_military_index.csv",
    "../Data Preprocessing/global_peace_index.csv",
    "../Data Preprocessing/global_press_freedom_index.csv"
]


In [189]:
# Load all CSV files into separate dataframes
global_crime_index = pd.read_csv("../Data Preprocessing/global_crime_index.csv")
global_health_index = pd.read_csv("../Data Preprocessing/global_health_index.csv")
homicide_justice_rate = pd.read_csv("../Data Preprocessing/homicide_justice_rate.csv")
global_military_index = pd.read_csv("../Data Preprocessing/global_military_index.csv")
global_peace_index = pd.read_csv("../Data Preprocessing/global_peace_index.csv")
global_press_freedom_index = pd.read_csv("../Data Preprocessing/global_press_freedom_index.csv")

In [245]:
global_crime_index.sort_values(by=['2021'],ascending=True).head(50) #checking if all csv is loaded properly

Unnamed: 0,Country,2013,2014,2015,2016,2017,2018,2019,2020,2021
122,South Africa,0.114206,0.013029,0.014587,0.0,0.042139,0.048632,0.0,0.0,0.0
53,Honduras,0.165738,0.169381,0.071313,0.031949,0.0,0.0,0.0,0.022654,0.037277
107,Peru,0.374652,0.345277,0.351702,0.230032,0.243112,0.227964,0.18225,0.150485,0.170178
4,Argentina,0.356546,0.355049,0.273906,0.241214,0.254457,0.259878,0.22504,0.254045,0.223663
70,Kenya,0.147632,0.006515,0.10859,0.142173,0.215559,0.290274,0.348653,0.255663,0.254457
13,Belarus,0.809192,0.7557,0.790924,0.811502,0.907618,0.794833,0.844691,0.849515,0.272285
35,Dominican Republic,0.373259,0.210098,0.188006,0.242812,0.254457,0.264438,0.244057,0.263754,0.275527
50,Guatemala,0.0,0.0,0.0,0.238019,0.303079,0.297872,0.33122,0.31877,0.306321
80,Malaysia,0.239554,0.210098,0.150729,0.15655,0.218801,0.240122,0.253566,0.302589,0.311183
140,Uganda,0.263231,0.278502,0.322528,0.322329,0.34684,0.314252,0.372425,0.299353,0.340357


In [191]:
# Create a dictionary for easier management
dataframes = {
    "global_crime_index": global_crime_index,
    "global_health_index": global_health_index,
    "homicide_justice_rate": homicide_justice_rate,
    "global_military_index": global_military_index,
    "global_peace_index": global_peace_index,
    "global_press_freedom_index": global_press_freedom_index
}

In [192]:
# Get the largest leftmost year and smallest rightmost year
year_bounds = []
for df in dataframes.values():
    # Extract year columns (excluding the 'Country' column)
    year_columns = [int(col) for col in df.columns if col != 'Country']
    year_bounds.append((min(year_columns), max(year_columns)))

# Determine the new year bounds
new_start_year = max(bound[0] for bound in year_bounds)
new_end_year = min(bound[1] for bound in year_bounds)

# Get the common countries among all dataframes
common_countries = set(global_crime_index['Country'])
for df in dataframes.values():
    common_countries.intersection_update(df['Country'])

# Create a new dataframe with zero values
columns = ['Country'] + list(range(new_start_year, new_end_year + 1))
new_df = pd.DataFrame(columns=columns)
new_df['Country'] = list(common_countries)
new_df.iloc[:, 1:] = 0


In [193]:
new_df.head()

Unnamed: 0,Country,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Slovakia,0,0,0,0,0,0,0,0,0
1,Lithuania,0,0,0,0,0,0,0,0,0
2,Belarus,0,0,0,0,0,0,0,0,0
3,Australia,0,0,0,0,0,0,0,0,0
4,Finland,0,0,0,0,0,0,0,0,0


In [194]:
new_df.sort_values(by=["Country"],inplace=True)

In [195]:
new_df[new_df['Country']=="Egypt"]

Unnamed: 0,Country,2013,2014,2015,2016,2017,2018,2019,2020,2021
52,Egypt,0,0,0,0,0,0,0,0,0


In [196]:
# Modify the dataframes
def normalize_dataframe(df, start_year, end_year, common_countries):
    # Filter the dataframe by year bounds
    year_columns = [col for col in df.columns if col != 'Country']
    year_columns = [col for col in year_columns if start_year <= int(col) <= end_year]
    filtered_df = df[['Country'] + year_columns]

    # Filter by common countries
    filtered_df = filtered_df[filtered_df['Country'].isin(common_countries)]

    # Normalize values to 0-1
    for col in year_columns:
        max_value = filtered_df[col].max()
        min_value = filtered_df[col].min()
        if max_value != min_value:  # Avoid division by zero
            filtered_df[col] = (filtered_df[col] - min_value) / (max_value - min_value)
        else:
            filtered_df[col] = 0  # If all values are the same, set to 0

    return filtered_df

# Apply modifications to each dataframe
for name, df in dataframes.items():
    dataframes[name] = normalize_dataframe(df, new_start_year, new_end_year, common_countries)

# Subtract values of global_crime_index from 1
global_crime_index = dataframes["global_crime_index"]
for col in global_crime_index.columns[1:]:
    global_crime_index[col] = 1 - global_crime_index[col]
dataframes["global_crime_index"] = global_crime_index

# Subtract values of global_peace_index from 1
global_peace_index = dataframes["global_peace_index"]
for col in global_peace_index.columns[1:]:
    global_peace_index[col] = 1 - global_peace_index[col]
dataframes["global_peace_index"] = global_peace_index

In [197]:
# Define the weights
weights = {
    "global_crime_index": 1/6,
    "global_health_index": 1/6,
    "homicide_justice_rate": 1/6,
    "global_military_index": 1/6,
    "global_peace_index": 1/6,
    "global_press_freedom_index": 1/6
}


In [198]:
# Add values of all dataframes into new_df
for year in range(new_start_year, new_end_year + 1):
    for country in common_countries:
        total_value = sum(
            df.loc[df['Country'] == country, str(year)].values[0]*weights[name]
            for names,df in dataframes.items()
            if str(year) in df.columns
        )
        new_df.loc[new_df['Country'] == country, year] = total_value

In [199]:
new_df.columns = ['Country'] + list(map(str, range(new_start_year, new_end_year + 1)))


In [200]:
pivoted_df = new_df.melt(id_vars=["Country"], var_name="Year", value_name="Index")

# Step 2: Ensure the 'Year' column is treated as numeric for sorting (optional)
pivoted_df["Year"] = pd.to_numeric(pivoted_df["Year"])

In [203]:
pivoted_df[pivoted_df['Year']==2018].sort_values(by=['Index'],ascending=False).tail(50)

Unnamed: 0,Country,Year,Index
426,Turkmenistan,2018,0.669828
394,Italy,2018,0.669524
405,New Zealand,2018,0.652828
418,Slovakia,2018,0.651464
421,Spain,2018,0.650653
425,Tunisia,2018,0.65025
389,Hungary,2018,0.649527
411,Poland,2018,0.645227
368,Belarus,2018,0.638953
424,Thailand,2018,0.632228


In [204]:
new_df.to_csv('Global Safety and Stability Index.csv',index=False)

In [205]:
new_df.to_excel('Global Safety and Stability Index.xlsx', index = False)

In [206]:
pivoted_df.to_excel('Global Safety and Stability Index(Pivoted).xlsx', index = False)

In [215]:
pivoted_gdp_df=pd.read_csv("../Data Preprocessing/GDP_per_capita.csv")

In [223]:
# Merge the pivoted_df and pivoted_gdp_df on the 'Country' column
refined_df = pivoted_df.copy()


In [225]:
pivoted_gdp_df[pivoted_gdp_df['Country']=='Egypt'] #checking if some countries exist

Unnamed: 0,Country,Year,GDP per capita
55,Egypt,2013,3088.890834
248,Egypt,2014,3196.861381
441,Egypt,2015,3370.382447
634,Egypt,2016,3331.612461
827,Egypt,2017,2439.967284
1020,Egypt,2018,2531.200079
1213,Egypt,2019,3017.258336
1406,Egypt,2020,3571.556907
1599,Egypt,2021,3886.722498


In [226]:
# Initialize an empty list to store the GDP per capita values
gdp_values = []

# Iterate through each row of pivoted_df
for index, row in pivoted_df.iterrows():
    country = row['Country']
    year = row['Year']
    
    # Find the matching row in pivoted_gdp_df
    matched_row = pivoted_gdp_df[(pivoted_gdp_df['Country'] == country) & (pivoted_gdp_df['Year'] == year)]
    
    # If a matching row is found, append the GDP per capita value to the list
    if not matched_row.empty:
        gdp_values.append(matched_row['GDP per capita'].values[0])
    else:
        # If no match found, append NaN (or handle as you need)
        gdp_values.append(None)

# Add the GDP per capita values to the original pivoted_df as a new column
refined_df['GDP per capita'] = gdp_values

In [227]:
refined_df.isna().sum()

Country           0
Year              0
Index             0
GDP per capita    9
dtype: int64

In [229]:
refined_df[refined_df['Country']=='Egypt']

Unnamed: 0,Country,Year,Index,GDP per capita
20,Egypt,2013,0.544175,3088.890834
92,Egypt,2014,0.54635,3196.861381
164,Egypt,2015,0.52994,3370.382447
236,Egypt,2016,0.506693,3331.612461
308,Egypt,2017,0.51008,2439.967284
380,Egypt,2018,0.497389,2531.200079
452,Egypt,2019,0.509246,3017.258336
524,Egypt,2020,0.51897,3571.556907
596,Egypt,2021,0.51386,3886.722498


In [230]:
refined_df = refined_df.dropna(subset=['GDP per capita'])

In [231]:
refined_df

Unnamed: 0,Country,Year,Index,GDP per capita
0,Albania,2013,0.459649,4413.063397
1,Algeria,2013,0.589104,5519.777576
2,Argentina,2013,0.508337,13080.254732
3,Armenia,2013,0.584497,3833.157071
4,Australia,2013,0.646574,68156.386105
...,...,...,...,...
643,Uganda,2021,0.309224,883.465728
644,Ukraine,2021,0.593235,4827.845703
645,United Arab Emirates,2021,0.746869,44315.554184
646,United States,2021,0.574822,70219.472454


In [232]:
refined_df[refined_df['Country']=='Egypt']

Unnamed: 0,Country,Year,Index,GDP per capita
20,Egypt,2013,0.544175,3088.890834
92,Egypt,2014,0.54635,3196.861381
164,Egypt,2015,0.52994,3370.382447
236,Egypt,2016,0.506693,3331.612461
308,Egypt,2017,0.51008,2439.967284
380,Egypt,2018,0.497389,2531.200079
452,Egypt,2019,0.509246,3017.258336
524,Egypt,2020,0.51897,3571.556907
596,Egypt,2021,0.51386,3886.722498


In [233]:
refined_df.to_excel('Safety and Stability Index vs GDP per capita.xlsx', index = False)