In [1]:
import pandas as pd

In [39]:
# List of all CSV files
csv_files = [
    "../Data Preprocessing/global_crime_index.csv",
    "../Data Preprocessing/global_health_index.csv",
    "../Data Preprocessing/homicide_justice_rate.csv",
    "../Data Preprocessing/global_military_index.csv",
    "../Data Preprocessing/global_peace_index.csv",
    "../Data Preprocessing/global_press_freedom_index.csv"
]


In [40]:
# Load all CSV files into separate dataframes
global_crime_index = pd.read_csv("../Data Preprocessing/global_crime_index.csv")
global_health_index = pd.read_csv("../Data Preprocessing/global_health_index.csv")
homicide_justice_rate = pd.read_csv("../Data Preprocessing/homicide_justice_rate.csv")
global_military_index = pd.read_csv("../Data Preprocessing/global_military_index.csv")
global_peace_index = pd.read_csv("../Data Preprocessing/global_peace_index.csv")
global_press_freedom_index = pd.read_csv("../Data Preprocessing/global_press_freedom_index.csv")

In [42]:
# Create a dictionary for easier management
dataframes = {
    "global_crime_index": global_crime_index,
    "global_health_index": global_health_index,
    "homicide_justice_rate": homicide_justice_rate,
    "global_military_index": global_military_index,
    "global_peace_index": global_peace_index,
    "global_press_freedom_index": global_press_freedom_index
}

In [43]:
# Get the largest leftmost year and smallest rightmost year
year_bounds = []
for df in dataframes.values():
    # Extract year columns (excluding the 'Country' column)
    year_columns = [int(col) for col in df.columns if col != 'Country']
    year_bounds.append((min(year_columns), max(year_columns)))

# Determine the new year bounds
new_start_year = max(bound[0] for bound in year_bounds)
new_end_year = min(bound[1] for bound in year_bounds)

# Get the common countries among all dataframes
common_countries = set(global_crime_index['Country'])
for df in dataframes.values():
    common_countries.intersection_update(df['Country'])

# Create a new dataframe with zero values
columns = ['Country'] + list(range(new_start_year, new_end_year + 1))
new_df = pd.DataFrame(columns=columns)
new_df['Country'] = list(common_countries)
new_df.iloc[:, 1:] = 0


In [44]:
new_df.head()

Unnamed: 0,Country,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Romania,0,0,0,0,0,0,0,0,0
1,Hungary,0,0,0,0,0,0,0,0,0
2,Spain,0,0,0,0,0,0,0,0,0
3,Canada,0,0,0,0,0,0,0,0,0
4,Croatia,0,0,0,0,0,0,0,0,0


In [45]:
new_df.sort_values(by=["Country"],inplace=True)

In [46]:
new_df.head()

Unnamed: 0,Country,2013,2014,2015,2016,2017,2018,2019,2020,2021
29,Albania,0,0,0,0,0,0,0,0,0
15,Algeria,0,0,0,0,0,0,0,0,0
16,Argentina,0,0,0,0,0,0,0,0,0
31,Armenia,0,0,0,0,0,0,0,0,0
26,Australia,0,0,0,0,0,0,0,0,0


In [47]:
# Modify the dataframes
def normalize_dataframe(df, start_year, end_year, common_countries):
    # Filter the dataframe by year bounds
    year_columns = [col for col in df.columns if col != 'Country']
    year_columns = [col for col in year_columns if start_year <= int(col) <= end_year]
    filtered_df = df[['Country'] + year_columns]

    # Filter by common countries
    filtered_df = filtered_df[filtered_df['Country'].isin(common_countries)]

    # Normalize values to 0-1
    for col in year_columns:
        max_value = filtered_df[col].max()
        min_value = filtered_df[col].min()
        if max_value != min_value:  # Avoid division by zero
            filtered_df[col] = (filtered_df[col] - min_value) / (max_value - min_value)
        else:
            filtered_df[col] = 0  # If all values are the same, set to 0

    return filtered_df

# Apply modifications to each dataframe
for name, df in dataframes.items():
    dataframes[name] = normalize_dataframe(df, new_start_year, new_end_year, common_countries)

# Subtract values of global_crime_index from 1
global_crime_index = dataframes["global_crime_index"]
for col in global_crime_index.columns[1:]:
    global_crime_index[col] = 1 - global_crime_index[col]
dataframes["global_crime_index"] = global_crime_index

In [48]:
# Add values of all dataframes into new_df
for year in range(new_start_year, new_end_year + 1):
    for country in common_countries:
        total_value = sum(
            df.loc[df['Country'] == country, str(year)].values[0]
            for df in dataframes.values()
            if str(year) in df.columns
        )
        new_df.loc[new_df['Country'] == country, year] = total_value

In [55]:
new_df.sort_values(by=2013,ascending=False).head(50) 

Unnamed: 0,Country,2013,2014,2015,2016,2017,2018,2019,2020,2021
62,Cyprus,4.377943,4.023323,3.764315,3.942189,4.184282,3.33875,3.628189,3.767999,3.859072
38,Israel,4.322645,4.547757,4.496023,4.466409,4.470295,4.407777,4.560325,4.592093,4.379692
35,Estonia,4.312211,4.088472,4.079999,4.223144,4.275098,4.280205,4.322104,4.02208,4.24312
24,Georgia,4.280885,3.657916,4.046624,4.124952,4.173616,4.083619,4.065706,3.900297,3.839038
61,Thailand,4.191979,4.31117,4.132381,3.480803,3.780902,3.821557,3.831554,3.949186,3.861756
33,Singapore,3.997086,3.923932,4.176661,3.820904,4.01902,4.071661,3.357168,3.724352,3.660519
47,Sweden,3.890845,3.878546,3.862754,3.800716,3.779454,3.719193,3.703802,3.725825,3.717978
12,Turkmenistan,3.889284,4.012297,3.928826,3.917461,3.973482,3.970589,4.007199,4.059879,4.006544
18,Finland,3.815313,3.972306,3.992346,3.91457,3.98022,3.707893,3.920024,3.915275,3.998714
68,Belarus,3.813252,3.827667,3.770376,3.732134,3.887226,3.778777,3.964457,3.995312,3.265511


In [None]:
new_df.to_csv('Global Safety and Stability Index.csv',index=False)