In [33]:
import pandas as pd
# Read population as int
df = pd.read_csv('Homicides.csv', dtype={"Population": "int64"})
df.drop(['Region', 'Subregion', 'Dimension', 'Category', 'Year', 'Unit of measurement', 'Source'], axis=1, inplace=True)
df.head()

country = df.groupby(by='Country')['VALUE'].sum()
country

Country
Afghanistan                           2570.0
Albania                                394.0
Algeria                               5353.0
Andorra                                  4.0
Antigua and Barbuda                     88.0
                                       ...  
Uruguay                               3978.0
Uzbekistan                            1258.0
Vanuatu                                  4.0
Venezuela (Bolivarian Republic of)    8384.0
Zimbabwe                              1560.0
Name: VALUE, Length: 139, dtype: float64

In [25]:
# Religion
import numpy as np

religion = pd.read_csv('religion.csv', thousands=',', dtype={"Population":"int64"})
religion.drop(['Region', 'Level', 'Countrycode'], axis=1, inplace=True)
religion.query('Year == 2020', inplace=True)

religion['Population'] = (
    religion['Population']
    .astype(str)
    .str.replace(',', '', regex=False)         
)
religion['Religiously_unaffiliated'] = (
    religion['Religiously_unaffiliated']
    .astype(str)
    .str.replace(',', '', regex=False)
)

religion['Religion Density'] = 1 - (religion['Religiously_unaffiliated'].astype(int) / religion['Population'].astype(int))
religion

Unnamed: 0,Country,Year,Population,Christians,Muslims,Religiously_unaffiliated,Buddhists,Hindus,Jews,Other_religions,Religion Density
1,All World,2020,7885807101,2268860679,2022590461,1905359982,324185581,1177861089,14779638,172169677,0.758381
3,All Asia-Pacific,2020,4544798237,268839155,1187663747,1492753842,316111123,1166709664,185058,112535650,0.671547
5,All Europe,2020,752957406,505138687,45505934,190322687,2539594,2152636,1277202,6020665,0.747233
7,All Latin America-Caribbean,2020,646240245,546922208,753352,77150335,326820,695342,390861,20001329,0.880617
9,All Middle East-North Africa,2020,439686888,12918830,414112362,1858569,88708,3223451,6788404,696565,0.995773
...,...,...,...,...,...,...,...,...,...,...,...
407,Vietnam,2020,98079191,8170016,70694,66373765,22578634,40495,125,845463,0.323264
409,Western Sahara,2020,549365,847,546903,1390,0,0,0,225,0.997470
411,Yemen,2020,36134864,20138,36085366,22243,197,2798,73,4047,0.999384
413,Zambia,2020,19059395,18731646,97449,11389,363,3061,191,215296,0.999402


In [26]:
# Sex
sex = pd.read_csv('Sex.csv', thousands=',', dtype={"Population":"int64"})
sex.drop(['Iso3_code', 'Region', 'Subregion', 'Indicator', 'Dimension', 'Category'], axis=1, inplace=True)
sex = sex.groupby(by='Country')['VALUE'].sum()
sex

Country
Albania                                 1055
Algeria                                11948
Antigua and Barbuda                      183
Argentina                             191104
Armenia                                  412
                                      ...   
United Kingdom (Northern Ireland)      21977
United Kingdom (Scotland)              29045
United States of America             1047935
Uruguay                                 6998
Uzbekistan                              1382
Name: VALUE, Length: 108, dtype: int64

In [27]:
# Corruption 
corruption = pd.read_csv('Corruption.csv', thousands=',', dtype={"Population":"int64"})
corruption.query('`Unit of measurement` == "Counts"', inplace=True)
corruption = corruption.groupby(by='Country')['VALUE'].sum()
corruption

Country
Albania                                7276.0
Algeria                                4208.0
Antigua and Barbuda                     919.0
Argentina                            229808.0
Armenia                                2279.0
                                       ...   
United Kingdom (Northern Ireland)     25063.0
United Kingdom (Scotland)            108546.0
United States of America               9393.0
Uruguay                              146276.0
Uzbekistan                              158.0
Name: VALUE, Length: 107, dtype: float64

In [29]:
# Merging
merged_df = pd.merge(religion, country, how='inner', on=['Country'])
merged_df = pd.merge(merged_df, sex, how='inner', on=['Country'], suffixes=('_hom', '_sex'))
merged_df = pd.merge(merged_df, corruption, how='inner', on=['Country'])
merged_df['Homicide Density'] = merged_df['VALUE_hom'].astype(int) / merged_df['Population'].astype(int) * 100
merged_df['Sex Assault Density'] = merged_df['VALUE_sex'].astype(int) / merged_df['Population'].astype(int) * 100
merged_df = merged_df.rename(columns={'VALUE': 'VALUES_corr'})
merged_df['Corruption Density'] = merged_df['VALUES_corr'].astype(int) / merged_df['Population'].astype(int) * 100
merged_df = merged_df.sort_values(by='Population', ascending=True)
merged_df


Unnamed: 0,Country,Year,Population,Christians,Muslims,Religiously_unaffiliated,Buddhists,Hindus,Jews,Other_religions,Religion Density,VALUE_hom,VALUE_sex,VALUES_corr,Homicide Density,Sex Assault Density,Corruption Density
34,Honduras,2020,10119641,8953090,280,986048,550,130,600,178943,0.902561,38572.0,7378,15036.0,0.381160,0.072908,0.148582
6,Azerbaijan,2020,10181730,42730,9644874,484645,208,693,8580,0,0.952401,2329.0,1037,9658.0,0.022874,0.010185,0.094856
72,Sweden,2020,10353687,6292071,838566,2992876,86937,46800,16396,80041,0.710936,1346.0,139553,744733.0,0.013000,1.347858,7.192926
64,Portugal,2020,10370519,8822654,42398,1429155,20659,22715,3390,29549,0.862191,321.0,6197,131562.0,0.003095,0.059756,1.268615
30,Greece,2020,10699369,9573407,547831,507404,2640,9160,4173,54754,0.952576,574.0,2912,98311.0,0.005365,0.027217,0.918849
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,Switzerland,2020,8640582,5320913,525613,2658287,53493,46609,19529,16137,0.692349,625.0,11480,189863.0,0.007233,0.132861,2.197340
38,Israel,2020,8800376,165383,1296427,384987,3770,14620,6780636,154552,0.956253,428.0,80236,113895.0,0.004863,0.911734,1.294206
5,Austria,2020,8921405,6083568,740723,1998162,26496,10103,5384,56968,0.776026,681.0,24646,221730.0,0.007633,0.276257,2.485371
77,United Arab Emirates,2020,9448524,1352428,6891776,31731,15792,1110535,1418,44843,0.996642,280.0,644,394.0,0.002963,0.006816,0.004170
