# Child Mortality and economical, geographical, religious feature of the countries

For this analysis four data sets will be used:
CHILD MORTALITY BY CAUSE: http://apps.who.int/gho/data/node.main.ghe1002015-by-cause?lang=en
WORLD COUNTRIES' GDP (IMF 2017): http://www.imf.org/external/pubs/ft/weo/2017/01/weodata/download.aspx
COUNTRIES' RELIGIONS: https://www.cia.gov/library/publications/the-world-factbook/fields/2122.html AND WIKIPEDIA
GEO INFO: http://download.geonames.org/export/dump/
All this dataset have been prepared and are available in https://github.com/MassimoSchiappa/datascience.
In the same place the json file downloaded from this link https://raw.githubusercontent.com/datasets/geo-boundaries-world-110m/master/countries.geojson can be found (it's used to bind panda's dataframe to folium maps)


In [1]:
import pandas as pd
import numpy as np

# ***************************** WHO *******************************************
#ALRI    Acute lower respiratory infections
#BABT    Birth asphyxia and birth trauma
#OCPNC   Other communicable, perinatal and nutritional conditions
#SOICN   Sepsis and other infectious conditions of the newborn
#CA      Congenital anomalies
#DD      Diarrhoeal diseases
#AIDS    HIV/AIDS
#INJ     Injuries
#MAL     Malaria
#MEA     Measles
#MEN     Meningitis/encephalitis
#OND     Other noncommunicable diseases
#PER     Pertussis
#PRE     Prematurity
#
#N0-27D  from 0 to 27 days of life
#N1-59M  from 1 to 59 months of life
#N0-4Y   from 0 to 4 years of life (N0-27D + N1-59M)
df_who = pd.read_csv('./MORT_CHILD_DS.csv', sep=';', encoding = "ISO-8859-1")

In [2]:
df_who.head(5)

Unnamed: 0.1,Unnamed: 0,Country,Year,N0-27D-ALRI,N1-59M-ALRI,N0-4Y-ALRI,N0-27D-BABT,N1-59M-BABT,N0-4Y-BABT,N0-27D-OCPNC,...,N0-4Y-MEN,N0-27D-OND,N1-59M-OND,N0-4Y-OND,N0-27D-PER,N1-59M-PER,N0-4Y-PER,N0-27D-PRE,N1-59M-PRE,N0-4Y-PRE
0,0,Afghanistan,2015,2341,16330,18671,9730,606,10336,2196,...,2367,22,6018,6040,49,1055,1104,11323,2426,13749
1,1,Afghanistan,2014,2432,17046,19477,10063,632,10695,2259,...,2477,23,6161,6184,51,1103,1153,11367,2528,13895
2,2,Afghanistan,2013,2552,19552,22104,10511,624,11135,2342,...,3132,23,5959,5982,52,1150,1203,11568,2495,14063
3,3,Afghanistan,2012,2685,20561,23247,11018,649,11667,2442,...,3343,24,6082,6106,54,1206,1260,11710,2595,14304
4,4,Afghanistan,2011,2837,21327,24164,11630,674,12304,2567,...,3497,25,6294,6320,56,1247,1303,11672,2696,14368


In [3]:
# ***************************** IMF *******************************************
df_imf = pd.read_csv('./imf_weo_ds.csv',sep=';', encoding = "ISO-8859-1", thousands=',')

In [4]:
df_imf.columns

Index(['WEO Country Code', 'ISO', 'WEO Subject Code', 'Country',
       'Subject Descriptor', 'Subject Notes', 'Units', 'Scale',
       'Country/Series-specific Notes', '1980', '1981', '1982', '1983', '1984',
       '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993',
       '1994', '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002',
       '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
       '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020',
       '2021', '2022', 'Estimates Start After'],
      dtype='object')

In [5]:

df_imf_filt = df_imf[['ISO','Country','Units','Scale','2000','2001', '2002',
       '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
       '2012', '2013', '2014', '2015','Estimates Start After','WEO Subject Code','Subject Descriptor','Subject Notes']].copy()

In [6]:
df_imf_filt[df_imf_filt['Country']=='Italy'][['WEO Subject Code','Subject Descriptor', 'Subject Notes']]

Unnamed: 0,WEO Subject Code,Subject Descriptor,Subject Notes
3520,NGDP_R,"Gross domestic product, constant prices",Expressed in billions of national currency uni...
3521,NGDP_RPCH,"Gross domestic product, constant prices",Annual percentages of constant price GDP are y...
3522,NGDP,"Gross domestic product, current prices",Expressed in billions of national currency uni...
3523,NGDPD,"Gross domestic product, current prices",Values are based upon GDP in national currency...
3524,NGDP_D,"Gross domestic product, deflator",The GDP deflator is derived by dividing curren...
3525,NGDPRPC,"Gross domestic product per capita, constant pr...",GDP is expressed in constant national currency...
3526,NGDPPC,"Gross domestic product per capita, current prices",GDP is expressed in current national currency ...
3527,NGDPDPC,"Gross domestic product per capita, current prices",GDP is expressed in current U.S. dollars per p...
3528,NGAP_NPGDP,Output gap in percent of potential GDP,Output gaps for advanced economies are calcula...
3529,PPPGDP,Gross domestic product based on purchasing-pow...,These data form the basis for the country weig...


In [31]:
#Population
df_imf_pop = df_imf_filt[df_imf_filt['WEO Subject Code']=='LP']

#Employment
df_imf_empl = df_imf_filt[df_imf_filt['WEO Subject Code']=='LE']

#Unemployment rate
df_imf_unempl_rate = df_imf_filt[df_imf_filt['WEO Subject Code']=='LUR']

#GDP
df_imf_gdp_cp = df_imf_filt[df_imf_filt['WEO Subject Code']=='NGDPDPC']

#GDP procapita
df_imf_gdp_xc_cp = df_imf_filt[df_imf_filt['WEO Subject Code']=='NGDPRPC']

In [32]:

# ***************************** REL *******************************************

df_rel = pd.read_csv('./religions_corr_ds.csv', sep=';', encoding = "ISO-8859-1")

In [9]:
df_rel.head(5)

Unnamed: 0,Country,TOT,Muslim,Catholic,Protestant,Buddhism,Orthodox Christian,Others,Hinduism,Shintoism,Jewish,Christians,Atheism,Jehova's Witness,Taoism,None,Unspecified,NOTE
0,Afghanistan,100,997.0,,,,,3,,,,,,,,,,
1,Albania,100,588.0,10.0,,,68.0,57,,,,,25.0,,,,162.0,
2,Algeria,100,99.0,,,,,1,,,,,,,,,,
3,American Samoa,100,,,,,,1,,,,983.0,,,,7.0,,
4,Andorra,100,,90.0,9.0,,,1,,,,,,,,,,


In [10]:
# ***************************** GEO ******************************************
df_geo_ds = pd.read_csv('./geo_ds.csv', sep=';', encoding = "ISO-8859-1")

In [11]:
df_geo_ds.head(5)

Unnamed: 0.1,Unnamed: 0,ISO,ISO3,Country,Capital,Area(in sq km),Population,Continent,CurrencyCode,CurrencyName,latitude,longitude,TimeZoneId,rawOffset (independant of DST)
0,0,AD,AND,Andorra,Andorra la Vella,468.0,84000,EU,EUR,Euro,42546245,1601554,Europe/Andorra,1.0
1,1,AE,ARE,United Arab Emirates,Abu Dhabi,82880.0,4975593,AS,AED,Dirham,23424076,53847818,Asia/Dubai,4.0
2,2,AF,AFG,Afghanistan,Kabul,647500.0,29121286,AS,AFN,Afghani,3393911,67709953,Asia/Kabul,4.5
3,3,AG,ATG,Antigua and Barbuda,St. John's,443.0,86754,,XCD,Dollar,17060816,-61796428,America/Antigua,-4.0
4,4,AI,AIA,Anguilla,The Valley,102.0,13254,,XCD,Dollar,18220554,-63068615,America/Anguilla,-4.0


In [33]:
df_data_plot = df_imf_gdp_cp[['ISO','Country','2000']]
df_data_plot = df_data_plot[~df_data_plot['ISO'].isin(['ATG','BHR','BRB','CPV','COM','DMA','GRD','HKG','KIR','UVK','MAC','MDV','MLT','MHL','MUS','FSM','NRU','PLW','WSM','SMR','STP','SYC','SGP','SSD','KNA','LCA','VCT','TON','TUV'
])]
df_data_plot.replace('n/a',0,inplace=True)
#df_data_plot['2000'] = df_data_plot['2000'].astype(np.str).str.replace('.','')
df_data_plot['2000'] = df_data_plot['2000'].astype(np.str).str.replace(',','')
df_data_plot['2000'] = df_data_plot['2000'].astype(np.float)
#df_data_plot['2000'] = df_data_plot['2000'].astype(np.int64)
df_data_plot.head(5)

Unnamed: 0,ISO,Country,2000
7,AFG,Afghanistan,0.0
51,ALB,Albania,1127.64
95,DZA,Algeria,1794.695
139,AGO,Angola,535473.0
227,ARG,Argentina,8386.586


In [34]:
df_data_plot2 = df_imf_gdp_cp[['ISO','Country','2015']]
df_data_plot2 = df_data_plot2[~df_data_plot2['ISO'].isin(['ATG','BHR','BRB','CPV','COM','DMA','GRD','HKG','KIR','UVK','MAC','MDV','MLT','MHL','MUS','FSM','NRU','PLW','WSM','SMR','STP','SYC','SGP','SSD','KNA','LCA','VCT','TON','TUV'
])]
df_data_plot2.replace('n/a',0,inplace=True)
df_data_plot2['2015'] = df_data_plot2['2015'].astype(np.str).str.replace(',','')
df_data_plot2['2015'] = df_data_plot2['2015'].astype(np.float)
df_data_plot2.head(5)

Unnamed: 0,ISO,Country,2015
7,AFG,Afghanistan,615091.0
51,ALB,Albania,3943.217
95,DZA,Algeria,4123.297
139,AGO,Angola,3876.197
227,ARG,Argentina,14643.922


In [35]:
df_data_plot3 = df_imf_pop[['ISO','Country','2015']]
df_data_plot3 = df_data_plot3[~df_data_plot3['ISO'].isin(['ATG','BHR','BRB','CPV','COM','DMA','GRD','HKG','KIR','UVK','MAC','MDV','MLT','MHL','MUS','FSM','NRU','PLW','WSM','SMR','STP','SYC','SGP','SSD','KNA','LCA','VCT','TON','TUV'
])]
df_data_plot3.replace('n/a',0,inplace=True)
df_data_plot3['2015'] = df_data_plot3['2015'].astype(np.str).str.replace(',','')
df_data_plot3['2015'] = df_data_plot3['2015'].astype(np.float)
df_data_plot3.head(5)

Unnamed: 0,ISO,Country,2015
26,AFG,Afghanistan,32007.0
70,ALB,Albania,2889.0
114,DZA,Algeria,39963.0
158,AGO,Angola,26563.0
246,ARG,Argentina,43132.0


In [36]:
import folium
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered.geojson"

map.choropleth(geo_path=json_geo,data=df_data_plot,columns=['ISO', '2000'],key_on='feature.properties.adm0_a3',fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2)
#map.choropleth(geo_path=json_geo)
map

In [25]:
import folium
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered.geojson"

map.choropleth(geo_path=json_geo,data=df_data_plot2,columns=['ISO', '2015'],key_on='feature.properties.adm0_a3',fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2)
#map.choropleth(geo_path=json_geo)
map

In [19]:
import folium
map = folium.Map(location=[41.87, 12.57], zoom_start=1.5)
json_geo = "filtered.geojson"

map.choropleth(geo_path=json_geo,data=df_data_plot3,columns=['ISO', '2015'],key_on='feature.properties.adm0_a3',fill_color='YlGnBu', fill_opacity=0.7, line_opacity=0.2)
#map.choropleth(geo_path=json_geo)
map