## Introduction

## Import

In [None]:
#Downloading and preprocessing
import numpy as np 
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
from datetime import datetime
        
#Warnings
        
#Vusualization
import matplotlib as plt
import seaborn as sns
import geopandas as gpd
import geoplot as gpt
import libpysal as ps
from mpl_toolkits.axes_grid1 import make_axes_locatable

#os
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

## Functions

In [None]:
# Data prewiev
def get_preview(df):
    data_prev = df.dtypes.to_frame()
    data_prev.reset_index(inplace=True)
    data_prev = data_prev.rename(columns = {'index': 'column_name'})
    data_prev = data_prev.rename(columns = {0: 'type'})

    #Frame with missings
    missing = df.isna().sum().to_frame()
    missing.reset_index(inplace=True)
    missing = missing.rename(columns = {'index': 'column_name'})
    missing = missing.rename(columns = {0: 'missing'})

    #Add missings to main preview
    data_prev['missing'] = missing['missing']
    data_prev['%_missing'] = round((data_prev['missing']/len(df)),2)

    #Samples of columns content
    data_prev['sample'] = data_prev['column_name'].apply(lambda x: df[x][5])
    
    display(data_prev)

## Data downloading

In [None]:
data = pd.read_csv("../input/neonatal-mortality-rate/UNIGME-2021.csv")
data.info()

In [None]:
#Making copy for preview & preprocessing
mortality = data.copy()

In [None]:
#Content preview
df = mortality
get_preview(df)

## Content description

- Geographic area - countries and region agglomerations,
- Indicator - age of died child,
- Sex - gender,
- Wealth Quantil - GDP per quanile,
- Series Name - name of survey/population study/admin data etc., 
- Series Year - year of publication of survey/census/VR,
- TIME_PERIOD - year and month of observation,
- OBS_VALUE - quantity of deaths,
- Observation status - included or excluded in Inter-agency Group for Child Mortality Estimation,
- Unit of measure - measurement unit,
- Series Сategory - type of surway,
- STD_ERR - sampling standard error of the observation value,
- REF_DATE - mid-point of reference period of observation value,
- Age Group of Women - mothers age,
- Time Since First Birth - corresponds to the nomination,
- DEFINITION - сlassification of definition used for stillbirth rate,
- INTERVAL - length (in years) of the reference interval,
- Series Method - data collection method,
- LOWER_BOUND - corresponds to the nomination,
- UPPER_BOUND - corresponds to the nomination,
- STATUS - considering progress towards SDG in neonatal mortality rate,
- YEAR_TO_ACHIEVE - corresponds to the nomination,
- Model Used -  estimation method for child, adolescent and youth mortality.

For further work, we will take data that will allow us to trace statistics by geographic region, as well as the ages of children and mothers over several decades.

In [None]:
#Choosing need types of data
need_cols = ['Geographic area', 'Indicator', 'TIME_PERIOD', 
             'OBS_VALUE', 'Age Group of Women', 'Unit of measure']

mortality = mortality[need_cols]

In [None]:
mortality.sample(5)

## Preprocessing

In [None]:
#Droppnig duplicates ans NaN observations
mortality = mortality.drop_duplicates()
mortality = mortality.dropna(subset=['OBS_VALUE'])

In [None]:
#Lower case
mortality.columns = [x.lower() for x in mortality.columns] 

### - indicator

In [None]:
mortality.indicator.unique()

In [None]:
#Excluding allover statistics rates
to_drop = ['Progress towards SDG in neonatal mortality rate',
           'Progress towards SDG in under-five mortality rate']
mortality = mortality[~mortality.indicator.isin(to_drop)]

In [None]:
#Rate unification per age groups
mortality = mortality.replace({'indicator':
                              {'Under-five mortality rate': '1 - 4 years',
                               'Infant mortality rate': '0 years',
                               'Under-five deaths': '1 - 4 years',
                               'Child Mortality rate age 1-4': '1 - 4 years',
                               'Infant deaths': '0 years',
                               'Child deaths age 1 to 4': '1 - 4 years',
                               'Neonatal mortality rate ': 'neonatal',
                               'Mortality rate age 5-9': '5 - 9 years',
                               'Mortality rate age 5-14 ': '5 - 14 years',
                               'Mortality rate age 15-24': '15 - 24 years',
                               'Mortality rate age 15-19': '15 - 19 years',
                               'Neonatal deaths': 'neonatal',
                               'Stillbirth rate': 'stillbirth',
                               'Mortality rate 1-59 months': '0 - 5 years',
                               'Mortality rate age 20-24': '20 - 24 years',
                               'Deaths age 5 to 14': '5 - 14 years',
                               'Mortality rate age 10-19 ': '10 - 19 years',
                               'Deaths age 10 to 14': '10 - 14 years',
                               'Deaths age 20 to 24': '20 - 24 years',
                               'Deaths 1-59 months': '0 - 5 years',
                               'Deaths age 5 to 24': '5 - 24 years',
                               'Mortality rate age 5-24': '5 - 24 years',
                               'Deaths age 10 to 19': '10 - 19 years',
                               'Deaths age 15 to 24': '15 - 24 years',
                               'Deaths age 15 to 19': '15 - 19 years',
                               'Deaths age 5 to 9': '5 - 9 years',
                               'Mortality rate age 10-14': '10 - 14 years',
                               'Stillbirths': 'stillbirth',
                               'Neonatal mortality rate': 'neonatal',
                               'Mortality rate age 5-14': '5 - 14 years',
                               'Mortality rate age 10-19': '10 - 19 years'}})

### - time_period

In [None]:
#Years extraction
mortality['time_period'] = pd.to_datetime(mortality['time_period']).dt.year

### - obs_value

In [None]:
#Rounding values
mortality['obs_value'] = round(mortality['obs_value'])
mortality['obs_value'] = mortality['obs_value'].apply(lambda x: int(x))

### - age group of women

In [None]:
mortality['age group of women'].unique()

### - unit of measure

In [None]:
#Standart measurement
mortality['unit of measure'] = 'per 1000 children'

In [None]:
#Rename columns
rename_col = {'geographic area': 'area',
              'indicator': 'mortality_rate',
              'time_period': 'year',
              'obs_value': 'value',
              'age group of women': 'women_age',
              'unit of measure': 'unit'}
            
mortality = mortality.rename(columns=rename_col)

# World Data Visualization

In [None]:
#Style setting
sns.set_style("whitegrid")

In [None]:
#All ower mortality
g = sns.displot(mortality[['year', 'value']], x='year',
               bins=30, element="step", alpha =0.15,               
               kde=True).set(title="Children mortality over the years")
g.fig.set_size_inches(10,4)


In [None]:
#Creating data subset
regions_list = ['East Asia and Pacific',
                'Europe and Central Asia',
                'Eastern Europe and Central Asia',
                'Eastern and Southern Africa',
                'Latin America and Caribbean',
                'Middle East and North Africa',
                'North America',
                'South Asia',
                'Sub-Saharan Africa',
                'West and Central Africa',
                'Western Europe',
                'Australia and New Zealand',
                'Central Asia',
                'Central Asia and Southern Asia',
                'Eastern Asia',
                'Eastern Asia and South-Eastern Asia',
                'Europe',
                'Northern America and Europe',
                'Latin America and the Caribbean'
                'Northern Africa',
                'North America',
                'Europe, Australia and New Zealand',
                'Northern America',
                'Oceania',
                'Oceania excluding Australia and New Zealand',
                'Small island developing States',
                'Southern Asia',
                'South-Eastern Asia',
                'South Eastern Asia and Oceania (excl. Australia and New Zealand)',
                'Western Asia', 
                'Western Asia and Northern Africa']

region_mortality = mortality[mortality.area.isin(regions_list)]

#After 2000
region_mortality = region_mortality[region_mortality.year>1999]        

#Aggregation and sorting
region_mortality = region_mortality[['area', 'value']].groupby('area', as_index=False).sum()
region_mortality = region_mortality.sort_values(by=['value'], ascending=False )

In [None]:
#Barplot
f, ax = plt.pyplot.subplots(figsize=(8, 15))
sns.barplot(x="value",
            y="area",
            data=region_mortality,
            palette="plasma").set(title="All regions 2000 - 2020")

In [None]:
#Creating data subset
#Sub-Saharian per years
sub_saharian=mortality[mortality.area=='Sub-Saharan Africa']
sub_saharian=sub_saharian[sub_saharian.year>1999]
g = sns.displot(sub_saharian[['year', 'value']], x='year',
               bins=20, element="step", alpha =0.15, 
               color='#6E35DB',
               kde=True).set(title="Sub-Saharan Africa 2000 - 2020")
g.fig.set_size_inches(10,4)

In [None]:
#Sub-Saharan countries chart
sub_sach_countries = ['Angola',
                      'Benin',
                      'Botswana',
                      'Burkina Faso',
                      'Burundi',
                      'Cameroon',
                      'Cape Verde',
                      'Central African Republic',
                      'Chad',
                      'Cameroon',
                      'Cabo Verde',
                      'Comoros',
                      'Democratic Republic of the Congo',
                      'Congo',
                      "Côte d'Ivoire",
                      'Djibouti',
                      'Equatorial Guinea',
                      'Eritrea',
                      'Eswatini',
                      'Ethiopia',
                      'Gabon',
                      'Gambia',
                      'Ghana',
                      'Guinea',
                      'Guinea-Bissau',
                      'Kenya',
                      'Lesotho',
                      'Liberia',
                      'Madagascar',
                      'Malawi',
                      'Mali',
                      'Mauritania',
                      'Mauritius',
                      'Mozambique',
                      'Namibia',
                      'Niger',
                      'Nigeria',
                      'Réunion',
                      'Rwanda',
                      'Sao Tome and Principe',
                      'Senegal',
                      'Seychelles',
                      'Sierra Leone',
                      'Somalia',
                      'South Africa',
                      'South Sudan',
                      'Sudan',
                      'Swaziland',
                      'Togo',
                      'Uganda',
                      'United Republic of Tanzania',
                      'Western Sahara',
                      'Zambia',
                      'Zimbabwe']
                      
SS_africa = mortality[mortality.area.isin(sub_sach_countries)]
SS_africa = SS_africa[SS_africa.year>1999]

In [None]:
#Aggregation and sorting
SS_africa_agg = SS_africa[['area', 'value']].groupby('area', as_index=False).sum()
SS_africa_agg = SS_africa_agg.sort_values(by='value', ascending=False)

In [None]:
#Visualization
f, ax = plt.pyplot.subplots(figsize=(10, 20))
sns.barplot(x="value",
            y="area",
            data=SS_africa_agg ,
            palette="plasma").set(title="Sub-Saharan Africa 2000 - 2020 per countries")

In the chart, we see the 3 leaders in child mortality in the region: Nigeria, the Democratic Republic of the Congo and Ethiopia. Let's see if these countries have a geographical commonality and how far they go from their local neighbors.

In [None]:
#Checking country names in the geo frame
africa = world[world.continent=='Africa']
world_africa_subset = africa.name.unique().tolist()

In [None]:
#Replacing by World names
SS_africa = SS_africa.replace({'area':
                             {'Democratic Republic of the Congo': 'Dem. Rep. Congo',
                             'Central African Republic': 'Central African Rep.',
                             'Equatorial Guinea': 'Eq. Guinea',
                             'United Republic of Tanzania': 'Tanzania',
                             'Eswatini': 'eSwatini',
                             'South Sudan': 'S. Sudan'}})

In [None]:
#Mortality statistic in Sub-Saharan Africa
SS_mort_stat = SS_africa[['area', 'value']]
SS_mort_stat = SS_mort_stat.groupby('area', as_index=False).sum()
SS_mort_stat = SS_mort_stat.rename(columns={'area':'name'})

# Sub-Saharan Africa Visualization


In [None]:
#Owerall Africa plotting
sns.set_style("whitegrid", {'axes.grid' : False})
africa = world.loc[world.continent=='Africa']
#Adding data to Africa
SS_africa_viz = pd.merge(africa, SS_mort_stat, on=['name'], how='left')

In [None]:
#Vizualization
fig, ax = plt.pyplot.subplots(1, 1, figsize=(8, 10))

divider = make_axes_locatable(ax)

cax = divider.append_axes("right", size="5%", pad=0.1)

g = SS_africa_viz.plot(column='value',
                ax=ax,
                edgecolor="gray",
                cmap="plasma",
                legend=True,
                cax=cax
               ).set(
                title="Sub-Saharan Africa 2000 - 2020")


In [None]:
#Extra mortality countries
extra_mort_subset = ['Nigeria', 'Dem. Rep. Congo','Ethiopia']
SS_extra_mort = SS_africa[SS_africa.area.isin(extra_mort_subset)]

g = sns.displot(SS_extra_mort[['area','year', 'value']],
                x='year', hue='area', palette="magma",
                multiple='layer',
                bins=20)
g.fig.set_size_inches(10,4)


In [None]:
#Nigeria
nigeria = SS_africa[SS_africa.area == "Nigeria"]
#Dem_Congo
dem_congo = SS_africa[SS_africa.area == "Dem. Rep. Congo"]
#Ethiopia
ethiopia = SS_africa[SS_africa.area == "Ethiopia"]

In [None]:
g = sns.displot(nigeria[['mortality_rate','year', 'value']],
                x='year', hue='mortality_rate', multiple="stack", palette="icefire",
                bins=20).set(
                title="Nigeria mortality rate 2000 - 2020"
                )
g.fig.set_size_inches(12,6)

In [None]:
g = sns.displot(dem_congo[['mortality_rate','year', 'value']],
                x='year', hue='mortality_rate', multiple="stack", palette="icefire",
                bins=20).set(
                title="Democratic Republic of the Congo mortality rate 2000 - 2020"
                )
g.fig.set_size_inches(12,6)

In [None]:
g = sns.displot(ethiopia[['mortality_rate','year', 'value']],
                x='year', hue='mortality_rate', multiple="stack", palette="icefire",
                bins=20).set(
                title="Ethiopia mortality rate 2000 - 2020"
                )
g.fig.set_size_inches(12,6)

In [None]:
#Creating subsets
SS_africa_hist = mortality[mortality.area.isin(sub_sach_countries)]
SS_africa_hist = SS_africa_hist.rename(columns={'area':'name'})


#Replacing by World names
SS_africa_hist = SS_africa_hist.replace({'name':
                             {'Democratic Republic of the Congo': 'Dem. Rep. Congo',
                             'Central African Republic': 'Central African Rep.',
                             'Equatorial Guinea': 'Eq. Guinea',
                             'United Republic of Tanzania': 'Tanzania',
                             'Eswatini': 'eSwatini',
                             'South Sudan': 'S. Sudan'}})

In [None]:
#1960s
SS_africa_60 = SS_africa_hist.loc[
               (SS_africa_hist.year>1959)&(SS_africa_hist.year<1970)]
SS_africa_60 = SS_africa_60[['name', 'value']].groupby('name', as_index=False).sum()

#1970s
SS_africa_70 = SS_africa_hist.loc[
               (SS_africa_hist.year>1969)&(SS_africa_hist.year<1980)]
SS_africa_70 = SS_africa_70[['name', 'value']].groupby('name', as_index=False).sum()

#1980s
SS_africa_80 = SS_africa_hist.loc[
               (SS_africa_hist.year>1979)&(SS_africa_hist.year<1990)]
SS_africa_80 = SS_africa_80[['name', 'value']].groupby('name', as_index=False).sum()

#1990s
SS_africa_90 = SS_africa_hist.loc[
               (SS_africa_hist.year>1989)&(SS_africa_hist.year<2000)]
SS_africa_90 = SS_africa_90[['name', 'value']].groupby('name', as_index=False).sum()

In [None]:
#Geoframes for vizualisation
africa60_viz = pd.merge(africa, SS_africa_60, on=['name'], how='left')
africa70_viz = pd.merge(africa, SS_africa_70, on=['name'], how='left')
africa80_viz = pd.merge(africa, SS_africa_80, on=['name'], how='left')
africa90_viz = pd.merge(africa, SS_africa_90, on=['name'], how='left')

In [None]:
fig, axes = plt.pyplot.subplots(2,2, figsize=(10, 10))

divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
africa60_viz.plot(column='value',
                edgecolor="gray",
                cmap="plasma",
                legend=True,
                
                ax=axes[0,0],
                cax=cax).set(
                title="Sub-Saharan Africa 1960s per countries"
                )


africa70_viz.plot(column='value',
                edgecolor="gray",
                cmap="plasma",
                legend=True,
               
                ax=axes[0,1],
                cax=cax).set(
                title="Sub-Saharan Africa 1970s per countries"
                )

africa80_viz.plot(column='value',
                edgecolor="gray",
                cmap="plasma",
                legend=True,
                
                ax=axes[1,0],
                cax=cax).set(
                title="Sub-Saharan Africa 1980s per countries"
                )

africa90_viz.plot(column='value',
                edgecolor="gray",
                cmap="plasma",
                legend=True,
                
                ax=axes[1,1],
                cax=cax).set(
                title="Sub-Saharan Africa 1990s per countries"
                )

In [None]:
#Basic mothers dataset
mothers = mortality[mortality.women_age.notna()]
mothers.info()

In [None]:
mothers.women_age.unique()

In [None]:
#Age scales for mothers
mothers = mothers.replace({'women_age':
                          {'15 to 19 years old': 1,
                           '20 to 24 years old': 2,
                           '25 to 29 years old': 3,
                           '25 to 34 years old': 4,
                           '30 to 34 years old': 5,
                           '35 to 39 years old': 6,
                           '40 to 44 years old': 7,
                           '45 to 49 years old': 8}})
mothers.sample(5)

In [None]:
#Extra mort mothers
extra_mort_subset = ['Nigeria', 'Democratic Republic of the Congo','Ethiopia']
extra_mort_mothers = mothers[mothers.area.isin(extra_mort_subset)]

In [None]:
sns.relplot(x="year", y="women_age", hue="area", size="value",
            sizes=(40, 400), alpha=.8, palette="plasma",
            height=6, data=extra_mort_mothers).set(
            title="Age of mothers in the most risky countries")

In [None]:
sns.relplot(x="year", y="women_age", size="value",
            sizes=(40, 400), alpha=.8, color='red',
            height=6, data=mothers).set(
            title="Age of mothers in the all world")

# Conclusion

Using various visualization methods, we examined the child mortality data and concluded that Sub-Saharan Africa is a highly ranked region. In this region, it was possible to identify three ranges that are far ahead of other countries:

- Nigeria,
- Democratic Republic Of The Congo,
- Ethiopia.

Comparing mortality in these countries over 6 decades, from the 1960s to 2020, we can say that the increase in mortality is more related to political events in these countries and economic crises than to natural conditions. This hypothesis can be proved more thoroughly by comparing these data with datasets on wars and the economic history of Sub-Saharan Africa.

At the same time, it should be noted that great expectations of the world economy are connected with the African continent. This allows us to hope that by 2030 the mortality rate in this region will decrease significantly, and the demographic situation as a whole will become healthier and more stable.