**Current Variables**
- country
- year 
- suicide number 
- age category
- gdp
- gdp per capita
- share of population with mental health disorders
- literacy
- rural population
- unemployment
- population over 65 yo
- fertility rate
- health expenditure per capita
- depression rates

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import functools

In [2]:
master = pd.read_csv('../archive/master.csv')
depression = pd.read_csv('../data/depression.csv', skiprows = 1)
elderly = pd.read_csv('../data/elderly.csv', skiprows = 5)
fertility = pd.read_csv('../data/fertility.csv', skiprows = 5)
health_exp = pd.read_csv('../data/health_exp.csv', skiprows = 5)
literacy = pd.read_csv('../data/literacy.csv', skiprows = 5)
m_health = pd.read_csv('../data/m_health.csv', skiprows = 1)
rural = pd.read_csv('../data/rural.csv', skiprows = 5)
unemployment = pd.read_csv('../data/unemployment.csv', skiprows = 5)

#merge our world in data dataset
owid = pd.merge(m_health, depression, on = ['Year', 'Entity'], how = 'outer' )
owid = owid[['Entity', 'Year', 
             'Prevalence - Mental disorders - Sex: Both - Age: Age-standardized (Percent)', 
            'Prevalence - Depressive disorders - Sex: Both - Age: Age-standardized (Percent)']]

owid.columns = ['country', 'year', 'pct_mdisorder', 'pct_depression']

#transform world bank data to fit master and owid
wb = [elderly, fertility, health_exp, literacy, rural, unemployment]
new_wb = []
for i in wb:
    #only keep country, indicator and dates as columns
    i = i[['Country Name', 'Indicator Name'] + [str(i) for i in list(range(1960, 2021))]]
    i.columns = ['country', 'indicator'] + [str(i) for i in list(range(1960, 2021))]
    new = pd.melt(i, id_vars = ['country'], value_vars = [str(i) for i in list(range(1960, 2021))], ignore_index = False)
    new = new.sort_values('country')
    new.columns = ['country', 'year', str(i['indicator'].loc[0])]
    new_wb.append(new)
    
#merge everything
wb_owid = new_wb + [owid] + [master]
wb_owid_new = []

for i in wb_owid:
    i['year']=i['year'].astype(int)
    i['country']=i['country'].astype(str)
    wb_owid_new.append(i)
    
df_merged = functools.reduce(lambda  left,right: pd.merge(left,right,on=['country', 'year'],
                                            how='outer'), wb_owid_new).fillna('void')
df_merged.to_csv('../archive/merged_master.csv')

In [13]:
df_merged.head(1)

Unnamed: 0,country,year,Population ages 65 and above (% of total population),"Fertility rate, total (births per woman)",Current health expenditure per capita (current US$),"Literacy rate, adult total (% of people ages 15 and above)",Rural population (% of total population),"Unemployment, total (% of total labor force) (modeled ILO estimate)",pct_mdisorder,pct_depression,sex,age,suicides_no,population,suicides/100k pop,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),generation
0,Afghanistan,1988,2.302779,7.461,void,void,79.002,void,void,void,void,void,void,void,void,void,void,void,void,void


In [14]:
df_merged[df_merged['year'] > 2000].head()

Unnamed: 0,country,year,Population ages 65 and above (% of total population),"Fertility rate, total (births per woman)",Current health expenditure per capita (current US$),"Literacy rate, adult total (% of people ages 15 and above)",Rural population (% of total population),"Unemployment, total (% of total labor force) (modeled ILO estimate)",pct_mdisorder,pct_depression,sex,age,suicides_no,population,suicides/100k pop,country-year,HDI for year,gdp_for_year ($),gdp_per_capita ($),generation
4,Afghanistan,2016,2.519923,4.8,60.188671,void,74.98,11.158,17.110129,5.11176,void,void,void,void,void,void,void,void,void,void
5,Afghanistan,2005,2.227488,6.875,23.890501,void,77.297,11.217,17.824287,5.097348,void,void,void,void,void,void,void,void,void,void
7,Afghanistan,2002,2.255848,7.272,15.803164,void,77.739,11.257,17.916547,5.115216,void,void,void,void,void,void,void,void,void,void
12,Afghanistan,2008,2.287168,6.373,38.102661,void,76.68,11.093,17.581372,5.096508,void,void,void,void,void,void,void,void,void,void
15,Afghanistan,2020,2.64907,void,void,void,73.974,11.71,void,void,void,void,void,void,void,void,void,void,void,void
