In [1]:
# Import the required Python modules
import numpy as np
import pandas as pd
import datetime
from sqlalchemy import create_engine


# STEP 1: EXTRACT

In [2]:
# DATASOURCE 1: Extract all records from datasets_673762_1185224_country-wise-average.csv - source kaggle.com

countrywise_avg_file = "Resources/datasets_673762_1185224_country-wise-average.csv"
df_countrywise_avg = pd.read_csv(countrywise_avg_file)

print("Total records ", df_countrywise_avg['Country'].count())
df_countrywise_avg.head()

Total records  152


Unnamed: 0,Country,Income Classification,Severe Wasting,Wasting,Overweight,Stunting,Underweight,U5 Population ('000s)
0,AFGHANISTAN,0.0,3.033333,10.35,5.125,47.775,30.375,4918.5615
1,ALBANIA,2.0,4.075,7.76,20.8,24.16,7.7,232.8598
2,ALGERIA,2.0,2.733333,5.942857,12.833333,19.571429,7.342857,3565.213143
3,ANGOLA,1.0,2.4,6.933333,2.55,42.633333,23.6,3980.054
4,ARGENTINA,2.0,0.2,2.15,11.125,10.025,2.6,3613.65175


In [3]:
# DATASOURCE 2: Extract all records from datasets_673762_1185224_malnutrition-estimates.csv - source kaggle.com

malnutrition_est_file = "Resources/datasets_673762_1185224_malnutrition-estimates.csv"
df_malnutrition_est = pd.read_csv(malnutrition_est_file)

print("Total records ", df_malnutrition_est['Country'].count())
df_malnutrition_est.head()

Total records  924


Unnamed: 0,Sequence Number,ISO code,Country,Survey Year,Year,Income Classification,LDC,LIFD,LLDC or SID2,Survey Sample (N),Severe Wasting,Wasting,Overweight,Stunting,Underweight,Notes,Report Author,Source,Short Source,U5 Population ('000s)
0,0,AFG,AFGHANISTAN,1997,1997,0,1,1,1,4846.0,,18.2,6.5,53.2,44.9,Converted estimates,CIET International,Afghanistan 1997 multiple indicator baseline (...,MICS,3838.877
1,1,AFG,AFGHANISTAN,2004,2004,0,1,1,1,946.0,3.5,8.6,4.6,59.3,32.9,,"Ministry of Public Health (Afghanistan), UNICE...",Summary report of the national nutrition surve...,NNS,4789.353
2,2,AFG,AFGHANISTAN,2013,2013,0,1,1,1,4426469.0,4.0,9.5,5.3,40.4,24.6,,"Ministry of Public Health, UNICEF and the Aga ...",Afghanistan National Nutrition Survey 2013.,SMART,5444.573
3,3,AFG,AFGHANISTAN,2018,2018,0,1,1,1,,1.6,5.1,4.1,38.2,19.1,,KIT Royal Tropical Institute,Afghanistan Health Survey 2018,Other,5601.443
4,4,ALB,ALBANIA,1996-98,1997,2,0,0,0,7642.0,,8.1,9.5,20.4,7.1,Converted estimates,"Institute of Public Health, Food and Nutrition...",National study on nutrition in Albania. Instit...,Other,309.225


In [4]:
# DATASOURCE 3: Extract all records from datasets_673762_1185224_developing_countries_fact.csv - source: team
dev_countries_fact_file = "Resources/datasets_673762_1185224_developing_countries_fact.csv"
df_dev_countries_fact = pd.read_csv(dev_countries_fact_file)

# DATASOURCE 4: Extract all records from datasets_673762_1185224_income_classification_fact.csv - source: team
inc_class_fact_file = "Resources/datasets_673762_1185224_income_classification_fact.csv"
df_inc_class_fact = pd.read_csv(inc_class_fact_file)


Unnamed: 0,Income Classification ID,Income Classification Description
0,0,Low Income
1,1,Lower Middle Income
2,2,Upper Middle Income
3,3,High Income


# STEP 2: TRANSFORM

In [5]:
del df_malnutrition_est['Notes'] 
del df_malnutrition_est['Source']
del df_malnutrition_est['Short Source']
del df_malnutrition_est['Survey Year']

df_malnutrition_est = df_malnutrition_est.rename(columns={"U5 Population ('000s)": "Under 5 Y/O Population",
                                                          "LDC": "Least Developed Countries",
                                                          "LIFD": "Low Income Food Deficient",
                                                          "LLDC or SID2": "Developing Country Status",
                                                          "Survey Sample (N)": "Survey Sample"})

df_malnutrition_est = pd.merge(df_malnutrition_est, df_dev_countries_fact, 
                       left_on=['Developing Country Status'], right_on=['Developing Country Flag'], how='left')

df_malnutrition_est = pd.merge(df_malnutrition_est, df_inc_class_fact, 
                       left_on=['Income Classification'], right_on=['Income Classification ID'], how='left')

del df_malnutrition_est['Developing Country Status']
del df_malnutrition_est['Income Classification']

df_countrywise_avg = df_countrywise_avg.rename(columns={"U5 Population ('000s)": "Under 5 Y/O Population"})


In [6]:
df_countrywise_avg.head() 

Unnamed: 0,Country,Income Classification,Severe Wasting,Wasting,Overweight,Stunting,Underweight,Under 5 Y/O Population
0,AFGHANISTAN,0.0,3.033333,10.35,5.125,47.775,30.375,4918.5615
1,ALBANIA,2.0,4.075,7.76,20.8,24.16,7.7,232.8598
2,ALGERIA,2.0,2.733333,5.942857,12.833333,19.571429,7.342857,3565.213143
3,ANGOLA,1.0,2.4,6.933333,2.55,42.633333,23.6,3980.054
4,ARGENTINA,2.0,0.2,2.15,11.125,10.025,2.6,3613.65175


In [7]:
df_malnutrition_est.head()

Unnamed: 0,Sequence Number,ISO code,Country,Year,Least Developed Countries,Low Income Food Deficient,Survey Sample,Severe Wasting,Wasting,Overweight,Stunting,Underweight,Report Author,Under 5 Y/O Population,Developing Country Flag,Developing Country Type,Developing Country Description,Income Classification ID,Income Classification Description
0,0,AFG,AFGHANISTAN,1997,1,1,4846.0,,18.2,6.5,53.2,44.9,CIET International,3838.877,1,LLDC,Land Locked Developing Countries,0,Low Income
1,1,AFG,AFGHANISTAN,2004,1,1,946.0,3.5,8.6,4.6,59.3,32.9,"Ministry of Public Health (Afghanistan), UNICE...",4789.353,1,LLDC,Land Locked Developing Countries,0,Low Income
2,2,AFG,AFGHANISTAN,2013,1,1,4426469.0,4.0,9.5,5.3,40.4,24.6,"Ministry of Public Health, UNICEF and the Aga ...",5444.573,1,LLDC,Land Locked Developing Countries,0,Low Income
3,3,AFG,AFGHANISTAN,2018,1,1,,1.6,5.1,4.1,38.2,19.1,KIT Royal Tropical Institute,5601.443,1,LLDC,Land Locked Developing Countries,0,Low Income
4,4,ALB,ALBANIA,1997,0,0,7642.0,,8.1,9.5,20.4,7.1,"Institute of Public Health, Food and Nutrition...",309.225,0,OTH,Other,2,Upper Middle Income


# STEP 3: LOAD