Data collected from: https://globalnutritionreport.org/resources/nutrition-profiles/asia/

In [155]:
# Imports 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import seaborn as sns
sns.set()
sns.set_context('talk')

import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", 104)
import patsy
import statsmodels.api as sm
import scipy.stats as stats
from scipy.stats import ttest_ind, chisquare, normaltest
# Note: the statsmodels import may print out a 'FutureWarning'. Thats fine.

In [156]:
# these were the tables that i thought had useful info/enough info
country_glance = pd.read_csv("./datasets/country_nutrition_profiles_datasets/country_glance.csv")
# region_diet = pd.read_csv("./datasets/country_nutrition_profiles_datasets/region_diet.csv")
# region_social = pd.read_csv("./datasets/country_nutrition_profiles_datasets/region_social.csv")

In [157]:
country_glance.head() # just taking a look at it

Unnamed: 0,iso3,country,disaggregation,disagg.value,region,subregion,section,adult_fem_diabetes_track,adult_fem_obesity_track,adult_mal_diabetes_track,adult_mal_obesity_track,anaemia_track,ebf_track,lbw_track,rbp_female_track,rbp_male_track,sodium_track,under_5_overweight_track,under_5_stunting_track,under_5_wasting_track
0,AFG,Afghanistan,all,,Asia,Southern Asia,The burden of malnutrition at a glance,Off course,Off course,Off course,Off course,No progress or worsening,On course,No data,Off course,Off course,Off course,On course,Some progress,Some progress
1,AGO,Angola,all,,Africa,Middle Africa,The burden of malnutrition at a glance,Off course,Off course,Off course,Off course,No progress or worsening,No data,Some progress,Off course,Off course,Off course,Off course,No progress or worsening,On course
2,ALB,Albania,all,,Europe,Southern Europe,The burden of malnutrition at a glance,Off course,Off course,Off course,Off course,No progress or worsening,No progress or worsening,On course,Off course,Off course,Off course,On course,On course,On course
3,AND,Andorra,all,,Europe,Southern Europe,The burden of malnutrition at a glance,On course,Off course,Off course,Off course,No progress or worsening,No data,No progress or worsening,On course,On course,No data,No data,No data,No data
4,ARE,United Arab Emirates,all,,Asia,Western Asia,The burden of malnutrition at a glance,Off course,Off course,Off course,Off course,No progress or worsening,No data,No progress or worsening,On course,On course,Off course,No data,No data,No data


In [158]:
def clean_results(str_in, col_name):
    try:
        if isinstance(str_in, (int, float)):
            #if str_in is already a numeric value, return it directly
            return str_in

        replacements = {
            'Off course': 0,
            'On course': 1,
            'Some progress': 0.5,
            'No progress or worsening': -1,
            'No data' : np.nan
        }

        for old_str, new_int in replacements.items():
            if old_str in str_in:
                str_in = str_in.replace(old_str, str(new_int))
            
        result = int(str_in) if str_in.isdigit() else float(str_in)
        return result
            
    except Exception as e:
        print(f"An error occurred in column '{col_name}': {e}")
        return np.nan

In [159]:
# CLEANING UP TABLE
# getting rid of disaggregation, since they seem to be irrelevant.
del country_glance['disaggregation']
del country_glance['disagg.value']
# getting rid of 'section', since they all seem to be the same and not that relevant
del country_glance['section']

# this basically converts the strings into actual numerical values. look at the replacements table
# for the corresponding values above.
for col in country_glance.columns[4:]:
    country_glance[col] = country_glance[col].apply(lambda x: clean_results(x, col))

country_glance.head()

Unnamed: 0,iso3,country,region,subregion,adult_fem_diabetes_track,adult_fem_obesity_track,adult_mal_diabetes_track,adult_mal_obesity_track,anaemia_track,ebf_track,lbw_track,rbp_female_track,rbp_male_track,sodium_track,under_5_overweight_track,under_5_stunting_track,under_5_wasting_track
0,AFG,Afghanistan,Asia,Southern Asia,0.0,0.0,0.0,0.0,-1.0,1.0,,0.0,0.0,0.0,1.0,0.5,0.5
1,AGO,Angola,Africa,Middle Africa,0.0,0.0,0.0,0.0,-1.0,,0.5,0.0,0.0,0.0,0.0,-1.0,1.0
2,ALB,Albania,Europe,Southern Europe,0.0,0.0,0.0,0.0,-1.0,-1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0
3,AND,Andorra,Europe,Southern Europe,1.0,0.0,0.0,0.0,-1.0,,-1.0,1.0,1.0,,,,
4,ARE,United Arab Emirates,Asia,Western Asia,0.0,0.0,0.0,0.0,-1.0,,-1.0,1.0,1.0,0.0,,,


In [160]:
# CLEANING UP REGION_DIET TABLE
region_diet = pd.read_csv("./datasets/country_nutrition_profiles_datasets/region_diet.csv")
if 'section' in region_diet.columns:
    del region_diet['section']

for col in region_diet.columns:
    if '_2014-2020' in col:
        region_diet.rename(columns={col: col.replace('_2014-2020', ' (%)')}, inplace=True)
    elif '_2018' in col:
        region_diet.rename(columns={col: col.replace('_2018', ' (g/day)')}, inplace=True)

region_diet.head()

Unnamed: 0,region,disaggregation,disagg.value,continued_breastfeeding_1yr (%),Dairy (g/day),early_initiation (%),exclusive_breastfeeding (%),Fish (g/day),Fruit (g/day),Legumes (g/day),minimum_accept_diet (%),minimum_diet_diversity (%),minimum_meal (%),Nuts (g/day),Red meat (g/day),solid_foods (%),Vegetables (g/day),Whole grains (g/day)
0,Africa,location,Target,,1.0,,,1.0,1.0,1.0,,,,1.0,1.0,,1.0,1.0
1,Africa,location,Within target,,1.0,,,1.0,1.0,1.0,,,,1.0,1.0,,1.0,1.0
2,Africa,sex,Both,65.84,,51.71,44.38,,,,13.21,23.81,44.74,,,75.41,,
3,Asia,location,Target,,1.0,,,1.0,1.0,1.0,,,,1.0,1.0,,1.0,1.0
4,Asia,location,Within target,,1.0,,,1.0,1.0,1.0,,,,1.0,1.0,,1.0,1.0


In [161]:
# CLEANING UP REGION_SOCIAL TABLE
region_social = pd.read_csv("./datasets/country_nutrition_profiles_datasets/region_social.csv")
region_social = region_social[region_social['disaggregation'] == 'all'] # generalizing to all genders
# deleting irrelevant columns
del region_social['disaggregation']
del region_social['disagg.value']
del region_social['section']
region_social = region_social.dropna(axis=1, how='all')

region_social.head()

Unnamed: 0,region,190_percent_2002,190_percent_2003,190_percent_2004,190_percent_2005,190_percent_2006,190_percent_2007,190_percent_2008,190_percent_2009,190_percent_2010,190_percent_2011,190_percent_2012,190_percent_2013,190_percent_2014,190_percent_2015,190_percent_2016,190_percent_2017,190_percent_2018,190_percent_2019,190_percent_2020,190_percent_2021,320_percent_2002,320_percent_2003,320_percent_2004,320_percent_2005,320_percent_2006,320_percent_2007,320_percent_2008,320_percent_2009,320_percent_2010,320_percent_2011,320_percent_2012,320_percent_2013,320_percent_2014,320_percent_2015,320_percent_2016,320_percent_2017,320_percent_2018,320_percent_2019,320_percent_2020,320_percent_2021,65_years_2021,at_least_basic_sanitation_2000,at_least_basic_sanitation_2001,at_least_basic_sanitation_2002,at_least_basic_sanitation_2003,at_least_basic_sanitation_2004,at_least_basic_sanitation_2005,at_least_basic_sanitation_2006,at_least_basic_sanitation_2007,at_least_basic_sanitation_2008,at_least_basic_sanitation_2009,...,undernourishment_prev_2009,undernourishment_prev_2010,undernourishment_prev_2011,undernourishment_prev_2012,undernourishment_prev_2013,undernourishment_prev_2014,undernourishment_prev_2015,undernourishment_prev_2016,undernourishment_prev_2017,undernourishment_prev_2018,unimproved_sanitation_2000,unimproved_sanitation_2001,unimproved_sanitation_2002,unimproved_sanitation_2003,unimproved_sanitation_2004,unimproved_sanitation_2005,unimproved_sanitation_2006,unimproved_sanitation_2007,unimproved_sanitation_2008,unimproved_sanitation_2009,unimproved_sanitation_2010,unimproved_sanitation_2011,unimproved_sanitation_2012,unimproved_sanitation_2013,unimproved_sanitation_2014,unimproved_sanitation_2015,unimproved_sanitation_2016,unimproved_sanitation_2017,unimproved_sanitation_2018,unimproved_sanitation_2019,unimproved_sanitation_2020,unimproved_water_2000,unimproved_water_2001,unimproved_water_2002,unimproved_water_2003,unimproved_water_2004,unimproved_water_2005,unimproved_water_2006,unimproved_water_2007,unimproved_water_2008,unimproved_water_2009,unimproved_water_2010,unimproved_water_2011,unimproved_water_2012,unimproved_water_2013,unimproved_water_2014,unimproved_water_2015,unimproved_water_2016,unimproved_water_2017,unimproved_water_2018,unimproved_water_2019,unimproved_water_2020
0,Africa,46.99,47.0,44.96,43.81,42.92,42.16,41.38,41.36,40.15,38.48,37.08,36.77,35.84,35.6,35.64,35.11,34.71,34.16,34.39,33.32,69.0,69.22,68.25,67.24,65.98,65.23,64.48,64.02,62.92,61.92,60.52,60.29,59.58,59.42,59.66,59.22,58.99,58.51,58.74,57.4,48363.1,,,,,,,,,,,...,19.3,18.8,18.4,18.1,17.9,18.0,18.2,18.5,18.6,18.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Asia,30.3,28.57,26.14,23.15,22.78,20.94,19.91,18.48,16.01,12.78,11.43,8.89,7.88,6.96,6.1,5.39,4.83,4.48,5.08,4.19,58.95,57.43,54.74,51.46,50.47,47.98,46.53,44.71,41.51,37.73,35.46,31.53,29.39,27.35,25.29,23.31,21.79,20.94,22.09,20.17,440663.68,,,,,,,,,,,...,10.7,10.1,9.6,9.4,9.2,8.9,8.7,8.5,8.4,8.3,20.37,19.86,18.84,17.89,16.95,16.03,15.13,14.24,13.37,12.52,11.7,10.89,10.11,9.35,8.6,7.87,7.16,6.3,5.62,4.96,4.29,12.94,12.58,11.94,11.35,10.77,10.26,9.76,9.27,8.78,8.31,7.85,7.41,6.98,6.56,6.14,5.74,5.35,4.96,4.58,4.21,3.81
2,Australia and New Zealand,0.84,1.0,0.5,0.46,0.43,0.4,0.36,0.36,0.35,0.39,0.42,0.46,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,1.12,1.24,0.71,0.7,0.69,0.68,0.67,0.67,0.68,0.69,0.71,0.73,0.74,0.74,0.74,0.74,0.74,0.74,0.74,0.74,5111.98,,,,,,,,,,,...,2.4,2.4,2.4,2.4,2.4,2.4,2.4,2.4,2.4,2.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,Caribbean,28.12,28.05,28.91,27.37,26.67,26.19,25.97,24.16,25.06,23.81,11.46,10.87,10.93,10.47,9.91,9.12,8.68,9.01,9.85,9.67,39.53,40.23,42.26,39.46,38.16,37.65,37.92,36.21,37.01,35.3,26.62,25.28,24.63,23.27,22.36,21.12,20.31,20.61,21.65,21.4,4567.74,,,,,,,,,,,...,19.3,18.4,17.9,18.0,17.7,17.4,17.1,17.0,16.9,16.7,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,Central America,8.53,8.86,8.18,8.54,6.19,6.01,6.52,6.43,5.73,5.35,5.26,4.98,4.75,4.06,3.48,3.24,3.11,3.01,3.44,3.15,21.29,20.96,19.21,19.16,16.05,15.73,16.38,16.54,15.04,14.41,14.69,14.2,13.93,12.35,10.54,9.81,9.42,9.3,10.62,9.78,13403.38,,,,,,,,,,,...,7.9,7.7,7.6,7.3,7.2,7.3,7.9,8.3,8.4,8.7,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
