<a href="https://colab.research.google.com/github/ArunK-ML/Project---Nutrition-Paradox-A-Global-View-on-Obesity-and-Malnutrition/blob/main/Nutrition_Paradox1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **⚖️ Nutrition Paradox: A Global View on Obesity and Malnutrition**

In [1]:
pip install pycountry

Collecting pycountry
  Downloading pycountry-24.6.1-py3-none-any.whl.metadata (12 kB)
Downloading pycountry-24.6.1-py3-none-any.whl (6.3 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/6.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.7/6.3 MB[0m [31m22.5 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/6.3 MB[0m [31m38.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━[0m [32m6.0/6.3 MB[0m [31m57.4 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m6.3/6.3 MB[0m [31m57.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m38.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pycountry
Successfully installed pycountry-24.6.1


In [2]:
import requests
import pandas as pd
import pycountry

# -----------------------------
# Step 1: Fetch Data from APIs
# -----------------------------
url1 = "https://ghoapi.azureedge.net/api/NCD_BMI_30C"
url2 = "https://ghoapi.azureedge.net/api/NCD_BMI_PLUS2C"
url3 = "https://ghoapi.azureedge.net/api/NCD_BMI_18C"
url4 = "https://ghoapi.azureedge.net/api/NCD_BMI_MINUS2C"

response1 = requests.get(url1)
response2 = requests.get(url2)
response3 = requests.get(url3)
response4 = requests.get(url4)

Obesity_adults = response1.json()
Obesity_children = response2.json()
Malnutrition_adults = response3.json()
Malnutrition_children = response4.json()

# -----------------------------
# Step 2: Convert JSON to DataFrames
# -----------------------------
Obesity_adults_df = pd.DataFrame(Obesity_adults["value"])
Obesity_children_df = pd.DataFrame(Obesity_children["value"])
Malnutrition_adults_df = pd.DataFrame(Malnutrition_adults["value"])
Malnutrition_children_df = pd.DataFrame(Malnutrition_children["value"])

# Add "age_group" column
Obesity_adults_df["age_group"] = "Adult"
Obesity_children_df["age_group"] = "Child"
Malnutrition_adults_df["age_group"] = "Adult"
Malnutrition_children_df["age_group"] = "Child"

# -----------------------------
# Step 3: Combine datasets
# -----------------------------
df_obesity = pd.concat([Obesity_adults_df, Obesity_children_df], ignore_index=True)
df_malnutrition = pd.concat([Malnutrition_adults_df, Malnutrition_children_df], ignore_index=True)

# -----------------------------
# Step 4: Select and Rename Columns
# -----------------------------
columns_map = {
    "ParentLocationCode": "Region",
    "Dim1": "Gender",
    "TimeDim": "Year",
    "Low": "LowerBound",
    "High": "UpperBound",
    "NumericValue": "Mean_Estimate",
    "SpatialDim": "Country"
}

selected_cols = list(columns_map.keys()) + ["age_group"]

df_obesity_subset = df_obesity[selected_cols].rename(columns=columns_map)
df_malnutrition_subset = df_malnutrition[selected_cols].rename(columns=columns_map)

# -----------------------------
# Step 5: Data Type Conversion
# -----------------------------
df_obesity_subset["Gender"] = df_obesity_subset["Gender"].astype("category")
df_obesity_subset["age_group"] = df_obesity_subset["age_group"].astype("category")
df_obesity_subset["Year"] = df_obesity_subset["Year"].astype(int)
df_malnutrition_subset["Year"] = df_malnutrition_subset["Year"].astype(int)

# -----------------------------
# Step 6: Filter Year 2012–2022
# -----------------------------
df_obesity_subset_filter = df_obesity_subset[
    (df_obesity_subset["Year"] >= 2012) & (df_obesity_subset["Year"] <= 2022)
].reset_index(drop=True)

df_malnutrition_subset_filter = df_malnutrition_subset[
    (df_malnutrition_subset["Year"] >= 2012) & (df_malnutrition_subset["Year"] <= 2022)
].reset_index(drop=True)

# -----------------------------
# Step 7: Replace Gender Codes
# -----------------------------
gender_map = {
    "SEX_FMLE": "Female",
    "SEX_BTSX": "Both",
    "SEX_MLE": "Male"
}

df_obesity_subset_filter.loc[:, "Gender"] = df_obesity_subset_filter["Gender"].replace(gender_map)
df_malnutrition_subset_filter.loc[:, "Gender"] = df_malnutrition_subset_filter["Gender"].replace(gender_map)

# -----------------------------
# Step 8: Replace Country Codes with Country Names (in-place)
# -----------------------------
def code_to_country(code):
    try:
        return pycountry.countries.lookup(code).name
    except:
        # Fallback mapping for special WHO or WB codes
        special_codes = {
            'GLOBAL': 'Global',
            'WB_LMI': 'Low & Middle Income',
            'WB_HI': 'High Income',
            'WB_LI': 'Low Income',
            'EMR': 'Eastern Mediterranean Region',
            'EUR': 'Europe',
            'AFR': 'Africa',
            'SEAR': 'South-East Asia Region',
            'WPR': 'Western Pacific Region',
            'AMR': 'Americas Region',
            'WB_UMI': 'Upper Middle Income',
            'WLD': 'World',
            'ENG': 'England',
            'SCT': 'Scotland',
            'XK': 'Kosovo',
            'WLS': 'Wales',
            'NIR': 'Northern Ireland',
            'PS': 'Palestine',
            'EU': 'European Union'
        }
        return special_codes.get(code, "Unknown")

df_obesity_subset_filter["Country"] = df_obesity_subset_filter["Country"].apply(code_to_country)
df_malnutrition_subset_filter["Country"] = df_malnutrition_subset_filter["Country"].apply(code_to_country)

# -----------------------------
# Step 9: Output Preview
# -----------------------------
print("✅ Obesity Data (2012–2022):")
print(df_obesity_subset_filter.head())

print("\n✅ Malnutrition Data (2012–2022):")
print(df_malnutrition_subset_filter.head())


  df_obesity_subset_filter.loc[:, "Gender"] = df_obesity_subset_filter["Gender"].replace(gender_map)
Length: 27720
Categories (3, object): ['Both', 'Female', 'Male']' has dtype incompatible with category, please explicitly cast to a compatible dtype first.
  df_obesity_subset_filter.loc[:, "Gender"] = df_obesity_subset_filter["Gender"].replace(gender_map)


✅ Obesity Data (2012–2022):
  Region  Gender  Year  LowerBound  UpperBound  Mean_Estimate  \
0   SEAR    Male  2013    3.004463    4.399506       3.682037   
1    EMR    Male  2020   15.793777   19.245727      17.487641   
2    EMR    Male  2020   23.089589   33.815336      28.335530   
3    EUR  Female  2014   24.924766   31.213260      27.948704   
4    EUR  Female  2019   28.160682   35.574925      31.893221   

                     Country age_group  
0                  Sri Lanka     Adult  
1  Iran, Islamic Republic of     Adult  
2                       Iraq     Adult  
3                  Greenland     Adult  
4                    Armenia     Adult  

✅ Malnutrition Data (2012–2022):
  Region  Gender  Year  LowerBound  UpperBound  Mean_Estimate  \
0    AFR  Female  2021    3.972036    8.077021       5.795110   
1    WPR    Both  2017    2.428230    3.842155       3.090652   
2    AFR  Female  2016    6.655668   10.253121       8.371724   
3   None    Both  2021   12.242323   14.4

In [3]:
df_obesity_subset_filter["CI_Width"] = df_obesity_subset_filter["UpperBound"] - df_obesity_subset_filter["LowerBound"]

df_malnutrition_subset_filter["CI_Width"] = df_malnutrition_subset_filter["UpperBound"] - df_malnutrition_subset_filter["LowerBound"]



In [4]:
df_obesity_subset_filter

Unnamed: 0,Region,Gender,Year,LowerBound,UpperBound,Mean_Estimate,Country,age_group,CI_Width
0,SEAR,Male,2013,3.004463,4.399506,3.682037,Sri Lanka,Adult,1.395044
1,EMR,Male,2020,15.793777,19.245727,17.487641,"Iran, Islamic Republic of",Adult,3.451950
2,EMR,Male,2020,23.089589,33.815336,28.335530,Iraq,Adult,10.725747
3,EUR,Female,2014,24.924766,31.213260,27.948704,Greenland,Adult,6.288494
4,EUR,Female,2019,28.160682,35.574925,31.893221,Armenia,Adult,7.414243
...,...,...,...,...,...,...,...,...,...
27715,EUR,Both,2020,0.964176,2.197432,1.475931,Tajikistan,Child,1.233255
27716,EMR,Female,2012,0.302039,5.872414,1.966190,Somalia,Child,5.570375
27717,WPR,Male,2021,3.325899,34.877816,16.634887,Vanuatu,Child,31.551917
27718,AFR,Male,2014,0.236481,6.341185,2.032189,Madagascar,Child,6.104704


In [5]:
df_malnutrition_subset_filter

Unnamed: 0,Region,Gender,Year,LowerBound,UpperBound,Mean_Estimate,Country,age_group,CI_Width
0,AFR,Female,2021,3.972036,8.077021,5.795110,Gabon,Adult,4.104985
1,WPR,Both,2017,2.428230,3.842155,3.090652,Mongolia,Adult,1.413925
2,AFR,Female,2016,6.655668,10.253121,8.371724,Sierra Leone,Adult,3.597453
3,,Both,2021,12.242323,14.442866,13.336353,South-East Asia Region,Adult,2.200543
4,EMR,Male,2014,1.253843,3.516819,2.208070,Qatar,Adult,2.262976
...,...,...,...,...,...,...,...,...,...
27715,AMR,Female,2013,2.795499,5.714345,4.140693,Haiti,Child,2.918846
27716,EUR,Both,2018,5.428173,10.218395,7.574986,Tajikistan,Child,4.790222
27717,WPR,Both,2016,0.621721,3.507061,1.703645,Papua New Guinea,Child,2.885340
27718,WPR,Female,2020,0.064152,2.810176,0.734066,Samoa,Child,2.746023


In [9]:
# Define conditions and choices
import numpy as np

obesity_conditions = [
    df_obesity_subset_filter["CI_Width"] >= 30,
    (df_obesity_subset_filter["CI_Width"] >= 25) & (df_obesity_subset_filter["CI_Width"] < 29.9),
    df_obesity_subset_filter["CI_Width"] < 25
]
obesity_choices = ['High', 'Moderate', 'Low']

# Create new column

df_obesity_subset_filter["obesity_level"] = np.select(obesity_conditions, obesity_choices, default='Unknown')

In [10]:
df_obesity_subset_filter

Unnamed: 0,Region,Gender,Year,LowerBound,UpperBound,Mean_Estimate,Country,age_group,CI_Width,obesity_level
0,SEAR,Male,2013,3.004463,4.399506,3.682037,Sri Lanka,Adult,1.395044,Low
1,EMR,Male,2020,15.793777,19.245727,17.487641,"Iran, Islamic Republic of",Adult,3.451950,Low
2,EMR,Male,2020,23.089589,33.815336,28.335530,Iraq,Adult,10.725747,Low
3,EUR,Female,2014,24.924766,31.213260,27.948704,Greenland,Adult,6.288494,Low
4,EUR,Female,2019,28.160682,35.574925,31.893221,Armenia,Adult,7.414243,Low
...,...,...,...,...,...,...,...,...,...,...
27715,EUR,Both,2020,0.964176,2.197432,1.475931,Tajikistan,Child,1.233255,Low
27716,EMR,Female,2012,0.302039,5.872414,1.966190,Somalia,Child,5.570375,Low
27717,WPR,Male,2021,3.325899,34.877816,16.634887,Vanuatu,Child,31.551917,High
27718,AFR,Male,2014,0.236481,6.341185,2.032189,Madagascar,Child,6.104704,Low


In [11]:
# Define conditions and choices
import numpy as np

malnutrition_conditions = [
    df_malnutrition_subset_filter["CI_Width"] >= 20,
    (df_malnutrition_subset_filter["CI_Width"] >= 10) & (df_malnutrition_subset_filter["CI_Width"] < 19.9),
    df_malnutrition_subset_filter["CI_Width"] < 10
]
malnutrition_choices = ['High', 'Moderate', 'Low']

# Create new column

df_malnutrition_subset_filter["malnutrition_level"] = np.select(malnutrition_conditions, malnutrition_choices, default='Unknown')

In [12]:
df_malnutrition_subset_filter

Unnamed: 0,Region,Gender,Year,LowerBound,UpperBound,Mean_Estimate,Country,age_group,CI_Width,malnutrition_level
0,AFR,Female,2021,3.972036,8.077021,5.795110,Gabon,Adult,4.104985,Low
1,WPR,Both,2017,2.428230,3.842155,3.090652,Mongolia,Adult,1.413925,Low
2,AFR,Female,2016,6.655668,10.253121,8.371724,Sierra Leone,Adult,3.597453,Low
3,,Both,2021,12.242323,14.442866,13.336353,South-East Asia Region,Adult,2.200543,Low
4,EMR,Male,2014,1.253843,3.516819,2.208070,Qatar,Adult,2.262976,Low
...,...,...,...,...,...,...,...,...,...,...
27715,AMR,Female,2013,2.795499,5.714345,4.140693,Haiti,Child,2.918846,Low
27716,EUR,Both,2018,5.428173,10.218395,7.574986,Tajikistan,Child,4.790222,Low
27717,WPR,Both,2016,0.621721,3.507061,1.703645,Papua New Guinea,Child,2.885340,Low
27718,WPR,Female,2020,0.064152,2.810176,0.734066,Samoa,Child,2.746023,Low
