In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

##  Obesity and Gdp per capita dataset For All countries

In [None]:
obesity_gdp = pd.read_csv('../data/obesity_gdp.csv')
obesity_gdp

In [None]:
obesity_gdp = obesity_gdp.rename(columns={'Entity' : 'Country'})


In [None]:
obesity_gdp

In [None]:
obesity_gdp.columns

Obesity is defined as having a body-mass index (BMI) ≥ 30. BMI is a person's weight (in kilograms) divided by their
height (in meters) squared. GDP per capita measured in constant international-$ means it is adjusted for price
differences between countries and adjusted for inflation to allow comparisons between countries and over time.

In [None]:
obesity_gdp.info()

In [None]:
obesity_gdp.isnull().sum()   ## There are missing values 

In [None]:
obesity_gdp = obesity_gdp.dropna() 

In [None]:
obesity_gdp.isnull().sum()

In [None]:
obesity_gdp.shape

In [None]:
fig, ax = plt.subplots(figsize=(12, 10))
sns.regplot(
    x=obesity_gdp["Year"],
    y=obesity_gdp["Gdp_ per_capita"],color="purple",
    line_kws={"color": "black"},
    ax=ax
)
plt.show()

## Obesity and daily intake of protein dataset for all countries

In [None]:
adult_obesity_protein= pd.read_csv('../data/adult_obesity_protein.csv')
adult_obesity_protein

In [None]:
adult_obesity_protein = adult_obesity_protein.rename(columns={'Entity' : 'Country'})


In [None]:
adult_obesity_protein = adult_obesity_protein.drop(columns={'World regions according to OWID'})

In [None]:
adult_obesity_protein.head(5)

In [None]:
adult_obesity_protein.info()

In [None]:
adult_obesity_protein.isnull().sum()

In [None]:
adult_obesity_protein= adult_obesity_protein.dropna()

In [None]:
adult_obesity_protein.shape

## Prevalence of obesity among adults, BMI >= 30 (crude estimate) (%) - Sex: both sexes - Age group: 18+ years with all countries

In [None]:
Obesity_adult = pd.read_csv('../data/Obesity_adult.csv')
Obesity_adult 

In [None]:
Obesity_adult.Year.max()

In [None]:
Obesity_adult['Obesity_Prevalence '].value_counts().reset_index().sort_values('Obesity_Prevalence ',ascending=False).head(10)

In [None]:
Obesity_adult.isnull().sum()

In [None]:
Obesity_adult = Obesity_adult.dropna()

In [None]:
Obesity_adult.shape

In [None]:
Obesity_adult  = Obesity_adult.rename(columns={'Entity' : 'Country'})

In [None]:
Obesity_adult.head(1)

In [None]:
Obesity_adult.columns

In [None]:
plt.figure(figsize=(12, 8))
sns.lineplot(data=Obesity_adult, x='Year', y='Obesity_Prevalence ', hue='Country', marker='o')
plt.title('Obesity Prevalence Over Time by Country')
plt.xlabel('Year')
plt.ylabel('Obesity Prevalence (%)')
plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')

In [None]:
countries_of_interest = ['Mexico', 'Peru', 'Colombia']
Obesity_adult_filtered = Obesity_adult[Obesity_adult['Country'].isin(countries_of_interest)]

In [None]:
plt.figure(figsize=(12, 8))
sns.lineplot(data=Obesity_adult_filtered, x='Year', y='Obesity_Prevalence ', hue='Country', marker='s')
plt.title('Obesity Prevalence Over Time (Mexico, Peru, Colombia)')
plt.xlabel('Year')
plt.ylabel('Obesity Prevalence (%)')
plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')

In [None]:
g = sns.FacetGrid(Obesity_adult, col="Country", col_wrap=4, height=4)
g.map(sns.lineplot, 'Year', 'Obesity_Prevalence ')
g.set_axis_labels('Year', 'Obesity Prevalence (%)')
g.set_titles("{col_name}")
plt.tight_layout()
plt.show()


In [None]:
Country_df = Obesity_adult[['Country','Obesity_Prevalence ']].value_counts().reset_index()
Country_df.sort_values('Obesity_Prevalence ',ascending=False).head(10)

In [None]:
Country_df = Obesity_adult[['Country','Obesity_Prevalence ']].value_counts().reset_index()
Country_df.sort_values('Obesity_Prevalence ',ascending=False).tail(10)

In [None]:
Country_gdp = obesity_gdp[['Country','Gdp_ per_capita']].value_counts().reset_index().sort_values('Gdp_ per_capita',ascending=False).head(10)
Country_gdp

In [None]:
obesity_gdp[['Country','Gdp_ per_capita']].value_counts().reset_index().sort_values('Gdp_ per_capita',ascending=False).head(10)

## Obesity countries dataset with Population

In [None]:
Obesity_daily_calories_pop= pd.read_csv('../data/Obesity_daily_calories.csv')
Obesity_daily_calories_pop

In [None]:
Obesity_daily_calories_pop = Obesity_daily_calories_pop.rename(columns={'Entity':'Country','Population (historical)' : 'Population'})

In [None]:
Obesity_daily_calories_pop =Obesity_daily_calories_pop.drop(columns={'Overweight or Obese (NCDRisC (2017))','World regions according to OWID'})

In [None]:
Obesity_daily_calories_pop.info()

In [None]:
Obesity_pop = Obesity_daily_calories_pop[['Country','Population']].value_counts().reset_index().sort_values('Population',ascending=False)
Obesity_pop

In [None]:
Countries_population = Obesity_pop.loc[Obesity_pop.Country != 'World']
Countries_population

## Obesity life_expectancy (Period life expectancy at birth - Sex: total - Age: 0 - life expecrtancy column)

In [None]:
ob_life_expectancy = pd.read_csv('../data/life_expectancy.csv')
ob_life_expectancy

In [None]:
ob_life_expectancy.isnull().sum()

In [None]:
ob_life_expectancy = ob_life_expectancy.dropna()

In [None]:
ob_life_expectancy.shape

In [None]:
ob_life_expectancy = ob_life_expectancy.rename(columns={'Entity' : 'Country'})
ob_life_expectancy.head(1)

## Merge aall the datasets with Country and code and year 

In [None]:
obesity_gdp.head(1)

In [None]:
adult_obesity_protein.head(1)

In [None]:
obesity_gdp_protein = pd.merge(obesity_gdp, adult_obesity_protein,
                               left_on =['Country','Code','Year'], right_on =['Country','Code','Year'],
                               how='inner')
obesity_gdp_protein 

In [None]:
obesity_gdp_protein = obesity_gdp_protein.drop(columns={'Obesity_prevalence '})
                                                            

In [None]:
obesity_gdp_protein_lex = pd.merge(obesity_gdp_protein,ob_life_expectancy,
                               left_on =['Country','Code','Year'], right_on =['Country','Code','Year'],
                               how='inner')
obesity_gdp_protein_lex

In [None]:
obesity_gdp_protein_lex_pop = pd.merge(obesity_gdp_protein_lex, Obesity_daily_calories_pop,
                               left_on =['Country','Code','Year'], right_on =['Country','Code','Year'],
                               how='inner')
obesity_gdp_protein_lex_pop 

In [None]:
obesity_gdp_protein_lex_pop.columns

## Questions 

## which country had the greastest increase and any countries had decrease in Obesity rates ?

## which country had the greastest increase in Obesity rates ?

In [None]:
Country_high_obesity = obesity_gdp_protein_lex_pop[['Country','Year','Obesity_Prevalence _both']].value_counts().reset_index().sort_values('Obesity_Prevalence _both',ascending=False).head(1).Country.to_list()
Country_high_obesity 

In [None]:
Obese_high_country = obesity_gdp_protein_lex_pop.loc[obesity_gdp_protein_lex_pop.Country.isin(Country_high_obesity)]
Obese_high_country.plot(x='Year',y='Obesity_Prevalence _both',color='purple',marker='s');

In [None]:
plt.figure(figsize=(10, 6))
sns.lineplot(data=Obese_high_country, x='Year', y='Obesity_Prevalence _both', color='purple', marker='s')
plt.xlabel('Year')
plt.ylabel('Obesity Prevalence')
plt.title('Obesity Prevalence Trend Over Years')
plt.show()  ## Obesity Prevalence Trend Over Years

## which country had decrease in Obesity rates ?

In [None]:
Country_low_obesity = obesity_gdp_protein_lex_pop[['Country','Year','Obesity_Prevalence _both']].value_counts().reset_index().sort_values('Obesity_Prevalence _both',ascending=False).tail(1).Country.to_list()
Country_low_obesity 

In [None]:
Obese_low_country = obesity_gdp_protein_lex_pop.loc[obesity_gdp_protein_lex_pop.Country.isin(Country_low_obesity)]
Obese_low_country.plot(x='Year',y='Obesity_Prevalence _both',color='purple',marker='s');

In [None]:
plt.figure(figsize=(10, 6)) 
sns.lineplot(data=Obese_low_country, x='Year', y='Obesity_Prevalence _both', color='purple', marker='s')
plt.xlabel('Year')
plt.ylabel('Obesity Prevalence')
plt.title('Obesity Prevalence Trend Over Years')
plt.show()  ## Obesity Prevalence Trend Over Years

plt.figure(figsize=(12, 8))
sns.lineplot(data=Countries_high_obesity , x='Year', y='Obesity_Prevalence _both', hue='Country', marker='s')
plt.title('Obesity Prevalence Over Time ')
plt.xlabel('Year')
plt.ylabel('Obesity Rates (%)')
plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')

Countries_low_obesity = obesity_gdp_protein_lex_pop[['Country','Year','Obesity_Prevalence _both']].value_counts().reset_index().sort_values('Obesity_Prevalence _both',ascending=False).tail(10)
Countries_low_obesity 

plt.figure(figsize=(12, 8))
sns.lineplot(data=Countries_low_obesity , x='Year', y='Obesity_Prevalence _both', hue='Country', marker='s')
plt.title('Obesity Prevalence Over Time ')
plt.xlabel('Year')
plt.ylabel('Obesity Rates (%)')
plt.legend(title='Country', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.figure(figsize=(10, 6))
sns.lineplot(data=obesity_gdp_protein_lex, x='Year', y='Gdp_ per_capita', color='purple', marker='o', ci=None)
plt.xticks(rotation=45)
plt.xlabel('Year')
plt.ylabel('GDP per Capita')
plt.title('GDP per Capita Trend Over Years')
plt.show()

In [None]:
obesity_gdp_protein_lex

In [None]:
obesity_gdp_protein_lex_pop.nunique() 

## 125 Countries have data for all years 

In [None]:
Countries_df = obesity_gdp_protein_lex_pop.Country.value_counts().to_frame()
len(Countries_df.loc[Countries_df['count'] == 27])

In [None]:
my_list = [1990,2000,2010]
ob_decades =  obesity_gdp_protein_lex_pop.loc[obesity_gdp_protein_lex_pop.Year.isin(my_list)]
ob_decades

 ## Comparing Obesity Prevalence for 1990,2000 and 2010

In [None]:
sns.boxplot(data= ob_decades, y = 'Obesity_Prevalence _both', x='Year',color="purple")
plt.xlabel('Year')
plt.ylabel('Obesity Prevalence'); 

## Comparing GDP Per capita for 1990,2000 and 2010

In [None]:
sns.boxplot(data= ob_decades, y = 'Gdp_ per_capita', x='Year',color="purple")
plt.xlabel('Year')
plt.ylabel('GDP Per Capita'); 

## Comparing Life Expectancy for 1990,2000 and 2010

In [None]:
sns.boxplot(data= ob_decades, y = 'Life_expectancy ', x='Year',color="purple")
plt.xlabel('Year')
plt.ylabel('Life Expectancy '); 

## Comparing Daily protein supply per person for 1990,2000,2010

In [None]:
sns.boxplot(data= ob_decades, y = 'Daily protein supply per person', x='Year',color="purple")
plt.xlabel('Year')
plt.ylabel('Daily protein supply per person'); 

## Comparing Population For 1990,2000,2010 Years

In [None]:
sns.barplot(data=ob_decades, y="Population", x="Year",ci=None,color="purple")
plt.ticklabel_format(style='plain', axis='y'); 

from matplotlib.ticker import FuncFormatter
sns.boxplot(data=ob_decades, y='Population', x='Year', color="purple")
plt.ticklabel_format(style='plain', axis='y')

  ## GDP per capita changed over time 1990-2015 

plt.figure(figsize=(10, 6))
sns.lineplot(data=obesity_gdp_protein_lex_pop , x='Year', y='Gdp_ per_capita', color='purple', marker='o', ci=None)
plt.xticks(rotation=45)
plt.xlabel('Year')
plt.ylabel('GDP per Capita')
plt.title('GDP per Capita Trend Over Years')
plt.show()

plt.figure(figsize=(10, 6))
sns.lineplot(data=obesity_gdp_protein_lex, x='Year', y='Obesity_Prevalence _both', color='purple', marker='o', ci=None)
plt.xticks(rotation=45)
plt.xlabel('Year')
plt.ylabel('Obesity Prevalence')
plt.title('Obesity Prevalence Trend Over Years')
plt.show()

import plotly.express as px
obs= px.line(ob_decades, x='Year', y='Population', 
              title='Population Trend Over Years', 
              labels={'Population': 'Population', 'Year': 'Year'})
obs.show()

In [None]:
obesity_gdp_protein_lex_pop.columns

plt.figure(figsize=(12,11))
sns.boxplot(data= obesity_gdp_protein_lex, y="Gdp_ per_capita", x="Year",color="purple")
plt.xticks(rotation=45);  

## How does GDP Per Capita relate to Obesity Prevalence across Countries

In [None]:
obesity_gdp_protein_lex_pop['Gdp_ per_capita'].describe()

plt.figure(figsize=(10, 6))
sns.scatterplot(data=obesity_gdp_protein_lex_pop, x='Gdp_ per_capita', y='Obesity_Prevalence _both',color='purple')
plt.title('GDP per Capita vs Obesity Prevalence')
plt.xlabel('GDP per Capita')
plt.ylabel('Obesity Prevalence (%)')

fig, ax = plt.subplots(figsize=(10, 6))
sns.regplot(
    x= obesity_gdp_protein_lex_pop["Gdp_ per_capita"],
    y= obesity_gdp_protein_lex_pop["Obesity_Prevalence _both"],color="purple",
    line_kws={"color": "black"},
    ax=ax
)
plt.show()

In [None]:
correlation = obesity_gdp_protein_lex_pop['Gdp_ per_capita'].corr(obesity_gdp_protein_lex_pop['Obesity_Prevalence _both'])
correlation

## Is there a relationship between daily protein supply and obesity prevalence in different countries?

Corelation is not extremely stong(moderate),other variables in the dataset such as Daily protein intake, Daily calories,and life_expectancy