# Title: Life Expectancy vs GDP
### Author: KlasshDev
### Date: 2023.01.16
### Source: World Health Organization and the World Bank


In [2]:
# Imports
import matplotlib as plt
import seaborn as sns
import pandas as pd

# Initial Data Exploring

In [31]:
# Load data from csv and print head
healthStats = pd.read_csv('all_data.csv')


# Clean column name
healthStats.rename(columns = {'Life expectancy at birth (years)':'Life_Expectancy'}, inplace = True)
print(healthStats.head())

  Country  Year  Life_Expectancy           GDP
0   Chile  2000             77.3  7.786093e+10
1   Chile  2001             77.3  7.097992e+10
2   Chile  2002             77.8  6.973681e+10
3   Chile  2003             77.9  7.564346e+10
4   Chile  2004             78.0  9.921039e+10


In [5]:
# Initial Data Explore
healthStats.info()
healthStats.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96 entries, 0 to 95
Data columns (total 4 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Country                           96 non-null     object 
 1   Year                              96 non-null     int64  
 2   Life expectancy at birth (years)  96 non-null     float64
 3   GDP                               96 non-null     float64
dtypes: float64(2), int64(1), object(1)
memory usage: 3.1+ KB


Unnamed: 0,Year,Life expectancy at birth (years),GDP
count,96.0,96.0,96.0
mean,2007.5,72.789583,3880499000000.0
std,4.633971,10.672882,5197561000000.0
min,2000.0,44.3,4415703000.0
25%,2003.75,74.475,173301800000.0
50%,2007.5,76.75,1280220000000.0
75%,2011.25,78.9,4067510000000.0
max,2015.0,81.0,18100000000000.0


In [32]:
# What Unique info
for column in healthStats:
    print(healthStats[column].unique())

#GDP Info
print(healthStats[healthStats.GDP == healthStats.GDP.max()])
print(healthStats[healthStats.GDP == healthStats.GDP.min()])

# Life expectancy
print(healthStats[healthStats['Life_Expectancy'] 
                  == healthStats['Life_Expectancy'].max()])
print(healthStats[healthStats['Life_Expectancy'] 
                  == healthStats['Life_Expectancy'].min()])

print(healthStats.groupby('Country').agg({'Life_Expectancy': ['mean', 'min', 'max']}))

['Chile' 'China' 'Germany' 'Mexico' 'United States of America' 'Zimbabwe']
[2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
 2014 2015]
[77.3 77.8 77.9 78.  78.4 78.9 79.6 79.3 79.1 79.8 79.9 80.1 80.3 80.5
 71.7 72.2 72.7 73.1 73.5 73.9 74.2 74.4 74.5 74.9 75.  75.2 75.4 75.6
 75.8 76.1 78.3 78.5 79.2 80.  80.6 80.9 81.  74.8 75.3 76.  75.7 76.3
 76.6 76.7 76.8 76.9 77.  77.2 77.5 78.1 78.2 78.7 78.8 46.  45.3 44.8
 44.5 44.3 44.6 45.4 46.6 48.2 50.  52.4 54.9 56.6 58.  59.2 60.7]
[7.78609322e+10 7.09799240e+10 6.97368114e+10 7.56434598e+10
 9.92103929e+10 1.22965000e+11 1.54788000e+11 1.73606000e+11
 1.79638000e+11 1.72389000e+11 2.18538000e+11 2.52252000e+11
 2.67122000e+11 2.78384000e+11 2.60990000e+11 2.42518000e+11
 1.21135000e+12 1.33940000e+12 1.47055000e+12 1.66029000e+12
 1.95535000e+12 2.28597000e+12 2.75213000e+12 3.55218000e+12
 4.59821000e+12 5.10995000e+12 6.10062000e+12 7.57255000e+12
 8.56055000e+12 9.60722000e+12 1.04824000e+13 1.10647000e+13
 1.

### Initial Findings:
- Only 96 entries in data source
- From years 2000 - 2015
- Countries: 'Chile' 'China' 'Germany' 'Mexico' 'United States of America' 'Zimbabwe'
- Min Life expectency: 44 years
- Max: 81 years
- Min GDP: 4.4 Billion (Zimbabwe 2008)
- Max GDP: 1.8 Trillion (USA 2015)
- Highest age: 81, Germany 2015
- Lowest age: 44.3, Zimbabwe 2004
- Zimbabwe is in rough shape, Mean age is only 50 (from 44.3 to only 60.7)
- In contrast, Germany's mean is 79! (from 78 - 81)

# Visualizations

In [None]:
# Average life expectancy by country over time

# Barplot by country

# Plot GDP and life expectancy
# Whole df
# Per country