# HUMAN LIFE EXPECTANCY ANALYSIS

### Modules

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly # to save the graphs
from sklearn.impute import SimpleImputer
import seaborn as sns
import datetime as dt

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

### Loading the dateset

In [2]:
df = pd.read_csv("Life Expectancy Data.csv")
df.head()

Unnamed: 0,Country,Year,Status,Life expectancy,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,...,Polio,Total expenditure,Diphtheria,HIV/AIDS,GDP,Population,thinness 1-19 years,thinness 5-9 years,Income composition of resources,Schooling
0,Afghanistan,2015,Developing,65.0,263.0,62,0.01,71.279624,65.0,1154,...,6.0,8.16,65.0,0.1,584.25921,33736494.0,17.2,17.3,0.479,10.1
1,Afghanistan,2014,Developing,59.9,271.0,64,0.01,73.523582,62.0,492,...,58.0,8.18,62.0,0.1,612.696514,327582.0,17.5,17.5,0.476,10.0
2,Afghanistan,2013,Developing,59.9,268.0,66,0.01,73.219243,64.0,430,...,62.0,8.13,64.0,0.1,631.744976,31731688.0,17.7,17.7,0.47,9.9
3,Afghanistan,2012,Developing,59.5,272.0,69,0.01,78.184215,67.0,2787,...,67.0,8.52,67.0,0.1,669.959,3696958.0,17.9,18.0,0.463,9.8
4,Afghanistan,2011,Developing,59.2,275.0,71,0.01,7.097109,68.0,3013,...,68.0,7.87,68.0,0.1,63.537231,2978599.0,18.2,18.2,0.454,9.5


## Cleaning The Dataframe

#### Observing the details of the dataframe

In [3]:
df.describe()

Unnamed: 0,Year,Life expectancy,Adult Mortality,infant deaths,Alcohol,percentage expenditure,Hepatitis B,Measles,BMI,under-five deaths,Polio,Total expenditure,Diphtheria,HIV/AIDS,GDP,Population,thinness 1-19 years,thinness 5-9 years,Income composition of resources,Schooling
count,2938.0,2928.0,2928.0,2938.0,2744.0,2938.0,2385.0,2938.0,2904.0,2938.0,2919.0,2712.0,2919.0,2938.0,2490.0,2286.0,2904.0,2904.0,2771.0,2775.0
mean,2007.51872,69.224932,164.796448,30.303948,4.602861,738.251295,80.940461,2419.59224,38.321247,42.035739,82.550188,5.93819,82.324084,1.742103,7483.158469,12753380.0,4.839704,4.870317,0.627551,11.992793
std,4.613841,9.523867,124.292079,117.926501,4.052413,1987.914858,25.070016,11467.272489,20.044034,160.445548,23.428046,2.49832,23.716912,5.077785,14270.169342,61012100.0,4.420195,4.508882,0.210904,3.35892
min,2000.0,36.3,1.0,0.0,0.01,0.0,1.0,0.0,1.0,0.0,3.0,0.37,2.0,0.1,1.68135,34.0,0.1,0.1,0.0,0.0
25%,2004.0,63.1,74.0,0.0,0.8775,4.685343,77.0,0.0,19.3,0.0,78.0,4.26,78.0,0.1,463.935626,195793.2,1.6,1.5,0.493,10.1
50%,2008.0,72.1,144.0,3.0,3.755,64.912906,92.0,17.0,43.5,4.0,93.0,5.755,93.0,0.1,1766.947595,1386542.0,3.3,3.3,0.677,12.3
75%,2012.0,75.7,228.0,22.0,7.7025,441.534144,97.0,360.25,56.2,28.0,97.0,7.4925,97.0,0.8,5910.806335,7420359.0,7.2,7.2,0.779,14.3
max,2015.0,89.0,723.0,1800.0,17.87,19479.91161,99.0,212183.0,87.3,2500.0,99.0,17.6,99.0,50.6,119172.7418,1293859000.0,27.7,28.6,0.948,20.7


#### Looking for NULL values in the dataframe

In [4]:
df.isnull().sum()

Country                              0
Year                                 0
Status                               0
Life expectancy                     10
Adult Mortality                     10
infant deaths                        0
Alcohol                            194
percentage expenditure               0
Hepatitis B                        553
Measles                              0
 BMI                                34
under-five deaths                    0
Polio                               19
Total expenditure                  226
Diphtheria                          19
 HIV/AIDS                            0
GDP                                448
Population                         652
 thinness  1-19 years               34
 thinness 5-9 years                 34
Income composition of resources    167
Schooling                          163
dtype: int64

#### Replacing the Null/ Empty values by the mean

In [5]:
#df.columns

imputer = SimpleImputer(missing_values=np.nan, strategy="mean", fill_value=None)

df["Life expectancy "] = imputer.fit_transform( df[["Life expectancy "]])

df["Adult Mortality"] = imputer.fit_transform( df[["Adult Mortality"]])

df["Alcohol"] = imputer.fit_transform( df[["Alcohol"]])

df["Hepatitis B"] = imputer.fit_transform( df[["Hepatitis B"]])

df[" BMI "] = imputer.fit_transform( df[[" BMI "]])

df["Polio"] = imputer.fit_transform( df[["Polio"]])

df["Total expenditure"] = imputer.fit_transform( df[["Total expenditure"]])

df["Diphtheria "] = imputer.fit_transform( df[["Diphtheria "]])

df["GDP"] = imputer.fit_transform( df[["GDP"]])

df["Population"] = imputer.fit_transform( df[["Population"]])

df[" thinness  1-19 years"] = imputer.fit_transform( df[[" thinness  1-19 years"]])

df[" thinness 5-9 years"] = imputer.fit_transform( df[[" thinness 5-9 years"]])

df["Income composition of resources"] = imputer.fit_transform( df[["Income composition of resources"]])

df["Schooling"] = imputer.fit_transform( df[["Schooling"]])

#### Renaming the columns

In [6]:
df.columns
df.rename( columns = {'Country': 'Country', 'Year': 'Year', 'Status': 'Status',
                    'Life expectancy ': 'Life_Expectancy', 'Adult Mortality': 'Adult_Mortality',
                     'infant deaths': 'Infant_Deaths', 'percentage expenditure': 'Percentage_Expenditure',
                    'Alcohol': 'Alcohol', 'Hepatitis B': 'Hepatitis_B', 'Measles ': 'Measles',
                     'under-five deaths ': 'Under_Five_Deaths', 'Total expenditure': 'Total_Expenditure',
                    'Polio': 'Polio', 'Diphtheria ': 'Diphtheria', ' HIV/AIDS': 'HIV/AIDS', 'GDP': 'GDP',
                     'Population': 'Population', ' thinness  1-19 years': 'Thinness_1_To_19',
                     ' thinness 5-9 years': 'Thinness_5_To_9',  ' BMI ': 'BMI', 'Schooling': 'Schooling',
                      'Income composition of resources': 'Income_Composition_Of_Resources'}, inplace=True)

## Questions & Answers

### Question #1: What age group has the highest life expectancy?

In [17]:
#Histogram
fig = px.histogram(df, x="Life_Expectancy", template="seaborn", color_discrete_sequence=['#3D1766'],
                  title='<b>Life Expectancy Distribution')
fig.update_xaxes(tickvals = np.arange(0, 100, 5))
fig.show()

plotly.offline.plot(fig, filename='Life Expectancy Distribution.html')

'Life Expectancy Distribution.html'

#### Answer: The Life Expectancy is high between the age of 70 to 75.

### Question #2: Compare the life expectancy between Developing and Developed Countries.

In [18]:
''''
# BOX PLOT
fig = px.box(df, x='Status', y='Life_Expectancy', color='Status', points='outliers', template='seaborn',
               title='Life Expectancy between Developing and Developed Countries')
fig.update_layout(height=700, width=900)
fig.update_traces(width=0.2)
fig.show()
'''

# VIOLIN PLOT
fig = px.violin(df, x='Status', y='Life_Expectancy', template='seaborn', color='Status', 
                box=True, title='<b>Life Expectancy between Developing and Developed Countries')
fig.update_traces(width=0.5)
fig.show()

plotly.offline.plot(fig, filename='Life Expectancy between Developing and Developed Countries.html')

'Life Expectancy between Developing and Developed Countries.html'

#### Answer: Developing countries have lower life expectancy and the developed countries have higher life expectancy.

### Question #3: Analyze the country wise life expectancies over the years. 

In [19]:
# Line Graph
fig=px.line(df, x='Year', y='Life_Expectancy', animation_frame='Country', animation_group='Year', 
            color='Country', markers=True, template="seaborn", 
            title='<b> Country wise Life Expectancy over Years')

fig.update_yaxes(range = [30,90])
fig.update_layout(height=700, width=900, showlegend=False)
fig.show()

plotly.offline.plot(fig, filename='Country-wise Life Expectancy over Years.html')

'Country-wise Life Expectancy over Years.html'

#### Answer: The graph above shows the country-wise life expectancy over the years. Since the data for some of the countries are absent, the graph isnt constructed for them.

### Question #4: Analyze the life expectancy over the infant death for the countries.

In [20]:
# SCATTER PLOT
fig = px.scatter(df, x='Life_Expectancy', y='Infant_Deaths', color='Country', 
           size='Year', template='seaborn', opacity=0.6, 
           title = '<b>Life Expectancy over Infant Deaths')
fig.update_layout(width=900, height=700)
fig.show()

plotly.offline.plot(fig, filename='Life Expectancy over Infant Deaths.html')

'Life Expectancy over Infant Deaths.html'

#### Answer: Above graph shows that more infant death low life expectancy. e.g India had 1800 infant death in year 2000 and the life expectancy rate in year 2000 was 62.5 on other hand in 2015 infant death decreases to 910 and life expectancy increases to 68.3. So life expectancy is directly propotional to the death of the infant per year.

## Question #5: Analyze the overall GDP over Mean Years of Schooling.

In [21]:
fig = px.scatter(df, x='Schooling', y='GDP', color='Country', opacity=0.6, size='Year', template='seaborn',
                title='<b>GDP vs Mean Years of Schooling over the years')
fig.update_layout(width=900, height=700)
fig.show()

plotly.offline.plot(fig, filename='GDP vs Mean Years of Schooling over the years.html')

'GDP vs Mean Years of Schooling over the years.html'

#### Answer: From the graph above, countries with the least mean years of schooling have the least GDP. Hence, GDP of a nation hugely depends upon the academic qualification of its citizen.

## Question #6: Visualize the Life Expectancy of the countries via a World Map

In [22]:
# Map graph (using scatter plot)
country_data = px.data.gapminder() # using a builtin dataset from plotly express

fig = px.scatter_geo(country_data, locations='iso_alpha', projection='orthographic', opacity=0.8, color='country',
                    hover_name='country', hover_data=['lifeExp', 'year'], template='plotly_dark',
                    title='<b>Life Expectancy all over the World')
fig.update_layout(width=900, height=700)
fig.show()

plotly.offline.plot(fig, filename='Life Expectancy all over the World.html')

'Life Expectancy all over the World.html'

#### Answer: The graph above shows the life expectancy of the countries in the year 2007 CE over a world map.