## Import libraries

In [2]:
# We always import libraries first
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn

# configure our plots 
%matplotlib inline

### This notebook follows the below framework:

* data exploration
* choosing the right model
* fit the model
* evaluate the model
* improve the model
* save/import the model

In [3]:
# ignore warnings for now sue to some module depreciation issues
import warnings
warnings.filterwarnings("ignore") # you can change "igonre" to "default"

In [4]:
# importing dataset
life_expectancy = pd.read_csv("LifeExpectancy.csv")
life_expectancy.head(10)

Unnamed: 0.1,Unnamed: 0,country,year,status,life_expectancy,adult_mortality,infant_deaths,alcohol,percentage_expenditure,hepatitis_b,...,polio,total_expenditure,diphtheria,hiv_aids,gdp,population,thinness__1_19_years,thinness_5_9_years,income_composition_of_resources,schooling
0,0,Afghanistan,2015,Developing,65.0,263.0,62,0.01,71.279624,65.0,...,6.0,8.16,65.0,0.1,584.25921,33736494.0,17.2,17.3,0.479,10.1
1,1,Afghanistan,2014,Developing,59.9,271.0,64,0.01,73.523582,62.0,...,58.0,8.18,62.0,0.1,612.696514,327582.0,17.5,17.5,0.476,10.0
2,2,Afghanistan,2013,Developing,59.9,268.0,66,0.01,73.219243,64.0,...,62.0,8.13,64.0,0.1,631.744976,31731688.0,17.7,17.7,0.47,9.9
3,3,Afghanistan,2012,Developing,59.5,272.0,69,0.01,78.184215,67.0,...,67.0,8.52,67.0,0.1,669.959,3696958.0,17.9,18.0,0.463,9.8
4,4,Afghanistan,2011,Developing,59.2,275.0,71,0.01,7.097109,68.0,...,68.0,7.87,68.0,0.1,63.537231,2978599.0,18.2,18.2,0.454,9.5
5,5,Afghanistan,2010,Developing,58.8,279.0,74,0.01,79.679367,66.0,...,66.0,9.2,66.0,0.1,553.32894,2883167.0,18.4,18.4,0.448,9.2
6,6,Afghanistan,2009,Developing,58.6,281.0,77,0.01,56.762217,63.0,...,63.0,9.42,63.0,0.1,445.893298,284331.0,18.6,18.7,0.434,8.9
7,7,Afghanistan,2008,Developing,58.1,287.0,80,0.03,25.873925,64.0,...,64.0,8.33,64.0,0.1,373.361116,2729431.0,18.8,18.9,0.433,8.7
8,8,Afghanistan,2007,Developing,57.5,295.0,82,0.02,10.910156,63.0,...,63.0,6.73,63.0,0.1,369.835796,26616792.0,19.0,19.1,0.415,8.4
9,9,Afghanistan,2006,Developing,57.3,295.0,84,0.03,17.171518,64.0,...,58.0,7.43,58.0,0.1,272.56377,2589345.0,19.2,19.3,0.405,8.1


In [5]:
# check the length of this dataframe
len(life_expectancy)

2938

In [7]:
# check for missing values
life_expectancy.isna().sum()

Unnamed: 0                           0
country                              0
year                                 0
status                               0
life_expectancy                     10
adult_mortality                     10
infant_deaths                        0
alcohol                            194
percentage_expenditure               0
hepatitis_b                        553
measles                              0
bmi                                 34
under_five_deaths                    0
polio                               19
total_expenditure                  226
diphtheria                          19
hiv_aids                             0
gdp                                448
population                         652
thinness__1_19_years                34
thinness_5_9_years                  34
income_composition_of_resources    167
schooling                          163
dtype: int64

In [8]:
life_expectancy.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2938 entries, 0 to 2937
Data columns (total 23 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Unnamed: 0                       2938 non-null   int64  
 1   country                          2938 non-null   object 
 2   year                             2938 non-null   int64  
 3   status                           2938 non-null   object 
 4   life_expectancy                  2928 non-null   float64
 5   adult_mortality                  2928 non-null   float64
 6   infant_deaths                    2938 non-null   int64  
 7   alcohol                          2744 non-null   float64
 8   percentage_expenditure           2938 non-null   float64
 9   hepatitis_b                      2385 non-null   float64
 10  measles                          2938 non-null   int64  
 11  bmi                              2904 non-null   float64
 12  under_five_deaths   