In [1]:
#uncheck this if you prefer to use inline display
# html_pygal = """
# <!DOCTYPE html>
# <html>
#   <head>
#   <script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/svg.jquery.js"></script>
#   <script type="text/javascript" src="http://kozea.github.com/pygal.js/javascripts/pygal-tooltips.js"></script>
#     <!-- ... -->
#   </head>
#   <body>
#     <figure>
#       {pygal_render}
#     </figure>
#   </body>
# </html>
# """

In [2]:
import pandas as pd
import numpy as np

pd.set_option('display.max_colwidth', 200)
pd.options.display.float_format = '{:,.2f}'.format

import pygal
from IPython.display import SVG, display, HTML
from pygal.style import BlueStyle

In [3]:
#for inital selection to capture Singapore's records since independence
range_years = [str(i) for i in range(1960,2020)] 

gdp = pd.read_csv('datasets/gdp_per_capital/gdp_.csv')
#extracting only SG information and selection by range_years
gdp_sg = gdp[gdp['Country Name']=='Singapore'].copy().reset_index(drop=True).T.reset_index(drop=False)
gdp_sg = gdp_sg[gdp_sg['index'].isin(range_years)] #selection of years 
gdp_sg.columns = ['year','gdp_per_capital'] #renaming the columns
gdp_sg['gdp_per_capital'] = gdp_sg['gdp_per_capital'].astype(float) #transforming the gdp as float datatype

le = pd.read_csv('datasets/life-expectancy-by-sex-annual/life-expectancy-at-birth-and-age-65-years.csv')
le = le[le['level_1'] == 'Total Life Expectancy At Birth (Residents)'].reset_index(drop=True)
le.columns = ['year','life_expectancy_1','life_expectancy']
le['year'] = le['year'].astype(str) #transforming the year as str datatype

df = pd.merge(gdp_sg, le, how = 'inner', on = 'year') #joining the 2 datasets together 

In [4]:
check_ = pd.concat([df.head(1),df.tail(1)],axis =0)
check_ #a view of how the merged df look like ... 

Unnamed: 0,year,gdp_per_capital,life_expectancy_1,life_expectancy
0,1960,428.06,Total Life Expectancy At Birth (Residents),62.9
42,2018,66188.78,Total Life Expectancy At Birth (Residents),83.2


## EDA

In [5]:
df.info() #summary of the combine dataset

<class 'pandas.core.frame.DataFrame'>
Int64Index: 43 entries, 0 to 42
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   year               43 non-null     object 
 1   gdp_per_capital    43 non-null     float64
 2   life_expectancy_1  43 non-null     object 
 3   life_expectancy    43 non-null     float64
dtypes: float64(2), object(2)
memory usage: 1.7+ KB


In [6]:
df.shape #this is the shape of the dataset 

(43, 4)

In [7]:
df.columns #these are the columns in the dataset

Index(['year', 'gdp_per_capital', 'life_expectancy_1', 'life_expectancy'], dtype='object')

In [8]:
df.index

Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42],
           dtype='int64')

In [9]:
df.isnull().sum() #ensure no nans in the dataset

year                 0
gdp_per_capital      0
life_expectancy_1    0
life_expectancy      0
dtype: int64

In [10]:
df.describe() #a descriptive statistical summary of the dataset

Unnamed: 0,gdp_per_capital,life_expectancy
count,43.0,43.0
mean,25369.69,76.73
std,19511.36,5.07
min,428.06,62.9
25%,7383.67,74.35
50%,21829.3,76.9
75%,39180.07,80.75
max,66188.78,83.2


## Function to plot the line chart

In [11]:
#function to produce the chart
def chart_1 ( y1=1960, y2=2018): #default years if no inputs in the function
    range_of_years  = [str(i) for i in range(y1, y2 + 1)]
    selected_years = df[df['year'].isin(range_of_years)]
    line_chart = pygal.Line(secondary_range=(60, 90),x_label_rotation=45, explicit_size=False)  #similar to twinx matplotlib
    line_chart.title = 'GDP Per Capital and Life Expectancy in Singapore'
   
    line_chart.add('gdp_per_capital', selected_years['gdp_per_capital'])
    line_chart.add('life_expectancy', selected_years['life_expectancy'], secondary=True) #similar to twinx matplotlib
    line_chart.x_labels = selected_years['year']
    #display(HTML(html_pygal.format(pygal_render=line_chart.render())))
    line_chart.render_to_file('1_line_chart_pygal.svg') #write to svg file. open in chrome for best viewing

In [12]:
chart_1()
#chart_1(2010,2018) #for selection of years --> try this 