In [1]:
#Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import plotly.io as pio


In [2]:
#Importing the data
df = pd.read_csv('life exp and GDP.csv')
print(df.head())

   Unnamed: 0      country  year  population continent  life_exp       gdp_cap
0          11  Afghanistan  2007    31889923      Asia    43.828    974.580338
1          23      Albania  2007     3600523    Europe    76.423   5937.029526
2          35      Algeria  2007    33333216    Africa    72.301   6223.367465
3          47       Angola  2007    12420476    Africa    42.731   4797.231267
4          59    Argentina  2007    40301927  Americas    75.320  12779.379640


### This section will focus on Data Wraggling, when necessary.

In [3]:
#Statistical summary of the data
print(df.shape)
print(df.describe())

(142, 7)
        Unnamed: 0    year    population    life_exp       gdp_cap
count   142.000000   142.0  1.420000e+02  142.000000    142.000000
mean    857.000000  2007.0  4.402122e+07   67.007423  11680.071820
std     493.631441     0.0  1.476214e+08   12.073021  12859.937337
min      11.000000  2007.0  1.995790e+05   39.613000    277.551859
25%     434.000000  2007.0  4.508034e+06   57.160250   1624.842248
50%     857.000000  2007.0  1.051753e+07   71.935500   6124.371108
75%    1280.000000  2007.0  3.121004e+07   76.413250  18008.835640
max    1703.000000  2007.0  1.318683e+09   82.603000  49357.190170


In [4]:
#Check for missing values within the df
df_miss = df.isnull().sum()
print(df_miss)

Unnamed: 0    0
country       0
year          0
population    0
continent     0
life_exp      0
gdp_cap       0
dtype: int64


In [5]:
#Lookingg for duplicates
df_dupli = df.duplicated().sum()
print(df_dupli)

0


In [6]:
#Checking the data type
print(df.dtypes)

Unnamed: 0      int64
country        object
year            int64
population      int64
continent      object
life_exp      float64
gdp_cap       float64
dtype: object


### The below section will be exploring the data with the support of visualisations. 

In [7]:
#Scatter Plot
fig1 = px.scatter(data_frame = df, 
                 x="life_exp", 
                 y="gdp_cap", 
                 size = "life_exp",
                 color= "continent"
                 )
                
fig1.show()

In [8]:
#This code creates an HTML page containing the above graph
pio.write_html(fig1, file='figure1.html', auto_open=True)


In [9]:
#Scatter Plot with Trendline
fig2 = px.scatter(data_frame = df, 
                 x="life_exp", 
                 y="gdp_cap", 
                 size = "gdp_cap",
                 color= "life_exp",
                 trendline = "ols")
                
fig2.show()


In [10]:
pio.write_html(fig2, file='figure2.html', auto_open=True)

In [11]:
# Main Pie plot 
fig3 = px.pie(data_frame = df, 
        values = "gdp_cap",
        names = "continent")

fig3.show()

In [12]:
pio.write_html(fig3, file='figure3.html', auto_open=True)


In [13]:
# Multiple pie plot display 
europe = df.query("continent=='Europe'")
asia = df.query("continent=='Asia'")
africa = df.query("continent=='Africa'")
america = df.query("continent=='Americas'")
oceania = df.query("continent=='Oceania'")
fig = make_subplots(rows=3,
                    cols=2,
                    specs=[[{'type':'domain'}, {'type':'domain'}],
                        [{'type':'domain'}, {'type':'domain'}],
                       [{'type':'domain'}, {'type':'domain'}]],
                    subplot_titles=("Asia", "Africa", "Americas", "Europe"))

fig.add_trace(go.Pie(labels=asia["country"], values=asia["life_exp"], name="Asia" ),1,1)
fig.update_traces(textposition='inside')

fig.add_trace(go.Pie(labels=africa["country"], values=africa["life_exp"], name="Africa"), 1, 2)
fig.update_traces(textposition='inside')

fig.add_trace(go.Pie(labels=america["country"], values=america["life_exp"], name="Americas"), 2, 1)
fig.update_traces(textposition='inside')

fig.add_trace(go.Pie(labels=europe["country"], values=europe["life_exp"], name="Europe"), 2, 2)
fig.update_traces(textposition='inside')

fig.update_layout(height=1500, width=1000,
                  title_text="Pie Chart Subplot of Life Expectancy for each Continent per Country")
                  
fig.show()

In [14]:
pio.write_html(fig, file='figure4.html', auto_open=True)


In [15]:
# Check top 5 countries with the ighest life expectancy
life_exp_max = df.nlargest(n=3, columns=["life_exp"])
life_exp_min = df.nsmallest(n=3, columns=["life_exp"])
print(life_exp_max)
print(life_exp_min)

    Unnamed: 0           country  year  population continent  life_exp  \
66         803             Japan  2007   127467972      Asia    82.603   
55         671  Hong Kong, China  2007     6980412      Asia    82.208   
57         695           Iceland  2007      301931    Europe    81.757   

        gdp_cap  
66  31656.06806  
55  39724.97867  
57  36180.78919  
     Unnamed: 0     country  year  population continent  life_exp      gdp_cap
121        1463   Swaziland  2007     1133066    Africa    39.613  4513.480643
86         1043  Mozambique  2007    19951656    Africa    42.082   823.685621
140        1691      Zambia  2007    11746035    Africa    42.384  1271.211593


In [16]:
japan = df.query("country=='Japan'")
china = df.query("country=='Hong Kong, China'")
iceland = df.query("country=='Iceland'")
sawziland = df.query("country=='Swaziland'")
mozambique = df.query("country=='Mozambique'")
zambia = df.query("country=='Zambia'")

fig5 = make_subplots(rows=1,
                    cols=1)

fig5.add_trace(go.Bar(x= japan["country"], y= japan["life_exp"]),1,1)

fig5.add_trace(go.Bar(x= china["country"], y= china["life_exp"]),1,1)

fig5.add_trace(go.Bar(x= iceland["country"], y= iceland["life_exp"]),1,1)

fig5.add_trace(go.Bar(x= sawziland["country"], y= sawziland["life_exp"]),1,1)

fig5.add_trace(go.Bar(x= mozambique["country"], y= mozambique["life_exp"]),1,1)

fig5.add_trace(go.Bar(x= zambia["country"], y= zambia["life_exp"]),1,1)


fig5.update_layout(height=550, width=750, title_text = "Top 3 Highest and Lowest Life Expectancy Countries",showlegend=False
                  )

fig5.show()

In [17]:
pio.write_html(fig5, file='figure5.html', auto_open=True)


In [18]:
# Box plot
fig6 = px.box(df['life_exp'])
fig6.update_layout(height=1000, width=800)
fig6.show()

In [19]:
pio.write_html(fig6, file='figure6.html', auto_open=True)


In [20]:
#Map chart with colors 
df = px.data.gapminder().query("year == 2007")
avg_lifeExp = (df['lifeExp']*df['pop']).sum()/df['pop'].sum()

fig = px.choropleth(df, locations="iso_alpha", color="lifeExp",
                    color_continuous_scale=px.colors.diverging.Geyser,
                    color_continuous_midpoint=avg_lifeExp,
                    title="World Average Life Expectancy in 2007 " % avg_lifeExp)
fig.show()

In [21]:
pio.write_html(fig, file='figure7.html', auto_open=True)
