In [131]:
import pandas as pd
import altair as alt
import pycountry as pc
from pycountry_convert import (
    country_alpha2_to_continent_code,
    country_alpha3_to_country_alpha2,
    country_name_to_country_alpha2
)

# Dataset preparation

In [132]:
dataset_df = pd.read_csv("../data/Life Expectancy Data.csv")

In [133]:
dataset_df=dataset_df.copy()
dataset_df.loc[dataset_df["country"] == "Bolivia (Plurinational State of)", "country"] = "Bolivia, Plurinational State of"
dataset_df.loc[dataset_df["country"] == "Iran (Islamic Republic of)", "country"] = "Iran, Islamic Republic of"
dataset_df.loc[dataset_df["country"] == "Micronesia (Federated States of)", "country"] = "Micronesia, Federated States of"
dataset_df.loc[dataset_df["country"] == "Republic of Korea", "country"] = "Korea, Republic of"
dataset_df.loc[dataset_df["country"] == "The former Yugoslav republic of Macedonia", "country"] = "North Macedonia"
dataset_df.loc[dataset_df["country"] == "Venezuela (Bolivarian Republic of)", "country"] = "Venezuela, Bolivarian Republic of"
continent = []
continent_map = {
    "AS": "Asia",
    "EU": "Europe",
    "AF": "Africa",
    "NA": "North America",
    "SA": "South America",
    "OC": "Oceania",
}
for c in dataset_df.country.tolist():
    if country_name_to_country_alpha2(c) == "TL":
        continent.append(continent_map[country_alpha2_to_continent_code("TP")])
    else:
        continent.append(continent_map[country_alpha2_to_continent_code(country_name_to_country_alpha2(c))])
continent_df = pd.DataFrame(data={"country": dataset_df.country.tolist(), "continent": continent})

In [137]:
dataset_df = dataset_df.assign(continent = continent_df.continent)

# Cards - For Summary

## Worldwide

In [138]:
temp = dataset_df[dataset_df["year"].isin([2000, 2015, 2010, 2005])]
temp = temp.groupby("year").mean()[["life_expectancy"]].T
temp = temp.assign(perc_last_5 = (temp[2015] - temp[2010])/temp[2010]*100)
temp = temp.assign(perc_last_10 = (temp[2015] - temp[2005])/temp[2005]*100)
temp = temp.assign(perc_last_15 = (temp[2015] - temp[2000])/temp[2000]*100)
temp

year,2000,2005,2010,2015,perc_last_5,perc_last_10,perc_last_15
life_expectancy,66.750273,68.20929,70.048634,71.61694,2.238882,4.995874,7.290857


## Continent wise

In [139]:
temp = dataset_df[dataset_df["year"].isin([2000, 2015, 2010, 2005])]
temp = temp.groupby(["continent", "year"]).mean()[["life_expectancy"]].reset_index()
temp = temp.pivot(index="continent", columns="year", values="life_expectancy")
temp = temp.assign(perc_last_5 = (temp[2015] - temp[2010])/temp[2010]*100)
temp = temp.assign(perc_last_10 = (temp[2015] - temp[2005])/temp[2005]*100)
temp = temp.assign(perc_last_15 = (temp[2015] - temp[2000])/temp[2000]*100)
temp

year,2000,2005,2010,2015,perc_last_5,perc_last_10,perc_last_15
continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Africa,54.72037,57.12963,60.075926,62.666667,4.312444,9.692058,14.521642
Asia,69.044681,70.351064,72.114894,73.185106,1.484038,4.028429,5.996734
Europe,75.148718,76.458974,78.138462,79.041026,1.155083,3.377041,5.179473
North America,72.680952,73.07619,73.142857,75.014286,2.558594,2.652157,3.210378
Oceania,69.42,70.72,72.05,72.16,0.152672,2.036199,3.946989
South America,72.0,72.258333,73.458333,75.225,2.404991,4.105639,4.479167


## By Continent - Trend

In [140]:
temp = dataset_df[dataset_df["year"].isin([2000, 2015, 2010, 2005])]
temp = temp.groupby(["status", "year"]).mean()[["life_expectancy"]].reset_index()
temp = temp.pivot(index="status", columns="year", values="life_expectancy")
temp = temp.assign(perc_last_5 = (temp[2015] - temp[2010])/temp[2010]*100)
temp = temp.assign(perc_last_10 = (temp[2015] - temp[2005])/temp[2005]*100)
temp = temp.assign(perc_last_15 = (temp[2015] - temp[2000])/temp[2000]*100)
temp

year,2000,2005,2010,2015,perc_last_5,perc_last_10,perc_last_15
status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Developed,76.803125,78.590625,80.146875,80.709375,0.701836,2.695932,5.086056
Developing,64.619868,66.009272,67.908609,69.690066,2.623315,5.576178,7.846192


In [156]:
a = alt.Chart(dataset_df.groupby(["continent", "year"]).mean()["life_expectancy"].reset_index()).mark_line().encode(
    x="year",
    y=alt.Y("sum(life_expectancy)",scale=alt.Scale(zero=False)),
    color="continent",
    tooltip="continent"
)
a

## Country vs same continent vs rest of the world - Trend

In [230]:
chosen_country = "Canada"
sel_continent = dataset_df[dataset_df["country"] == chosen_country].head(1).continent.tolist()[0]

In [238]:
temp = (
    dataset_df.groupby("year")
    .mean()["life_expectancy"]
    .reset_index()
    .assign(label="Worldwide")
)
temp = pd.concat(
    [
        temp,
        dataset_df[dataset_df["continent"] == sel_continent]
        .groupby("year")
        .mean()["life_expectancy"]
        .reset_index()
        .assign(label=sel_continent)
    ],
    ignore_index=True,
)
temp = pd.concat(
    [
        temp,
        dataset_df.loc[
            dataset_df["country"] == chosen_country, ["year", "life_expectancy"]
        ].assign(label=chosen_country),
    ],
    ignore_index=True,
)
alt.Chart(temp).mark_line().encode(
    x="year",
    y="life_expectancy",
    color="label"
)

## Developed vs Developing

In [157]:
b = alt.Chart(dataset_df.groupby(["status", "year"]).mean()["life_expectancy"].reset_index()).mark_line().encode(
    x="year",
    y=alt.Y("sum(life_expectancy)",scale=alt.Scale(zero=False)),
    color="status",
    tooltip="status"
)
b

## Effect of other factors

In [206]:
alt.Chart(
    dataset_df.query("year == 2013")
).mark_circle().encode(
    x=alt.X("hiv_aids"),
    y=alt.Y("life_expectancy", title="Life Expectancy"),
    color="continent",
    size=alt.Size("population", scale=alt.Scale(range=(10, 1000))),
    tooltip="country",
)