In [None]:
# Life expectancy data source: OECD (https://www.oecd.org/en/data/indicators/life-expectancy-at-birth.html).
# requires pandas, so before executing run `pip install pandas`

In [None]:
import sys; sys.path.append("../")
import os
import django
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "lifetime_in_weeks.settings")
django.setup()

from datetime import date
import pandas as pd
pd.set_option("display.max_columns", 50)
from lifetime.models import LifetimeExpectancy

In [None]:
# download from https://www.oecd.org/en/data/indicators/life-expectancy-at-birth.html
# or https://data-explorer.oecd.org/vis?lc=en&ac=false&tm=DF_LE&pg=0&snb=1&vw=tb&df[ds]=dsDisseminateFinalDMZ&df[id]=DSD_HEALTH_STAT%40DF_LE&df[ag]=OECD.ELS.HD&df[vs]=&pd=%2C&dq=.A...Y0........&to[TIME_PERIOD]=false
oecd = pd.read_csv("oecd_life_expectancy.csv")
oecd.head()

In [None]:
# download from https://www.iso.org/iso-3166-country-codes.html
# or https://www.iso.org/obp/ui/#search
iso_codes = pd.read_csv("iso_codes.csv")
iso_codes.head()

In [None]:
oecd = oecd[oecd["Measure"] == "Life expectancy"]
oecd = oecd[oecd["AGE"] == "Y0"]
oecd = oecd[["REF_AREA", "SEX", "TIME_PERIOD", "OBS_VALUE"]]
oecd.head()

In [None]:
oecd_columns = oecd.columns.tolist()
oecd = oecd.merge(iso_codes, left_on="REF_AREA", right_on="Alpha-3 code")
oecd = oecd[oecd_columns + ["Alpha-2 code"]]
oecd.head()

In [None]:
oecd = oecd.rename(columns={
    "Alpha-2 code": "country_code",
    "SEX": "sex",
    "TIME_PERIOD": "year",
    "OBS_VALUE": "life_expectancy"
})
oecd = oecd.drop(columns=["REF_AREA"])
oecd = oecd[["country_code", "sex", "year", "life_expectancy"]]
oecd.head()

In [None]:
countries = oecd["country_code"].unique().tolist() + [None]
sexes = oecd["sex"].unique()
years = range(1960, 2024 + 1)
index = pd.MultiIndex.from_product(
        [countries, sexes, years],
        names=["country_code", "sex", "year"]
    )
index

In [None]:
# Reindex the original dataframe
oecd = oecd.set_index(['country_code', "sex", 'year']).reindex(index)
# Reset index to get country and year back as columns
oecd = oecd.reset_index()
# Group by country and sex and forward fill, then backward fill values
oecd['life_expectancy'] = (oecd.groupby(['country_code', "sex"])['life_expectancy']).apply(lambda x: x.ffill().bfill()).reset_index().set_index('level_2')["life_expectancy"]
oecd.head()

In [None]:
oecd[oecd["country_code"].isna()]

In [None]:
# download from https://ourworldindata.org/grapher/life-expectancy?time=1960..latest&country=~OWID_WRL
world = pd.read_csv("world_life_expectancy.csv")
world.head()

In [None]:
# download from https://ourworldindata.org/grapher/life-expectation-at-birth-by-sex?time=1960..latest
world_by_sex = pd.read_csv("wolrd_life_expectancy_by_sex.csv")
world_by_sex.head()

In [None]:
world = world.merge(world_by_sex, on="Year", suffixes=("", "_by_sex"))
world

In [None]:
last_year = world[world["Year"] == world["Year"].max()].iloc[0]
this_year = date.today().year
for year in range(last_year["Year"] + 1, this_year + 1):
    row = last_year.copy()
    row["Year"] = year
    world.loc[world.index.max() + 1] = row
world.tail()

In [None]:
world = world.rename(columns={
    "Year": "year",
    "Period life expectancy at birth - Sex: all - Age: 0": "_T",
    "Period life expectancy - Sex: female - Age: 0": "F",
    "Period life expectancy - Sex: male - Age: 0": "M"
})
world = world[["year", "_T", "F", "M"]]
world.head()

In [None]:
_T = world[["year", "_T"]]
_T["country_code"] = None
_T["sex"] = "_T"
_T = _T.rename(columns={"_T": "life_expectancy"})
_T = _T[["country_code", "sex", "year", "life_expectancy"]]
_T

In [None]:
M = world[["year", "M"]]
M["country_code"] = None
M["sex"] = "M"
M = M.rename(columns={"M": "life_expectancy"})
M = M[["country_code", "sex", "year", "life_expectancy"]]
M

In [None]:
F = world[["year", "F"]]
F["country_code"] = None
F["sex"] = "F"
F = F.rename(columns={"F": "life_expectancy"})
F = F[["country_code", "sex", "year", "life_expectancy"]]
F

In [None]:
oecd = pd.concat([oecd, _T, M, F])
oecd["sex"] = oecd["sex"].replace("_T", "O")
oecd

In [None]:
oecd_dicts = oecd.to_dict(orient="records")

In [None]:
objects_to_create = [
    LifetimeExpectancy(
        country=d["country_code"],
        sex=d["sex"],
        birth_year=d["year"],
        life_expectancy=d["life_expectancy"]
    ) for d in oecd_dicts 
]
objects_to_create[:5]

In [None]:
await LifetimeExpectancy.objects.abulk_create(objects_to_create)

In [None]:
last_obj = await LifetimeExpectancy.objects.alast()
last_obj.sex