In [60]:
# Data Sources: World Health Organization (WHO) Global Health Observatory Data / http://pophealthmetrics.biomedcentral.com/articles/10.1186/s12963-016-0094-0#Sec7

In [5]:
import pandas as pd
from scipy import stats

In [42]:
def get_life_exp_vals(country, input_age, sex,
                        STD_MALES=5.6, STD_FEMALES=3.6):
    # load WHO longevity data
    life_exp_data = pd.read_csv('https://apps.who.int/gho/athena/data/GHO/WHOSIS_000001,WHOSIS_000015?filter=COUNTRY:*&x-sideaxis=COUNTRY;YEAR&x-topaxis=GHO;SEX&profile=verbose&format=csv')
    # Keep only useful features fix case display of country text
    life_exp_data = life_exp_data[["GHO (DISPLAY)", "YEAR (CODE)", "COUNTRY (DISPLAY)", 
                                   "SEX (DISPLAY)", "Numeric"]]

    sub_data = life_exp_data[life_exp_data["COUNTRY (DISPLAY)"] == country]
    sub_data = sub_data[sub_data['SEX (DISPLAY)'] == sex]


    sub_data.sort_values(by='YEAR (CODE)', ascending=False, inplace=True)
    birth_expectancy = sub_data[sub_data['GHO (DISPLAY)'] == "Life expectancy at birth (years)"].values[0][-1]
    sixty_year_expectancy = sub_data[sub_data['GHO (DISPLAY)'] == "Life expectancy at age 60 (years)"].values[0][-1]
    ages = [0,60]
    life_expectancies = [birth_expectancy, sixty_year_expectancy]
    slope, intercept, _, _, _ = stats.linregress(ages, life_expectancies)

    #life_mean = slope * input_age + intercept


    life_std = STD_MALES if sex == 'Male' else STD_FEMALES
    return slope, intercept, life_std

In [43]:
country = 'United States of America'
sex = 'Male'
age = 50.0

In [44]:
life_slope, life_intercept, life_std = get_life_exp_vals(country, age, sex)

In [46]:
def survival_prob_next_year(age, life_slope, life_intercept, life_std):
    life_mean = life_slope * age + life_intercept + age
    prob_survival_till_now = 1 - stats.norm.cdf(age, loc=life_mean, scale=life_std)
    prob_survival_next_year = 1 - stats.norm.cdf(age + 1, loc=life_mean, scale=life_std)
    return prob_survival_next_year / prob_survival_till_now

def survival_prob_next_month(age, life_slope, life_intercept, life_std):
    life_mean = life_slope * age + life_intercept
    prob_survival_till_now = 1 - stats.norm.cdf(age, loc=life_mean, scale=life_std)
    prob_survival_next_month = 1 - stats.norm.cdf(age + (1 / 12), loc=life_mean, scale=life_std)
    return prob_survival_next_month / prob_survival_till_now

In [None]:
for age in range(124, 150):
    print("{}: {}".format(age, survival_prob_next_year(age, life_slope, life_intercept, life_std)))

In [None]:
def survival_array():
    # should return an array that is True for every datapoint on which the person is alive and false on every datapoint on which the person is dead
    pass