In [73]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler
import pandas as pd
import joblib

In [75]:
def map_region(value):
    if value == 1:
        name = 'Region_Asia'
    elif value == 2:
        name = 'Region_Central America and Caribbean'
    elif value == 3:
        name = 'Region_European Union'
    elif value == 4:
        name = 'Region_Middle East'
    elif value == 5:
        name = 'Region_North America'
    elif value == 6:
        name = 'Region_Oceania'
    elif value == 7:
        name = 'Region_Rest of Europe'
    elif value == 8:
        name = 'Region_South America'
    return name

In [77]:
def validate_region(input_value):
    enc_map = int(validate_ranged(input_value, 1, 8))
    return map_region(enc_map)
    
def validate_school(input_value):
    return validate_ranged(input_value, 0, 20)

def validate_bmi(input_value):
    return validate_ranged(input_value, 1, 40)

def validate_positive_float(input_value):
    try:
        value = float(input_value)
        assert value > 0
        return value
    except (ValueError, AssertionError):
        raise ValueError(f"Must be a positive number.")

def validate_ranged(input_value, minv=0, maxv=1000):
    try:
        value = float(input_value)
        assert minv <= value <= maxv  
        return value
    except (ValueError, AssertionError):
        raise ValueError(f"Must be a number between {minv} and {maxv}.")


In [79]:
model_1_params = {
    'const': 69.21197568956534, 
    'Under_five_deaths': -4.182220994190924, 
    'Adult_mortality': -6.24779898044286, 
    'BMI': -0.5529768222701559, 
    'Schooling': 0.54699705573051, 
    'GDP_per_capita_log': 5.062474585988401, 
    'Incidents_HIV_log': -1.2380416175750413, 
    'Region_Asia': -0.258756248647668, 
    'Region_Central America and Caribbean': 1.513269339213396, 
    'Region_European Union': 0.7187216306389358, 
    'Region_Middle East': -0.22551325872249164, 
    'Region_North America': 1.427932115236103, 
    'Region_Oceania': -0.7937502455802756, 
    'Region_Rest of Europe': 0.3350217991614595, 
    'Region_South America': 1.0844795471719326
}    
model_2_params = {
    'const': 72.1525761376697, 
    'Under_five_deaths': -5.494643384419958, 
    'Adult_mortality': -6.658005120051611, 
    'BMI': -0.13039081436056832
}

In [81]:
def scale_responses(responses):
    responses = pd.DataFrame.from_dict([responses])
    
    if 'Incidents_HIV_log' in responses:
        responses['Incidents_HIV_log'] = np.log(responses['Incidents_HIV_log'])
    if 'GDP_per_capita_log' in responses:
        responses['GDP_per_capita_log'] = np.log(responses['GDP_per_capita_log'])

    # Load the scalers
    standard_scaler_bmi = joblib.load('scr/standard_scaler_bmi.pkl')
    standard_scaler_schooling = joblib.load('scr/standard_scaler_schooling.pkl')
    minmax_scaler_gdp = joblib.load('scr/minmax_scaler_gdp.pkl')
    minmax_scaler_hiv = joblib.load('scr/minmax_scaler_hiv.pkl')
    robust_scaler_under_five = joblib.load('scr/robust_scaler_under_five.pkl')
    robust_scaler_adult_mortality = joblib.load('scr/robust_scaler_adult_mortality.pkl')
    
    if 'BMI' in responses:
        responses['BMI'] = standard_scaler_bmi.transform(responses[['BMI']]) 
    if 'Schooling' in responses:
        responses['Schooling'] = standard_scaler_schooling.transform(responses[['Schooling']])  
    
    if 'GDP_per_capita_log' in responses:
        responses['GDP_per_capita_log'] = minmax_scaler_gdp.transform(responses[['GDP_per_capita_log']])  
    if 'Incidents_HIV_log' in responses:
        responses['Incidents_HIV_log'] = minmax_scaler_hiv.transform(responses[['Incidents_HIV_log']])  

    if 'Under_five_deaths' in responses:
        responses['Under_five_deaths'] = robust_scaler_under_five.transform(responses[['Under_five_deaths']]) 
    if 'Adult_mortality' in responses:
        responses['Adult_mortality'] = robust_scaler_adult_mortality.transform(responses[['Adult_mortality']]) 

    scaled = responses.squeeze().to_dict()
   
    return scaled  

In [83]:
def compute_prediction(responses, model_params):
    prediction = model_params['const']
    
    for feature, value in responses.items():
        if feature in model_params:
            prediction += model_params[feature] * value
    
    return prediction
    

In [85]:
precise_qv = [       
    ("Under five deaths (Number of under-five deaths per 1000 population)\n",
         validate_positive_float,
        "Under_five_deaths"
    ),
    ("Adult mortality (Probability of dying between 15 and 60 years per 1000 population)\n", 
         validate_positive_float,
         "Adult_mortality",
    ),
    ("BMI (Average Body Mass Index of entire population)\n", 
         validate_bmi,
        "BMI"
    ),
    ("Schooling (Number of years of Schooling(years)):\n", 
         validate_school,
            "Schooling"
    ),
    ("Incidents of HIV (Deaths per 1,000 live births HIV/AIDS (0-4 years))\n", 
         validate_positive_float,
        "Incidents_HIV_log"),
    ("GDP per capita (Gross Domestic Product per capita (in USD))\n$", 
         validate_positive_float,
    "GDP_per_capita_log"),
    (
        "Select a region for:\n"
        "  1: Asia\n"
        "  2: Central America and Caribbean\n"
        "  3: European Union\n"
        "  4: Middle East\n"
        "  5: North America\n"
        "  6: Oceania\n"
        "  7: Rest of Europe\n"
        "  8: South America\n",
        validate_region,
        "Region"
    )
]

In [87]:
# {'R_European Union': 1.0, 'Under_five_deaths': -0.3336306868867083, 'Adult_mortality': -0.44910356720395617, 'BMI': 0.2602316599807751, 'Incidents_HIV_log': 0.23325462336548392, 'GDP_per_capita_log': 0.9706438107276493, 'Schooling': 0.8451610438231039}
# Your responses are: 
#  {'R_European Union': 1, 'Under_five_deaths': 4.5, 'Adult_mortality': 100.072, 'BMI': 25.6, 'Incidents_HIV_log': 0.06, 'GDP_per_capita_log': 92528, 'Schooling': 10.3}

In [89]:
minimal_qv = [       
    ("Under five deaths (Number of under-five deaths per 1000 population)\n",
         validate_positive_float,
        "Under_five_deaths"
    ),
    ("Adult mortality (Probability of dying between 15 and 60 years per 1000 population)\n", 
         validate_positive_float,
         "Adult_mortality",
    ),
    ("BMI (Average Body Mass Index of entire population)\n", 
         validate_bmi,
        "BMI"
    )
] 

In [91]:
def get_model_wanted():
    while True:
        try:
            response = input(
                "Select the model you want to use:\n"
                "  1: Precise\n"
                "  2: Minimalistic\n"
            )
            validated_response = validate_ranged(response, 1, 2)
            break
        except ValueError as e:
                print(e)
    return int(response)

In [93]:
def app():
    model_chosed = get_model_wanted() 
    if model_chosed == 1:
        print("***** Precise Model ***** \n\n\n")
        questions_and_validators = precise_qv
        model_param = model_1_params
    else:
        print("***** Minimalistic Model *****\n\n\n")
        questions_and_validators = minimal_qv
        model_param = model_2_params
    
    responses = {}
    for question, validator, name in questions_and_validators:
        while True:
            try:
                response = input(f"{question} ")
                validated_response = validator(response)
                if name == 'Region':
                    responses[validated_response] = 1
                else:
                    responses[name] = validated_response
                
                break
            except ValueError as e:
                print(e)

    scaled_responses = scale_responses(responses)
    prediction = compute_prediction(scaled_responses, model_param)
    
    print(f"Your responses are: \n {responses}")
    print(f"\n\nPredicted life expectancy is: \n\n{prediction:.2f}")

In [71]:
data = app()
data

Select the model you want to use:
  1: Precise
  2: Minimalistic
 4.5


Must be a number between 1 and 2.


Select the model you want to use:
  1: Precise
  2: Minimalistic
 100.072


Must be a number between 1 and 2.


Select the model you want to use:
  1: Precise
  2: Minimalistic
 1


***** Precise Model ***** 





Under five deaths (Number of under-five deaths per 1000 population)
  4.5
Adult mortality (Probability of dying between 15 and 60 years per 1000 population)
  100.072
BMI (Average Body Mass Index of entire population)
  25.6
Schooling (Number of years of Schooling(years)):
  10.3
Incidents of HIV (Deaths per 1,000 live births HIV/AIDS (0-4 years))
  0.06
GDP per capita (Gross Domestic Product per capita (in USD))
$  92528
Select a region for:
  1: Asia
  2: Central America and Caribbean
  3: European Union
  4: Middle East
  5: North America
  6: Oceania
  7: Rest of Europe
  8: South America
  3


Your responses are: 
 {'Under_five_deaths': 4.5, 'Adult_mortality': 100.072, 'BMI': 25.6, 'Schooling': 10.3, 'Incidents_HIV_log': 0.06, 'GDP_per_capita_log': 92528.0, 'Region_European Union': 1}


Predicted life expectancy is: 

79.08


In [40]:
# responses = {'R_European Union': 1, 'Under_five_deaths': 4.5, 'Adult_mortality': 100.072, 'BMI': 25.6, 'Schooling': 10.3,'Incidents_HIV_log': 0.06,'GDP_per_capita_log': 92528  }
#prediction should be: 79.075403-- on precise 