In [143]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler

In [117]:
def map_region(value):
    if value == 1:
        name = 'Region_Asia'
    elif value == 2:
        name = 'Region_Central America and Caribbean'
    elif value == 3:
        name = 'Region_European Union'
    elif value == 4:
        name = 'Region_Middle East'
    elif value == 5:
        name = 'Region_North America'
    elif value == 6:
        name = 'Region_Oceania'
    elif value == 7:
        name = 'Region_Rest of Europe'
    elif value == 8:
        name = 'Region_South America'
    return name
    

In [113]:
def validate_region(input_value):
    enc_map = int(validate_ranged(input_value, 1, 8))
    return map_region(enc_map)
    

def validate_school(input_value):
    return validate_ranged(input_value, 0, 20)

def validate_bmi(input_value):
    return validate_ranged(input_value, 1, 40)

def validate_positive_float(input_value):
    try:
        value = float(input_value)
        assert value > 0
        return value
    except (ValueError, AssertionError):
        raise ValueError(f"Must be a positive number.")

def validate_ranged(input_value, minv=0, maxv=1000):
    try:
        value = float(input_value)
        assert minv <= value <= maxv  
        return value
    except (ValueError, AssertionError):
        raise ValueError(f"Must be a number between {minv} and {maxv}.")


In [115]:
model_1_params = {
    'const': 69.21197568956532,
    'Under_five_deaths': -4.182220994190937,
    'Adult_mortality': -6.247798980442859,
    'BMI': -0.5529768222701692,
    'Schooling': 0.5469970557304917,
    'Region_Asia': -0.25875624864766067,
    'Region_Central America and Caribbean': 1.5132693392134056,
    'Region_European Union': 0.7187216306389193,
    'Region_Middle East': -0.22551325872248373,
    'Region_North America': 1.427932115236123,
    'Region_Oceania': -0.7937502455802825,
    'Region_Rest of Europe': 0.3350217991614568,
    'Region_South America': 1.08447954717193,
    'GDP_per_capita_log': 5.06247458598834,
    'Incidents_HIV_log': -1.238041617575024
}
    
model_2_params = {
    'const': 71.40584843623562,
    'Under_five_deaths': -5.049564269608951,
    'Adult_mortality': -6.593371037005651,
    'BMI': -0.16656731993787366,
    'Region_Asia': -0.10879640464582607,
    'Region_Central America and Caribbean': 1.4164636610858503,
    'Region_European Union': 2.0697209069556006,
    'Region_Middle East': 0.062062452026476755,
    'Region_North America': 2.556911571495162,
    'Region_Oceania': -1.0521467455711573,
    'Region_Rest of Europe': 0.8807208206819866,
    'Region_South America': 1.1284349917369587
}

In [245]:
def scale_responses(responses):


    responses['Incidents_HIV_log'] = np.log(responses['Incidents_HIV_log'])
    responses['GDP_per_capita_log'] = np.log(responses['GDP_per_capita_log'])  

    standard_scaler = StandardScaler()
    minmax_scaler = MinMaxScaler()
    robust_scaler = RobustScaler()
        
    # Normally distributed features
    standard_cols = ['BMI', 'Schooling']
    minmax_cols = ['GDP_per_capita_log', 'Incidents_HIV_log']
    robust_cols = ['Under_five_deaths', 'Adult_mortality']
    

    # for name, value in responses.items():
    #     if name in standard_cols:
    #         responses[name] = standard_scaler.fit_transform(responses[name])
    
    # # MinMax scaling for bounded features
    # 
    # df[minmax_cols] = minmax_scaler.fit_transform(df[minmax_cols])
    
    # # Robust scaling for features with outliers
    # 
    # for col in robust_cols:
    #         df[[col]] = robust_scaler.fit_transform(df[[col]])  

    # # Add the constant (statsmodels)
    # df = sm.add_constant(df)

    # # Return the feature engineered result
    

    return responses
    

In [247]:
def model_1():
    questions_and_validators = [       
        ("Under five deaths (Number of under-five deaths per 1000 population)\n",
             validate_positive_float,
            "Under_five_deaths"
        ),
        ("Adult mortality (Probability of dying between 15 and 60 years per 1000 population)\n", 
             validate_positive_float,
             "Adult_mortality",
        ),
        ("BMI (Average Body Mass Index of entire population)\n", 
             validate_bmi,
            "BMI"
        ),
        ("Schooling (Number of years of Schooling(years)):\n", 
             validate_school,
                "Schooling"
        ),
        ("Incidents of HIV (Deaths per 1,000 live births HIV/AIDS (0-4 years))\n", 
             validate_positive_float,
            "Incidents_HIV_log"),
        ("GDP per capita (Gross Domestic Product per capita (in USD))\n$", 
             validate_positive_float,
        "GDP_per_capita_log"),
        (
            "Select a region for:\n"
            "  1: Asia\n"
            "  2: Central America and Caribbean\n"
            "  3: European Union\n"
            "  4: Middle East\n"
            "  5: North America\n"
            "  6: Oceania\n"
            "  7: Rest of Europe\n"
            "  8: South America\n",
            validate_region,
            "Region"
        )
        
    ]
    
    responses = {}
    responses = {'Under_five_deaths': 13.0, 'Adult_mortality': 105.824, 'BMI': 27.8, 'Schooling': 7.8, 'Incidents_HIV_log': 0.08, 'GDP_per_capita_log': 11006.0, 'Region_Middle East': 1}
    # for question, validator, name in questions_and_validators:
    #     while True:
    #         try:
    #             response = input(f"{question} ")
    #             validated_response = validator(response)
    #             if name == 'Region':
    #                 responses[validated_response] = 1
    #             else:
    #                 responses[name] = validated_response
                
    #             break
    #         except ValueError as e:
    #             print(e)
    result = scale_responses(responses.copy())

    
    return result, responses

In [249]:
# def model_2():
#     questions_and_validators = [       
#         ("Under five deaths (Number of under-five deaths per 1000 population)\n",
#              validate_positive_float,
#             "Under_five_deaths"
#         ),
#         ("Adult mortality (Probability of dying between 15 and 60 years per 1000 population)\n", 
#              validate_positive_float,
#              "Adult_mortality",
#         ),
#         ("BMI (Average Body Mass Index of entire population)\n", 
#              validate_bmi,
#             "BMI"
#         ),
#           (
#             "Select a region for:\n"
#             "  1: Asia\n"
#             "  2: Central America and Caribbean\n"
#             "  3: European Union\n"
#             "  4: Middle East\n"
#             "  5: North America\n"
#             "  6: Oceania\n"
#             "  7: Rest of Europe\n"
#             "  8: South America\n",
#             validate_region,
#             "Region"
#         )
#     ] 
#     responses = {}
#     for question, validator, name in questions_and_validators:
#         while True:
#             try:
#                 response = input(f"{question} ")
#                 validated_response = validator(response)
#                 responses[name] = validated_response
#                 break
#             except ValueError as e:
#                 print(e)
    
#     result = scale_responses(responses)
#     return result, responses



In [251]:

def app():
    # while True:
    #     try:
    #         response = input(
    #             "Select the model you want to use:\n"
    #             "  1: Precise\n"
    #             "  2: Minimalistic\n"
    #         )
    #         validated_response = validate_ranged(response, 1, 2)
    #         break
    #     except ValueError as e:
    #             print(e)
    # if int(response) == 1:
    if True:
        print("***** Precise Model ***** \n\n\n")
        result, responses =  model_1()
    else:
        print("***** Minimalistic Model *****\n\n\n")
        result, responses = model_2()

    
    print(f"Your responses are: \n {responses}")
    print(f"\n\nPredicted life expectancy is: \n\n{result}")
    


In [253]:
    
data = app()
data

***** Precise Model ***** 



Your responses are: 
 {'Under_five_deaths': 13.0, 'Adult_mortality': 105.824, 'BMI': 27.8, 'Schooling': 7.8, 'Incidents_HIV_log': 0.08, 'GDP_per_capita_log': 11006.0, 'Region_Middle East': 1}


Predicted life expectancy is: 

{'Under_five_deaths': 13.0, 'Adult_mortality': 105.824, 'BMI': 27.8, 'Schooling': 7.8, 'Incidents_HIV_log': -2.5257286443082556, 'GDP_per_capita_log': 9.306195857619704, 'Region_Middle East': 1}
