In [400]:
import numpy as np
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler
import pandas as pd
import joblib

In [402]:
def map_region(value):
    if value == 1:
        name = 'Region_Asia'
    elif value == 2:
        name = 'Region_Central America and Caribbean'
    elif value == 3:
        name = 'Region_European Union'
    elif value == 4:
        name = 'Region_Middle East'
    elif value == 5:
        name = 'Region_North America'
    elif value == 6:
        name = 'Region_Oceania'
    elif value == 7:
        name = 'Region_Rest of Europe'
    elif value == 8:
        name = 'Region_South America'
    return name
    

In [404]:
def validate_region(input_value):
    enc_map = int(validate_ranged(input_value, 1, 8))
    return map_region(enc_map)
    

def validate_school(input_value):
    return validate_ranged(input_value, 0, 20)

def validate_bmi(input_value):
    return validate_ranged(input_value, 1, 40)

def validate_positive_float(input_value):
    try:
        value = float(input_value)
        assert value > 0
        return value
    except (ValueError, AssertionError):
        raise ValueError(f"Must be a positive number.")

def validate_ranged(input_value, minv=0, maxv=1000):
    try:
        value = float(input_value)
        assert minv <= value <= maxv  
        return value
    except (ValueError, AssertionError):
        raise ValueError(f"Must be a number between {minv} and {maxv}.")


In [186]:
model_1_params = {
    'const': 69.21197568956532,
    'Under_five_deaths': -4.182220994190937,
    'Adult_mortality': -6.247798980442859,
    'BMI': -0.5529768222701692,
    'Schooling': 0.5469970557304917,
    'Region_Asia': -0.25875624864766067,
    'Region_Central America and Caribbean': 1.5132693392134056,
    'Region_European Union': 0.7187216306389193,
    'Region_Middle East': -0.22551325872248373,
    'Region_North America': 1.427932115236123,
    'Region_Oceania': -0.7937502455802825,
    'Region_Rest of Europe': 0.3350217991614568,
    'Region_South America': 1.08447954717193,
    'GDP_per_capita_log': 5.06247458598834,
    'Incidents_HIV_log': -1.238041617575024
}
    
model_2_params = {
    'const': 71.40584843623562,
    'Under_five_deaths': -5.049564269608951,
    'Adult_mortality': -6.593371037005651,
    'BMI': -0.16656731993787366,
    'Region_Asia': -0.10879640464582607,
    'Region_Central America and Caribbean': 1.4164636610858503,
    'Region_European Union': 2.0697209069556006,
    'Region_Middle East': 0.062062452026476755,
    'Region_North America': 2.556911571495162,
    'Region_Oceania': -1.0521467455711573,
    'Region_Rest of Europe': 0.8807208206819866,
    'Region_South America': 1.1284349917369587
}

In [420]:
def scale_responses(responses):
    responses = pd.DataFrame.from_dict([responses])
    
    # Check and apply log transformation if columns are present
    if 'Incidents_HIV_log' in responses:
        responses['Incidents_HIV_log'] = np.log(responses['Incidents_HIV_log'])
    if 'GDP_per_capita_log' in responses:
        responses['GDP_per_capita_log'] = np.log(responses['GDP_per_capita_log'])

    # Load the scalers
    standard_scaler_bmi = joblib.load('scr/standard_scaler_bmi.pkl')
    standard_scaler_schooling = joblib.load('scr/standard_scaler_schooling.pkl')
    minmax_scaler_gdp = joblib.load('scr/minmax_scaler_gdp.pkl')
    minmax_scaler_hiv = joblib.load('scr/minmax_scaler_hiv.pkl')
    robust_scaler_under_five = joblib.load('scr/robust_scaler_under_five.pkl')
    robust_scaler_adult_mortality = joblib.load('scr/robust_scaler_adult_mortality.pkl')
    
    # Apply appropriate scalers based on the columns available in the responses
    if 'BMI' in responses:
        responses['BMI'] = standard_scaler_bmi.transform(responses[['BMI']])  # Passing as DataFrame
    if 'Schooling' in responses:
        responses['Schooling'] = standard_scaler_schooling.transform(responses[['Schooling']])  # Passing as DataFrame
    
    if 'GDP_per_capita_log' in responses:
        responses['GDP_per_capita_log'] = minmax_scaler_gdp.transform(responses[['GDP_per_capita_log']])  # Passing as DataFrame
    if 'Incidents_HIV_log' in responses:
        responses['Incidents_HIV_log'] = minmax_scaler_hiv.transform(responses[['Incidents_HIV_log']])  # Passing as DataFrame

    if 'Under_five_deaths' in responses:
        responses['Under_five_deaths'] = robust_scaler_under_five.transform(responses[['Under_five_deaths']])  # Passing as DataFrame
    if 'Adult_mortality' in responses:
        responses['Adult_mortality'] = robust_scaler_adult_mortality.transform(responses[['Adult_mortality']])  # Passing as DataFrame

    scaled = responses.squeeze().to_dict()
   
    return scaled  

In [422]:
def compute_prediction(responses, model_params):
    prediction = model_params['const']
    
    for feature, value in responses.items():
        if feature in model_params:
            prediction += model_params[feature] * value
    
    return prediction
    

In [434]:
def model_1():
    questions_and_validators = [       
        ("Under five deaths (Number of under-five deaths per 1000 population)\n",
             validate_positive_float,
            "Under_five_deaths"
        ),
        ("Adult mortality (Probability of dying between 15 and 60 years per 1000 population)\n", 
             validate_positive_float,
             "Adult_mortality",
        ),
        ("BMI (Average Body Mass Index of entire population)\n", 
             validate_bmi,
            "BMI"
        ),
        ("Schooling (Number of years of Schooling(years)):\n", 
             validate_school,
                "Schooling"
        ),
        ("Incidents of HIV (Deaths per 1,000 live births HIV/AIDS (0-4 years))\n", 
             validate_positive_float,
            "Incidents_HIV_log"),
        ("GDP per capita (Gross Domestic Product per capita (in USD))\n$", 
             validate_positive_float,
        "GDP_per_capita_log"),
        (
            "Select a region for:\n"
            "  1: Asia\n"
            "  2: Central America and Caribbean\n"
            "  3: European Union\n"
            "  4: Middle East\n"
            "  5: North America\n"
            "  6: Oceania\n"
            "  7: Rest of Europe\n"
            "  8: South America\n",
            validate_region,
            "Region"
        )
        
    ]
    
    responses = {}
    for question, validator, name in questions_and_validators:
        while True:
            try:
                response = input(f"{question} ")
                validated_response = validator(response)
                if name == 'Region':
                    responses[validated_response] = 1
                else:
                    responses[name] = validated_response
                
                break
            except ValueError as e:
                print(e)
    scaled_responses = scale_responses(responses)
    prediction = compute_prediction(scaled_responses, model_1_params)

    
    return prediction, responses

In [436]:
def model_2():
    questions_and_validators = [       
        ("Under five deaths (Number of under-five deaths per 1000 population)\n",
             validate_positive_float,
            "Under_five_deaths"
        ),
        ("Adult mortality (Probability of dying between 15 and 60 years per 1000 population)\n", 
             validate_positive_float,
             "Adult_mortality",
        ),
        ("BMI (Average Body Mass Index of entire population)\n", 
             validate_bmi,
            "BMI"
        ),
          (
            "Select a region for:\n"
            "  1: Asia\n"
            "  2: Central America and Caribbean\n"
            "  3: European Union\n"
            "  4: Middle East\n"
            "  5: North America\n"
            "  6: Oceania\n"
            "  7: Rest of Europe\n"
            "  8: South America\n",
            validate_region,
            "Region"
        )
    ] 
    responses = {}
    for question, validator, name in questions_and_validators:
        while True:
            try:
                response = input(f"{question} ")
                validated_response = validator(response)
                if name == 'Region':
                    responses[validated_response] = 1
                else:
                    responses[name] = validated_response
                
                break
            except ValueError as e:
                print(e)
    scaled_responses = scale_responses(responses)
    prediction = compute_prediction(scaled_responses, model_1_params)

    
    return prediction, responses



In [438]:

def app():
    while True:
        try:
            response = input(
                "Select the model you want to use:\n"
                "  1: Precise\n"
                "  2: Minimalistic\n"
            )
            validated_response = validate_ranged(response, 1, 2)
            break
        except ValueError as e:
                print(e)
    if int(response) == 1:
        print("***** Precise Model ***** \n\n\n")
        result, responses =  model_1()
    else:
        print("***** Minimalistic Model *****\n\n\n")
        result, responses = model_2()

    
    print(f"Your responses are: \n {responses}")
    print(f"\n\nPredicted life expectancy is: \n\n{result}")
    


In [440]:
    
data = app()
data

***** Precise Model ***** 





Under five deaths (Number of under-five deaths per 1000 population)
  1
Adult mortality (Probability of dying between 15 and 60 years per 1000 population)
  1
BMI (Average Body Mass Index of entire population)
  1
Schooling (Number of years of Schooling(years)):
  1
Incidents of HIV (Deaths per 1,000 live births HIV/AIDS (0-4 years))
  1
GDP per capita (Gross Domestic Product per capita (in USD))
$  1
Select a region for:
  1: Asia
  2: Central America and Caribbean
  3: European Union
  4: Middle East
  5: North America
  6: Oceania
  7: Rest of Europe
  8: South America
  1


Your responses are: 
 {'Under_five_deaths': 1.0, 'Adult_mortality': 1.0, 'BMI': 1.0, 'Schooling': 1.0, 'Incidents_HIV_log': 1.0, 'GDP_per_capita_log': 1.0, 'Region_Asia': 1}


Predicted life expectancy is: 

78.2218590645223


In [432]:
path = "scr/precise.pkl"
import statsmodels.api as sm

import pickle
with open(path, 'rb') as file:
    loaded_model = pickle.load(file)

# Use the loaded model for predictions



# Load the saved StandardScaler
standard_scaler = joblib.load('scr/standard_scaler.pkl')

# Single BMI entry to scale
bmi_value = 25.6  # Example of a single BMI value

# Transform the single BMI entry (add a dummy second column, like NaN, to match the expected number of features)
bmi_scaled = standard_scaler.transform(np.array([[bmi_value]])) # Take the scaled 'BMI' column only

# Print the scaled BMI value
print(bmi_scaled)

FileNotFoundError: [Errno 2] No such file or directory: 'scr/standard_scaler.pkl'