# This notebook documents the final function

## Setup

In [1]:
import sys
sys.path.append('../src') # Add src to path

from user_interaction import welcome_message, model_decider, collect_values
from preprocessing import load_data 
from models import full_pipeline
from features import feature_engineering, scaling, add_constant_column

## Global variables

In [2]:
minimal_cols = ['Region',
                'Under_five_deaths',
                'Adult_mortality',
                'GDP_per_capita',
                'Schooling',
                'Economy_status_Developed',
                'Life_expectancy'
                ]

elaborate_cols = minimal_cols + [
                  'Alcohol_consumption',
                  'Hepatitis_B',
                  'Measles',
                  'BMI',
                  'Polio',
                  'Diphtheria',
                  'Incidents_HIV',
                  'Thinness_ten_nineteen_years',
                  'Thinness_five_nine_years',
                 ]

## Final Function

In [3]:
def final_function():
    welcome_message()
    response = model_decider()
    if response == 'n': # Decide columns in use
        features = minimal_cols
    else:
        features = elaborate_cols
    df = load_data()
    user_values = collect_values(df, response, features) # Get user data

    # Get model and scaler
    pipeline_results = full_pipeline(features) # This is a tuple
    model = pipeline_results[4] # Extract model from results
    scaler = pipeline_results[5] # Extract scaled used on training data
    training_columns = pipeline_results[6]

    # Apply feature engineering on user_values
    user_values_fe = feature_engineering(user_values)

    # Align columns with training data, to ensure we have the same columns (give that we have one hot encoded on less columns)
    user_values_fe = user_values_fe.reindex(
        columns = [col for col in training_columns if col != 'const'],
        fill_value = 0
        )
    
    # Scale and add constant column
    user_values_fe, _ = scaling(user_values_fe, scaler)
    user_values_fe = add_constant_column(user_values_fe)

    user_values_fe = user_values_fe.reindex(columns=training_columns, fill_value =1)

    # Get prediction
    prediction = model.predict(user_values_fe)
    print(f"Predicted life expectancy: {prediction[0]:.3f} years")
   

In [4]:
final_function()

Welcome to the WHOligans life expectancy predictor

Do you consent to using advanced population data, 
which may include protected information, for better accuracy? (Y/N): 
Thank you, your response was: N 

Current feature is Region
Please enter the value for Region. Pick from the following list ['Middle East', 'European Union', 'Asia', 'South America', 'Central America and Caribbean', 'Rest of Europe', 'Africa', 'Oceania', 'North America']
Current feature is Under_five_deaths
Please enter the value for Under_five_deaths. Please enter a number, expected range is 2.3 to 224.9
Current feature is Adult_mortality
Please enter the value for Adult_mortality. Please enter a number, expected range is 49.384 to 719.3605
Current feature is GDP_per_capita
Please enter the value for GDP_per_capita. Please enter a number, expected range is 148 to 112418
Current feature is Schooling
Please enter the value for Schooling. Please enter a number, expected range is 1.1 to 14.1
Current feature is Economy_