# Model Testing

## Setup

In [1]:
import sys
sys.path.append('../src')

from preprocessing import load_data
from features import splitting_data, feature_engineering, scaling, add_constant_column
from models import train_model, full_pipeline, calculate_rmse

## Train a model

In [2]:
minimal_cols = ['Region',
                'Under_five_deaths',
                'Adult_mortality',
                'GDP_per_capita',
                'Schooling',
                'Economy_status_Developed',
                'Life_expectancy'
                ]

elaborate_cols = minimal_cols + ['Alcohol_consumption',
                  'Hepatitis_B',
                  'Measles',
                  'BMI',
                  'Polio',
                  'Diphtheria',
                  'Incidents_HIV',
                  'Thinness_ten_nineteen_years',
                  'Thinness_five_nine_years',
                 ]

In [9]:
X_train_m_fe, X_test_m_fe, y_train_m, y_test_m, minimal_results = full_pipeline(minimal_cols)
X_train_e_fe, X_test_e_fe, y_train_e, y_test_e, elaborate_results = full_pipeline(elaborate_cols)

## Test our minimal model

In [7]:
# Make prediction (assuming model results saved as an object called 'results'
y_train_m_pred = minimal_results.predict(X_train_m_fe)

# Calculate RMSE from train data
minimal_rmse_train = calculate_rmse(y_train_m, y_train_m_pred)

print(f'Root Mean Squared Error for training data: {minimal_rmse_train}')

Root Mean Squared Error for training data: 1.4146794518920598


But how does it fare on our testing data!

In [8]:
# Do the same thing as above but with TEST data
y_test_m_pred = minimal_results.predict(X_test_m_fe)

minimal_rmse_test = calculate_rmse(y_test_m, y_test_m_pred)

print(f'Root Mean Squared Error for testing data: {minimal_rmse_test}')

Root Mean Squared Error for testing data: 1.7057435503581893


## Test our elaborate model

In [10]:
# Make prediction (assuming model results saved as an object called 'results'
y_train_e_pred = elaborate_results.predict(X_train_e_fe)

# Calculate RMSE from train data
elaborate_rmse_train = calculate_rmse(y_train_e, y_train_e_pred)

print(f'Root Mean Squared Error for training data: {elaborate_rmse_train}')

Root Mean Squared Error for training data: 1.388716500898901


In [11]:
# Do the same thing as above but with TEST data
y_test_e_pred = elaborate_results.predict(X_test_e_fe)

elaborate_rmse_test = calculate_rmse(y_test_e, y_test_e_pred)

print(f'Root Mean Squared Error for testing data: {elaborate_rmse_test}')

Root Mean Squared Error for testing data: 1.6946588444932533


## Let's compare!