# DEMO

In this notebook the previously build model will be demonstared by using the test data

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import mlflow
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [10]:
def calc_metrics(actual_data: pd.Series, prediction_data: np.ndarray):
    """
    calculate the different metrics (mean squared error, root mean squared error, mean absolute error 
        and mean absolute percentage error) to evaluate the model accuracy.
    
    Params:
        actual_data (Series): the actual imported data
        prediction_data (ndarray): the from the model predicted data
    
    Returns:
        mse (Float): the calculated mean squared error value
        rmse (Float): the calculated root mean squared error value
        mae (Float): the claclulated mean absolute error value
        mape (FLoat): the calculated mean absolute percentage error value
    """
    mse = mean_squared_error(actual_data, prediction_data)
    rmse = mean_squared_error(actual_data, prediction_data, squared = False)
    mae = mean_absolute_error(actual_data, prediction_data)
    mape = np.mean(np.abs((actual_data - prediction_data) / actual_data)) * 100
    
    print('########## METRICS ##########')
    print('MSE:\t%s\nRMSE:\t%s\nMAE:\t%s\nMAPE:\t%s' % (mse, rmse, mae, mape))
    
    return mse, rmse, mae, mape

In [3]:
# get data
df = pd.read_csv('./data/WorldHappinessReport/2019.csv')
# set new index for DataFrame
df.set_index('Country or region', inplace = True)
df.head()

Unnamed: 0_level_0,Overall rank,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
Country or region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Finland,1,7.769,1.34,1.587,0.986,0.596,0.153,0.393
Denmark,2,7.6,1.383,1.573,0.996,0.592,0.252,0.41
Norway,3,7.554,1.488,1.582,1.028,0.603,0.271,0.341
Iceland,4,7.494,1.38,1.624,1.026,0.591,0.354,0.118
Netherlands,5,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [4]:
# define target feature and create feature list
target_feature = 'Score'
features = df.columns.to_list()
features = [feature for feature in features if feature not in target_feature]

In [5]:
# get testing data
# Split all data in training and testing data (Size 90 / 10)
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target_feature], test_size = 0.1, random_state = 1)

In [8]:
# import model from other notebook
best_model = 'runs:/52896f1ea0d5442984343ea2a16f15bd/model'

# Load model as a PyFuncModel
model = mlflow.pyfunc.load_model(best_model)

# Predict on a Pandas DataFrame
pred = model.predict(X_test)

In [9]:
pred

array([4.52480257, 5.4515138 , 5.98106308, 6.42235414, 6.33409593,
       6.22377316, 6.79745154, 4.701319  , 5.93693397, 3.88493053,
       3.79667232, 5.38532014, 5.0764164 , 5.12054551, 5.14261006,
       6.99603252])

In [13]:
calc_metrics(y_test, pred)

########## METRICS ##########
MSE:	0.03830085990963802
RMSE:	0.1957060548619741
MAE:	0.13137445717715995
MAPE:	2.6255197134238664


(0.03830085990963802,
 0.1957060548619741,
 0.13137445717715995,
 2.6255197134238664)