# Model comparison
This notebook evaluates a model according to an agreed upon metric in order to compare results with the GP models.

In [1]:
import numpy as np
import pandas as pds
import matplotlib.pyplot as plt

In [2]:
"""
Some loss functions
"""
def basic_mae(pred, label):
    e = abs(pred - label)
    return [e]

def mape(pred, label):

    e =abs((label - pred)/label)

    return [e]

Pick the name for your model

In [3]:
model_name = 'ANN_M1_203_benchmark'

In [4]:
data = pds.read_pickle(model_name + '.pkl')
data.head()

Unnamed: 0,seg,journey,speed,pred,label
0,1,12,0.0,185.085464,188.0
1,1,12,0.0,184.085464,187.0
2,1,12,0.0,183.085464,186.0
3,1,12,0.0,182.085464,185.0
4,1,12,0.0,181.085464,184.0


In [5]:
jour_frames = []

for ij, jour in data.groupby('journey'):
    seg_frames = []
    
    for iss, seg in jour.groupby('seg'):
        tot = len(seg) - 1
        errs = {}
        i_20 = np.int_(np.round(tot * 0.2))
        i_40 = np.int_(np.round(tot * 0.4))
        i_60 = np.int_(np.round(tot * 0.6))
        i_80 = np.int_(np.round(tot * 0.8))
        
        p_20 = seg.iloc[i_20]
        p_40 = seg.iloc[i_40]
        p_60 = seg.iloc[i_60]
        p_80 = seg.iloc[i_80]
        errs['journey'] = [ij]
        errs['segment'] = [iss]
        
        errs['e_20'] = basic_mae(p_20.pred, p_20.label)
        errs['ep_20'] = mape(p_20.pred, p_20.label)
        
        errs['e_40'] = basic_mae(p_40.pred, p_40.label)
        errs['ep_40'] = mape(p_40.pred, p_40.label)
        
        errs['e_60'] = basic_mae(p_60.pred, p_60.label)
        errs['ep_60'] = mape(p_60.pred, p_60.label)
        
        errs['e_80'] = basic_mae(p_80.pred, p_80.label)
        errs['ep_80'] = mape(p_80.pred, p_80.label)
        
        seg_frames.append(pds.DataFrame(errs))
    
    jour_frames.append(pds.concat(seg_frames))
        

In [6]:
ers = pds.concat(jour_frames).reset_index(drop=True)

In [7]:
#ers = pds.read_pickle('ANN_M2_downsampled_203_errors.pkl')

In [8]:
ers.head(30)

Unnamed: 0,journey,segment,e_20,ep_20,e_40,ep_40,e_60,ep_60,e_80,ep_80
0,12,1,2.914536,0.01943,2.914536,0.025792,2.914536,0.03886,2.914536,0.076698
1,12,2,9.349342,0.203247,9.349342,0.267124,9.349342,0.406493,9.349342,0.779112
2,12,3,6.650581,0.133012,6.650581,0.179745,6.650581,0.266023,6.650581,0.554215
3,12,4,28.741142,1.59673,28.741142,2.052939,28.741142,3.19346,28.741142,5.748228
4,12,5,20.037827,0.556606,20.037827,0.742142,20.037827,1.113213,20.037827,2.226425
5,12,6,1.740799,0.034133,1.740799,0.04581,1.740799,0.066954,1.740799,0.133908
6,12,7,2.168976,0.026777,2.168976,0.035557,2.168976,0.054224,2.168976,0.108449
7,12,8,5.427628,0.146693,5.427628,0.193844,5.427628,0.301535,5.427628,0.60307
8,12,9,12.036713,0.102878,12.036713,0.136781,12.036713,0.20753,12.036713,0.415059
9,12,10,1.327339,0.022885,1.327339,0.030868,1.327339,0.04577,1.327339,0.09481


In [11]:
m = ers.mean(axis=0)
for i in range(2,9,2):
    print('&',str(round(m[i], 2)), end='')

for i in range(3,10,2):
    print('&',str(round(m[i]*100, 2)), end='')

& 14.79& 14.8& 14.79& 14.79& 30.8& 41.08& 61.58& 123.27

In [10]:
mae = []
mape = []
for i in ers.segment.unique():
    seg_data = ers.loc[ers['segment'] == i]
    e_20 = seg_data['e_20']
    e_40 = seg_data['e_40']
    e_60 = seg_data['e_60']
    e_80 = seg_data['e_80']
    ep_20 = seg_data['ep_20']
    ep_40 = seg_data['ep_40']
    ep_60 = seg_data['ep_60']
    ep_80 = seg_data['ep_80']
    mae_i = e_20.sum()+e_40.sum()+e_60.sum()+e_80.sum()
    mae_i = mae_i/(len(seg_data)*4)
    mape_i = ep_20.sum()+ep_40.sum()+ep_60.sum()+ep_80.sum()
    mape_i = mape_i/(len(seg_data)*4)
    mae.append(mae_i)
    mape.append(mape_i)

#LaTeX formatting
for i in mae:
    print('&',str(round(i, 2)), end='')
print('\n')
for i in mape:
    print('&',str(round(i*100, 2)), end='')

& 27.17& 11.39& 10.55& 13.11& 10.39& 11.87& 14.65& 11.94& 18.57& 15.43& 17.63

& 43.97& 66.35& 52.54& 125.04& 48.16& 59.76& 35.99& 101.22& 29.96& 59.15& 83.87

In [10]:
ers.to_pickle(model_name+'_errors.pkl')