# Model comparison
This notebook evaluates a model according to an agreed upon metric in order to compare results with the GP models.

In [11]:
import numpy as np
import pandas as pds
import matplotlib.pyplot as plt

In [20]:
"""
Some loss functions
"""
def basic_mae(preds, labels):
    acc = 0 
    n = len(preds)
    for i in range(n):
        e = abs(preds[i] - labels[i])
        acc += e
 
    acc /= n
    return [acc]

def mape(preds, labels):
    acc = 0
    n = len(preds)
    for i in range(n):
        acc+=abs((labels[i] - preds[i])/labels[i])
    
    res = acc/n
    return [res]

Pick the name for your model

In [21]:
model_name = 'ANN_M4_203_downsampled_benchmark'

In [22]:
data = pds.read_pickle(model_name + '.pkl')
data.head()

Unnamed: 0,seg,journey,speed,pred,label
0,1,12,0.671067,17.630503,20.0
1,1,12,0.678218,16.53756,19.0
2,1,12,0.680968,15.687718,18.0
3,1,12,0.684268,14.78297,17.0
4,1,12,0.689219,13.821644,16.0


In [23]:
jour_frames = []

for ij, jour in data.groupby('journey'):
    seg_frames = []
    
    for iss, seg in jour.groupby('seg'):
        tot = len(seg) - 1
        errs = {}
        i_20 = np.int_(np.round(tot * 0.2))
        i_40 = np.int_(np.round(tot * 0.4))
        i_60 = np.int_(np.round(tot * 0.6))
        i_80 = np.int_(np.round(tot * 0.8))
        
        p_20 = seg.iloc[0:i_20]
        p_40 = seg.iloc[0:i_40]
        p_60 = seg.iloc[0:i_60]
        p_80 = seg.iloc[0:i_80]
        errs['journey'] = [ij]
        errs['segment'] = [iss]
        
        errs['e_20'] = basic_mae(p_20.pred.values, p_20.label.values)
        errs['ep_20'] = mape(p_20.pred.values, p_20.label.values)
        
        errs['e_40'] = basic_mae(p_40.pred.values, p_40.label.values)
        errs['ep_40'] = mape(p_40.pred.values, p_40.label.values)
        
        errs['e_60'] = basic_mae(p_60.pred.values, p_60.label.values)
        errs['ep_60'] = mape(p_60.pred.values, p_60.label.values)
        
        errs['e_80'] = basic_mae(p_80.pred.values, p_80.label.values)
        errs['ep_80'] = mape(p_80.pred.values, p_80.label.values)
        
        seg_frames.append(pds.DataFrame(errs))
    
    jour_frames.append(pds.concat(seg_frames))
        

In [24]:
ers = pds.concat(jour_frames).reset_index(drop=True)

In [7]:
#ers = pds.read_pickle('ANN_M2_downsampled_203_errors.pkl')

In [25]:
ers.head(30)

Unnamed: 0,journey,segment,e_20,ep_20,e_40,ep_40,e_60,ep_60,e_80,ep_80
0,12,1,2.340312,0.126738,2.303402,0.142124,2.153214,0.154044,2.063593,0.186884
1,12,2,2.473087,0.07599,2.077112,0.072773,1.807012,0.074554,1.896699,0.107603
2,12,3,2.386609,0.059863,1.676261,0.045508,1.585614,0.05212,1.529981,0.065203
3,12,4,1.00325,0.04986,1.529191,0.090963,1.624426,0.11297,1.741394,0.165264
4,12,5,1.27895,0.03084,1.565394,0.044517,1.600077,0.053316,1.308188,0.048415
5,12,6,3.301879,0.078528,3.341284,0.092419,3.41833,0.114639,3.303648,0.136327
6,12,7,1.786196,0.033889,3.278906,0.074579,3.344624,0.085559,3.098446,0.099743
7,12,8,1.826206,0.062026,1.490391,0.059889,1.725758,0.087962,1.915303,0.135893
8,12,9,9.22286,0.080794,9.653747,0.095127,7.432264,0.079399,6.647825,0.089133
9,12,10,2.640528,0.064808,1.788125,0.04827,2.020421,0.071415,2.30475,0.118184


In [26]:
ers.mean(axis=0)

journey    1818.397222
segment       6.000000
e_20          4.629005
ep_20         0.085431
e_40          4.111307
ep_40         0.088713
e_60          3.762966
ep_60         0.097499
e_80          3.374168
ep_80         0.111144
dtype: float64

In [27]:
mae = []
mape = []
for i in ers.segment.unique():
    seg_data = ers.loc[ers['segment'] == i]
    e_20 = seg_data['e_20']
    e_40 = seg_data['e_40']
    e_60 = seg_data['e_60']
    e_80 = seg_data['e_80']
    ep_20 = seg_data['ep_20']
    ep_40 = seg_data['ep_40']
    ep_60 = seg_data['ep_60']
    ep_80 = seg_data['ep_80']
    mae_i = e_20.sum()+e_40.sum()+e_60.sum()+e_80.sum()
    mae_i = mae_i/(len(seg_data)*4)
    mape_i = ep_20.sum()+ep_40.sum()+ep_60.sum()+ep_80.sum()
    mape_i = mape_i/(len(seg_data)*4)
    mae.append(mae_i)
    mape.append(mape_i)
for i in mae:
    print('&',str(round(i, 2)), end='')
print('\n')
for i in mape:
    print('&',str(round(i*100, 2)), end='')

& 3.27& 2.17& 2.64& 2.23& 2.95& 3.3& 5.12& 1.73& 10.12& 4.12& 6.0

& 9.5& 8.19& 8.33& 10.83& 7.47& 8.53& 9.73& 8.22& 8.95& 10.7& 14.81

In [10]:
ers.to_pickle(model_name+'_errors.pkl')