# Model comparison
This notebook evaluates a model according to an agreed upon metric in order to compare results with the GP models.

In [1]:
import numpy as np
import pandas as pds
import matplotlib.pyplot as plt

In [2]:
"""
Some loss functions
"""

def mae(preds, labels):
    mae = np.sum(np.absolute(preds - labels))
    print(mae)
    mae /= len(preds)
    print(mae)
    return mae

def basic_mae(preds, labels):
    acc = 0 
    n = len(preds)
    for i in range(n):
        e = abs(preds[i] - labels[i])
        acc += e
 
    acc /= n
    return [acc]

def mape(preds, labels):
    acc = 0
    n = len(preds)
    for i in range(n):
        acc+=abs((labels[i] - preds[i])/labels[i])
    
    res = acc/n
    return [res]

Pick the name for your model

In [3]:
model_name = 'ANN_M3'

In [4]:
data = pds.read_pickle(model_name + '.pkl')
data.head()

Unnamed: 0,seg,journey,speed,pred,label
0,1,14,0.0,127.827164,209.0
1,1,14,0.0,127.827164,208.0
2,1,14,0.0,127.827164,207.0
3,1,14,0.0,127.827164,206.0
4,1,14,0.0,127.827164,205.0


In [5]:
jour_frames = []

for ij, jour in data.groupby('journey'):
    seg_frames = []
    
    for iss, seg in jour.groupby('seg'):
        tot = len(seg) - 1
        errs = {}
        i_20 = np.int_(np.round(tot * 0.2))
        i_40 = np.int_(np.round(tot * 0.4))
        i_60 = np.int_(np.round(tot * 0.6))
        i_80 = np.int_(np.round(tot * 0.8))
        
        p_20 = seg.iloc[0:i_20]
        p_40 = seg.iloc[0:i_40]
        p_60 = seg.iloc[0:i_60]
        p_80 = seg.iloc[0:i_80]
        errs['journey'] = [ij]
        errs['segment'] = [iss]
        
        errs['e_20'] = basic_mae(p_20.pred.values, p_20.label.values)
        errs['ep_20'] = mape(p_20.pred.values, p_20.label.values)
        
        errs['e_40'] = basic_mae(p_40.pred.values, p_40.label.values)
        errs['ep_40'] = mape(p_40.pred.values, p_40.label.values)
        
        errs['e_60'] = basic_mae(p_60.pred.values, p_60.label.values)
        errs['ep_60'] = mape(p_60.pred.values, p_60.label.values)
        
        errs['e_80'] = basic_mae(p_80.pred.values, p_80.label.values)
        errs['ep_80'] = mape(p_80.pred.values, p_80.label.values)
        
        seg_frames.append(pds.DataFrame(errs))
    
    jour_frames.append(pds.concat(seg_frames))
        

In [6]:
ers = pds.concat(jour_frames).reset_index(drop=True)

In [7]:
ers.head(30)

Unnamed: 0,journey,segment,e_20,ep_20,e_40,ep_40,e_60,ep_60,e_80,ep_80
0,14,1,60.672911,0.319047,39.736075,0.22073,34.189972,0.224828,36.286486,0.328745
1,14,2,3.270547,0.061715,6.540682,0.153696,5.024845,0.124462,3.999491,0.104693
2,14,3,5.12998,0.099573,5.502921,0.117946,4.006997,0.090285,3.153392,0.075071
3,14,4,4.757675,0.125943,5.053812,0.151177,3.408849,0.105175,2.825399,0.101461
4,14,5,10.352041,0.141202,8.190961,0.12523,8.478311,0.159118,8.036103,0.191212
5,14,6,2.339331,0.06168,1.650536,0.048503,1.388791,0.046232,1.131266,0.042671
6,14,7,3.669889,0.042382,10.407999,0.149282,7.769501,0.115767,5.931781,0.091494
7,14,8,4.776344,0.119267,6.652654,0.19681,5.652252,0.182691,4.651919,0.166015
8,14,9,10.906591,0.082736,7.451508,0.059279,6.105131,0.055184,4.991795,0.05029
9,14,10,14.381081,0.256919,15.526553,0.316761,12.828451,0.283431,9.9753,0.235727


In [8]:
ers.mean(axis=0)

journey    1863.087500
segment       6.000000
e_20         12.543861
ep_20         0.137459
e_40         10.503873
ep_40         0.137599
e_60          9.271104
ep_60         0.135903
e_80          8.151079
ep_80         0.136152
dtype: float64

In [9]:
ers[ers.segment != 1].mean(axis=0)

journey    1863.087500
segment       6.500000
e_20          8.651322
ep_20         0.122886
e_40          7.907544
ep_40         0.129292
e_60          6.741132
ep_60         0.123276
e_80          5.624530
ep_80         0.118454
dtype: float64

In [10]:
ers.to_pickle(model_name + '_203_errors.pkl')