# init

## imports

In [1]:
import math
import numpy as np
import os
import pandas as pd
import warnings

from tabulate import tabulate

warnings.filterwarnings('ignore')

## configs

In [3]:
PREDICTION_DATA_PATH = os.path.expanduser('~/class/new_york/data/test.csv')

# load data

In [4]:
pred_df = pd.read_csv(PREDICTION_DATA_PATH)
pred_df.head()

Unnamed: 0,Date,PULocationID,Daily_trips,1_day_lag,2_day_lag,3_day_lag,4_day_lag,5_day_lag,6_day_lag,7_day_lag,8_day_lag,9_day_lag,10_day_lag,Pred_dt,Pred_rf
0,2023-04-01,13,518,672.0,753.0,760.0,746.0,556.0,402.0,480.0,605.0,740.0,710.0,469.793651,458.082
1,2023-04-01,24,335,318.0,316.0,275.0,238.0,250.0,268.0,269.0,300.0,266.0,272.0,267.230769,302.6
2,2023-04-01,43,1888,1776.0,1686.0,1489.0,1537.0,1435.0,1568.0,1985.0,1714.0,1831.0,1348.0,1891.365079,1970.318
3,2023-04-01,48,3868,3224.0,3112.0,2934.0,2628.0,2392.0,3034.0,3780.0,3073.0,3114.0,2880.0,3651.555556,3615.644
4,2023-04-01,50,842,774.0,675.0,668.0,613.0,564.0,712.0,772.0,639.0,645.0,595.0,803.0,758.004


# evaluation

## utillity functions

In [9]:
def mape(real, pred):
    return (abs(pred - real) / real).mean()

def rmse(real, pred):
    return math.sqrt(((pred - real) ** 2).mean())

## methods

In [14]:
results_dict = {}

### yesterday

In [15]:
results_dict['yesterday'] = {
    'mape': mape(pred_df['Daily_trips'], pred_df['1_day_lag']),
    'rmse': rmse(pred_df['Daily_trips'], pred_df['1_day_lag'])
}

### last week

In [16]:
results_dict['last_week'] = {
    'mape': mape(pred_df['Daily_trips'], pred_df['7_day_lag']),
    'rmse': rmse(pred_df['Daily_trips'], pred_df['7_day_lag'])
}

### decision tree 

In [18]:
results_dict['decision_tree'] = {
    'mape': mape(pred_df['Daily_trips'], pred_df['Pred_dt']),
    'rmse': rmse(pred_df['Daily_trips'], pred_df['Pred_dt'])
}

### random forest

In [19]:
results_dict['random_forest'] = {
    'mape': mape(pred_df['Daily_trips'], pred_df['Pred_rf']),
    'rmse': rmse(pred_df['Daily_trips'], pred_df['Pred_rf'])
}

## summerize

In [22]:
result_table = [
    [method, results_dict[method]['mape'], results_dict[method]['rmse']]
    for method in results_dict
]
print(tabulate(result_table, headers=["Method","MAPE", "RMSE"], tablefmt="simple_grid"))

┌───────────────┬───────────┬─────────┐
│ Method        │      MAPE │    RMSE │
├───────────────┼───────────┼─────────┤
│ yesterday     │ 0.177427  │ 493.059 │
├───────────────┼───────────┼─────────┤
│ last_week     │ 0.0964221 │ 279.786 │
├───────────────┼───────────┼─────────┤
│ decision_tree │ 0.0958253 │ 274.928 │
├───────────────┼───────────┼─────────┤
│ random_forest │ 0.0822316 │ 238.989 │
└───────────────┴───────────┴─────────┘
