# init

## imports

In [4]:
import math
import numpy as np
import os
import pandas as pd
import warnings

from tabulate import tabulate

warnings.filterwarnings('ignore')

## configs

In [5]:
PREDICTION_DATA_PATH = os.path.expanduser('~/class/new_york/data/test.csv')

AIRPORT_LOCATIONID = 132

# load data

In [3]:
pred_df = pd.read_csv(PREDICTION_DATA_PATH)

airport_pred_df = pred_df[pred_df['PULocationID'] == AIRPORT_LOCATIONID]
airport_pred_df.head()

Unnamed: 0,Date,PULocationID,Daily_trips,1_day_lag,2_day_lag,3_day_lag,4_day_lag,5_day_lag,6_day_lag,7_day_lag,8_day_lag,9_day_lag,10_day_lag,Pred_dt,Pred_rf
16,2023-04-01,132,5677,6058.0,5907.0,5367.0,5445.0,6481.0,6520.0,5190.0,5188.0,5422.0,4905.0,5395.361702,5482.9
66,2023-04-02,132,6897,5677.0,6058.0,5907.0,5367.0,5445.0,6481.0,6520.0,5190.0,5188.0,5422.0,6139.97561,6324.83
116,2023-04-03,132,6313,6897.0,5677.0,6058.0,5907.0,5367.0,5445.0,6481.0,6520.0,5190.0,5188.0,6139.97561,6473.132
166,2023-04-04,132,5330,6313.0,6897.0,5677.0,6058.0,5907.0,5367.0,5445.0,6481.0,6520.0,5190.0,5395.361702,5331.028
216,2023-04-05,132,5244,5330.0,6313.0,6897.0,5677.0,6058.0,5907.0,5367.0,5445.0,6481.0,6520.0,5395.361702,5297.502


# evaluation

## utillity functions

In [9]:
def mape(real, pred):
    return (abs(pred - real) / real).mean()

def rmse(real, pred):
    return math.sqrt(((pred - real) ** 2).mean())

## methods

In [10]:
results_dict = {}

### yesterday

In [11]:
results_dict['yesterday'] = {
    'mape': mape(airport_pred_df['Daily_trips'], airport_pred_df['1_day_lag']),
    'rmse': rmse(airport_pred_df['Daily_trips'], airport_pred_df['1_day_lag'])
}

### last week

In [12]:
results_dict['last_week'] = {
    'mape': mape(airport_pred_df['Daily_trips'], airport_pred_df['7_day_lag']),
    'rmse': rmse(airport_pred_df['Daily_trips'], airport_pred_df['7_day_lag'])
}

### decision tree 

In [13]:
results_dict['decision_tree'] = {
    'mape': mape(airport_pred_df['Daily_trips'], airport_pred_df['Pred_dt']),
    'rmse': rmse(airport_pred_df['Daily_trips'], airport_pred_df['Pred_dt'])
}

### random forest

In [14]:
results_dict['random_forest'] = {
    'mape': mape(airport_pred_df['Daily_trips'], airport_pred_df['Pred_rf']),
    'rmse': rmse(airport_pred_df['Daily_trips'], airport_pred_df['Pred_rf'])
}

## summerize

In [15]:
result_table = [
    [method, results_dict[method]['mape'], results_dict[method]['rmse']]
    for method in results_dict
]
print(tabulate(result_table, headers=["Method","MAPE", "RMSE"], tablefmt="simple_grid"))

┌───────────────┬───────────┬─────────┐
│ Method        │      MAPE │    RMSE │
├───────────────┼───────────┼─────────┤
│ yesterday     │ 0.105091  │ 772.483 │
├───────────────┼───────────┼─────────┤
│ last_week     │ 0.0882864 │ 658.54  │
├───────────────┼───────────┼─────────┤
│ decision_tree │ 0.086866  │ 659.9   │
├───────────────┼───────────┼─────────┤
│ random_forest │ 0.0791774 │ 580.542 │
└───────────────┴───────────┴─────────┘
