In [1]:
import numpy as np
import pandas as pd

In [16]:
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, r2_score, root_mean_squared_error

In [4]:
df = pd.read_csv('Datasets/SPY.csv', index_col='Date', parse_dates=True)

In [5]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04,112.370003,113.389999,111.510002,113.330002,92.246048,118944600
2010-01-05,113.260002,113.68,112.849998,113.629997,92.490204,111579900
2010-01-06,113.519997,113.989998,113.43,113.709999,92.555328,116074400
2010-01-07,113.5,114.330002,113.18,114.190002,92.94606,131091100
2010-01-08,113.889999,114.620003,113.660004,114.57,93.255348,126402800


In [6]:
df['ClosePrediction'] = df['Close'].shift(1)

In [7]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,ClosePrediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,112.370003,113.389999,111.510002,113.330002,92.246048,118944600,
2010-01-05,113.260002,113.68,112.849998,113.629997,92.490204,111579900,113.330002
2010-01-06,113.519997,113.989998,113.43,113.709999,92.555328,116074400,113.629997
2010-01-07,113.5,114.330002,113.18,114.190002,92.94606,131091100,113.709999
2010-01-08,113.889999,114.620003,113.660004,114.57,93.255348,126402800,114.190002


In [8]:
y_true = df.iloc[1:]['Close']
y_pred = df.iloc[1:]['ClosePrediction']

## **Metrics**
Main idea: get a feel for how the values relate to one another. What's "good"? What's "bad"? If the $R^2$ is "good", will the MAE also be "good"?

In [12]:
# SSE
(y_true - y_pred).dot(y_true - y_pred)

6330.3742894926045

In [13]:
# MSE
mean_squared_error(y_true, y_pred)

2.798573956451196

In [14]:
# MSE again
# Don't be afraid to implement things yourself!
# It should be easy (and good exercise for your brain)
(y_true - y_pred).dot(y_true - y_pred) / len(y_true)

2.7985739564511958

In [18]:
# RMSE
root_mean_squared_error(y_true, y_pred)

1.672893886787562

In [19]:
# RMSE again
np.sqrt((y_true - y_pred).dot(y_true - y_pred) / len(y_true))

1.6728938867875618

In [20]:
# MAE
mean_absolute_error(y_true, y_pred)

1.1457559803120336

In [21]:
# R^2
# Wow, what a great prediction!
# Why is it so good? Be suspicious...
r2_score(y_true, y_pred)

0.9989603259063914

In [22]:
# MAPE
mean_absolute_percentage_error(y_true, y_pred)

0.006494073151422373

## sMAPE
$$
E = \frac{1}{N} \sum_{i=1}^{N} \frac{| y_i - \hat{y}_i |}{\frac{| y_i | + | \hat{y}_i |}{2}}
$$

In [23]:
# sMAPE
# Not implemented, oh well...
# Good thing we are brave and know how to implement things!
def smape(y_true, y_pred):
  numerator = np.abs(y_true - y_pred)
  denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
  ratio = numerator / denominator
  return ratio.mean()

smape(y_true, y_pred)

0.006491365814068417