In [None]:
import numpy as np
import pandas as pd

In [None]:
# MAPE implemented in latest versions of SKLearn!
# Not yet in Colab!
!pip install -U scikit-learn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scikit-learn
  Downloading scikit_learn-1.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.0.2
    Uninstalling scikit-learn-1.0.2:
      Successfully uninstalled scikit-learn-1.0.2
Successfully installed scikit-learn-1.2.0


In [None]:
from sklearn.metrics import mean_absolute_percentage_error, \
  mean_absolute_error, r2_score, mean_squared_error

In [None]:
df = pd.read_csv('BTC_USD_2013-10-01_2021-05-18-CoinDesk.csv', index_col='Date', parse_dates=True)

In [None]:
df.head()

Unnamed: 0_level_0,Currency,Closing Price (USD),24h Open (USD),24h High (USD),24h Low (USD)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-10-01,BTC,123.65499,124.30466,124.75166,122.56349
2013-10-02,BTC,125.455,123.65499,125.7585,123.63383
2013-10-03,BTC,108.58483,125.455,125.66566,83.32833
2013-10-04,BTC,118.67466,108.58483,118.675,107.05816
2013-10-05,BTC,121.33866,118.67466,121.93633,118.00566


In [None]:
df['ClosePrediction'] = df['Closing Price (USD)'].shift(1)

In [None]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,ClosePrediction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2010-01-04,112.370003,113.389999,111.510002,113.330002,92.246048,118944600,
2010-01-05,113.260002,113.68,112.849998,113.629997,92.490204,111579900,113.330002
2010-01-06,113.519997,113.989998,113.43,113.709999,92.555328,116074400,113.629997
2010-01-07,113.5,114.330002,113.18,114.190002,92.94606,131091100,113.709999
2010-01-08,113.889999,114.620003,113.660004,114.57,93.255348,126402800,114.190002


In [None]:
y_true = df.iloc[1:]['Closing Price (USD)']
y_pred = df.iloc[1:]['ClosePrediction']

## Metrics

Main idea: get a feel for how the values relate to one another. What's "good"? What's "bad"? If the $R^2$ is "good", will the MAE also be "good"?

In [None]:
# SSE
(y_true - y_pred).dot(y_true - y_pred)

792986952.351397

In [None]:
# MSE
mean_squared_error(y_true, y_pred)

284632.78978872823

In [None]:
# MSE again
# Don't be afraid to implement things yourself!
# It should be easy (and good exercise for your brain)
(y_true - y_pred).dot(y_true - y_pred) / len(y_true)

284632.7897887283

In [None]:
# RMSE
mean_squared_error(y_true, y_pred, squared=False)

533.509877873623

In [None]:
# RMSE again
np.sqrt((y_true - y_pred).dot(y_true - y_pred) / len(y_true))

533.5098778736232

In [None]:
# MAE
mean_absolute_error(y_true, y_pred)

197.66592863122122

In [None]:
# R^2
# Wow, what a great prediction!
# Why is it so good? Be suspicious...
r2_score(y_true, y_pred)

0.9975986533166566

In [None]:
# MAPE
mean_absolute_percentage_error(y_true, y_pred)

0.027526664563160583

## sMAPE

$$ E = \frac{1}{N} \sum_{i=1}^N \frac{|y_i - \hat{y}_i|}{(|y_i| + |\hat{y}_i|)/2} $$

In [None]:
# sMAPE
# Not implemented, oh well...
# Good thing we are brave and know how to implement things!
def smape(y_true, y_pred):
  numerator = np.abs(y_true - y_pred)
  denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
  ratio = numerator / denominator
  return ratio.mean()

smape(y_true, y_pred)

0.027487479886646685