In [1]:
import numpy as np
import pandas as pd

In [None]:
from sklearn.metrics import mean_absolute_percentage_error, \
  mean_absolute_error, r2_score, mean_squared_error, accuracy_score,precision_score,f1_score,roc_auc_score

In [None]:
df = pd.read_csv('IBRX-returns.csv', index_col='Date', parse_dates=True)

In [None]:
df_returns = pd.read_csv('IBRX-returns.csv', index_col=0, parse_dates=True)


In [None]:
df_returns.head()

In [None]:
df.head()

In [None]:
df['PREDICTION'] = df['BOVA11.SA'].shift(1)

In [None]:
ytrain_true = df.iloc[2:-1000]['BOVA11.SA']
ytrain_pred = df.iloc[2:-1000]['PREDICTION']

In [None]:
Ctrain_true = (ytrain_true > 0)
Ctrain_pred = (ytrain_pred > 0)

In [None]:
ytest_true = df.iloc[-1000:]['BOVA11.SA']
ytest_pred = df.iloc[-1000:]['PREDICTION']

In [None]:
Ctest_true = (ytest_true > 0)
Ctest_pred = (ytest_pred > 0)

In [None]:
df_returns['BOVA11.SA'] = df_returns['BOVA11.SA'].shift(-1)
df_returns['BOVA11.SA'].tail()

In [None]:
train_idx = df.index <= Ctrain_pred.index[-1]
test_idx = df.index > Ctrain_pred.index[-1]

train_idx[0] = False
test_idx[-1] = False

In [None]:
df_returns['Position'] = 0 # create new column
df_returns.loc[train_idx,'Position'] = (Ctrain_pred > 0)
df_returns.loc[test_idx,'Position'] = (Ctest_pred > 0)

In [None]:
df_returns['AlgoReturn'] = df_returns['Position'] * df_returns['BOVA11.SA']

In [None]:
# Total algorithm log return in train period
df_returns.iloc[1:-1000]['AlgoReturn'].sum() #train return

In [None]:
# Total algorithm log return in test period 
df_returns.iloc[-1000:-1]['AlgoReturn'].sum() #test return

In [None]:
# Total performance (train and test sets)
df_returns['AlgoReturn'].sum()

In [None]:
#std and SR algo train
df_returns.iloc[1:-1000]['AlgoReturn'].std(), df_returns.iloc[1:-1000]['AlgoReturn'].mean()/df_returns.iloc[1:-1000]['AlgoReturn'].std()

In [None]:
#std and SR algo test
df_returns.iloc[-1000:-1]['AlgoReturn'].std(), df_returns.iloc[-1000:-1]['AlgoReturn'].mean()/df_returns.iloc[-1000:-1]['AlgoReturn'].std()

## Metrics

Main idea: get a feel for how the values relate to one another. What's "good"? What's "bad"? If the $R^2$ is "good", will the MAE also be "good"?

In [None]:
accuracy_score(Ctrain_true,Ctrain_pred), accuracy_score(Ctest_true,Ctest_pred)

In [None]:
precision_score(Ctrain_true,Ctrain_pred), precision_score(Ctest_true,Ctest_pred)

In [None]:
f1_score(Ctrain_true,Ctrain_pred), f1_score(Ctest_true,Ctest_pred)

In [None]:
roc_auc_score(Ctrain_true,Ctrain_pred), roc_auc_score(Ctest_true,Ctest_pred)

In [None]:
# MSE
mean_squared_error(ytrain_true, ytrain_pred), mean_squared_error(ytest_true, ytest_pred)

In [None]:
# RMSE
mean_squared_error(ytrain_true, ytrain_pred, squared=False), mean_squared_error(ytest_true, ytest_pred, squared=False)

In [None]:
# MAE
mean_absolute_error(ytrain_true, ytrain_pred), mean_absolute_error(ytest_true, ytest_pred)

In [None]:
# R^2
r2_score(ytrain_true, ytrain_pred), r2_score(ytest_true, ytest_pred)

In [None]:
# MAPE
mean_absolute_percentage_error(ytrain_true, ytrain_pred), mean_absolute_percentage_error(ytest_true, ytest_pred)

## sMAPE

$$ E = \frac{1}{N} \sum_{i=1}^N \frac{|y_i - \hat{y}_i|}{(|y_i| + |\hat{y}_i|)/2} $$

In [None]:
# sMAPE
def smape(y_true, y_pred):
  numerator = np.abs(y_true - y_pred)
  denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
  ratio = numerator / denominator
  return ratio.mean()

smape(ytrain_true, ytrain_pred), smape(ytest_true, ytest_pred)