<a href="https://colab.research.google.com/github/0zym4nd145/bovespa-index-prediction/blob/main/TGII.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 # Trabalho de Graduação para engenharia de Informação
 Lucas Ortega Venzel @ UFABC

 # Previsão do Índice Bovespa Utilizando Indicadores Técnicos e Algoritmos de Machine Learning

## Preparo do Ambiente

In [None]:
# Instalando bibliotecas utilizadas no projeto
_ = !pip -qq install ta yfinance sklearn pandas pmdarima optuna

## Importando Dados e Criando Features

In [None]:
from yfinance import download
# Importando dados do ibov para os últimos 11 anos
data = download(  # or pdr.get_data_yahoo(...
        # tickers list or string as well
        tickers = "^BVSP",

        start='2006-01-01',
        end='2021-01-01',

        # fetch data by interval (including intraday if period < 60 days)
        # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
        # (optional, default is '1d')
        interval = "1d",

        # adjust all OHLC automatically
        # (optional, default is False)
        auto_adjust = True,

        # download pre/post regular market hours data
        # (optional, default is False)
        prepost = True,

    )

[*********************100%***********************]  1 of 1 completed


In [None]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2006-01-02,33462.0,33519.0,32860.0,33507.0,0
2006-01-03,33507.0,34563.0,33507.0,34541.0,0
2006-01-04,34540.0,35223.0,34540.0,35002.0,0
2006-01-05,35006.0,35088.0,34681.0,34936.0,0
2006-01-06,35170.0,35529.0,34940.0,35475.0,0
...,...,...,...,...,...
2020-12-22,115825.0,116903.0,115648.0,116348.0,6947900
2020-12-23,116636.0,118311.0,116636.0,117857.0,6483300
2020-12-28,117806.0,119213.0,117805.0,119051.0,7171700
2020-12-29,119130.0,119861.0,118750.0,119475.0,6769700


In [None]:
data['ano'] = data.index.year

In [None]:
data.groupby('ano').last().loc[2015:, 'Close'].pct_change()

ano
2015         NaN
2016    0.389319
2017    0.268567
2018    0.150323
2019    0.319467
2020    0.028819
Name: Close, dtype: float64

In [None]:
import pandas as pd
temp = data['Close']
temp.index = temp.index + pd.DateOffset(weeks=-1)
data = data.join(temp.rename('1w target'))
data.loc[:, '1w target'] = data['1w target'].fillna(method='ffill')
data.loc[:, '1w target'] = data['1w target'] / data['Close']

In [None]:
data[['1w target']]

Unnamed: 0_level_0,1w target
Date,Unnamed: 1_level_1
2006-01-02,1.054615
2006-01-03,1.014707
2006-01-04,1.027141
2006-01-05,1.024130
2006-01-06,1.011896
...,...
2020-12-22,1.026876
2020-12-23,1.012295
2020-12-28,1.002142
2020-12-29,0.998585


In [None]:
# Criando as features para a previsão, com os indicadores técnicos
from ta import trend, momentum
data['MACD'] = trend.MACD(close = data['Close'], window_slow = 26, window_fast = 12, window_sign = 9, fillna = False).macd_signal()
data['RSI'] = momentum.RSIIndicator(close = data['Close'], window = 14, fillna = False).rsi()
data['ADX'] = trend.ADXIndicator(high = data['High'], low = data['Low'], close = data['Close'], window = 14, fillna = False).adx()
data['Aroon'] = trend.AroonIndicator(close = data['Close'], window = 25, fillna = False).aroon_indicator()
data['CCI'] = trend.CCIIndicator(high = data['High'], low = data['Low'], close = data['Close'], window = 20, constant = 0.015, fillna = False).cci()
data['DPO'] = trend.DPOIndicator(close = data['Close'], window = 20, fillna = False).dpo()
data['MI'] = trend.MassIndex(high = data['Close'], low = data['Low'], window_fast = 9, window_slow = 25, fillna = False).mass_index()
data['TRIX'] = trend.TRIXIndicator(close = data['Close'], window = 15, fillna = False).trix()
features = ['MACD', 'RSI', 'ADX', 'Aroon', 'CCI', 'DPO', 'MI', 'TRIX']

  dip[i] = 100 * (self._dip[i] / self._trs[i])
  din[i] = 100 * (self._din[i] / self._trs[i])


In [None]:
data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,ano,1w target,MACD,RSI,ADX,Aroon,CCI,DPO,MI,TRIX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-12-22,115825.0,116903.0,115648.0,116348.0,6947900,2020,1.026876,3250.703691,66.606606,43.814811,80.0,75.166411,-228.7,23.986947,0.449624
2020-12-23,116636.0,118311.0,116636.0,117857.0,6483300,2020,1.012295,3214.888656,70.142446,43.414537,80.0,102.265759,-668.9,23.907152,0.443078
2020-12-28,117806.0,119213.0,117805.0,119051.0,7171700,2020,1.002142,3192.560554,72.613427,43.352187,96.0,121.544637,-1959.1,24.014231,0.437784
2020-12-29,119130.0,119861.0,118750.0,119475.0,6769700,2020,0.998585,3179.344458,73.453597,43.505391,96.0,127.950633,-134.1,24.079572,0.433384
2020-12-30,119410.0,120150.0,118919.0,119306.0,8235700,2020,1.0,3162.589735,72.498898,43.741509,76.0,119.084701,-324.0,24.015525,0.428551


In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
for i in features:
  data[i] = scaler.fit_transform(data[i].values.reshape(-1, 1))

In [None]:
data[features].tail()

Unnamed: 0_level_0,MACD,RSI,ADX,Aroon,CCI,DPO,MI,TRIX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-12-22,2.547982,1.18486,2.232344,1.110852,0.565066,-0.143085,-0.758782,1.569097
2020-12-23,2.51864,1.483132,2.186685,1.110852,0.813995,-0.466188,-0.822943,1.544618
2020-12-28,2.500348,1.691577,2.179572,1.366404,0.991087,-1.413185,-0.736844,1.52482
2020-12-29,2.48952,1.762451,2.197049,1.366404,1.049931,-0.073649,-0.684305,1.508366
2020-12-30,2.475794,1.681916,2.223983,1.046964,0.968491,-0.213035,-0.735803,1.490293


In [None]:
from pmdarima.arima import auto_arima
from tqdm import trange
predictions_arima = {}

for i in trange(2016,2017):

  X_train = data.loc[(data.index.year >= i - 8) & (data.index.year < i - 1), features]
  y_train = data.loc[(data.index.year >= i - 8) & (data.index.year < i - 1), '1w target']

  X_valid = data.loc[data.index.year == i - 1, features]
  y_valid = data.loc[data.index.year == i - 1, '1w target']

  X_test = data.loc[data.index.year == i, features]
  y_test = data.loc[data.index.year == i, '1w target']


  auto_fitted = auto_arima(y_train, X_train[features])
  predictions_arima[i] = pd.DataFrame(auto_fitted.predict(len(X_test), X_test[features]), index=X_test.index, columns=['Predictions'])
  predictions_arima[i]['True'] = y_test

100%|██████████| 1/1 [00:49<00:00, 49.68s/it]


In [None]:
print(auto_fitted.summary())

                               SARIMAX Results                                
Dep. Variable:                      y   No. Observations:                 1731
Model:               SARIMAX(1, 0, 0)   Log Likelihood                4280.214
Date:                Mon, 02 Aug 2021   AIC                          -8538.429
Time:                        13:55:41   BIC                          -8478.408
Sample:                             0   HQIC                         -8516.229
                               - 1731                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept      0.0261      0.005      4.781      0.000       0.015       0.037
MACD          -0.0088      0.026     -0.336      0.737      -0.060       0.042
RSI           -0.0371      0.002    -18.363      0.0

In [None]:
import optuna
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score
from tqdm import trange
predictions_GBR = {}
for i in trange(2016,2021):
  X_train = data.loc[(data.index.year >= i - 8) & (data.index.year < i - 1), features]
  y_train = data.loc[(data.index.year >= i - 8) & (data.index.year < i - 1), '1w target']

  X_valid = data.loc[data.index.year == i - 1, features]
  y_valid = data.loc[data.index.year == i - 1, '1w target']

  X_test = data.loc[data.index.year == i, features]
  y_test = data.loc[data.index.year == i, '1w target']

  def objective(trial):
    loss = trial.suggest_categorical('loss', ['ls', 'lad', 'huber', 'quantile'])
    learning_rate = trial.suggest_float('learning_rate', 1e-6, 1e-1, log=True)
    n_estimators = trial.suggest_int('n_estimators', 10, 1000)
    subsample = trial.suggest_float('subsample', 0.1, 1)
    criterion = trial.suggest_categorical('criterion', ['friedman_mse', 'mse', 'mae'])
    min_samples_split = trial.suggest_int('min_samples_split', 2, 50)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 2, 10)
    max_depth = trial.suggest_int('max_depth', 2, 15)
    max_features = trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2'])


    regressor_obj = GradientBoostingRegressor(loss=loss, learning_rate=learning_rate, n_estimators=n_estimators, subsample=subsample, criterion=criterion,
                                              min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, max_depth=max_depth, max_features=max_features) 
    regressor_obj.fit(X_train, y_train)
    score = r2_score(y_valid, regressor_obj.predict(X_valid))
    return score


  study = optuna.create_study(direction="maximize")
  study.optimize(objective, n_trials=50)

  GBR = GradientBoostingRegressor(**study.best_params)
  GBR.fit(X_train, y_train)
  predictions_GBR[i] = pd.DataFrame(GBR.predict(X_test), index=X_test.index, columns=['Predictions'])
  predictions_GBR[i]['True'] = y_test

In [None]:
from keras import backend as K

def coeff_determination(y_true, y_pred):
    from keras import backend as K
    SS_res =  K.sum(K.square( y_true-y_pred ))
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [None]:
import tensorflow as tf

predictions_MLP = {}
for i in range(2016,2017):

  X_train = data.loc[(data.index.year >= i - 8) & (data.index.year < i - 1), features]
  y_train = data.loc[(data.index.year >= i - 8) & (data.index.year < i - 1), '1w target']

  X_valid = data.loc[data.index.year == i - 1, features]
  y_valid = data.loc[data.index.year == i - 1, '1w target']

  X_test = data.loc[data.index.year == i, features]
  y_test = data.loc[data.index.year == i, '1w target']


  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.Dense(units=X_train.shape[1], input_dim = X_train.shape[1], activation='relu'))
  model.add(tf.keras.layers.Dropout(0.1))
  model.add(tf.keras.layers.Dense(units=12,  activation = 'relu'))
  model.add(tf.keras.layers.Dropout(0.1))
  model.add(tf.keras.layers.Dense(units=6,  activation = 'relu'))
  model.add(tf.keras.layers.Dropout(0.1))
  model.add(tf.keras.layers.Dense(1,  activation = 'linear'))

  model.compile(tf.keras.optimizers.Adam(learning_rate=0.005), loss='mse', metrics=coeff_determination)

  model.fit(X_train, y_train,
            validation_data=(X_valid, y_valid),
            epochs=100,
            batch_size=2000,
            verbose=1)
  predictions_MLP[i] = pd.DataFrame(model.predict(X_test), index=X_test.index, columns=['Predictions'])
  predictions_MLP[i]['True'] = y_test

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, None, 6)           312       
_________________________________________________________________
dense (Dense)                (None, None, 1)           7         
Total params: 319
Trainable params: 319
Non-trainable params: 0
_________________________________________________________________


In [None]:
lookback = 200
data_lstm = pd.DataFrame(columns=['MACD', 'RSI', 'ADX', 'Aroon', 'CCI', 'DPO', '1w target'], dtype=object)
for index in data.index[50:]:
  for column in data[['MACD', 'RSI', 'ADX', 'Aroon', 'CCI', 'DPO', '1w target']].columns:
    position = data.index.get_loc(index)
    data_lstm.at[index, column] = data[column].iloc[position - 49: position + 1].tolist()

In [None]:
import tensorflow as tf
import numpy as np

predictions_LSTM = {}
for i in range(2016,2017):

  X_train = data_lstm.loc[(data_lstm.index.year >= i - 8) & (data_lstm.index.year < i - 1), ['MACD', 'RSI', 'ADX', 'Aroon', 'CCI', 'DPO']]
  X_train = np.array([X_train[column].tolist() for column in X_train.columns])
  X_train = X_train.reshape(X_train.shape[1], X_train.shape[2], X_train.shape[0])
  y_train = data_lstm.loc[(data_lstm.index.year >= i - 8) & (data_lstm.index.year < i - 1), '1w target'].tolist()

  X_valid = data_lstm.loc[data_lstm.index.year == i - 1, ['MACD', 'RSI', 'ADX', 'Aroon', 'CCI', 'DPO']]
  X_valid = np.array([X_valid[column].tolist() for column in X_valid.columns])
  X_valid = X_valid.reshape(X_valid.shape[1], X_valid.shape[2], X_valid.shape[0])
  y_valid = data_lstm.loc[data_lstm.index.year == i - 1, '1w target'].tolist()

  X_test = data_lstm.loc[data_lstm.index.year == i, ['MACD', 'RSI', 'ADX', 'Aroon', 'CCI', 'DPO']]
  X_test = np.array([X_test[column].tolist() for column in X_test.columns])
  X_test = X_test.reshape(X_test.shape[1], X_test.shape[2], X_test.shape[0])
  y_test = data_lstm.loc[data_lstm.index.year == i, '1w target'].tolist()


  model = tf.keras.models.Sequential()
  model.add(tf.keras.layers.LSTM(units=6, input_shape=(None, 6), return_sequences=True))
  model.add(tf.keras.layers.Dense(units=1,  activation = 'linear'))

  model.compile(tf.keras.optimizers.Adam(learning_rate=0.15), loss='mse', metrics=coeff_determination)
  model.fit(tf.convert_to_tensor(X_train), tf.convert_to_tensor(y_train),
            validation_data=(tf.convert_to_tensor(X_valid), tf.convert_to_tensor(y_valid)),
            epochs=1000,
            batch_size=2000,
            verbose=1)
  predictions_LSTM[i] = pd.DataFrame(model.predict(X_test)[:, -1], index=data.loc[data.index.year == i].index, columns=['Predictions'])
  predictions_LSTM[i]['True'] = np.array(y_test)[:, -1]

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [None]:
import pickle

In [None]:
pickle.dump(predictions_arima, open('predictions_arima.pkl', 'wb'))

In [None]:
pickle.dump(predictions_GBR, open('predictions_GBR.pkl', 'wb'))

In [None]:
pickle.dump(predictions_MLP, open('predictions_MLP.pkl', 'wb'))

In [None]:
pickle.dump(predictions_LSTM, open('predictions_LSTM.pkl', 'wb'))

In [None]:
import pickle
performance_arima = pickle.load(open('predictions_arima.pkl', 'rb'))
performance_GBR = pickle.load(open('predictions_GBR.pkl', 'rb'))
performance_MLP = pickle.load(open('predictions_MLP.pkl', 'rb'))
performance_LSTM = pickle.load(open('predictions_LSTM.pkl', 'rb'))

In [None]:
from sklearn.metrics import r2_score
for i in performance_arima.keys():
  print(i)
  print('  ARIMA R²: {}'.format(r2_score(performance_arima[i]['True'],performance_arima[i]['Predictions'])))
  print('  GBR R²: {}'.format(r2_score(performance_GBR[i]['True'],performance_GBR[i]['Predictions'])))
  print('  MLP R²: {}'.format(r2_score(performance_MLP[i]['True'],performance_MLP[i]['Predictions'])))
  print('  LSTM R²: {}'.format(r2_score(performance_LSTM[i]['True'],performance_LSTM[i]['Predictions'])))

2016
  ARIMA R²: -4.246892372903152
  GBR R²: -0.1800458281249706
  MLP R²: -0.09629945103809634
  LSTM R²: -0.07218930822297409
2017
  ARIMA R²: -8.919811509448587
  GBR R²: -0.06905152468533626
  MLP R²: -0.21617821213859711
  LSTM R²: -0.03901385754202247
2018
  ARIMA R²: -2.0645822502134767
  GBR R²: -0.006581012049825086
  MLP R²: -0.21700895606625292
  LSTM R²: 0.01573141373493725
2019
  ARIMA R²: -5.158439686763038
  GBR R²: -0.031708638836401626
  MLP R²: -0.14216882201557457
  LSTM R²: -0.018285343865868553
2020
  ARIMA R²: -0.41543380878503644
  GBR R²: 0.012617553001281867
  MLP R²: -0.12974276382234207
  LSTM R²: 0.002590414661739482


In [None]:
from sklearn.metrics import mean_absolute_error
for i in performance_arima.keys():
  print(i)
  print('  ARIMA R²: {}'.format(mean_absolute_error(performance_arima[i]['True'],performance_arima[i]['Predictions'])))
  print('  GBR R²: {}'.format(mean_absolute_error(performance_GBR[i]['True'],performance_GBR[i]['Predictions'])))
  print('  MLP R²: {}'.format(mean_absolute_error(performance_MLP[i]['True'],performance_MLP[i]['Predictions'])))
  print('  LSTM R²: {}'.format(mean_absolute_error(performance_LSTM[i]['True'],performance_LSTM[i]['Predictions'])))

2016
  ARIMA R²: 0.068952604868917
  GBR R²: 0.030161548030626754
  MLP R²: 0.030490699553411226
  LSTM R²: 0.029776099705592977
2017
  ARIMA R²: 0.06344605828013736
  GBR R²: 0.020068506125639953
  MLP R²: 0.02169596622639913
  LSTM R²: 0.02026265200356603
2018
  ARIMA R²: 0.03936845859778431
  GBR R²: 0.022869793881852396
  MLP R²: 0.02508369338164655
  LSTM R²: 0.02262313597122211
2019
  ARIMA R²: 0.04581118139899686
  GBR R²: 0.01862699270170303
  MLP R²: 0.018728956902975803
  LSTM R²: 0.018172120828464394
2020
  ARIMA R²: 0.04302165747979368
  GBR R²: 0.03411370040296588
  MLP R²: 0.03782485979444368
  LSTM R²: 0.033810285969967956


In [None]:
classification_arima = {}
classification_GBR = {}
classification_MLP = {}
classification_LSTM = {}

In [None]:
for i in performance_arima.keys():
  classification_arima[i] = performance_arima[i].applymap(lambda x: 0 if x < 1 else 1)
  classification_GBR[i] = performance_GBR[i].applymap(lambda x: 0 if x < 1 else 1)
  classification_MLP[i] = performance_MLP[i].applymap(lambda x: 0 if x < 1 else 1)
  classification_LSTM[i] = performance_LSTM[i].applymap(lambda x: 0 if x < 1 else 1)

In [None]:
from sklearn.metrics import f1_score
for i in classification_arima.keys():
  print(i)
  print('  ARIMA F1: {}'.format(f1_score(classification_arima[i]['True'],classification_arima[i]['Predictions'])))
  print('  GBR F1: {}'.format(f1_score(classification_GBR[i]['True'],classification_GBR[i]['Predictions'])))
  print('  MLP F1: {}'.format(f1_score(classification_MLP[i]['True'],classification_MLP[i]['Predictions'])))
  print('  LSTM F1: {}'.format(f1_score(classification_LSTM[i]['True'],classification_LSTM[i]['Predictions'])))

2016
  ARIMA F1: 0.3474178403755869
  GBR F1: 0.6184210526315789
  MLP F1: 0.47104247104247104
  LSTM F1: 0.20408163265306123
2017
  ARIMA F1: 0.36771300448430494
  GBR F1: 0.6067415730337079
  MLP F1: 0.40740740740740744
  LSTM F1: 0.5785714285714286
2018
  ARIMA F1: 0.45045045045045046
  GBR F1: 0.7172774869109948
  MLP F1: 0.4873949579831932
  LSTM F1: 0.6119402985074628
2019
  ARIMA F1: 0.5179282868525896
  GBR F1: 0.6540880503144654
  MLP F1: 0.7628865979381443
  LSTM F1: 0.7892156862745099
2020
  ARIMA F1: 0.47058823529411764
  GBR F1: 0.5866666666666667
  MLP F1: 0.36097560975609755
  LSTM F1: 0.739795918367347


In [None]:
from sklearn.metrics import accuracy_score
for i in classification_arima.keys():
  print(i)
  print('  ARIMA Accu: {}'.format(accuracy_score(classification_arima[i]['True'],classification_arima[i]['Predictions'])))
  print('  GBR Accu: {}'.format(accuracy_score(classification_GBR[i]['True'],classification_GBR[i]['Predictions'])))
  print('  MLP Accu: {}'.format(accuracy_score(classification_MLP[i]['True'],classification_MLP[i]['Predictions'])))
  print('  LSTM Accu: {}'.format(accuracy_score(classification_LSTM[i]['True'],classification_LSTM[i]['Predictions'])))

2016
  ARIMA Accu: 0.44176706827309237
  GBR Accu: 0.5341365461847389
  MLP Accu: 0.4497991967871486
  LSTM Accu: 0.37349397590361444
2017
  ARIMA Accu: 0.4314516129032258
  GBR Accu: 0.5766129032258065
  MLP Accu: 0.4838709677419355
  LSTM Accu: 0.5241935483870968
2018
  ARIMA Accu: 0.5020408163265306
  GBR Accu: 0.5591836734693878
  MLP Accu: 0.5020408163265306
  LSTM Accu: 0.5755102040816327
2019
  ARIMA Accu: 0.5101214574898786
  GBR Accu: 0.5546558704453441
  MLP Accu: 0.6275303643724697
  LSTM Accu: 0.6518218623481782
2020
  ARIMA Accu: 0.41700404858299595
  GBR Accu: 0.4979757085020243
  MLP Accu: 0.46963562753036436
  LSTM Accu: 0.5870445344129555


In [None]:
for i in classification_arima.keys():
  classification_arima[i]['week'] = classification_arima[i].index.week
  classification_arima[i]['True'] = performance_arima[i]['True']
  result = classification_arima[i].groupby('week', as_index=False).first()
  patrimonio = 100
  patrimonio2 = 100
  for j in result.index:
    if result.loc[j, 'Predictions'] == 1:
      patrimonio *= result.loc[j, 'True']
      patrimonio2 *= result.loc[j, 'True']
    else:
      patrimonio2 *= 2 - result.loc[j, 'True']
  print('{}: patrimonio final long-only: {}, long-short: {}'.format(i, patrimonio, patrimonio2))

2016: patrimonio final long-only: 101.9784605240005, long-short: 68.16837358536087
2017: patrimonio final long-only: 101.64651692973983, long-short: 82.42998776890738
2018: patrimonio final long-only: 107.5422253557783, long-short: 95.099516143715
2019: patrimonio final long-only: 132.02720232285984, long-short: 130.53951307796643
2020: patrimonio final long-only: 75.98302705970492, long-short: 50.39176331418891


  


In [None]:
for i in classification_GBR.keys():
  classification_GBR[i]['week'] = classification_GBR[i].index.week
  classification_GBR[i]['True'] = performance_GBR[i]['True']
  result = classification_GBR[i].groupby('week', as_index=False).first()
  patrimonio = 100
  patrimonio2 = 100
  for j in result.index:
    if result.loc[j, 'Predictions'] == 1:
      patrimonio *= result.loc[j, 'True']
      patrimonio2 *= result.loc[j, 'True']
    else:
      patrimonio2 *= 2 - result.loc[j, 'True']
  print('{}: patrimonio final long-only: {}, long-short: {}'.format(i, patrimonio, patrimonio2))

2016: patrimonio final long-only: 102.12950331778256, long-short: 70.61106107715094
2017: patrimonio final long-only: 109.50694177041723, long-short: 96.95555227428966
2018: patrimonio final long-only: 117.0938944257049, long-short: 117.0938944257049
2019: patrimonio final long-only: 113.79671526011471, long-short: 97.51736667804091
2020: patrimonio final long-only: 107.25554867257412, long-short: 101.45785359518707


  


In [None]:
for i in classification_MLP.keys():
  classification_MLP[i]['week'] = classification_MLP[i].index.week
  classification_MLP[i]['True'] = performance_MLP[i]['True']
  result = classification_MLP[i].groupby('week', as_index=False).first()
  patrimonio = 100
  patrimonio2 = 100
  for j in result.index:
    if result.loc[j, 'Predictions'] == 1:
      patrimonio *= result.loc[j, 'True']
      patrimonio2 *= result.loc[j, 'True']
    else:
      patrimonio2 *= 2 - result.loc[j, 'True']
  print('{}: patrimonio final long-only: {}, long-short: {}'.format(i, patrimonio, patrimonio2))

2016: patrimonio final long-only: 127.17303308712144, long-short: 110.42135588447219
2017: patrimonio final long-only: 110.67418337804301, long-short: 97.79599795764076
2018: patrimonio final long-only: 99.77805083165866, long-short: 81.57871369929335
2019: patrimonio final long-only: 124.61338040759036, long-short: 118.15362562329261
2020: patrimonio final long-only: 117.48539799587718, long-short: 113.93506114006072


  


In [None]:
for i in classification_arima.keys():
  classification_LSTM[i]['week'] = classification_LSTM[i].index.week
  classification_LSTM[i]['True'] = performance_LSTM[i]['True']
  result = classification_LSTM[i].groupby('week', as_index=False).first()
  patrimonio = 100
  patrimonio2 = 100
  for j in result.index:
    if result.loc[j, 'Predictions'] == 1:
      patrimonio *= result.loc[j, 'True']
      patrimonio2 *= result.loc[j, 'True']
    else:
      patrimonio2 *= 2 - result.loc[j, 'True']
  print('{}: patrimonio final long-only: {}, long-short: {}'.format(i, patrimonio, patrimonio2))

2016: patrimonio final long-only: 108.49344439463161, long-short: 77.02006025337093
2017: patrimonio final long-only: 127.63363888375058, long-short: 131.0187080916043
2018: patrimonio final long-only: 120.97868779652373, long-short: 120.39866646380614
2019: patrimonio final long-only: 131.20879466027634, long-short: 131.20879466027634
2020: patrimonio final long-only: 107.46291495402866, long-short: 107.46291495402866


  
