### Gera os gráficos das bases de dados coletadas

In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
dataNames = [
    "PETR3_B_0_30min", 
    "WDOFUT_F_0_30min", 
    "WINFUT_F_0_30min",
]

setDivision = [0.1, 0.7, 0.2]              # Size of the [optimization, train, test] set
outputName  = "Fechamento"                 # Name of the output variable0

dir_name = f"../Results/datasPlots"
if not os.path.exists(dir_name):
    os.makedirs(dir_name)

for dataName in dataNames:
    Y = pd.read_csv(f'../Data/Generated/{dataName}_OUT.csv', sep=";")['OutPut |T+1|']
    plt.figure(figsize=(10, 5))
    plt.plot(Y)
    plt.axvline(x=len(Y) * 0.1, color='red', linestyle='--')
    plt.axvline(x=len(Y) * 0.8, color='blue', linestyle='--')
    plt.text(len(Y) * -0.03, Y.max() * 1, 'Otimização', color='red')
    plt.text(len(Y) * 0.38, Y.max() * 1, 'Treinamento', color='black')
    plt.text(len(Y) * 0.90, Y.max() * 1, 'Teste', color='blue')
    plt.legend()
    # plt.title(f"Ativo {dataName.split('_')[0]}")
    plt.xticks([])
    plt.xlabel("Amostras")
    plt.ylabel("Valor (R$)")
    plt.savefig(f"{dir_name}/{dataName.split('_')[0]}_fechamento.png")
    plt.close()

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.


### Verifica os melhores modelos para cada base de dados

In [2]:
from models_classification import GetModelsClassification
from models_regression import GetModelsRegression

dataNames = [
    "PETR3_B_0_30min", 
    "PETR3_B_0_60min", 

    "WDOFUT_F_0_30min", 
    "WDOFUT_F_0_60min", 

    "WINFUT_F_0_30min",
    "WINFUT_F_0_60min",
]

for dataName in dataNames:
    SVM, KNN, LR = GetModelsClassification(dataName)        # Obtém os modelos de classificação
    LSTM, MLP, RNN = GetModelsRegression(dataName)          # Obtém os modelos de regressão

    print(f"-------- Modelos definidos para a base de dados {dataName} --------")
    print(f"Modelos de classificação: ")
    print(f"Modelo SVM: {SVM.get_params()}")
    print(f"Modelo KNN: {KNN.get_params()}")
    print(f"Modelo LR: {LR.get_params()}", end="\n\n")
    print(f"Modelos de regressão: ")
    print(f"Modelo MLP: {[camada.units for camada in MLP.layers]}")
    print(f"Modelo RNN: {[camada.units for camada in RNN.layers]}")
    print(f"Modelo LSTM: {[camada.units for camada in LSTM.layers]}")
    print(f"-------------------------------------------------------------------", end="\n\n")

Using TensorFlow backend


2023-11-22 06:53:02.402041: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-22 06:53:02.484486: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-22 06:53:02.486057: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-22 06:53:06.442549: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-22 06:53:06.443736: W tensorflow/core/common_runtime/gpu/gpu_device.

-------- Modelos definidos para a base de dados PETR3_B_0_30min --------
Modelos de classificação: 
Modelo SVM: {'C': 15.01, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 1, 'decision_function_shape': 'ovr', 'degree': 4, 'gamma': 'scale', 'kernel': 'poly', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Modelo KNN: {'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 34, 'p': 1, 'weights': 'distance'}
Modelo LR: {'C': 10, 'class_weight': 'balanced', 'dual': False, 'fit_intercept': False, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 50, 'multi_class': 'auto', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'newton-cg', 'tol': 0.0001, 'verbose': 0, 'warm_start': True}

Modelos de regressão: 
Modelo MLP: [28, 44, 44, 12, 1]
Modelo RNN: [80, 80, 96, 64, 1]
Modelo LSTM: [60, 40, 80, 1]
-------------------------------

### Gera tabelas de comparação para modelos de regressão

In [3]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd
import os

dataNames = [
    "PETR3_B_0_30min", 
    "WDOFUT_F_0_30min", 
    "WINFUT_F_0_30min",

    "PETR3_B_0_60min", 
    #"WDOFUT_F_0_60min", 
    "WINFUT_F_0_60min",
]

setDivision = [0.1, 0.7, 0.2]              # Size of the [optimization, train, test] set
outputName  = "Fechamento"                 # Name of the output variable0

dir_name = f"../Results/test/logs/regression"
if not os.path.exists(dir_name):
    os.makedirs(dir_name)

for dataName in dataNames:
    res = pd.DataFrame(columns=['Model', 'MAE', 'RMSE'])
    datasRegression = pd.read_csv(f'../Results/test/regression/{dataName}_predictions.csv', sep=";")
    datasStatistics = pd.read_csv(f'../Results/test/statistic/{dataName}_predictions.csv', sep=";")
    outDatas = pd.read_csv(f'../Data/Cut/statistic/Y/Test_{dataName}.csv', sep=";")['OutPut |T+1|']
    datas = pd.concat([datasRegression, datasStatistics], axis=1)
    print("Data Name:       ", dataName)
    print("Datas Shape:     ", datas.shape)
    print("OutDatas Shape:  ", outDatas.shape)
    for colum in datas.columns:
        mae = mean_absolute_error(outDatas, datas[colum])
        rmse = np.sqrt(mean_squared_error(outDatas, datas[colum]))
        res = pd.concat([res, pd.DataFrame([[colum, mae, rmse]], columns=['Model', 'MAE', 'RMSE'])], axis=0)
    res.to_csv(f"{dir_name}/{dataName}.csv", sep=";", index=False)

Data Name:        PETR3_B_0_30min
Datas Shape:      (1342, 6)
OutDatas Shape:   (1342,)
Data Name:        WDOFUT_F_0_30min
Datas Shape:      (1610, 6)
OutDatas Shape:   (1610,)
Data Name:        WINFUT_F_0_30min
Datas Shape:      (1610, 6)
OutDatas Shape:   (1610,)
Data Name:        PETR3_B_0_60min
Datas Shape:      (683, 6)
OutDatas Shape:   (683,)
Data Name:        WINFUT_F_0_60min
Datas Shape:      (802, 6)
OutDatas Shape:   (802,)


## Exibe logs para modelos treinados

In [4]:
import os 
import pandas as pd
# print("Operations History:        \n", operationsHistory.to_string(index=False))

path = "../Results/test/logs/class"
files = os.listdir(path)
for file in files:
    if file.endswith(".csv"):
        print(f"================================================ {file} ===========================================================")
        df = pd.read_csv(path+"/"+file, sep=";")
        print(df.to_string(index=False), end="\n\n")


                  model  qtdBuying  inirialValue  finalValue  percentual  accuracy       f1  truePositives  trueNegatives  falsePositives  falseNegatives
                    SVM         81          1000 1036.126467    3.612647  0.547382 0.643768          111.0          328.0           281.0            82.0
                    KNN        182          1000 1085.472523    8.547252  0.513716 0.471545          238.0          174.0           154.0           236.0
                     LR         78          1000 1089.834954    8.983495  0.554863 0.508941          260.0          185.0           132.0           225.0
                   LSTM          0          1000 1000.000000    0.000000  0.488778 0.000000          392.0            0.0             0.0           410.0
                    MLP          1          1000 1100.112004   10.011200  0.512469 0.677126            1.0          410.0           391.0             0.0
                    RNN          0          1000 1000.000000    0.000000  0.

                  model  qtdBuying  inirialValue  finalValue  percentual  accuracy       f1  truePositives  trueNegatives  falsePositives  falseNegatives
                    SVM         24          1000 1051.718816    5.171882  0.484627 0.260504          269.0           62.0            67.0           285.0
                    KNN        152          1000 1208.388424   20.838842  0.510981 0.510264          175.0          174.0           161.0           173.0
                     LR         37          1000  905.284687   -9.471531  0.474378 0.425600          191.0          133.0           145.0           214.0
                   LSTM          1          1000  805.210184  -19.478982  0.506589 0.672498            0.0          346.0           336.0             1.0
                    MLP          1          1000  805.210184  -19.478982  0.506589 0.672498            0.0          346.0           336.0             1.0
                    RNN          1          1000  805.210184  -19.478982  0.

### Exibe logs de regressão

In [5]:
import os 
import pandas as pd
# print("Operations History:        \n", operationsHistory.to_string(index=False))

path = "../Results/test/logs/regression"
files = os.listdir(path)
for file in files:
    if file.endswith(".csv"):
        print(f"================================================ {file} ===========================================================")
        df = pd.read_csv(path+"/"+file, sep=";")
        print(df.to_string(index=False), end="\n\n")

 Model      MAE     RMSE
  LSTM 4.501107 4.715391
   MLP 4.571041 4.781941
   RNN 4.340650 4.562272
 ARIMA 0.079205 0.114771
SARIMA 0.079692 0.114520
 GARCH 0.604997 0.836969

 Model         MAE        RMSE
  LSTM 4957.349252 4959.809258
   MLP  440.029471  472.735178
   RNN 4062.172952 4065.174702
 ARIMA    9.663642   13.786633
SARIMA    9.837974   13.986968
 GARCH   50.555641   64.577240

 Model      MAE     RMSE
  LSTM 4.266493 4.513349
   MLP 4.503871 4.737022
   RNN 4.516052 4.747392
 ARIMA 0.055946 0.080992
SARIMA 0.057220 0.081907
 GARCH 0.418511 0.594305

 Model         MAE        RMSE
  LSTM    5.859477    6.815682
   MLP    5.726649    6.568410
   RNN 4185.750690 4930.722987
 ARIMA    0.334360    0.472185
SARIMA    0.340123    0.480152
 GARCH    1.894884    2.525112

 Model         MAE        RMSE
  LSTM    8.338608   10.567698
   MLP    5.851314    6.627775
   RNN 2500.733991 3082.418599
 ARIMA    0.501761    0.689591
SARIMA    0.503149    0.689728
 GARCH    2.670904    3.44

## Exibe logs para estratégias implementadas

In [6]:
import os 
import pandas as pd
# print("Operations History:        \n", operationsHistory.to_string(index=False))

path = "../Results/test/logs/economic"
files = os.listdir(path)
for file in files:
    if file.endswith(".csv"):
        print(f"================================================ {file} ===========================================================")
        df = pd.read_csv(path+"/"+file, sep=";")
        print(df.to_string(index=False), end="\n\n")

                         model  qtdBuying  inirialValue  finalValue  percentual
     strat_TCC_PETR3_B_0_60min        152          1000 1208.388424   20.838842
 strat_hurwicz_PETR3_B_0_60min          1          1000  805.210184  -19.478982
                  Buy and Hold          1          1000  803.307738  -19.669226
     strat_TCC_PETR4_B_0_60min        139          1000  591.442413  -40.855759
 strat_hurwicz_PETR4_B_0_60min          1          1000  513.129103  -48.687090
                  Buy and Hold          1          1000  513.129103  -48.687090
     strat_TCC_PETR4_B_0_15min        572          1000  952.374055   -4.762595
 strat_hurwicz_PETR4_B_0_15min          1          1000  797.723292  -20.227671
                  Buy and Hold          1          1000  801.231310  -19.876869
      strat_TCC_PETR3_B_0_5min        332          1000  823.566918  -17.643308
  strat_hurwicz_PETR3_B_0_5min          1          1000  856.312292  -14.368771
                  Buy and Hold          