In [39]:
import pandas as pd
import numpy as np
import yfinance as yf
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
import time
import matplotlib.pyplot as plt
import math
import copy
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ProcessPoolExecutor
from tensorflow.keras.layers import Input
from sklearn.preprocessing import StandardScaler
from keras.layers import Dropout

In [40]:
# HIPERPARÂMETROS:
start_date = '2014-08-21'
W = 300  # Lembrando que o tamanho do treinamento será W - w
w = 30

In [41]:
# Criptomoedas que vamos escolher para nosso portifólio
cryptos = [
    'BTC-USD', 'ETH-USD','LTC-USD', 'ADA-USD',
    'DOT-USD', 'LINK-USD','SOL-USD',
    'TRX-USD'
]

In [42]:
## Função para calcular o retorno logarítmico
def log_return(series):
    return np.log(series['Close'] / series['Open'])

In [43]:
def fracdiff_weights(d, size):
    we = [1.0]
    for k in range(1, size):
        we.append(-we[-1] * ((d - k + 1)) / k)
    return np.array(we[::-1])

In [44]:
data = {}
for crypto in cryptos:
    # Baixar dados e calcular retorno logarítmico
    df = yf.download(crypto, start=start_date, end=pd.to_datetime("today").strftime("%Y-%m-%d"), interval='1d')
    df['Return'] = log_return(df)

    # Construir DataFrame final, pegando o retorno do dia anterior e o atual
    df_final = pd.DataFrame({
        'Crypto_Price': np.log(df['Open'].squeeze()),
        'Crypto_Return_Today': df['Return']
    }).dropna()


    data[crypto] = df_final

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [45]:
data[crypto]

Unnamed: 0_level_0,Crypto_Price,Crypto_Return_Today
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-11-09,-6.073109,0.017212
2017-11-10,-6.059743,-0.148386
2017-11-11,-6.201692,-0.011417
2017-11-12,-6.211613,-0.117845
2017-11-13,-6.322750,0.162630
...,...,...
2024-11-12,-1.784732,0.113839
2024-11-13,-1.670925,-0.060059
2024-11-14,-1.730984,-0.003898
2024-11-15,-1.734882,0.070814


In [46]:
aux = yf.download('BTC-USD', start=start_date, end=pd.to_datetime("today").strftime("%Y-%m-%d"), interval='1d')
aux['Return'] = np.log(aux['Open'])
aux = pd.DataFrame({'Aux_Price': aux['Return']})

for crypto in cryptos:
    if crypto != 'BTC-USD':
        data[crypto] = pd.merge(aux, data[crypto],  left_index=True, right_index=True, how='outer').dropna()

[*********************100%%**********************]  1 of 1 completed


In [47]:
aux = yf.download('ETH-USD', start=start_date, end=pd.to_datetime("today").strftime("%Y-%m-%d"), interval='1d')
aux['Return'] = np.log(aux['Open'])
aux = pd.DataFrame({'Aux_Price': aux['Return']})

data['BTC-USD'] = pd.merge(aux, data['BTC-USD'],  left_index=True, right_index=True, how='outer').dropna()


[*********************100%%**********************]  1 of 1 completed


In [48]:
from statsmodels.tsa.stattools import adfuller

window_size = 10  # Ajuste para o tamanho desejado do histórico
def fracdiff(series, weights):
    diff_series = []
    for i in range(window_size, len(series)):
        window = series[i - window_size:i]
        diff_value = np.dot(weights, window)
        diff_series.append(diff_value)
    return pd.Series(diff_series, index=series.index[window_size:])

def find_optimal_d(series, window_size=10, threshold=0.00001, d_step=0.01, max_d=1.0):
    d = 0.6  # Inicie com d = 0.2
    while d <= max_d:
        # Calcula os pesos e a série diferenciada fracionariamente
        weights = fracdiff_weights(d, window_size)
        diff_series = fracdiff(series, weights).dropna()

        # Executa o teste ADF
        adf_result = adfuller(diff_series)
        p_value = adf_result[1]

        # Verifica se o p_value é menor que o threshold
        if p_value < threshold:
            return d, p_value  # Retorna o d encontrado e o p_value correspondente

        # Aumenta d pelo passo definido
        d += d_step

    # Caso não encontre um d adequado
    return 1, None

In [49]:
data[crypto]

Unnamed: 0_level_0,Aux_Price,Crypto_Price,Crypto_Return_Today
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-11-09,8.915544,-6.073109,0.017212
2017-11-10,8.878181,-6.059743,-0.148386
2017-11-11,8.797641,-6.201692,-0.011417
2017-11-12,8.747582,-6.211613,-0.117845
2017-11-13,8.689170,-6.322750,0.162630
...,...,...,...
2024-11-12,11.393078,-1.784732,0.113839
2024-11-13,11.384296,-1.670925,-0.060059
2024-11-14,11.413932,-1.730984,-0.003898
2024-11-15,11.376925,-1.734882,0.070814


In [50]:
data_complete = {}
for crypto in cryptos:
    # Baixar dados e calcular retorno logarítmico
    df = yf.download(crypto, start=start_date, end=pd.to_datetime("today").strftime("%Y-%m-%d"), interval='1d')
    df['Return'] = log_return(df)

    # Construir DataFrame final, pegando o retorno do dia anterior e o atual
    df_final = pd.DataFrame({
        'Crypto_Return_Today': df['Return']
    }).dropna()

    # Adicionar a coluna 'exp_value' com valores NaN
    df_final['exp_value'] = np.nan

    # Armazenar no dicionário
    data_complete[crypto] = df_final

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [51]:
print(len(data_complete['ETH-USD']))
data_complete['ETH-USD']

2565


Unnamed: 0_level_0,Crypto_Return_Today,exp_value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-11-09,0.038888,
2017-11-10,-0.069126,
2017-11-11,0.052501,
2017-11-12,-0.021787,
2017-11-13,0.031076,
...,...,...
2024-11-12,-0.038938,
2024-11-13,-0.016140,
2024-11-14,-0.042739,
2024-11-15,0.014122,


In [52]:
# Criando a função First Transformation, que recebe uma tabela contendo os retornos de todos os dias dos ultimos X anos e retorno um vetor, que cada elemento
# é uma tabela do retorno de W+1 dias.
def FirstTransform(df, W):
    vet = []
    Linhas, Colunas = df.shape
    for i in range(Linhas, W+1, -1):
        vet.append(df.iloc[(i-W-1):i])
    return vet

In [53]:
# Aqui eu perco W+1 dias do meu dataset
for crypto in cryptos:
    data[crypto] = FirstTransform(data[crypto], W)

In [54]:
print(len(data['ETH-USD']))
data['ETH-USD'][0]

2264


Unnamed: 0_level_0,Aux_Price,Crypto_Price,Crypto_Return_Today
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-21,10.637572,7.811892,-0.006453
2024-01-22,10.634741,7.805877,-0.060516
2024-01-23,10.584530,7.745415,-0.030877
2024-01-24,10.593570,7.715012,-0.003659
2024-01-25,10.598522,7.711536,-0.007305
...,...,...,...
2024-11-12,11.393078,8.124196,-0.038938
2024-11-13,11.384296,8.084730,-0.016140
2024-11-14,11.413932,8.068566,-0.042739
2024-11-15,11.376925,8.026015,0.014122


## 2.2 Separar em treinamento e teste - Permitir Bagging

In [55]:
optimal_d = {}
p_value = {}
for crypto in cryptos:
    optimal_d[crypto] = {}
    p_value[crypto] = {}

In [56]:
for crypto in cryptos:
    del data[crypto][0]

In [57]:
for crypto in cryptos:
    for day in range(len(data[crypto])):
        data[crypto][day] =pd.read_csv(f"C:\\Users\\delve\\OneDrive\\Eu\\GitHub\\Quantamental\\Código\\Backtest\\BackTest LSTMinator\\series_fracionada\\{crypto}_{day}.csv", index_col = 'Date')

In [58]:
data['BTC-USD'][0].dropna()

Unnamed: 0_level_0,diff_Aux_Price,diff_Crypto_Price,Crypto_Return_Today
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-30,0.943695,1.301644,-0.008060
2024-01-31,0.940170,1.274320,-0.008503
2024-02-01,0.904486,1.266025,0.011817
2024-02-02,0.927151,1.281540,0.002509
2024-02-03,0.924965,1.276756,-0.004473
...,...,...,...
2024-11-10,1.034182,1.354057,0.047050
2024-11-11,1.010710,1.395153,0.097375
2024-11-12,1.042769,1.461348,-0.008488
2024-11-13,0.960464,1.386409,0.029739


In [59]:
test_target = {}
for crypto in cryptos:
    row = []
    for i in range(len(data[crypto])):
        row.append((pd.DataFrame(data[crypto][i].iloc[-1])).T)
        data[crypto][i] = data[crypto][i].drop(data[crypto][i].index[-1])
    test_target[crypto] = row

In [60]:
data['ETH-USD'][0], test_target['ETH-USD'][0]['Crypto_Return_Today']

(            diff_Aux_Price  diff_Crypto_Price  Crypto_Return_Today
 Date                                                              
 2024-01-30        1.301644           0.943695             0.011608
 2024-01-31        1.274320           0.940170            -0.026380
 2024-02-01        1.266025           0.904486             0.009442
 2024-02-02        1.281540           0.927151             0.001879
 2024-02-03        1.276756           0.924965            -0.005188
 ...                    ...                ...                  ...
 2024-11-09        1.361528           1.005684             0.055267
 2024-11-10        1.354057           1.034182             0.019172
 2024-11-11        1.395153           1.010710             0.055800
 2024-11-12        1.461348           1.042769            -0.038938
 2024-11-13        1.386409           0.960464            -0.016140
 
 [289 rows x 3 columns],
 2024-11-14   -0.042739
 Name: Crypto_Return_Today, dtype: float64)

## 2.3 Bagging

In [61]:
# Definindo uma função bagging, que recebe um dataframe e retorna um vetor de dataframes.
def Bagging(df, n, gamma):
    df_bagged = []
    for i in range(gamma):
        aux = df.sample(n = n, random_state = i)
        aux = aux.sort_index()
        df_bagged.append(aux)
    return df_bagged

# 3. Machine Learning

* vamos peimeiro fazer do modo 1 para todos os dias
* depois aplicar para os ultimos 290 dias o modo 2 (contém bagging e purge K-Fold-CV)

## 3.1 Função do Modelo


In [62]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Conv1D, MaxPooling1D
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau


def create_model(a, b):
    model = Sequential([
        Input(shape=(a, b)),
        Conv1D(15, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        LSTM(units=50, return_sequences=False),
        BatchNormalization(),
        Dropout(0.5),
        Dense(units=128, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(units=1)
    ])
    optimizer = RMSprop(learning_rate=1e-4)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model


In [63]:
# Função do modelo LSTM
def Model(df, time_step, features, model=None):
    # Normalizando o dataframe
    #scaler = StandardScaler()
    #df_scaled = scaler.fit_transform(df)
    #df_array = np.array(df_scaled)
    df_array = np.array(df)

    # Transformando no formato LSTM
    X_lstm = []
    y_lstm = []
    for i in range(time_step, len(df_array)):
        X_lstm.append(df_array[i-time_step:i+1, :-1])  # Pega 'timesteps' linhas anteriores
        y_lstm.append(df_array[i, -1])  # Alvo é o valor do dia seguinte
    X_lstm = np.array(X_lstm)
    y_lstm = np.array(y_lstm)


    # Conjunto de validação - primeiros 30 elementos
    X_val = X_lstm[-20:-1]
    y_val = y_lstm[-20:-1]
    y_val = pd.DataFrame(y_val).to_numpy().reshape((len(y_val), 1))

    # Separar o X_train (todos exceto o último elemento)
    X_train = X_lstm[:-20]
    y_train = y_lstm[:-20]
    y_train = pd.DataFrame(y_train).to_numpy()
    y_train = y_train.reshape((len(y_train), 1))
    y_train.shape

    # X_test e y_test
    X_test = X_lstm[-1:]
    y_test = y_lstm[-1:]
    y_test = y_lstm[-1:]
    y_test = pd.DataFrame(y_test).to_numpy()
    y_test = y_test.reshape((len(y_test), 1))

    # Treinar o modelo específico para a criptomoeda

    # Arquitetura 1:
    #reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=20, min_lr=1e-6, verbose=0)
    #model.fit(X_train, y_train, epochs=200, batch_size=32, verbose=0, callbacks=[reduce_lr])

    early_stopping = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)
    # Arquitetura 2:
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs= 300, verbose=0, callbacks=[early_stopping])
    # Fazer a previsão
    y_hat_scaled = model.predict(X_test)


    return y_hat_scaled

##  3.2 Treinamento com Paralelismo e Geração dos Expected Values

In [64]:
def Add(y_hat, data_complete, ind, crypto):
    data_complete[crypto].at[ind, 'exp_value'] = y_hat

In [65]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    print("Usando GPU:", physical_devices[0])
else:
    print("Usando CPU")

Usando CPU


In [66]:
for crypto in cryptos:
    print(len(data[crypto]))
len(data['ETH-USD'][0])

2263
2263
3412
2263
1248
2263
1380
2263


289

In [67]:
test_target['ETH-USD'][319]
#test_target['ETH-USD'][2138]

Unnamed: 0,diff_Aux_Price,diff_Crypto_Price,Crypto_Return_Today
2023-12-31,0.66965,0.915014,-0.00458


In [None]:
aux = data_complete['ETH-USD'].loc['2019-01-05':'2019-04-10']
num_positive = (aux['Crypto_Return_Today'] > 0).sum()
num_negative = (aux['Crypto_Return_Today'] < 0).sum()

print("\n\n--------------------------------------------------------")
print("\n\nCriptomoeda: ", 'ETH')
print("\n                              Real")
print("\nNumero de Subidas: ",num_positive,"   |   ",100*num_positive / (num_negative + num_positive),"%")
print("\nNumero de Quedas : ",num_negative,"   |   ",100*num_negative / (num_negative + num_positive),"%")



--------------------------------------------------------


Criptomoeda:  ETH

                              Real

Numero de Subidas:  48    |    50.0 %

Numero de Quedas :  48    |    50.0 %


: 

In [None]:
# Função que processa cada criptomoeda (paralelismo será aplicado aqui)
def process_crypto(crypto, data, test_target, data_complete, time_step, features):
    # Criar um modelo para cada criptomoeda
    for day in range(350, 500):
        model = create_model(time_step, features)

        df_train = data[crypto][day].copy()  # df é uma tabela de 300 linhas e uma coluna
        df_test = test_target[crypto][day].copy()  # df_test é uma tabela com 1 linha e uma coluna
        df = pd.concat([df_train, df_test], ignore_index=True)  # Unindo as duas tabelas

        scaler = StandardScaler()
        df = scaler.fit_transform(df)

        y_hat_scaled = Model(df, time_step, features, model=model)  # Chamar o modelo para previsão

        y_hat_scaled = np.repeat(y_hat_scaled, features+1, axis=1)
        y_hat_scaled = pd.DataFrame(y_hat_scaled, columns=(['Predictions_1'], ['Predictions_2'],  ['Predictions_3']))

        y_hat = scaler.inverse_transform(y_hat_scaled)[0][features]

        Add(y_hat, data_complete, df_test.index[0], crypto)  # Adicionar o resultado na tabela final

    print("\nMoeda: ", crypto, " processada")

# Número de samples que vamos testar o modelo
time_step = w
features = 2

# Criar o executor para rodar as criptomoedas em paralelo
with ThreadPoolExecutor() as executor:
    # Executar o processo para cada criptomoeda simultaneamente
    futures = [
        executor.submit(process_crypto, crypto, data, test_target, data_complete, time_step, features)
        for crypto in cryptos
    ]

    # Aguardar a conclusão de todas as threads
    for future in futures:
        future.result()

print("Processamento em paralelo finalizado.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 849ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 965ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 870ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 872ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 738ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 933ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 981ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step   
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 634ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 784ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 887ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 870ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 878ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

16min para 31


In [None]:
import winsound

# Frequência em Hertz e duração em milissegundos
frequencia = 1000  # 1 kHz
duracao = 500      # 500 ms

# Emite o som
winsound.Beep(frequencia, duracao)

In [33]:
df = data_complete['BTC-USD']
df.dropna()

Unnamed: 0_level_0,Crypto_Return_Today,exp_value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-12-01,0.025411,0.000544
2023-12-02,0.020139,0.003122
2023-12-03,0.012742,0.004571
2023-12-04,0.048851,0.001759
2023-12-05,0.048678,0.002455
2023-12-06,-0.007596,0.001618
2023-12-07,-0.010946,0.000718
2023-12-08,0.019975,0.000392
2023-12-09,-0.01033,0.004919
2023-12-10,0.001173,0.010059


In [34]:
data_aux = {}
for crypto in cryptos:
    data_aux[crypto] = pd.read_csv(f"{crypto}.csv", index_col='Date')

FileNotFoundError: [Errno 2] No such file or directory: 'BTC-USD.csv'

In [None]:
data_aux['BTC-USD'].dropna()

Unnamed: 0_level_0,Crypto_Return_Today,exp_value,ae
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-02-13,-0.005911,-0.000663,0.005248
2019-02-14,-0.003943,-0.000381,0.003561
2019-02-15,0.000951,-0.002385,0.003336
2019-02-16,0.004008,-0.000742,0.004749
2019-02-17,0.011079,-0.000184,0.011263
...,...,...,...
2019-10-29,0.019196,0.005857,0.013339
2019-10-30,-0.023271,0.004103,0.027373
2019-10-31,-0.000312,0.005313,0.005625
2019-11-01,0.007273,-0.000666,0.007939


In [None]:
data_aux['BTC-USD'][-2100:]
#data_complete['BTC-USD'][:-2100]

Unnamed: 0_level_0,Crypto_Return_Today,exp_value,ae
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-02-13,-0.005911,-0.000663,0.005248
2019-02-14,-0.003943,-0.000381,0.003561
2019-02-15,0.000951,-0.002385,0.003336
2019-02-16,0.004008,-0.000742,0.004749
2019-02-17,0.011079,-0.000184,0.011263
...,...,...,...
2024-11-08,0.008431,,
2024-11-09,0.002904,,
2024-11-10,0.047050,,
2024-11-11,0.097375,,


In [None]:
for crypto in cryptos:
    df_aux = data_complete[crypto]
    df_aux = pd.concat([df_aux[:-2100], data_aux[crypto][-2100:]])
    df_aux.index = pd.to_datetime(df_aux.index)
    data_complete[crypto] = df_aux


In [None]:
for crypto in cryptos:
    data_complete[crypto]= data_complete[crypto][~data_complete[crypto].index.duplicated(keep='first')]

In [37]:
data_complete['BTC-USD'].dropna()

Unnamed: 0_level_0,Crypto_Return_Today,exp_value,ae
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-12-01,0.025411,0.000544,0.024867
2023-12-02,0.020139,0.003122,0.017017
2023-12-03,0.012742,0.004571,0.008172
2023-12-04,0.048851,0.001759,0.047091
2023-12-05,0.048678,0.002455,0.046223
2023-12-06,-0.007596,0.001618,0.009215
2023-12-07,-0.010946,0.000718,0.011664
2023-12-08,0.019975,0.000392,0.019583
2023-12-09,-0.01033,0.004919,0.015249
2023-12-10,0.001173,0.010059,0.008887


In [38]:
for crypto in cryptos:
    # Define o nome do arquivo como o nome da criptomoeda seguido de .csv
    filename = f"{crypto}.csv"

    # Salva o DataFrame data_complete[crypto] no arquivo .csv
    data_complete[crypto].to_csv(filename, index=True)

    print(f"Arquivo {filename} salvo com sucesso!")

Arquivo BTC-USD.csv salvo com sucesso!
Arquivo ETH-USD.csv salvo com sucesso!
Arquivo LTC-USD.csv salvo com sucesso!
Arquivo ADA-USD.csv salvo com sucesso!
Arquivo DOT-USD.csv salvo com sucesso!
Arquivo LINK-USD.csv salvo com sucesso!
Arquivo SOL-USD.csv salvo com sucesso!
Arquivo TRX-USD.csv salvo com sucesso!


In [35]:
data_visual = data_complete.copy()

In [36]:
results = []

for crypto in cryptos:
    df = data_visual[crypto]
    df['ae'] = df['exp_value'] - df['Crypto_Return_Today']
    df['ae'] = abs(df['ae'])
    df = df.dropna()

    # Cálculos dos valores
    num_positive = (df['Crypto_Return_Today'] > 0).sum()
    num_negative = (df['Crypto_Return_Today'] < 0).sum()
    hat_pos = (df['exp_value'] > 0).sum()
    hat_neg = (df['exp_value'] < 0).sum()

    # Calcula a porcentagem de subidas e quedas reais e previstas
    total_real = num_positive + num_negative
    total_pred = hat_pos + hat_neg
    perc_positive_real = (100 * num_positive / total_real) if total_real > 0 else None
    perc_positive_pred = (100 * hat_pos / total_pred) if total_pred > 0 else None

    # Calcular métricas de avaliação
    TP = ((df['Crypto_Return_Today'] > 0) & (df['exp_value'] > 0)).sum()
    TN = ((df['Crypto_Return_Today'] < 0) & (df['exp_value'] < 0)).sum()
    FP = ((df['Crypto_Return_Today'] < 0) & (df['exp_value'] > 0)).sum()
    FN = ((df['Crypto_Return_Today'] > 0) & (df['exp_value'] < 0)).sum()
    mae = df['ae'].mean()
    acc = (TP + TN) / (TP + FP + FN + TN) if (TP + FP + FN + FP) > 0 else None
    sen = TP / (TP + FN) if (TP + FN) > 0 else None
    spe = TN / (TN + FP) if (TN + FP) > 0 else None
    pre = TP / (TP + FP) if (TP + FP) > 0 else None

    # Lower bound dos 10% maiores
    limite_90 = df['exp_value'].quantile(0.75)
    menor_valor_10_maiores = df[df['exp_value'] >= limite_90]['exp_value'].min()

    # Precisão para valores maiores que 0.008
    TP_threshold = ((df['Crypto_Return_Today'] > 0) & (df['exp_value'] > 0) & (df['exp_value'] >= limite_90)).sum()
    FP_threshold = ((df['Crypto_Return_Today'] < 0) & (df['exp_value'] > 0) & (df['exp_value'] >= limite_90)).sum()
    prec_maior_008 = TP_threshold / (TP_threshold + FP_threshold) if (TP_threshold + FP_threshold) > 0 else None

    # Adicionando os resultados para a criptomoeda atual
    results.append({
        "Crypto": crypto,
        "Num Subidas (Real)": num_positive,
        "Perc Subidas (Real)": perc_positive_real,
        "Num Quedas (Real)": num_negative,
        "Num Subidas (Pred)": hat_pos,
        "Perc Subidas (Pred)": perc_positive_pred,
        "Num Quedas (Pred)": hat_neg,
        "MAE": mae,
        "Acc": acc,
        "Sen": sen,
        "Spe": spe,
        "Pre": pre,
        "Lower bound dos 10% maiores": menor_valor_10_maiores,
        "Precisao para maiores que 0.008": prec_maior_008
    })

# Criando um DataFrame com os resultados
results_df = pd.DataFrame(results)

# Exibindo a tabela com os resultados

# Exibindo a tabela com os resultados
#results_df.to_csv("resultado_arquitetura_2.csv", index = True)
results_df

Unnamed: 0,Crypto,Num Subidas (Real),Perc Subidas (Real),Num Quedas (Real),Num Subidas (Pred),Perc Subidas (Pred),Num Quedas (Pred),MAE,Acc,Sen,Spe,Pre,Lower bound dos 10% maiores,Precisao para maiores que 0.008
0,BTC-USD,19,61.290323,12,28,90.322581,3,0.018914,0.580645,0.894737,0.083333,0.607143,0.004497,0.375
1,ETH-USD,17,54.83871,14,27,87.096774,4,0.020706,0.483871,0.823529,0.071429,0.518519,0.004858,0.625
2,LTC-USD,15,48.387097,16,17,54.83871,14,0.018874,0.483871,0.533333,0.4375,0.470588,0.001981,0.5
3,ADA-USD,17,54.83871,14,24,77.419355,7,0.041554,0.451613,0.705882,0.142857,0.5,0.009803,0.5
4,DOT-USD,19,61.290323,12,24,77.419355,7,0.041876,0.516129,0.736842,0.166667,0.583333,0.007743,0.5
5,LINK-USD,15,48.387097,16,24,77.419355,7,0.034703,0.451613,0.733333,0.1875,0.458333,0.005759,0.375
6,SOL-USD,17,54.83871,14,30,96.774194,1,0.047594,0.580645,1.0,0.071429,0.566667,0.017734,0.5
7,TRX-USD,17,54.83871,14,23,74.193548,8,0.011777,0.483871,0.705882,0.214286,0.521739,0.00259,0.375


In [None]:
Pre_media = results_df['Pre'].mean()
Acc_media = results_df['Acc'].mean()
MAE_media = results_df['MAE'].mean()
Precisao_25_media = results_df['Precisao para maiores que 0.008'].mean()
print("Pre_media ", Pre_media)
print("Acc_media ", Acc_media)
print("MAE_media ", MAE_media)
print("Precisao_25_media", Precisao_25_media)

Pre_media  0.5117948717948718
Acc_media  0.5172786177105831
MAE_media  0.029441449071043672
Precisao_25_media 0.5086206896551724


## 3.3 Método 2 para os ultimos X intervalos

* Aplicar bagging
* Aplicar CV e Otimização de HP
* Fazer treinamento para uma janela W maior de tempo

In [None]:
# for day in data[crypto]:
#       day = bagging(day)

# for day in data[crypto]:
#       for sample in day:
#               sample = CV(sample) # transforma sample em um vetor de folds. Ultimos 10% do fold são validation e 90% são pure train

# for day in data[crypto]:
#       for sample in day:
#               for fold in sample:
#                       y_hat = model(fold)
#               y_hat_mean = y_hat / len(sample)
#       y_hat_mean_mean = y_hat_mean / len(day)