In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler
import time
import matplotlib.pyplot as plt
import math
import copy
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ProcessPoolExecutor
from tensorflow.keras.layers import Input
from sklearn.preprocessing import StandardScaler
from keras.layers import Dropout

In [2]:
# HIPERPARÂMETROS:
start_date = '2014-08-21'
W = 250  # Lembrando que o tamanho do treinamento será W - w
w = 14  

In [3]:
# Criptomoedas que vamos escolher para nosso portifólio
cryptos = [
    'BTC-USD', 'LTC-USD'
]

In [4]:
## Função para calcular o retorno logarítmico
def log_return(series):
    return np.log(series['Close'] / series['Open'])

In [5]:
data = {}
for crypto in cryptos:
    # Baixar dados e calcular retorno logarítmico
    df = yf.download(crypto, start=start_date, end=pd.to_datetime("today").strftime("%Y-%m-%d"), interval='1d')
    df['Return'] = log_return(df)

    # Construir DataFrame final, pegando o retorno do dia anterior e o atual
    df_final = pd.DataFrame({
        #'Volume': df['Volume'],
        'Crypto_Return_Today': df['Return']
    }).dropna()

    data[crypto] = df_final

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [6]:
aux = yf.download('BTC-USD', start=start_date, end=pd.to_datetime("today").strftime("%Y-%m-%d"), interval='1d')
aux['Return'] = log_return(aux)
aux = pd.DataFrame({'BTC-Return': aux['Return']})

data['LTC-USD'] = pd.merge(aux, data['LTC-USD'],  left_index=True, right_index=True, how='outer').dropna()

aux = yf.download('LTC-USD', start=start_date, end=pd.to_datetime("today").strftime("%Y-%m-%d"), interval='1d')
aux['Return'] = log_return(aux)
aux = pd.DataFrame({'LTC-Return': aux['Return']})

data['BTC-USD'] = pd.merge(aux, data['BTC-USD'],  left_index=True, right_index=True, how='outer').dropna()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [7]:
data['LTC-USD']

Unnamed: 0_level_0,BTC-Return,Crypto_Return_Today
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-09-17,-0.018480,-0.005390
2014-09-18,-0.073606,-0.078024
2014-09-19,-0.071607,-0.079802
2014-09-20,0.035423,-0.009926
2014-09-21,-0.022963,-0.004031
...,...,...
2024-11-07,0.003534,0.006597
2024-11-08,0.008431,0.018171
2024-11-09,0.002904,0.017163
2024-11-10,0.047050,0.032670


In [8]:
data_complete = {}
for crypto in cryptos:
    # Baixar dados e calcular retorno logarítmico
    df = yf.download(crypto, start=start_date, end=pd.to_datetime("today").strftime("%Y-%m-%d"), interval='1d')
    df['Return'] = log_return(df)

    # Construir DataFrame final, pegando o retorno do dia anterior e o atual
    df_final = pd.DataFrame({
        #'Volume': df['Volume'],
        'Crypto_Return_Today': df['Return']
    }).dropna()

    # Adicionar a coluna 'exp_value' com valores NaN
    df_final['exp_value'] = np.nan

    # Armazenar no dicionário
    data_complete[crypto] = df_final

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [9]:
print(len(data_complete['LTC-USD']))
data_complete['LTC-USD']

3709


Unnamed: 0_level_0,Crypto_Return_Today,exp_value
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-09-17,-0.005390,
2014-09-18,-0.078024,
2014-09-19,-0.079802,
2014-09-20,-0.009926,
2014-09-21,-0.004031,
...,...,...
2024-11-07,0.006597,
2024-11-08,0.018171,
2024-11-09,0.017163,
2024-11-10,0.032670,


In [10]:
# Criando a função First Transformation, que recebe uma tabela contendo os retornos de todos os dias dos ultimos X anos e retorno um vetor, que cada elemento
# é uma tabela do retorno de W+1 dias.
def FirstTransform(df, W):
    vet = []
    Linhas, Colunas = df.shape
    for i in range(Linhas, W+1, -1):
        vet.append(df.iloc[(i-W-1):i])
    return vet

In [11]:
# Aqui eu perco W+1 dias do meu dataset
for crypto in cryptos:
    data[crypto] = FirstTransform(data[crypto], W)

In [12]:
print(len(data['LTC-USD']))
data['LTC-USD'][0]

3458


Unnamed: 0_level_0,BTC-Return,Crypto_Return_Today
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-03-06,0.035894,0.045173
2024-03-07,0.012415,0.026196
2024-03-08,0.020143,0.005920
2024-03-09,0.002919,0.024694
2024-03-10,0.007556,-0.037443
...,...,...
2024-11-07,0.003534,0.006597
2024-11-08,0.008431,0.018171
2024-11-09,0.002904,0.017163
2024-11-10,0.047050,0.032670


## 2.2 Separar em treinamento e teste - Permitir Bagging

In [13]:
test_target = {}
for crypto in cryptos:
    row = []
    for i in range(len(data[crypto])):
        row.append((pd.DataFrame(data[crypto][i].iloc[-1])).T)
        data[crypto][i] = data[crypto][i].drop(data[crypto][i].index[-1])
    test_target[crypto] = row

In [14]:
data['LTC-USD'][0], test_target['LTC-USD'][0]['Crypto_Return_Today']

(            BTC-Return  Crypto_Return_Today
 Date                                       
 2024-03-06    0.035894             0.045173
 2024-03-07    0.012415             0.026196
 2024-03-08    0.020143             0.005920
 2024-03-09    0.002919             0.024694
 2024-03-10    0.007556            -0.037443
 ...                ...                  ...
 2024-11-06    0.086684             0.081865
 2024-11-07    0.003534             0.006597
 2024-11-08    0.008431             0.018171
 2024-11-09    0.002904             0.017163
 2024-11-10    0.047050             0.032670
 
 [250 rows x 2 columns],
 2024-11-11    0.045925
 Name: Crypto_Return_Today, dtype: float64)

## 2.3 Bagging

In [15]:
# Definindo uma função bagging, que recebe um dataframe e retorna um vetor de dataframes.
def Bagging(df, n, gamma):
    df_bagged = []
    for i in range(gamma):
        aux = df.sample(n = n, random_state = i)
        aux = aux.sort_index()
        df_bagged.append(aux)
    return df_bagged

# 3. Machine Learning

* vamos peimeiro fazer do modo 1 para todos os dias
* depois aplicar para os ultimos 290 dias o modo 2 (contém bagging e purge K-Fold-CV)

## 3.1 Função do Modelo


In [16]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau


def create_model(a, b):
    model = Sequential([
        Input(shape=(a, b)),
        LSTM(units=64, return_sequences=True),# kernel_regularizer=l2(0.002)),
        Dropout(0.3),
        BatchNormalization(),
        LSTM(units=32, return_sequences=False),
        Dropout(0.3),
        Dense(units=1)
    ])
    optimizer = RMSprop(learning_rate=1e-5)
    model.compile(optimizer=optimizer, loss='mean_squared_error')
    return model


In [17]:
# Função do modelo LSTM
def Model(df, time_step, features, model=None):
    # Normalizando o dataframe
    #scaler = StandardScaler()
    #df_scaled = scaler.fit_transform(df)
    #df_array = np.array(df_scaled)
    df_array = np.array(df)

    # Transformando no formato LSTM
    X_lstm = []
    y_lstm = []
    for i in range(time_step, len(df_array)):
        X_lstm.append(df_array[i-time_step:i, :])  # Pega 'timesteps' linhas anteriores
        y_lstm.append(df_array[i])  # Alvo é o valor do dia seguinte
    X_lstm = np.array(X_lstm)
    y_lstm = np.array(y_lstm)

    # Conjunto de validação - primeiros 30 elementos
    X_val = X_lstm[-20:-1]
    y_val = y_lstm[-20:-1]
    y_val = pd.DataFrame(y_val)[features - 1].to_numpy().reshape((len(y_val), 1))

    # Separar o X_train (todos exceto o último elemento)
    X_train = X_lstm[:-20]
    y_train = y_lstm[:-20]
    y_train = pd.DataFrame(y_train)[features - 1].to_numpy()
    y_train = y_train.reshape((len(y_train), 1))
    y_train.shape

    # X_test e y_test
    X_test = X_lstm[-1:]
    y_test = y_lstm[-1:]
    y_test = y_lstm[-1:]
    y_test = pd.DataFrame(y_test)[features - 1].to_numpy()
    y_test = y_test.reshape((len(y_test), 1))

    # Treinar o modelo específico para a criptomoeda

    # Arquitetura 1:
    #reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=3, min_lr=1e-6, verbose=0)
    #model.fit(X_train, y_train, epochs=200, batch_size=32, verbose=0, callbacks=[reduce_lr])

    early_stopping = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)
    # Arquitetura 2:
    model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs= 300, verbose=0, callbacks=[early_stopping])
    # Fazer a previsão
    y_hat_scaled = model.predict(X_test)


    return y_hat_scaled

##  3.2 Treinamento com Paralelismo e Geração dos Expected Values

In [18]:
def Add(y_hat, data_complete, ind, crypto):
    data_complete[crypto].at[ind, 'exp_value'] = y_hat

In [19]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    print("Usando GPU:", physical_devices[0])
else:
    print("Usando CPU")

Usando CPU


In [20]:
for crypto in cryptos:
    print(len(data[crypto]))
len(data['LTC-USD'][0])

3458
3458


250

In [None]:
test_target['LTC-USD'][2718]
#test_target['LTC-USD'][1837]

Unnamed: 0,BTC-Return,Crypto_Return_Today
2017-06-03,0.008636,-0.048523


In [22]:
aux = data_complete['LTC-USD'].loc['2017-06-01':'2019-10-31']
num_positive = (aux['Crypto_Return_Today'] > 0).sum()
num_negative = (aux['Crypto_Return_Today'] < 0).sum()

print("\n\n--------------------------------------------------------")
print("\n\nCriptomoeda: ", 'LTC')
print("\n                              Real")
print("\nNumero de Subidas: ",num_positive,"   |   ",100*num_positive / (num_negative + num_positive),"%")
print("\nNumero de Quedas : ",num_negative,"   |   ",100*num_negative / (num_negative + num_positive),"%")



--------------------------------------------------------


Criptomoeda:  LTC

                              Real

Numero de Subidas:  418    |    47.338618346545864 %

Numero de Quedas :  465    |    52.661381653454136 %


In [23]:
test_target['LTC-USD'][2400]

Unnamed: 0,BTC-Return,Crypto_Return_Today
2018-04-17,-0.021232,0.032246


In [24]:
# Função que processa cada criptomoeda (paralelismo será aplicado aqui)
def process_crypto(crypto, data, test_target, data_complete, time_step, features):
    # Criar um modelo para cada criptomoeda
    for day in range(2200, 2400):
        model = create_model(time_step, features)

        df_train = data[crypto][day].copy()  # df é uma tabela de 300 linhas e uma coluna
        df_test = test_target[crypto][day].copy()  # df_test é uma tabela com 1 linha e uma coluna
        df = pd.concat([df_train, df_test], ignore_index=True)  # Unindo as duas tabelas

        scaler = StandardScaler()
        df = scaler.fit_transform(df)

        y_hat_scaled = Model(df, time_step, features, model=model)  # Chamar o modelo para previsão

        y_hat_scaled = np.repeat(y_hat_scaled, features, axis=1)
        y_hat_scaled = pd.DataFrame(y_hat_scaled, columns=(['Predictions_1'], ['Predictions_2']))

        y_hat = scaler.inverse_transform(y_hat_scaled)[0][features - 1]

        Add(y_hat, data_complete, df_test.index[0], crypto)  # Adicionar o resultado na tabela final

    print("\nMoeda: ", crypto, " processada")

# Número de samples que vamos testar o modelo
time_step = w
features = 2

# Criar o executor para rodar as criptomoedas em paralelo
with ThreadPoolExecutor() as executor:
    # Executar o processo para cada criptomoeda simultaneamente
    futures = [
        executor.submit(process_crypto, crypto, data, test_target, data_complete, time_step, features)
        for crypto in cryptos
    ]

    # Aguardar a conclusão de todas as threads
    for future in futures:
        future.result()

print("Processamento em paralelo finalizado.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 360ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 332ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 384ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 511ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 999ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 562ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 458ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 456ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 400ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 392ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 373ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 430ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 534ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

42min para 100 dias


5e-3 com paciencia 40 com kernel

Pre_media  0.7916666666666666
Acc_media  0.6333333333333333
MAE_media  0.04967512944185867

1e-3 com paciencia 40 e kernel

MAE: 0.053194381919197065 
Acc: 0.5081967213114754
Pre: 0.6666666666666666 

1e-3 paciencia 40 e sem kernel

MAE: 0.053384303365326284
Acc: 0.5238095238095238
Pre: 0.6739130434782609

1e-4 paciencia 20 e sem kernel

Pre_media  0.6304347826086957
Acc_media  0.45454545454545453
MAE_media  0.05325952124305713

5e-5 paciencia 40 e sem kernel

Pre_media  0.6444444444444445
Acc_media  0.484375
MAE_media  0.05461988293125862

1e-5 paciencia 40 com kernel

Pre_media  0.6326530612244898
Acc_media  0.45588235294117646
MAE_media  0.05405173270290354

1e-5 paciencia 40 sem kernel
Pre_media  0.6818181818181818
Acc_media  0.5666666666666667
MAE_media  0.05313811674901827

In [32]:
df = data_complete['BTC-USD']
df.dropna()

Unnamed: 0_level_0,Crypto_Return_Today,exp_value,ae
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-04-18,0.027192,0.004480,0.022712
2018-04-19,0.016415,0.002167,0.014248
2018-04-20,0.065273,0.002994,0.062279
2018-04-21,0.005274,0.001620,0.003653
2018-04-22,-0.013832,0.001453,0.015285
...,...,...,...
2018-10-30,-0.000437,-0.002288,0.001851
2018-10-31,-0.003063,-0.001773,0.001290
2018-11-01,0.009395,-0.001491,0.010886
2018-11-02,0.001491,-0.001967,0.003458


In [26]:
data_visual = data_complete

In [27]:
for crypto in cryptos:
    df = data_visual[crypto]
    df['ae'] = df['exp_value'] - df['Crypto_Return_Today']
    df['ae'] = abs(df['ae'])
    df = df.dropna()
    num_positive = (df['Crypto_Return_Today'] > 0).sum()
    num_negative = (df['Crypto_Return_Today'] < 0).sum()

    hat_pos = (df['exp_value'] > 0).sum()
    hat_neg = (df['exp_value'] < 0).sum()


    TP = ((df['Crypto_Return_Today'] > 0)  & (df['exp_value'] > 0)).sum()
    TN = ((df['Crypto_Return_Today'] < 0)  & (df['exp_value'] < 0)).sum()
    FP = ((df['Crypto_Return_Today'] < 0)  & (df['exp_value'] > 0)).sum()
    FN = ((df['Crypto_Return_Today'] > 0)  & (df['exp_value'] < 0)).sum()

    #rmse = np.sqrt(((df['Crypto_Return_Today'] - df['exp_value']) ** 2).mean())
    print("\n\n--------------------------------------------------------")
    print("\n\nCriptomoeda: ", crypto)
    print("\n                              Real")
    print("\nNumero de Subidas: ",num_positive,"   |   ",100*num_positive / (num_negative + num_positive),"%")
    print("\nNumero de Quedas : ",num_negative,"   |   ",100*num_negative / (num_negative + num_positive),"%")

    print("\n                              Predito")
    print("\nNumero de Subidas: ",hat_pos,"   |   ", 100*hat_pos / (hat_neg + hat_pos),"%")
    print("\nNumero de Quedas : ",hat_neg,"   |   ", 100*hat_neg / (hat_neg + hat_pos),"%")

    print("\nMean Absolute Error MAE: ", df['ae'].mean())
    #print("\nRoot Mean Squared Error RMSE: ", rmse)
    print("\nAcc: ", (TP+TN) / (TP + FP + FN + FP))
    print("\nSen: ", (TP) / (TP + FN)) # significa, entre os que subiram, quantos % eu consegui identificar que realmente subiu
    print("\nSpe: ", (TP) / (TP + FN)) # significa, entre os que cairam, quantos % eu consegui identificar que realmente caiu
    print("\nPre: ", (TP) / (TP + FP)) # significa, entre os que eu disse que ia subir, quantos % realmente subiram

    TP = ((df['Crypto_Return_Today'] > 0)  & (df['exp_value'] > 0) & (df['exp_value'] >= 0.0002)).sum()
    FP = (((df['Crypto_Return_Today'] < 0)  & (df['exp_value'] > 0)) & (df['exp_value'] >= 0.0002)).sum()

    limite_90 = df['exp_value'].quantile(0.9)
    menor_valor_10_maiores = df[df['exp_value'] >= limite_90]['exp_value'].min()
    print("\nLower bound dos 10% maiores: ", menor_valor_10_maiores)

    print("\nPrecisao para maiores que 0.008: ", (TP) / (TP + FP)) # significa, entre os que eu disse que ia subir, quantos % realmente subiram



--------------------------------------------------------


Criptomoeda:  BTC-USD

                              Real

Numero de Subidas:  107    |    53.5 %

Numero de Quedas :  93    |    46.5 %

                              Predito

Numero de Subidas:  66    |    33.0 %

Numero de Quedas :  134    |    67.0 %

Mean Absolute Error MAE:  0.020613947147483113

Acc:  0.52

Sen:  0.29906542056074764

Spe:  0.29906542056074764

Pre:  0.48484848484848486

Lower bound dos 10% maiores:  0.0033623194321990013

Precisao para maiores que 0.008:  0.5


--------------------------------------------------------


Criptomoeda:  LTC-USD

                              Real

Numero de Subidas:  86    |    43.0 %

Numero de Quedas :  114    |    57.0 %

                              Predito

Numero de Subidas:  72    |    36.0 %

Numero de Quedas :  128    |    64.0 %

Mean Absolute Error MAE:  0.031734112872896014

Acc:  0.6

Sen:  0.3488372093023256

Spe:  0.3488372093023256

Pre:  0.416666666666666

In [28]:
data_aux = {}
for crypto in cryptos:
    data_aux[crypto] = pd.read_csv(f"{crypto}.csv", index_col='Date')

In [29]:
data_aux['BTC-USD'].dropna()

Unnamed: 0_level_0,Crypto_Return_Today,exp_value,ae
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-06-04,-0.014223,0.006509,0.020732
2017-06-05,0.067116,0.004892,0.062224
2017-06-06,0.062086,0.004016,0.058070
2017-06-07,-0.049003,0.002745,0.051749
2017-06-08,0.030813,0.005202,0.025611
...,...,...,...
2018-04-13,-0.000649,0.003091,0.003740
2018-04-14,0.014069,0.004414,0.009655
2018-04-15,0.040399,0.000049,0.040350
2018-04-16,-0.034023,0.010514,0.044537


In [29]:
data_complete['BTC-USD'].dropna()

Unnamed: 0_level_0,Crypto_Return_Today,exp_value,ae
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-06-04,-0.014223,0.006509,0.020732
2017-06-05,0.067116,0.004892,0.062224
2017-06-06,0.062086,0.004016,0.058070
2017-06-07,-0.049003,0.002745,0.051749
2017-06-08,0.030813,0.005202,0.025611
...,...,...,...
2017-11-14,0.011256,-0.003516,0.014772
2017-11-15,0.097678,0.005719,0.091959
2017-11-16,0.072220,0.002352,0.069868
2017-11-17,-0.018581,0.008818,0.027399


In [39]:
data_aux['BTC-USD'][:-2400]
data_complete['BTC-USD'][-2400:]

Unnamed: 0_level_0,Crypto_Return_Today,exp_value,ae
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-04-18,0.027192,0.004480,0.022712
2018-04-19,0.016415,0.002167,0.014248
2018-04-20,0.065273,0.002994,0.062279
2018-04-21,0.005274,0.001620,0.003653
2018-04-22,-0.013832,0.001453,0.015285
...,...,...,...
2024-11-07,0.003534,,
2024-11-08,0.008431,,
2024-11-09,0.002904,,
2024-11-10,0.047050,,


In [40]:
for crypto in cryptos:
    df_aux = data_complete[crypto]
    df_aux = pd.concat([data_aux[crypto][:-2400], df_aux[-2400:]])
    df_aux.index = pd.to_datetime(df_aux.index)
    data_complete[crypto] = df_aux


In [41]:
for crypto in cryptos:
    data_complete[crypto]= data_complete[crypto][~data_complete[crypto].index.duplicated(keep='first')]

In [42]:
data_complete['BTC-USD'].dropna()

Unnamed: 0_level_0,Crypto_Return_Today,exp_value,ae
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-06-04,-0.014223,0.006509,0.020732
2017-06-05,0.067116,0.004892,0.062224
2017-06-06,0.062086,0.004016,0.058070
2017-06-07,-0.049003,0.002745,0.051749
2017-06-08,0.030813,0.005202,0.025611
...,...,...,...
2018-10-30,-0.000437,-0.002288,0.001851
2018-10-31,-0.003063,-0.001773,0.001290
2018-11-01,0.009395,-0.001491,0.010886
2018-11-02,0.001491,-0.001967,0.003458


In [43]:
for crypto in cryptos:
    # Define o nome do arquivo como o nome da criptomoeda seguido de .csv
    filename = f"{crypto}.csv"

    # Salva o DataFrame data_complete[crypto] no arquivo .csv
    data_complete[crypto].to_csv(filename, index=True)

    print(f"Arquivo {filename} salvo com sucesso!")

Arquivo BTC-USD.csv salvo com sucesso!
Arquivo LTC-USD.csv salvo com sucesso!


In [44]:
data_visual = data_complete.copy()

In [45]:
results = []

for crypto in cryptos:
    df = data_visual[crypto]
    df['ae'] = df['exp_value'] - df['Crypto_Return_Today']
    df['ae'] = abs(df['ae'])
    df = df.dropna()
    
    # Cálculos dos valores
    num_positive = (df['Crypto_Return_Today'] > 0).sum()
    num_negative = (df['Crypto_Return_Today'] < 0).sum()
    hat_pos = (df['exp_value'] > 0).sum()
    hat_neg = (df['exp_value'] < 0).sum()

    # Calcula a porcentagem de subidas e quedas reais e previstas
    total_real = num_positive + num_negative
    total_pred = hat_pos + hat_neg
    perc_positive_real = (100 * num_positive / total_real) if total_real > 0 else None
    perc_positive_pred = (100 * hat_pos / total_pred) if total_pred > 0 else None
    
    # Calcular métricas de avaliação
    TP = ((df['Crypto_Return_Today'] > 0) & (df['exp_value'] > 0)).sum()
    TN = ((df['Crypto_Return_Today'] < 0) & (df['exp_value'] < 0)).sum()
    FP = ((df['Crypto_Return_Today'] < 0) & (df['exp_value'] > 0)).sum()
    FN = ((df['Crypto_Return_Today'] > 0) & (df['exp_value'] < 0)).sum()
    mae = df['ae'].mean()
    acc = (TP + TN) / (TP + FP + FN + FP) if (TP + FP + FN + FP) > 0 else None
    sen = TP / (TP + FN) if (TP + FN) > 0 else None
    spe = TN / (TN + FP) if (TN + FP) > 0 else None
    pre = TP / (TP + FP) if (TP + FP) > 0 else None

    # Lower bound dos 10% maiores
    limite_90 = df['exp_value'].quantile(0.9)
    menor_valor_10_maiores = df[df['exp_value'] >= limite_90]['exp_value'].min()

    # Precisão para valores maiores que 0.008
    TP_threshold = ((df['Crypto_Return_Today'] > 0) & (df['exp_value'] > 0) & (df['exp_value'] >= 0.03)).sum()
    FP_threshold = ((df['Crypto_Return_Today'] < 0) & (df['exp_value'] > 0) & (df['exp_value'] >= 0.03)).sum()
    prec_maior_008 = TP_threshold / (TP_threshold + FP_threshold) if (TP_threshold + FP_threshold) > 0 else None

    # Adicionando os resultados para a criptomoeda atual
    results.append({
        "Crypto": crypto,
        "Num Subidas (Real)": num_positive,
        "Perc Subidas (Real)": perc_positive_real,
        "Num Quedas (Real)": num_negative,
        "Num Subidas (Pred)": hat_pos,
        "Perc Subidas (Pred)": perc_positive_pred,
        "Num Quedas (Pred)": hat_neg,
        "MAE": mae,
        "Acc": acc,
        "Sen": sen,
        "Spe": spe,
        "Pre": pre,
        "Lower bound dos 10% maiores": menor_valor_10_maiores,
        "Precisao para maiores que 0.008": prec_maior_008
    })

# Criando um DataFrame com os resultados
results_df = pd.DataFrame(results)

# Exibindo a tabela com os resultados

# Exibindo a tabela com os resultados
results_df.to_csv("resultado_arquitetura_1.csv", index = True)
results_df

Unnamed: 0,Crypto,Num Subidas (Real),Perc Subidas (Real),Num Quedas (Real),Num Subidas (Pred),Perc Subidas (Pred),Num Quedas (Pred),MAE,Acc,Sen,Spe,Pre,Lower bound dos 10% maiores,Precisao para maiores que 0.008
0,BTC-USD,282,54.440154,236,368,71.042471,150,0.033799,0.410828,0.691489,0.266949,0.529891,0.010514,
1,LTC-USD,241,46.525097,277,341,65.830116,177,0.045698,0.429285,0.66805,0.350181,0.472141,0.013157,0.5


In [44]:
Pre_media = results_df['Pre'].mean()
Acc_media = results_df['Acc'].mean()
MAE_media = results_df['MAE'].mean()
Precisao_25_media = results_df['Precisao para maiores que 0.008'].mean()
print("Pre_media ", Pre_media)
print("Acc_media ", Acc_media)
print("MAE_media ", MAE_media)
print("Precisao_25_media", Precisao_25_media)

Pre_media  0.5133619734606958
Acc_media  0.3653011887750137
MAE_media  0.0482851172743167
Precisao_25_media 0.5


gerar uns graficos aqui

## 3.3 Método 2 para os ultimos X intervalos

* Aplicar bagging
* Aplicar CV e Otimização de HP
* Fazer treinamento para uma janela W maior de tempo

In [None]:
# for day in data[crypto]:
#       day = bagging(day)

# for day in data[crypto]:
#       for sample in day:
#               sample = CV(sample) # transforma sample em um vetor de folds. Ultimos 10% do fold são validation e 90% são pure train

# for day in data[crypto]:
#       for sample in day:
#               for fold in sample:
#                       y_hat = model(fold)
#               y_hat_mean = y_hat / len(sample)
#       y_hat_mean_mean = y_hat_mean / len(day)