In [55]:
import pandas as pd
import pandas_ta as ta
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
import plotly.graph_objects as go

In [86]:
# Leitura do CSV
df = pd.read_csv('brlbtc.csv')

In [87]:
# Conversão do tempo de milissegundos para datetime
df['Open time'] = pd.to_datetime(df['Open time'], unit='ms')
df['Close time'] = pd.to_datetime(df['Close time'], unit='ms')

In [88]:
# Criação dos campos de variação total e variação real
df['Candle_Variation'] = df['High'] - df['Low']
df['Real_Variation'] = df['Close'] - df['Open']

In [59]:
# Adicionando indicadores de preço
df['SMA'] = ta.sma(df['Close'], length=14)
df['EMA'] = ta.ema(df['Close'], length=14)
df['RSI'] = ta.rsi(df['Close'], length=14)
df['MACD'] = ta.macd(df['Close'])['MACD_12_26_9']
df['Bollinger_Upper'], df['Bollinger_Lower'] = ta.bbands(df['Close'])['BBU_5_2.0'], ta.bbands(df['Close'])['BBL_5_2.0']
df['Parabolic-SAR'] = 

In [60]:
# Adicionando indicadores de volume
df['Volume_SMA'] = ta.sma(df['Volume'], length=14)
df['OBV'] = ta.obv(df['Close'], df['Volume'])

In [61]:
# Preencher valores NaN
df.fillna(df.mean(), inplace=True)

In [62]:
# Normalização dos dados
scaler = StandardScaler()
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'Candle_Variation', 'Real_Variation', 'SMA', 'EMA', 'RSI', 'MACD']
df[features] = scaler.fit_transform(df[features])

In [63]:
# Criando o alvo (target) para classificar (0 = Esperar, 1 = Comprar, 2 = Vender)
df['target'] = 0  # Inicialmente, marcar como "Esperar"
df.loc[df['Close'] > df['SMA'], 'target'] = 1  # Comprar
df.loc[df['Close'] < df['SMA'], 'target'] = 2  # Vender

In [64]:
# Verificar se o target contém exatamente 3 classes
print("Classes únicas no target antes do balanceamento:", df['target'].unique())

Classes únicas no target antes do balanceamento: [1 2]


In [65]:
# Definir as variáveis de entrada (X) e saída (y)
X = df[features].values
y = df['target'].values  # Saída agora categórica (0, 1, 2)

In [66]:
# Balanceamento das classes usando SMOTE
smote = SMOTE()
X_resampled, y_resampled = smote.fit_resample(X, y)

In [67]:
# Verificar se o balanceamento incluiu as 3 classes
print("Classes únicas após o balanceamento:", np.unique(y_resampled))

Classes únicas após o balanceamento: [1 2]


In [68]:
# Divisão dos dados em treino e validação
X_train, X_val, y_train, y_val = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [69]:
# Verificando se ainda temos as 3 classes após a divisão
print("Classes no conjunto de treino:", np.unique(y_train))
print("Classes no conjunto de validação:", np.unique(y_val))

Classes no conjunto de treino: [1 2]
Classes no conjunto de validação: [1 2]


In [70]:
# Convertendo a variável target para categórica (one-hot encoding)
from keras.utils import to_categorical
y_train = to_categorical(y_train, num_classes=3)  # Precisamos garantir 3 classes
y_val = to_categorical(y_val, num_classes=3)

In [71]:
# Garantindo que as formas dos dados estejam corretas
print(f"Formato de X_train: {X_train.shape}")
print(f"Formato de y_train: {y_train.shape}")
print(f"Formato de X_val: {X_val.shape}")
print(f"Formato de y_val: {y_val.shape}")

Formato de X_train: (409, 11)
Formato de y_train: (409, 3)
Formato de X_val: (103, 11)
Formato de y_val: (103, 3)


In [72]:
# Arquitetura da rede neural ajustada
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.2))  # Dropout para regularização
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))  # Mais Dropout
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))  # Saída para 3 classes (comprar, vender, esperar)


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [73]:
# Compilação do modelo
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [74]:
# Treinamento do modelo
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

Epoch 1/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.1565 - loss: 1.1932 - val_accuracy: 0.7087 - val_loss: 0.9451
Epoch 2/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7397 - loss: 0.8978 - val_accuracy: 0.8058 - val_loss: 0.7110
Epoch 3/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8249 - loss: 0.6873 - val_accuracy: 0.8835 - val_loss: 0.4607
Epoch 4/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8259 - loss: 0.4713 - val_accuracy: 0.8835 - val_loss: 0.3291
Epoch 5/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8188 - loss: 0.3855 - val_accuracy: 0.8641 - val_loss: 0.2697
Epoch 6/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8637 - loss: 0.3077 - val_accuracy: 0.9029 - val_loss: 0.2360
Epoch 7/50
[1m13/13[0m [32m━━━━━━━━━

In [75]:
# Avaliação do modelo
y_pred = model.predict(X_val)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step


In [76]:
# Conversão das predições para rótulos
y_pred_classes = np.argmax(y_pred, axis=1)
y_val_classes = np.argmax(y_val, axis=1)

In [77]:
# Cálculo das métricas
rmse = np.sqrt(mean_squared_error(y_val_classes, y_pred_classes))
mae = mean_absolute_error(y_val_classes, y_pred_classes)
r2 = r2_score(y_val_classes, y_pred_classes)
accuracy = accuracy_score(y_val_classes, y_pred_classes)
precision = precision_score(y_val_classes, y_pred_classes, average='macro')
recall = recall_score(y_val_classes, y_pred_classes, average='macro')
f1 = f1_score(y_val_classes, y_pred_classes, average='macro')

In [78]:
# Exibindo as métricas
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")
print(f"R²: {r2}")
print(f"Acurácia: {accuracy}")
print(f"Precisão: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")

RMSE: 0.2606936229533505
MAE: 0.06796116504854369
R²: 0.7275132275132274
Acurácia: 0.9320388349514563
Precisão: 0.9316981132075473
Recall: 0.9323507180650038
F1-Score: 0.9319361842726328


In [79]:
# Matriz de Confusão
conf_matrix = confusion_matrix(y_val_classes, y_pred_classes)
print(f"Matriz de Confusão:\n{conf_matrix}")

Matriz de Confusão:
[[46  3]
 [ 4 50]]


In [83]:
# Gráfico dos candles e indicadores usando Plotly
fig = go.Figure(data=[go.Candlestick(x=df['Open time'],
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])])

In [84]:
# Adicionando médias móveis
fig.add_trace(go.Scatter(x=df['Open time'], y=df['SMA'], mode='lines', name='SMA'))
fig.add_trace(go.Scatter(x=df['Open time'], y=df['EMA'], mode='lines', name='EMA'))

fig.update_layout(title='Candlestick com SMA e EMA', xaxis_title='Tempo', yaxis_title='Preço')
fig.show()