In [42]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Flatten, Dropout, LSTM
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt



  data = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed


In [205]:
import yfinance as yf
import numpy as np
import pandas as pd

symbol = "EQTL3.SA"
start = "2018-01-01"
end = "2025-06-01"
window_size = 60

data = yf.download(symbol, start=start, end=end)

# Selecionando colunas e garantindo cópia
data = data[['Open', 'High', 'Low', 'Close', 'Volume']].copy()

# Adicionando mês e dia do ano como features
data['Month'] = data.index.month
data['DayOfYear'] = data.index.dayofyear

# 2. Cálculo dos indicadores técnicos
data['EMA9'] = data['Open'].ewm(span=9, adjust=False).mean()
data['SMA20'] = data['Open'].rolling(window=20).mean()
data['SMA200'] = data['Open'].rolling(window=200).mean()

# RSI
delta = data['Close'].diff()
gain = delta.clip(lower=0)
loss = -delta.clip(upper=0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / avg_loss
data['RSI'] = 100 - (100 / (1 + rs))

# MACD
ema12 = data['Close'].ewm(span=12, adjust=False).mean()
ema26 = data['Close'].ewm(span=26, adjust=False).mean()
data['MACD'] = ema12 - ema26
data['MACD_signal'] = data['MACD'].ewm(span=9, adjust=False).mean()

# Bollinger Bands
rolling_mean = data['Close'].rolling(window=20).mean()
rolling_std = data['Close'].rolling(window=20).std()
data['BB_upper'] = rolling_mean + 2 * rolling_std
data['BB_lower'] = rolling_mean - 2 * rolling_std

# Momentum
data['Momentum'] = data['Close'] - data['Close'].shift(10)

# ATR
high_low = data['High'] - data['Low']
high_close = np.abs(data['High'] - data['Close'].shift())
low_close = np.abs(data['Low'] - data['Close'].shift())
data['ATR'] = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1).rolling(window=14).mean()

# Remover NaNs
data = data.dropna()

# 3. Preparação das janelas
X, y, window_dates = [], [], []

features = ['Close','High','Low','Open','Volume',
            'EMA9','SMA20','SMA200','RSI','MACD','MACD_signal',
            'BB_upper','BB_lower','Momentum','ATR',
            'Month','DayOfYear']  # adicionadas as novas features

data_values = data[features].values
open_prices = data['Open'].values
close_prices = data['Close'].values


for i in range(window_size, len(data_values)):
    # Janela dos últimos 60 dias antes de hoje
    window = data_values[i - window_size:i]
    X.append(window)
    
    # Dia atual
    open_today = open_prices[i]
    close_today = close_prices[i]
    
    # Label: 1 se Close > Open hoje, 0 caso contrário
    label = 1 if (close_today - open_today) > 0 else 0
    y.append(label)
    
    # Guardar a data de hoje
    window_dates.append(data.index[i])

# Converter para arrays numpy
X = np.array(X)
y = np.array(y)
window_dates = np.array(window_dates)

print("Formato de X:", X.shape)  # (n_amostras, 60, n_features)
print("Formato de y:", y.shape)
print(y


  data = yf.download(symbol, start=start, end=end)
[*********************100%***********************]  1 of 1 completed

Formato de X: (1581, 60, 17)
Formato de y: (1581,)





In [183]:
# Normalização dos dados
"""
n_samples, n_timesteps, n_features = X.shape
X_reshaped = X.reshape(-1, n_features)  # Achata para aplicar a normalização
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_reshaped)
X = X_scaled.reshape(n_samples, n_timesteps, n_features)

# Visualizando a distribuição dos rótulos
plt.figure(figsize=(6,4))
plt.hist(y, bins=2, edgecolor='k', align='mid')
plt.xticks([0, 1])
plt.title("Distribuição dos rótulos (0: queda, 1: alta)")
plt.xlabel("Rótulo")
plt.ylabel("Frequência")
plt.grid(True)
#plt.show()
print(X[-1][-1])  # primeira janela, primeira linha
"""

'\nn_samples, n_timesteps, n_features = X.shape\nX_reshaped = X.reshape(-1, n_features)  # Achata para aplicar a normalização\nscaler = StandardScaler()\nX_scaled = scaler.fit_transform(X_reshaped)\nX = X_scaled.reshape(n_samples, n_timesteps, n_features)\n\n# Visualizando a distribuição dos rótulos\nplt.figure(figsize=(6,4))\nplt.hist(y, bins=2, edgecolor=\'k\', align=\'mid\')\nplt.xticks([0, 1])\nplt.title("Distribuição dos rótulos (0: queda, 1: alta)")\nplt.xlabel("Rótulo")\nplt.ylabel("Frequência")\nplt.grid(True)\n#plt.show()\nprint(X[-1][-1])  # primeira janela, primeira linha\n'

In [193]:
# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=723235431)
print(len(X_train[0]))
print(y_train[0])

60
0


In [199]:

# 4. Construção do modelo CNN 1D para múltiplas features

model = Sequential([
    Conv1D(64, 3, activation='relu', input_shape=(window_size, n_features)),
    Dropout(0.2),
    LSTM(128),
    Dense(1, activation='sigmoid')
])

# Compilando o modelo
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# 5. Treinamento do modelo
history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_split=0.2)

# Plot do histórico de treinamento
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history.history['loss'], label='Treino')
plt.plot(history.history['val_loss'], label='Validação')
plt.title("Histórico de Loss")
plt.xlabel("Época")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)

plt.subplot(1,2,2)
plt.plot(history.history['accuracy'], label='Treino')
plt.plot(history.history['val_accuracy'], label='Validação')
plt.title("Histórico de Acurácia")
plt.xlabel("Época")
plt.ylabel("Acurácia")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

# 6. Avaliação do modelo no conjunto de teste
loss, acc = model.evaluate(X_test, y_test)
print("Acurácia no conjunto de teste:", acc)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/500
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 38ms/step - accuracy: 0.5124 - loss: 0.7031 - val_accuracy: 0.5415 - val_loss: 0.6932
Epoch 2/500
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.4857 - loss: 0.7176 - val_accuracy: 0.5217 - val_loss: 0.7002
Epoch 3/500
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.4857 - loss: 0.7128 - val_accuracy: 0.5375 - val_loss: 0.7079
Epoch 4/500
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.5005 - loss: 0.7081 - val_accuracy: 0.4822 - val_loss: 0.7014
Epoch 5/500
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.5005 - loss: 0.7101 - val_accuracy: 0.4466 - val_loss: 0.7070
Epoch 6/500
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 27ms/step - accuracy: 0.4906 - loss: 0.7195 - val_accuracy: 0.4783 - val_loss: 0.7023
Epoch 7/500
[1m32/32[0m [

KeyboardInterrupt: 

In [22]:
# Após o treinamento, para obter as probabilidades no conjunto de teste:
predicted_probabilities = model.predict(X_test)

# Exibir as probabilidades para os 5 primeiros exemplos:
for i, prob in enumerate(predicted_probabilities[:5]):
    print(f"Amostra {i}: Probabilidade de alta = {prob[0]:.2f}")
    
# Caso você queira converter para um sinal (subir/descender) utilizando um threshold de 0.5:
predicted_signals = (predicted_probabilities.flatten() > 0.5).astype(int)
print("Sinais previstos (0: baixa, 1: alta) para os 5 primeiros exemplos:", predicted_signals[:5])


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step
Amostra 0: Probabilidade de alta = 0.52
Amostra 1: Probabilidade de alta = 0.99
Amostra 2: Probabilidade de alta = 1.00
Amostra 3: Probabilidade de alta = 0.76
Amostra 4: Probabilidade de alta = 0.60
Sinais previstos (0: baixa, 1: alta) para os 5 primeiros exemplos: [1 1 1 1 1]


In [104]:
model.save("modelo_completo.keras")


In [26]:
model.summary()
loss, acc = model.evaluate(X_test, y_test)
print("Acurácia no conjunto de teste:", acc)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.4810 - loss: 1.6227
Acurácia no conjunto de teste: 0.4810126721858978


In [108]:
# from tensorflow.keras.utils import plot_model

# plot_model(model, to_file='modelo.png', show_shapes=True, show_layer_names=True)


In [None]:
# Plot dos indicadores técnicos e fundamentalistas em subplots
fig, axs = plt.subplots(3, 3, figsize=(18, 12))

# Preço de Fechamento
axs[0, 0].plot(data.index, data['Close'])
axs[0, 0].set_title("Preço de Fechamento")

# EMA9
axs[0, 1].plot(data.index, data['EMA9'])
axs[0, 1].set_title("EMA9")

# SMA20
axs[0, 2].plot(data.index, data['SMA20'])
axs[0, 2].set_title("SMA20")

# SMA200
axs[1, 0].plot(data.index, data['SMA200'])
axs[1, 0].set_title("SMA200")

# MACD
axs[1, 1].plot(data.index, data['MACD'])
axs[1, 1].set_title("MACD")

# RSI
axs[1, 2].plot(data.index, data['RSI'])
axs[1, 2].set_title("RSI")

# OBV
axs[2, 0].plot(data.index, data['OBV'])
axs[2, 0].set_title("OBV")


plt.tight_layout()
plt.show()
