In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM

2024-06-01 12:28:03.058974: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Carregar o dataframe de cotações de fechamento
stock_data = pd.read_csv('raw_data/stock_market_dataset.csv', parse_dates=['Date'])

In [3]:
# Pivotar o dataframe para ter os tickers como colunas
stock_data_pivoted = stock_data.pivot(index='Date', columns='Stock', values='Close')

In [4]:
stock_data_pivoted.head()

Stock,AAPL,AMZN,GOOGL,MSFT,NVDA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2008-06-09,23.257,79.43,278.94,23.238,21.978
2008-06-10,23.774,79.62,277.08,23.389,20.68
2008-06-11,23.156,77.28,272.6,22.744,19.714
2008-06-12,22.189,76.15,276.48,23.682,19.837
2008-06-13,22.074,79.17,285.76,24.377,19.771


In [21]:
stock_data[60, 0]

KeyError: (60, 0)

In [5]:
# Carregar o dataframe de notícias com análise de sentimento
news_data = pd.read_csv('raw_data/reddit_data.csv', parse_dates=['Date'])

In [6]:
# Mapear os valores de sentimento para números
sentiment_mapping = {'positive': 1, 'neutral': 0, 'negative': -1}
news_data['Sentiment'] = news_data['Label'].map(sentiment_mapping)

In [7]:
news_data.head(6)

Unnamed: 0,Date,News,Label,Score,Sentiment
0,2016-07-01,A 117-year-old woman in Mexico City finally re...,neutral,1.0,0
1,2016-07-01,IMF chief backs Athens as permanent Olympic host,neutral,1.0,0
2,2016-07-01,"The president of France says if Brexit won, so...",neutral,1.0,0
3,2016-07-01,British Man Who Must Give Police 24 Hours' Not...,neutral,1.0,0
4,2016-07-01,100+ Nobel laureates urge Greenpeace to stop o...,neutral,1.0,0
5,2016-07-01,Brazil: Huge spike in number of police killing...,negative,0.95,-1


In [8]:
# Normalizar os dados de preços de ações
scaler = MinMaxScaler()
scaled_stock_data = scaler.fit_transform(stock_data_pivoted)

In [9]:
scaled_stock_data

array([[0.11369724, 0.06404986, 0.22581324, 0.2525657 , 0.38649861],
       [0.11813592, 0.06432394, 0.22301726, 0.25618646, 0.35610106],
       [0.11283011, 0.06094834, 0.21628285, 0.24072031, 0.33347853],
       ...,
       [0.70332086, 0.98156403, 0.85152727, 0.87137924, 0.9564411 ],
       [0.71334867, 0.98179484, 0.8640641 , 0.8860541 , 0.96557442],
       [0.71576977, 0.99622048, 0.87416571, 0.88581431, 0.95737785]])

In [10]:
# Preparar dados para RNN
def prepare_data(stock_data, news_data, time_step=60):
    X, y = [], []
    for i in range(time_step, len(stock_data)):
        X.append(stock_data[i-time_step:i])
        y.append(stock_data[i, 0])  # Prevendo a ação da primeira coluna (provavelmente AAPL)

    X, y = np.array(X), np.array(y)

    # Incorporar dados de notícias
    sentiment_array = news_data['Sentiment'].values[:len(y)]
    sentiment_array = sentiment_array.reshape(-1, 1, 1)  # Reshape para 3D

    # Repetir o sentimento para se alinhar com o número de timesteps
    sentiment_array = np.repeat(sentiment_array, time_step, axis=1)

    # Concatenar o sentimento com os dados de preços
    X = np.concatenate((X, sentiment_array), axis=2)

    return X, y


In [11]:
# Preparar os dados
X, y = prepare_data(scaled_stock_data, news_data)

In [12]:
# Construir o modelo RNN com LSTM
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(LSTM(units=50))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

In [13]:
# Treinar o modelo
model.fit(X, y, epochs=20, batch_size=32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f3b941fd960>

In [None]:
# Previsão de novos dados (exemplo)
# new_data = ... (dados de entrada a serem previstos)
# predictions = model.predict(new_data)

In [15]:
# Avaliar o impacto das notícias (Análise de Sentimento)
impact_weights = model.get_weights()
print(impact_weights)

[array([[ 0.03758971,  0.18661875, -0.11829011, ...,  0.07422014,
         0.04752044,  0.00930904],
       [ 0.10766185,  0.03303375, -0.11221252, ..., -0.03335897,
        -0.1635173 , -0.08769655],
       [ 0.17470485,  0.14479712, -0.01975338, ...,  0.04591226,
        -0.11250095,  0.00678541],
       [ 0.15491395, -0.03868248,  0.04278235, ...,  0.03991653,
         0.15877108,  0.13263021],
       [-0.06864773,  0.07442369, -0.02666946, ...,  0.10223538,
        -0.1323253 , -0.01295845],
       [ 0.15221061, -0.02237999, -0.04146862, ...,  0.03982652,
        -0.12006549,  0.09525693]], dtype=float32), array([[-0.05562356,  0.05541499, -0.11051937, ..., -0.11190971,
        -0.02898152, -0.06123189],
       [ 0.00064422,  0.06079299,  0.05422154, ..., -0.08015168,
        -0.06441937,  0.09437943],
       [ 0.0356468 ,  0.03335513,  0.1289006 , ...,  0.09638293,
        -0.10691893, -0.02387763],
       ...,
       [ 0.12805435, -0.03677482, -0.04497025, ...,  0.03914598,
     