In [17]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import datetime

In [18]:
def load_data(path:str) -> pd.DataFrame:
    df = pd.read_csv(path, index_col=0)
    return df

In [19]:
def preprocess_data(df, column='Close', train_size=0.8):
    # Convert the date column to datetime
    df['Date'] = pd.to_datetime(df['Date'])
    df.set_index('Date', inplace=True)

    # Extract the relevant column for prediction
    data = df[[column]].values

    # Scale the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(data)

    # Split the data into training and test sets
    train_data_len = int(np.ceil(len(scaled_data) * train_size))
    train_data = scaled_data[0:int(train_data_len), :]
    test_data = scaled_data[train_data_len - 60:, :]

    return train_data, test_data, scaler

In [20]:
def create_dataset(dataset, time_step=60):
    X, y = [], []
    for i in range(len(dataset) - time_step):
        X.append(dataset[i:(i + time_step), 0])
        y.append(dataset[i + time_step, 0])
    return np.array(X), np.array(y)

In [21]:
def build_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(units=100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(units=100, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(units=50))
    model.add(Dense(units=1))

    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [22]:
def plot_predictions(train_data, valid_data, predictions):
    plt.figure(figsize=(16, 8))
    plt.title('Model')
    plt.xlabel('Date')
    plt.ylabel('Close Price USD ($)')
    plt.plot(train_data['Close'])
    plt.plot(valid_data[['Close', 'Predictions']])
    plt.legend(['Train', 'Val', 'Predictions'], loc='lower right')
    plt.show()
    
def plot_predictions_with_plotly(train_data, valid_data, future_dates, future_predictions):
    # Create traces for training, validation, and prediction data
    train_trace = go.Scatter(x=train_data.index, y=train_data['Close'], mode='lines', name='Train')
    valid_trace = go.Scatter(x=valid_data.index, y=valid_data['Close'], mode='lines', name='Validation')
    prediction_trace = go.Scatter(x=valid_data.index, y=valid_data['Predictions'], mode='lines', name='Predictions')
    future_trace = go.Scatter(x=future_dates, y=future_predictions, mode='lines', name='Future Predictions')

    # Create the layout for the plot
    layout = go.Layout(
        title='LSTM Model Predictions',
        xaxis=dict(title='Date'),
        yaxis=dict(title='Close Price USD ($)'),
        hovermode='x unified'
    )

    # Combine the traces and layout into a figure
    fig = go.Figure(data=[train_trace, valid_trace, prediction_trace, future_trace], layout=layout)

    # Show the figure
    fig.show()

In [23]:
def main(predict_days:int, path:str):
    # Load and preprocess data
    df = load_data(path)
    train_data, test_data, scaler = preprocess_data(df)

    X_train, y_train = create_dataset(train_data)
    X_test, y_test = create_dataset(test_data)

    X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    model = build_lstm_model((X_train.shape[1], 1))
    model.fit(X_train, y_train, batch_size=64, epochs=10, verbose=1)

    predictions = model.predict(X_test)
    predictions = scaler.inverse_transform(predictions)

    train = df[:len(train_data)]
    valid = df[len(train_data):]
    valid['Predictions'] = predictions

    last_60_days = test_data[-60:]
    last_60_days = last_60_days.reshape(1, -1, 1)
    
    future_predictions = []
    last_date = df.index[-1]
    future_dates = [last_date + datetime.timedelta(days=i) for i in range(1, predict_days + 1)]

    for _ in range(predict_days):
        pred = model.predict(last_60_days)
        last_60_days = np.append(last_60_days[:, 1:], pred).reshape(1, -1, 1)
        future_pred = scaler.inverse_transform(pred)
        future_predictions.append(future_pred[0][0])
        print(f"Predicted Close: {future_pred[0][0]}")

    plot_predictions_with_plotly(train, valid, future_dates, future_predictions)

    return future_predictions

In [24]:
# Run the script
main(predict_days=7, path='../dados/raw/PETR4.SA.csv')

2024-12-14 23:31:26.418426: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-12-14 23:31:26.420221: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-12-14 23:31:26.421782: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 1/10


2024-12-14 23:31:26.842847: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-12-14 23:31:26.844926: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-12-14 23:31:26.846526: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


2024-12-14 23:39:44.840757: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_2_grad/concat/split_2/split_dim' with dtype int32
	 [[{{node gradients/split_2_grad/concat/split_2/split_dim}}]]
2024-12-14 23:39:44.842801: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/split_grad/concat/split/split_dim' with dtype int32
	 [[{{node gradients/split_grad/concat/split/split_dim}}]]
2024-12-14 23:39:44.844801: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You mus

Predicted Close: 38.36517333984375




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Predicted Close: 38.27064895629883
Predicted Close: 38.10898971557617
Predicted Close: 37.905643463134766
Predicted Close: 37.67646026611328
Predicted Close: 37.43160629272461
Predicted Close: 37.17784118652344


[38.365173, 38.27065, 38.10899, 37.905643, 37.67646, 37.431606, 37.17784]