## Import necessary libraries

In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense

## Load and preprocess the data

In [2]:
# Load the data 
df = pd.read_csv('/kaggle/input/sandp500/all_stocks_5yr.csv')
df.head()

Unnamed: 0,date,open,high,low,close,volume,Name
0,2013-02-08,15.07,15.12,14.63,14.75,8407500,AAL
1,2013-02-11,14.89,15.01,14.26,14.46,8882000,AAL
2,2013-02-12,14.45,14.51,14.1,14.27,8126000,AAL
3,2013-02-13,14.3,14.94,14.25,14.66,10259500,AAL
4,2013-02-14,14.94,14.96,13.16,13.99,31879900,AAL


In [3]:
df.shape

(619040, 7)

In [4]:
df.isnull().sum()

date       0
open      11
high       8
low        8
close      0
volume     0
Name       0
dtype: int64

In [5]:
df.dropna(inplace=True)

In [6]:
df.duplicated().sum()

0

In [7]:
# Preprocess the data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df['close'].values.reshape(-1, 1))

## Split the data into training and testing sets

In [8]:
train_size = int(len(scaled_data) * 0.8)
test_size = len(scaled_data) - train_size
train_data, test_data = scaled_data[0:train_size, :], scaled_data[train_size:len(scaled_data), :]

## Create the LSTM model

In [9]:
lstm_model = Sequential()
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(1, 1)))
lstm_model.add(LSTM(units=50))
lstm_model.add(Dense(1))

## Compile the model

In [10]:
lstm_model.compile(loss='mean_squared_error', optimizer='adam')

## Train the LSTM model

In [11]:
lstm_model.fit(train_data[:-1], train_data[1:], epochs=10, batch_size=32, verbose=2)

Epoch 1/10
15476/15476 - 68s - loss: 2.9669e-05 - 68s/epoch - 4ms/step
Epoch 2/10
15476/15476 - 63s - loss: 9.2607e-06 - 63s/epoch - 4ms/step
Epoch 3/10
15476/15476 - 64s - loss: 9.0029e-06 - 64s/epoch - 4ms/step
Epoch 4/10
15476/15476 - 63s - loss: 9.1768e-06 - 63s/epoch - 4ms/step
Epoch 5/10
15476/15476 - 63s - loss: 8.8874e-06 - 63s/epoch - 4ms/step
Epoch 6/10
15476/15476 - 63s - loss: 8.6325e-06 - 63s/epoch - 4ms/step
Epoch 7/10
15476/15476 - 63s - loss: 8.9028e-06 - 63s/epoch - 4ms/step
Epoch 8/10
15476/15476 - 64s - loss: 8.3660e-06 - 64s/epoch - 4ms/step
Epoch 9/10
15476/15476 - 64s - loss: 8.2948e-06 - 64s/epoch - 4ms/step
Epoch 10/10
15476/15476 - 64s - loss: 8.1510e-06 - 64s/epoch - 4ms/step


<keras.src.callbacks.History at 0x7f7994155c90>

## Make predictions

In [12]:
train_predictions = lstm_model.predict(train_data[:-1])
test_predictions = lstm_model.predict(test_data[:-1])



## Rescale the predictions to their original scale

In [13]:
train_predictions = scaler.inverse_transform(train_predictions)
test_predictions = scaler.inverse_transform(test_predictions)

## Evaluate the model

In [14]:
# Evaluate the model using mean squared error (MSE)
train_rmse = np.sqrt(
    np.mean(np.square(train_predictions - scaler.inverse_transform(train_data[1:])))
)
test_rmse = np.sqrt(
    np.mean(np.square(test_predictions - scaler.inverse_transform(test_data[1:])))
)

In [15]:
# Print the evaluation results
print("Train RMSE:", train_rmse)
print("Test RMSE:", test_rmse)

Train RMSE: 5.238089033541467
Test RMSE: 2.689821554206371


In [16]:
train_mae = np.mean(np.abs(train_predictions - scaler.inverse_transform(train_data[1:])))
test_mae = np.mean(np.abs(test_predictions - scaler.inverse_transform(test_data[1:])))

print("Train MAE:", train_mae)
print("Test MAE:", test_mae)

Train MAE: 1.1444933898659027
Test MAE: 1.0080819604753106
