# LSTM (Long Short-Term Memory)

 
### A popular neural network model used to predict future stock prices.


In [1]:
import pandas as pd

file_name='MCX:ZINCMINI24AUGFUT-INTRADAY.csv'
# Load stock price data
data = pd.read_csv(f'../data/raw/{file_name}', parse_dates=True)
data['datetime'] = pd.to_datetime(data['datetime'])

data.set_index('datetime', inplace=True)
data = data[['open', 'high', 'low', 'close', 'volume']]

data

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-04-29 09:54:00,252.95,252.95,252.95,252.95,1
2024-04-29 09:57:00,252.95,252.95,252.95,252.95,0
2024-04-29 10:00:00,252.95,252.95,252.95,252.95,0
2024-04-29 10:03:00,252.95,252.95,252.95,252.95,0
2024-04-29 10:06:00,252.95,252.95,252.95,252.95,0
...,...,...,...,...,...
2024-08-23 23:12:00,267.00,267.00,266.40,266.40,8
2024-08-23 23:15:00,266.85,266.85,266.45,266.60,13
2024-08-23 23:18:00,266.80,267.00,266.60,267.00,10
2024-08-23 23:21:00,267.00,267.00,266.90,267.00,13


### Resample data to daily frequency using OHLC dictionary


In [2]:
ohlc_dict = {                                                                                                             
    'open': 'first',                                                                                                    
    'high': 'max',                                                                                                       
    'low': 'min',                                                                                                        
    'close': 'last',                                                                                                    
    'volume': 'sum',
}

data = data.resample('D', closed='left', label='left').apply(ohlc_dict)
# data = pd.DatetimeIndex(data, freq='D')

data.drop(data.tail(2).index, inplace=True)
data = data.asfreq('D')
data.dropna(inplace=True)


data

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-04-29,252.95,258.20,252.20,258.20,9
2024-04-30,259.90,261.00,257.90,258.20,15
2024-05-01,255.10,255.65,251.90,253.55,5221
2024-05-02,254.80,257.85,252.25,256.55,12671
2024-05-03,257.15,258.40,254.80,257.50,7480
...,...,...,...,...,...
2024-08-14,255.80,260.50,255.35,257.60,5101
2024-08-16,261.50,263.20,260.00,262.50,4328
2024-08-19,263.45,265.60,263.20,264.20,3518
2024-08-20,263.80,267.10,261.85,264.55,4689


In [3]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# include candlestick with rangeselector
fig.add_trace(go.Candlestick(
                x=data.index,
                open=data['open'], 
                high=data['high'],
                low=data['low'], 
                close=data['close'],
                name='Candle'),
               secondary_y=True)

# include a go.Bar trace for volumes
fig.add_trace(go.Bar(x=data.index, y=data['volume'],marker_color='gray', opacity=0.5,name='Volume'), secondary_y=False)

# Update layout
fig.update_layout(
    title=f'{file_name} Stock High & Low Price', xaxis_title='Date',
    yaxis=dict(title='Volume'),
    yaxis2=dict(title='Price', overlaying='y', side='right'),
    xaxis_rangeslider_visible=False
)

fig.show()

In [4]:
# %% [markdown]
# # LSTM (Long Short-Term Memory)
# 
# ## A popular neural network model used to predict future stock prices.

# %%
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
import plotly.graph_objects as go

# from keras.layers import LSTM, Dense, Dropout, Input


2024-08-25 13:00:04.084014: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-25 13:00:04.200938: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-25 13:00:04.243403: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-25 13:00:04.256488: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-25 13:00:04.342477: I tensorflow/core/platform/cpu_feature_guar

In [5]:

# %%
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data)

# Define sequence length for LSTM
sequence_length = 5

# Prepare data for LSTM
x_train, y_train = [], []
for i in range(sequence_length, len(scaled_data)):
    x_train.append(scaled_data[i-sequence_length:i, :])
    y_train.append(scaled_data[i, [1, 2]])  # Predict high and low prices

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], x_train.shape[2]))

# %%
# Build LSTM model
model = Sequential()
model.add(Input(shape=(x_train.shape[1], x_train.shape[2])))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(25, activation='relu'))
model.add(Dense(units=2))  # Predict high and low prices

# Compile the model
model.compile(loss='mse', optimizer='adam')

# Fit the model
model.fit(x_train, y_train, epochs=50, batch_size=5)


I0000 00:00:1724571005.658302   11565 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1724571005.740217   11565 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1724571005.740425   11565 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1724571005.741857   11565 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

Epoch 1/50


2024-08-25 13:00:07.688314: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.2824
Epoch 2/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0588 
Epoch 3/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0391
Epoch 4/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0303 
Epoch 5/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0281
Epoch 6/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0421
Epoch 7/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0303
Epoch 8/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0361
Epoch 9/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0283 
Epoch 10/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0291
Epoch 11/50
[1m1

<keras.src.callbacks.history.History at 0x76d621e6c490>

In [6]:

# %%
# Forecast future prices
forecast_length = 4
last_x = x_train[-1]
forecast_prices = []

for i in range(forecast_length):
    prediction = model.predict(np.array([last_x]))
    # Prepare the shape for inverse transformation
    prediction_scaled = np.zeros((1, scaled_data.shape[1]))
    prediction_scaled[0, 1:3] = prediction  # Set only the high and low prices
    prediction = scaler.inverse_transform(prediction_scaled)
    high_price, low_price = prediction[0][1], prediction[0][2]
    forecast_prices.append([high_price, low_price])
    last_x = np.append(last_x[1:], [prediction_scaled[0]], axis=0)

forecast_prices = np.array(forecast_prices)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step


In [7]:

# Prepare data for visualization
dates = pd.date_range(start=data.index[-1], periods=forecast_length+1, inclusive='right').to_pydatetime()
actual_prices_high = scaler.inverse_transform(scaled_data[-len(forecast_prices):])[:, 1]
actual_prices_low = scaler.inverse_transform(scaled_data[-len(forecast_prices):])[:, 2]

# Create a DataFrame for the actual and forecasted values
forecast_df = pd.DataFrame({
    'date': dates,
    'forecast_high': forecast_prices[:, 0],
    'forecast_low': forecast_prices[:, 1],
    'actual_high': np.concatenate((actual_prices_high, [np.nan] * (forecast_length - len(actual_prices_high)))),
    'actual_low': np.concatenate((actual_prices_low, [np.nan] * (forecast_length - len(actual_prices_low))))
}).set_index('date')

# %%
# Create Plotly graph
fig = go.Figure()
fig.add_trace(go.Scatter(x=forecast_df.index, y=forecast_df['actual_high'], name='Actual High Price'))
fig.add_trace(go.Scatter(x=forecast_df.index, y=forecast_df['forecast_high'], name='Forecast High Price'))
fig.add_trace(go.Scatter(x=forecast_df.index, y=forecast_df['actual_low'], name='Actual Low Price'))
fig.add_trace(go.Scatter(x=forecast_df.index, y=forecast_df['forecast_low'], name='Forecast Low Price'))
fig.update_layout(title=f'{file_name} Stock Price Prediction using LSTM', xaxis_title='Date', yaxis_title='Price')
fig.show()



In [13]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=forecast_df.index, y=forecast_df['forecast_high'], name='Forecast High Price'))
fig.add_trace(go.Scatter(x=forecast_df.index, y=forecast_df['forecast_low'], name='Forecast Low Price'))

fig.add_trace(go.Candlestick(
                x=data.index,
                open=data['open'], 
                high=data['high'],
                low=data['low'], 
                close=data['close'],
                name='Candle'),)

fig.update_layout(title=f'{file_name} Stock Price Prediction using LSTM', xaxis_title='Date', yaxis_title='Price',    xaxis_rangeslider_visible=False )
fig.show()


In [8]:
# %%
forecast_df


Unnamed: 0_level_0,forecast_high,forecast_low,actual_high,actual_low
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-08-22,265.721471,261.176157,263.2,260.0
2024-08-23,264.787369,260.119076,265.6,263.2
2024-08-24,261.920964,257.012671,267.1,261.85
2024-08-25,258.235887,253.15521,267.6,264.95
