In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

2025-02-28 17:32:25.801405: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load dataset
file_path = 'sorted_stock_data_cleaned_nica.csv'
data = pd.read_csv(file_path)

In [3]:
# Display dataset info
display(data.head())

Unnamed: 0,Open,High,Low,Close,per_change,Qty,Turnover,EPS,PE
0,678.0,666.0,658.0,660.0,0.0,3547.0,2347771.0,17.94,36.789298
1,660.0,660.0,647.0,651.0,0.0,5758.0,3758521.0,17.94,36.287625
2,651.0,655.0,644.0,646.0,0.0,6085.0,3939778.0,17.94,36.008919
3,646.0,651.0,642.0,648.0,0.0,6977.0,4505359.0,17.94,36.120401
4,648.0,651.0,643.0,648.0,0.0,3303.0,2133809.0,17.94,36.120401


In [4]:
# Ensure the dataset has the required 8 features
assert len(data.columns) == 9, "Dataset should have exactly 9 features!"

In [5]:
# Normalize the dataset
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

In [6]:
# Convert data into sequences
def create_sequences(data, seq_length=50):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 50  # Lookback period
X_train, y_train = create_sequences(data_scaled, seq_length)

In [7]:
y_train.shape, X_train.shape

((1870, 9), (1870, 50, 9))

In [8]:
# Build LSTM model
model = Sequential([
    LSTM(64, activation='tanh', return_sequences=True, input_shape=(seq_length, 9)),
    Dropout(0.2),
    LSTM(128, activation='tanh', return_sequences=True),
    Dropout(0.3),
    LSTM(64, activation='tanh', return_sequences=True),
    Dropout(0.3),
    LSTM(32, activation='tanh', return_sequences=False),
    Dropout(0.2),
    Dense(9)  # Predicting all 8 features
])

In [9]:
# Compile and train the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1)
model.save("lstm50.keras")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [10]:
# # Predict next 100 days
# def predict_next_n_days(model, last_100_days, n_days=100):
#     predictions = []
#     input_sequence = last_100_days.copy()
#     for _ in range(n_days):
#         input_data = np.expand_dims(input_sequence, axis=0)
#         next_day_features = model.predict(input_data, verbose=0)[0]
#         predictions.append(next_day_features)
#         input_sequence = np.append(input_sequence[1:], [next_day_features], axis=0)
#     return np.array(predictions)

In [11]:
# # Get the last 100 days from dataset
# last_100_days = data_scaled[-seq_length:]
# predicted_features = predict_next_n_days(model, last_100_days, n_days=100)

In [12]:
# # Convert predictions back to original scale
# predicted_features = scaler.inverse_transform(predicted_features)

In [13]:
# # Generate future dates
# import datetime
# future_dates = [datetime.date.today() + datetime.timedelta(days=i) for i in range(1, 101)]

In [14]:
# # Plot predictions
# plt.figure(figsize=(12, 6))
# plt.plot(future_dates, predicted_features[:, 0], label="Predicted High", linestyle="dashed", color="blue")
# plt.plot(future_dates, predicted_features[:, 1], label="Predicted Low", linestyle="dashed", color="green")
# plt.plot(future_dates, predicted_features[:, 2], label="Predicted Quantity Traded", linestyle="dashed", color="purple")
# plt.plot(future_dates, predicted_features[:, 3], label="Predicted Turnover", linestyle="dashed", color="orange")
# plt.plot(future_dates, predicted_features[:, 4], label="Predicted EPS", linestyle="dashed", color="brown")
# plt.plot(future_dates, predicted_features[:, 5], label="Predicted PE Ratio", linestyle="dashed", color="cyan")
# plt.plot(future_dates, predicted_features[:, 6], label="Predicted Percentage Change", linestyle="dashed", color="red")
# plt.plot(future_dates, predicted_features[:, 7], label="Predicted Open", linestyle="dashed", color="black")
# plt.xlabel("Date")
# plt.ylabel("Stock Features")
# plt.title("Predicted Stock Features for the Next 100 Days")
# plt.legend()
# plt.show()