# 📈 Stock Price Prediction with LSTM (PyTorch)
### Predict future stock prices using an LSTM-based deep learning model.


In [None]:
from datetime import datetime, timedelta

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler

from StockETL.globalpath import GlobalPath

In [None]:
time_step = 60
epochs = 50
today = datetime.today()

In [None]:
# Load and Prepare Data
holding_gold_file_path = GlobalPath("DATA/GOLD/Holding/Holding_data.csv")
df = pd.read_csv(holding_gold_file_path)

# Convert datetime column to date and process data
df["date"] = pd.to_datetime(df["date"], errors="coerce")
df["close_amount"] = pd.to_numeric(df["close_amount"], errors="coerce")
df["holding_amount"] = pd.to_numeric(df["holding_amount"], errors="coerce")
df["value"] = df["close_amount"] - df["holding_amount"]
df = df[df["date"] >= (today - timedelta(days=365 * 2))]
if df.empty:
    raise ValueError("No data found for the selected symbol and date range.")
df["value"] = df["close_amount"] - df["holding_amount"]
df = df[["date", "value"]]
df = df.dropna(subset=["date", "value"])
df = df.groupby("date").sum()
df.tail()

In [None]:
print("\nMissing values in Close column:", df["value"].isna().sum())
print("\nStatistical Summary:")
print(df.describe())

In [None]:
# Preprocess
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df["value"].values.reshape(-1, 1))
train_size = int(len(scaled_data) * 0.8)
train_data, test_data = scaled_data[:train_size], scaled_data[train_size:]


def create_dataset(data, time_step):
    X, y = [], []
    for i in range(time_step, len(data)):
        X.append(data[i - time_step : i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)


X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test = create_dataset(test_data, time_step)

X_train, y_train = torch.tensor(X_train).float(), torch.tensor(y_train).float()
X_test, y_test = torch.tensor(X_test).float(), torch.tensor(y_test).float()

In [None]:
# LSTM model


class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=200, num_layers=2, output_size=1):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])


model = LSTMModel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Train
train_losses = []
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train.unsqueeze(-1))
    loss = criterion(output.squeeze(), y_train)
    loss.backward()
    optimizer.step()
    train_losses.append(loss.item())

In [None]:
# Plot Training Loss
plt.figure(figsize=(10, 4))
plt.plot(train_losses, label="Training Loss")
plt.title("Training Loss per Epoch")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Test
model.eval()
with torch.no_grad():
    predictions = model(X_test.unsqueeze(-1)).squeeze()
    test_loss = criterion(predictions, y_test).item()
print(f"\nTest Loss (MSE): {test_loss:.5f}")

In [None]:
# Actual vs Predicted
full_actual = scaler.inverse_transform(scaled_data)
full_predicted = np.empty_like(full_actual)
full_predicted[:] = np.nan
test_start_index = train_size + time_step
full_predicted[test_start_index : test_start_index + len(predictions)] = (
    scaler.inverse_transform(predictions.detach().numpy().reshape(-1, 1))
)

In [None]:
# Plot Actual vs Predicted
plt.figure(figsize=(14, 5))
plt.plot(df.index, full_actual, label="Actual", color="black")
plt.plot(df.index, full_predicted, label="Predicted", color="red")
plt.axvline(
    df.index[train_size], color="blue", linestyle="--", label="Train/Test Split"
)
plt.title("Stock Price Prediction")
plt.legend()
plt.grid()
plt.show()

In [None]:
# Future Forecast
last_60_days = scaled_data[-time_step:].reshape(1, time_step, 1)
future_predictions = []

with torch.no_grad():
    for _ in range(30):
        input_tensor = torch.tensor(last_60_days).float()
        predicted = model(input_tensor)
        future_predictions.append(predicted.item())
        last_60_days = np.roll(last_60_days, -1)
        last_60_days[0, -1, 0] = predicted.item()

future_predictions = scaler.inverse_transform(
    np.array(future_predictions).reshape(-1, 1)
)
future_dates = pd.date_range(start=today + timedelta(days=1), periods=30)

In [None]:
# Plot future predictions
plt.figure(figsize=(10, 4))
plt.plot(future_dates, future_predictions, color="orange", label="Future Prediction")
plt.title("Next 30 Days Stock Price Forecast")
plt.xlabel("Date")
plt.ylabel("Predicted Price")
plt.legend()
plt.grid()
plt.show()