<a href="https://colab.research.google.com/github/Aurora-Fund-Analytics/forecast-model/blob/main/notebooks/model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Stock Price Prediction Model

In [None]:
!pip install yfinance torch numpy pandas scikit-learn matplotlib

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt

# Download & Preprocess Stock Data

In [None]:
# -------------------
# 1. Load Stock Data
# -------------------
ticker = "E1VFVN30"
data = yf.download(ticker, start="2015-01-01", end="2024-12-31")
data = data[['Close']]  # use closing price only

In [None]:
# Normalize data (0,1)
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(data)

In [None]:
# -------------------
# 2. Create Dataset
# -------------------
SEQ_LEN = 60  # lookback window (60 days)

class StockDataset(Dataset):
    def __init__(self, data, seq_len=SEQ_LEN):
        self.data = data
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        seq = self.data[idx:idx+self.seq_len]
        label = self.data[idx+self.seq_len]
        return torch.tensor(seq, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)

In [None]:
dataset = StockDataset(scaled_data, SEQ_LEN)
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Building LSTM model and training

In [None]:
# -------------------
# 3. Build model
# -------------------
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=64, num_layers=2, output_size=1):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]  # last time step
        out = self.fc(out)
        return out

model = LSTMModel()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training
EPOCHS = 20
for epoch in range(EPOCHS):
    model.train()
    for seq, target in train_loader:
        optimizer.zero_grad()
        output = model(seq.unsqueeze(-1))  # (batch, seq_len, 1)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {loss.item():.6f}")

# Evaluate & Plot Predictions

In [None]:
model.eval()
preds, actuals = [], []
with torch.no_grad():
    for seq, target in test_loader:
        output = model(seq.unsqueeze(-1))
        preds.extend(output.numpy())
        actuals.extend(target.numpy())

# Inverse transform back to real price
preds = scaler.inverse_transform(np.array(preds).reshape(-1,1))
actuals = scaler.inverse_transform(np.array(actuals).reshape(-1,1))

plt.figure(figsize=(12,6))
plt.plot(actuals, label="Actual Price")
plt.plot(preds, label="Predicted Price")
plt.legend()
plt.show()