# LSTM Model – Stock Price Prediction

## Why LSTM?
Standard feedforward networks treat each input independently. Stock prices are
**sequential** – yesterday's price carries information about today's. Long Short-Term
Memory (LSTM) networks contain gating mechanisms (forget, input, output gates) that
learn *which* past information to retain, making them ideal for time-series.

## Overfitting Prevention
- **Dropout layers** randomly zero out neurons during training (regularisation).
- **Batch Normalisation** stabilises activations layer by layer.
- **Early Stopping** halts training when validation loss stops improving.
- **ReduceLROnPlateau** decays the learning rate when learning stalls.


In [None]:
import sys; sys.path.insert(0, '..')
import warnings; warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from src.data_loader import (
    fetch_stock_data, time_series_split,
    scale_features, build_sequences
)
from src.sentiment_analyzer import add_sentiment_to_df
from src.model_trainer import train_lstm
from src.evaluator import (
    regression_metrics, plot_predictions,
    plot_loss_curves, sharpe_ratio, max_drawdown
)

%matplotlib inline
plt.style.use('seaborn-v0_8-darkgrid')
print('Setup complete')

In [None]:
# ── Config ────────────────────────────────────────────────────────────────────
TICKER     = 'AAPL'
START      = '2015-01-01'
END        = '2024-12-31'
SEQ_LEN    = 60      # look-back window (trading days)
TRAIN_RATIO = 0.80
EPOCHS     = 50
BATCH_SIZE = 32

# Feature columns fed to LSTM (excluding target 'Close')
FEATURE_COLS = [
    'Open', 'High', 'Low', 'Volume',
    'SMA_10', 'SMA_20', 'SMA_50',
    'RSI_14', 'MACD', 'MACD_Signal', 'MACD_Hist',
    'BB_Width', 'ATR_14', 'Vol_Change', 'Log_Return', 'Sentiment',
]

In [None]:
# ── 1. Load & Prepare Data ────────────────────────────────────────────────────
df = fetch_stock_data(TICKER, START, END)
df = add_sentiment_to_df(df, TICKER, START, END)

# Remove any feature columns that weren't computed (edge-case)
feature_cols = [c for c in FEATURE_COLS if c in df.columns]
print(f'Using {len(feature_cols)} features')

train_df, test_df = time_series_split(df, TRAIN_RATIO)
print(f'Train: {len(train_df)} rows | Test: {len(test_df)} rows')

In [None]:
# ── 2. Scale Features (fit on train only!) ────────────────────────────────────
# The target column 'Close' is appended last; target_idx = -1
train_scaled, test_scaled, scaler = scale_features(
    train_df, test_df, feature_cols, target_col='Close'
)

target_idx = len(feature_cols)   # last column = Close

X_train, y_train = build_sequences(train_scaled, SEQ_LEN, target_idx)
X_test,  y_test  = build_sequences(test_scaled,  SEQ_LEN, target_idx)

print(f'X_train: {X_train.shape}  |  y_train: {y_train.shape}')
print(f'X_test:  {X_test.shape}   |  y_test:  {y_test.shape}')

In [None]:
# ── 3. Train LSTM ──────────────────────────────────────────────────────────────
# We use 10 % of training data as validation for early stopping
val_split  = int(len(X_train) * 0.9)
X_val      = X_train[val_split:]
y_val      = y_train[val_split:]
X_train_   = X_train[:val_split]
y_train_   = y_train[:val_split]

model, history = train_lstm(
    X_train_, y_train_,
    X_val,    y_val,
    units=64, dropout=0.2,
    learning_rate=1e-3,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    save_path='../results/lstm_model.keras',
)

In [None]:
# ── 4. Loss Curves ────────────────────────────────────────────────────────────
plot_loss_curves(history)

In [None]:
# ── 5. Predictions & Inverse Transform ───────────────────────────────────────
import numpy as np
from sklearn.preprocessing import MinMaxScaler

y_pred_scaled = model.predict(X_test).flatten()

# Inverse-transform predictions back to USD
# We need to reconstruct a full-width array then pick the Close column
n_cols  = len(feature_cols) + 1   # features + Close

def inverse_close(scaled_vals, scaler, close_col_idx, n_cols):
    dummy = np.zeros((len(scaled_vals), n_cols))
    dummy[:, close_col_idx] = scaled_vals
    inv = scaler.inverse_transform(dummy)
    return inv[:, close_col_idx]

y_pred_usd = inverse_close(y_pred_scaled, scaler, target_idx, n_cols)
y_true_usd = inverse_close(y_test,        scaler, target_idx, n_cols)

# Dates for the test period (accounting for look-back)
test_dates = test_df.index[SEQ_LEN:]

metrics = regression_metrics(y_true_usd, y_pred_usd, 'LSTM')

In [None]:
# ── 6. Plot Predictions ───────────────────────────────────────────────────────
plot_predictions(y_true_usd, y_pred_usd, label='LSTM', dates=test_dates)

In [None]:
# ── 7. Finance Metrics ────────────────────────────────────────────────────────
# Simulate a simple strategy: buy when predicted > actual (upward signal)
pred_returns  = np.diff(y_pred_usd) / (y_pred_usd[:-1] + 1e-10)
actual_returns = np.diff(y_true_usd) / (y_true_usd[:-1] + 1e-10)

# Signal: go long if model predicts price increase
signals  = np.where(pred_returns > 0, 1, -1)
strat_returns = signals * actual_returns

print('\n── LSTM Strategy Finance Metrics ───────────')
sr  = sharpe_ratio(strat_returns)
mdd = max_drawdown(np.cumprod(1 + strat_returns))

In [None]:
# ── 8. Save metrics to results/ ──────────────────────────────────────────────
import json, os
os.makedirs('../results', exist_ok=True)
metrics.update({'Sharpe': round(sr, 4), 'MaxDrawdown': round(mdd, 4)})
with open('../results/lstm_metrics.json', 'w') as f:
    json.dump(metrics, f, indent=2)
print('Metrics saved → ../results/lstm_metrics.json')