# Stock Price Trend Prediction with LSTM

**Objective:** Predict future stock prices using past trends.

**Tools:** Python, Keras (TensorFlow), Pandas, Matplotlib, yfinance

This notebook:
- Fetches data using the `yfinance` API
- Normalizes and prepares data
- Builds an LSTM model with Keras
- Trains and validates the model
- Plots predictions vs. actuals
- Integrates Moving Average (MA) & RSI indicators
- Saves model weights and generated plots

> Tip: Run this end-to-end locally where internet is available.


In [None]:
# Install packages if needed (uncomment if running in a fresh environment)
# %pip install yfinance pandas numpy scikit-learn matplotlib tensorflow==2.* ta streamlit

import os
import math
from dataclasses import dataclass
from typing import Tuple

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

import yfinance as yf
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Technical indicators (RSI)
import ta

plt.rcParams['figure.figsize'] = (10, 5)
plt.rcParams['axes.grid'] = True

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

os.makedirs('artifacts', exist_ok=True)
os.makedirs('plots', exist_ok=True)


## Parameters

In [None]:
# You can tweak these
TICKER = 'AAPL'           # any Yahoo Finance ticker
START_DATE = '2015-01-01'
END_DATE = None           # None = today
TEST_SIZE = 0.2           # last 20% for validation
LOOKBACK = 60             # LSTM sequence length (days)
PRED_STEPS = 1            # one-step ahead prediction
BATCH_SIZE = 32
EPOCHS = 30
LEARNING_RATE = 1e-3
DROPOUT = 0.2
MODEL_PATH = 'artifacts/lstm_stock_model.h5'
SCALER_PATH = 'artifacts/scaler.npy'  # we'll save scaler min/max
PLOTS_DIR = 'plots'


## Fetch data with `yfinance` and add indicators (MA & RSI)

In [None]:
def fetch_with_indicators(ticker: str, start: str, end: str | None) -> pd.DataFrame:
    df = yf.download(ticker, start=start, end=end, progress=False)
    if df.empty:
        raise ValueError(f'No data returned for {ticker}. Check the ticker or date range.')
    df = df[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']].copy()
    # Moving averages
    df['MA20'] = df['Close'].rolling(window=20, min_periods=1).mean()
    df['MA50'] = df['Close'].rolling(window=50, min_periods=1).mean()
    # RSI (14)
    df['RSI14'] = ta.momentum.rsi(df['Close'], window=14, fillna=True)
    df.dropna(inplace=True)
    return df

data = fetch_with_indicators(TICKER, START_DATE, END_DATE)
data.tail()


## Prepare sequences for LSTM (scaling + windowing)

In [None]:
# We'll predict next-day Close given recent history & indicators.
features = ['Close', 'MA20', 'MA50', 'RSI14', 'Open', 'High', 'Low', 'Volume']
target = 'Close'

# Scale features to [0,1]
scaler = MinMaxScaler()
scaled = scaler.fit_transform(data[features])
np.save(SCALER_PATH, {'min_': scaler.data_min_, 'max_': scaler.data_max_, 'scale_': scaler.scale_, 'data_range_': scaler.data_range_, 'feature_names': features}, allow_pickle=True)

def make_sequences(values: np.ndarray, lookback: int, pred_steps: int = 1) -> Tuple[np.ndarray, np.ndarray]:
    X, y = [], []
    for i in range(lookback, len(values) - pred_steps + 1):
        X.append(values[i - lookback:i])
        y.append(values[i:i + pred_steps, 0])  # target is 'Close' scaled (col 0 in 'features')
    return np.array(X), np.array(y).squeeze()

X, y = make_sequences(scaled, LOOKBACK, PRED_STEPS)

# Train/validation split by time (no shuffling)
split_idx = int(len(X) * (1 - TEST_SIZE))
X_train, X_val = X[:split_idx], X[split_idx:]
y_train, y_val = y[:split_idx], y[split_idx:]

X_train.shape, X_val.shape


## Build LSTM model

In [None]:
def build_lstm(input_shape, lr=LEARNING_RATE, dropout=DROPOUT):
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=input_shape),
        Dropout(dropout),
        LSTM(32),
        Dropout(dropout),
        Dense(PRED_STEPS)  # linear output
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), loss='mse', metrics=['mae'])
    return model

model = build_lstm((LOOKBACK, len(features)))
model.summary()


## Train with EarlyStopping & save best weights

In [None]:
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint(MODEL_PATH, monitor='val_loss', save_best_only=True, verbose=1)
]

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    verbose=1,
    callbacks=callbacks
)

# Plot training curves
plt.figure()
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.title('Training History (MSE)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(PLOTS_DIR, 'training_history.png'))
plt.show()


## Evaluate & plot predictions vs actual (on validation set)

In [None]:
# Predict on validation
val_pred_scaled = model.predict(X_val)

# If PRED_STEPS==1, shape (n,1). We'll work with 1-step ahead for plotting clarity.
val_pred_scaled = val_pred_scaled.squeeze()
y_val_scaled = y_val.squeeze()

# Inverse scale to price space for 'Close' only
minmax = np.load(SCALER_PATH, allow_pickle=True).item()
min_, max_ = minmax['min_'][0], minmax['max_'][0]  # 'Close' is first feature
data_range_ = minmax['data_range_'][0]

# MinMax inverse for a single feature: x = x_scaled * data_range + data_min
def inverse_scale_close(x_scaled):
    return x_scaled * data_range_ + min_

val_pred = inverse_scale_close(val_pred_scaled)
y_val_actual = inverse_scale_close(y_val_scaled)

# Build an index aligned to the validation tail of the original data
val_index = data.index[-len(y_val_actual):]

rmse = math.sqrt(mean_squared_error(y_val_actual, val_pred))
mae = mean_absolute_error(y_val_actual, val_pred)
print(f'Validation RMSE: {rmse:.4f}  |  MAE: {mae:.4f}')

# Plot predictions vs actual
plt.figure()
plt.plot(val_index, y_val_actual, label='Actual Close')
plt.plot(val_index, val_pred, label='Predicted Close')
plt.title(f'{TICKER} — Actual vs Predicted Close (Validation)')
plt.xlabel('Date'); plt.ylabel('Price')
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(PLOTS_DIR, 'val_pred_vs_actual.png'))
plt.show()

# Plot with indicators (MA & RSI) for context
fig, ax = plt.subplots()
ax.plot(data.index, data['Close'], label='Close')
ax.plot(data.index, data['MA20'], label='MA20')
ax.plot(data.index, data['MA50'], label='MA50')
ax.set_title(f'{TICKER} Close with MA20/MA50')
ax.set_xlabel('Date'); ax.set_ylabel('Price')
ax.legend()
fig.tight_layout()
fig.savefig(os.path.join(PLOTS_DIR, 'close_ma.png'))
plt.show()

# RSI plot
plt.figure()
plt.plot(data.index, data['RSI14'], label='RSI14')
plt.axhline(70, linestyle='--')
plt.axhline(30, linestyle='--')
plt.title(f'{TICKER} RSI14')
plt.xlabel('Date'); plt.ylabel('RSI')
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(PLOTS_DIR, 'rsi.png'))
plt.show()


## Optional: Simple rolling forecast for the next N days

In [None]:
N_DAYS_AHEAD = 5

def forecast_next_days(model, last_values_scaled: np.ndarray, n_days: int) -> list[float]:
    # Roll forward predictions by feeding outputs back into the input window (on the Close feature only).
    # last_values_scaled: array of shape (LOOKBACK, n_features) in scaled space.
    window = last_values_scaled.copy()
    preds = []
    for _ in range(n_days):
        x = window[np.newaxis, ...]
        pred_close_scaled = model.predict(x, verbose=0).squeeze()
        if np.ndim(pred_close_scaled) == 0:
            preds.append(float(pred_close_scaled))
        else:
            preds.append(float(np.array(pred_close_scaled).item()))
        # Shift window: roll and insert predicted close (keep other features as last row)
        new_row = window[-1].copy()
        new_row[0] = preds[-1]  # update Close
        window = np.vstack([window[1:], new_row])
    return preds

last_window = scaled[-LOOKBACK:]
future_preds_scaled = forecast_next_days(model, last_window, N_DAYS_AHEAD)
future_preds = [inverse_scale_close(p) for p in future_preds_scaled]

print('Next days (predicted Close):', future_preds)

## Save artifacts

In [None]:
# The best model has already been saved to MODEL_PATH by ModelCheckpoint.
print(f"Saved best model to: {MODEL_PATH}")
print(f"Scaler saved to: {SCALER_PATH}")
print(f"Plots saved in: {PLOTS_DIR}/")
