# Experiment: MSFT 1-Minute GRU Recursive Forecast (PRD)

Objective:
- Train a PyTorch GRU on MSFT 1-minute OHLC candles (target: last 60 days).
- Use 500-candle windows to predict the next candle.
- Recursively forecast 15 future candles from one seed window.
- Visualize predicted candles in a black-and-white candlestick style.

Success criteria:
- Notebook runs top-to-bottom and reports one-step and recursive metrics.
- Final chart clearly distinguishes historical, actual future, and predicted future candles in grayscale.


In [None]:
# Optional dependency bootstrap (kept lightweight).
import importlib.util
import subprocess
import sys

required = {
    'yfinance': 'yfinance',
    'numpy': 'numpy',
    'pandas': 'pandas',
    'matplotlib': 'matplotlib',
    'sklearn': 'scikit-learn',
}

missing = [pkg for module_name, pkg in required.items() if importlib.util.find_spec(module_name) is None]
if missing:
    print('Installing missing packages:', missing)
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', *missing])
else:
    print('All required third-party packages are already installed.')

print('If torch is missing, install it separately with a CUDA wheel suitable for your RTX 3070 and driver.')


In [None]:
# Setup: imports and reproducibility
from __future__ import annotations

import random
import time

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import yfinance as yf
from matplotlib import pyplot as plt
from IPython.display import display
from matplotlib.patches import Patch, Rectangle
from sklearn.metrics import mean_absolute_error, mean_squared_error
from torch.utils.data import DataLoader, Dataset

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

torch.backends.cudnn.benchmark = True
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f'Using device: {DEVICE}')
if torch.cuda.is_available():
    print('GPU:', torch.cuda.get_device_name(0))
    print('CUDA:', torch.version.cuda)


## Plan

- Data: download MSFT 1-minute OHLC candles over 60 days using chunked Yahoo requests.
- Features: raw OHLC only (no indicators).
- Model: 2-layer GRU (hidden=128, dropout=0.2) -> linear head to next OHLC.
- Training: time-aware split, train-only normalization, MSE loss, Adam, early stopping.
- Inference: recursive 15-step forecast using own predictions as future inputs.
- Visualization: grayscale candlestick plot with predicted candles emphasized in white.


In [None]:
# Experiment configuration
SYMBOL = 'MSFT'
INTERVAL = '1m'
LOOKBACK_DAYS = 60
CHUNK_DAYS = 6
FEATURES = ['Open', 'High', 'Low', 'Close']

WINDOW = 500
HORIZON = 15

TRAIN_RATIO = 0.70
VAL_RATIO = 0.15

BATCH_SIZE = 256
HIDDEN_SIZE = 128
NUM_LAYERS = 2
DROPOUT = 0.2
LEARNING_RATE = 1e-3
EPOCHS = 12
PATIENCE = 3

print({
    'symbol': SYMBOL,
    'interval': INTERVAL,
    'lookback_days': LOOKBACK_DAYS,
    'window': WINDOW,
    'horizon': HORIZON,
    'device': str(DEVICE),
})


In [None]:
# Data download and cleaning
def fetch_ohlc_1m(symbol: str, lookback_days: int, chunk_days: int = 6, pause_seconds: float = 0.25) -> pd.DataFrame:
    end_ts = pd.Timestamp.utcnow().floor('min')
    start_ts = end_ts - pd.Timedelta(days=lookback_days)

    frames: list[pd.DataFrame] = []
    cursor = start_ts

    while cursor < end_ts:
        chunk_end = min(cursor + pd.Timedelta(days=chunk_days), end_ts)
        chunk = yf.download(
            tickers=symbol,
            start=cursor.to_pydatetime(),
            end=chunk_end.to_pydatetime(),
            interval='1m',
            auto_adjust=False,
            prepost=False,
            progress=False,
            threads=False,
        )

        if not chunk.empty:
            if isinstance(chunk.columns, pd.MultiIndex):
                chunk.columns = chunk.columns.get_level_values(0)

            missing_cols = [c for c in FEATURES if c not in chunk.columns]
            if missing_cols:
                raise ValueError(f'Missing OHLC columns: {missing_cols}')

            chunk = chunk[FEATURES].copy()
            frames.append(chunk)

        cursor = chunk_end
        time.sleep(pause_seconds)

    if not frames:
        raise RuntimeError('No 1-minute candles were returned. Try again during market hours or use another data source.')

    df = pd.concat(frames, axis=0).sort_index()
    df = df[~df.index.duplicated(keep='last')]
    df = df.dropna(subset=FEATURES)

    idx = pd.DatetimeIndex(df.index)
    if idx.tz is not None:
        idx = idx.tz_convert('UTC').tz_localize(None)
    df.index = idx

    return df

raw_df = fetch_ohlc_1m(SYMBOL, LOOKBACK_DAYS, CHUNK_DAYS)
raw_df = raw_df.astype(np.float32)

span_days = (raw_df.index.max() - raw_df.index.min()).total_seconds() / 86400
print(f'Rows: {len(raw_df):,}')
print(f'Time span in returned data: {span_days:.1f} days')
if span_days < LOOKBACK_DAYS * 0.9:
    print('Warning: Data vendor returned less than 60 days of 1-minute bars. This is a provider limitation, not a modeling change.')

min_needed = WINDOW + HORIZON + 200
if len(raw_df) < min_needed:
    raise RuntimeError(f'Not enough rows ({len(raw_df)}) for window={WINDOW} and horizon={HORIZON}. Need at least {min_needed}.')

raw_df.tail()


In [None]:
# Build sequences with train/val/test time split and train-only normalization
def split_indices(n_rows: int, train_ratio: float, val_ratio: float) -> tuple[int, int]:
    train_end = int(n_rows * train_ratio)
    val_end = int(n_rows * (train_ratio + val_ratio))
    return train_end, val_end


def make_windows(values: np.ndarray, window: int) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    X, y, target_idx = [], [], []
    for i in range(window, len(values)):
        X.append(values[i - window : i])
        y.append(values[i])
        target_idx.append(i)

    return (
        np.asarray(X, dtype=np.float32),
        np.asarray(y, dtype=np.float32),
        np.asarray(target_idx, dtype=np.int64),
    )


class SequenceDataset(Dataset):
    def __init__(self, X: np.ndarray, y: np.ndarray):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).float()

    def __len__(self) -> int:
        return len(self.X)

    def __getitem__(self, idx: int):
        return self.X[idx], self.y[idx]


raw_values = raw_df[FEATURES].to_numpy(dtype=np.float32)
train_end, val_end = split_indices(len(raw_df), TRAIN_RATIO, VAL_RATIO)

train_mean = raw_values[:train_end].mean(axis=0)
train_std = raw_values[:train_end].std(axis=0)
train_std = np.where(train_std < 1e-8, 1.0, train_std)

scaled_values = (raw_values - train_mean) / train_std

X_all, y_all, seq_idx = make_windows(scaled_values, WINDOW)

train_mask = seq_idx < train_end
val_mask = (seq_idx >= train_end) & (seq_idx < val_end)
test_mask = seq_idx >= val_end

X_train, y_train = X_all[train_mask], y_all[train_mask]
X_val, y_val = X_all[val_mask], y_all[val_mask]
X_test, y_test = X_all[test_mask], y_all[test_mask]

train_loader = DataLoader(SequenceDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True, drop_last=False)
val_loader = DataLoader(SequenceDataset(X_val, y_val), batch_size=BATCH_SIZE, shuffle=False, drop_last=False)
test_loader = DataLoader(SequenceDataset(X_test, y_test), batch_size=BATCH_SIZE, shuffle=False, drop_last=False)

print('Split rows:', {'train': train_end, 'val': val_end - train_end, 'test': len(raw_df) - val_end})
print('Windowed samples:', {'train': len(X_train), 'val': len(X_val), 'test': len(X_test)})
print('X_train shape:', X_train.shape, 'y_train shape:', y_train.shape)
if len(X_train) == 0 or len(X_val) == 0 or len(X_test) == 0:
    raise RuntimeError('One or more splits are empty. Increase data range or adjust split ratios.')



In [None]:
# GRU model + training loop
class GRUForecaster(nn.Module):
    def __init__(self, input_size: int = 4, hidden_size: int = 128, num_layers: int = 2, dropout: float = 0.2):
        super().__init__()
        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0,
        )
        self.head = nn.Linear(hidden_size, input_size)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        out, _ = self.gru(x)
        return self.head(out[:, -1, :])


def run_epoch(model: nn.Module, loader: DataLoader, loss_fn: nn.Module, optimizer: torch.optim.Optimizer | None = None) -> float:
    is_train = optimizer is not None
    model.train(is_train)

    total_loss = 0.0
    total_items = 0

    for xb, yb in loader:
        xb = xb.to(DEVICE)
        yb = yb.to(DEVICE)

        if is_train:
            optimizer.zero_grad(set_to_none=True)

        with torch.set_grad_enabled(is_train):
            pred = model(xb)
            loss = loss_fn(pred, yb)

        if is_train:
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

        batch_size = xb.size(0)
        total_loss += loss.item() * batch_size
        total_items += batch_size

    return total_loss / max(total_items, 1)


def train_model(model: nn.Module, train_loader: DataLoader, val_loader: DataLoader, epochs: int, lr: float, patience: int):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()

    best_val = float('inf')
    best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
    wait = 0
    history = []

    for epoch in range(1, epochs + 1):
        train_loss = run_epoch(model, train_loader, loss_fn, optimizer)
        val_loss = run_epoch(model, val_loader, loss_fn, optimizer=None)

        history.append({'epoch': epoch, 'train_loss': train_loss, 'val_loss': val_loss})
        print(f'Epoch {epoch:02d} | train_loss={train_loss:.6f} | val_loss={val_loss:.6f}')

        if val_loss < best_val:
            best_val = val_loss
            best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
            wait = 0
        else:
            wait += 1
            if wait >= patience:
                print(f'Early stopping at epoch {epoch}.')
                break

    model.load_state_dict(best_state)
    return history


In [None]:
# Train
model = GRUForecaster(
    input_size=len(FEATURES),
    hidden_size=HIDDEN_SIZE,
    num_layers=NUM_LAYERS,
    dropout=DROPOUT,
).to(DEVICE)

history = train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=EPOCHS,
    lr=LEARNING_RATE,
    patience=PATIENCE,
)

history_df = pd.DataFrame(history)
display(history_df.tail())

plt.figure(figsize=(8, 4))
plt.plot(history_df['epoch'], history_df['train_loss'], label='Train', color='black')
plt.plot(history_df['epoch'], history_df['val_loss'], label='Validation', color='gray')
plt.title('GRU Loss Curves')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.show()


In [None]:
# One-step evaluation on test set
def inverse_scale(arr: np.ndarray, mean: np.ndarray, std: np.ndarray) -> np.ndarray:
    return arr * std + mean


@torch.no_grad()
def collect_preds(model: nn.Module, loader: DataLoader) -> tuple[np.ndarray, np.ndarray]:
    model.eval()
    preds, targets = [], []

    for xb, yb in loader:
        xb = xb.to(DEVICE)
        yhat = model(xb).cpu().numpy()
        preds.append(yhat)
        targets.append(yb.numpy())

    return np.vstack(preds), np.vstack(targets)


pred_test_scaled, y_test_scaled = collect_preds(model, test_loader)
pred_test = inverse_scale(pred_test_scaled, train_mean, train_std)
y_test_raw = inverse_scale(y_test_scaled, train_mean, train_std)

test_target_indices = seq_idx[test_mask]
prev_close = raw_values[test_target_indices - 1, 3]
actual_close = y_test_raw[:, 3]
pred_close = pred_test[:, 3]

close_mae = mean_absolute_error(actual_close, pred_close)
close_rmse = mean_squared_error(actual_close, pred_close, squared=False)
ohlc_mae = mean_absolute_error(y_test_raw.reshape(-1), pred_test.reshape(-1))
ohlc_rmse = mean_squared_error(y_test_raw.reshape(-1), pred_test.reshape(-1), squared=False)
dir_acc = np.mean(np.sign(actual_close - prev_close) == np.sign(pred_close - prev_close))

one_step_metrics = {
    'close_mae': float(close_mae),
    'close_rmse': float(close_rmse),
    'ohlc_mae': float(ohlc_mae),
    'ohlc_rmse': float(ohlc_rmse),
    'directional_accuracy': float(dir_acc),
}

print('One-step test metrics:')
for k, v in one_step_metrics.items():
    print(f'  {k}: {v:.6f}')


In [None]:
# Recursive 15-step forecast
@torch.no_grad()
def recursive_forecast(model: nn.Module, seed_window_scaled: np.ndarray, horizon: int) -> np.ndarray:
    model.eval()
    window = seed_window_scaled.copy()
    preds = []

    for _ in range(horizon):
        x = torch.from_numpy(window).float().unsqueeze(0).to(DEVICE)
        next_step = model(x).cpu().numpy()[0]
        preds.append(next_step)
        window = np.vstack([window[1:], next_step])

    return np.asarray(preds, dtype=np.float32)


anchor_end = len(raw_df) - HORIZON - 1
if anchor_end - WINDOW + 1 < 0:
    raise RuntimeError('Not enough rows for recursive seed window.')

seed_scaled = scaled_values[anchor_end - WINDOW + 1 : anchor_end + 1]
recursive_scaled = recursive_forecast(model, seed_scaled, HORIZON)
recursive_pred = inverse_scale(recursive_scaled, train_mean, train_std)

future_actual = raw_values[anchor_end + 1 : anchor_end + 1 + HORIZON]
future_index = raw_df.index[anchor_end + 1 : anchor_end + 1 + HORIZON]

pred_future_df = pd.DataFrame(recursive_pred, index=future_index, columns=FEATURES)
actual_future_df = pd.DataFrame(future_actual, index=future_index, columns=FEATURES)

recursive_close_mae = mean_absolute_error(actual_future_df['Close'], pred_future_df['Close'])
recursive_close_rmse = mean_squared_error(actual_future_df['Close'], pred_future_df['Close'], squared=False)

print('Recursive 15-step metrics (close):')
print(f'  mae:  {recursive_close_mae:.6f}')
print(f'  rmse: {recursive_close_rmse:.6f}')

display(pd.concat({
    'actual_close': actual_future_df['Close'],
    'pred_close': pred_future_df['Close'],
}, axis=1))


In [None]:
# Candlestick rendering with requested color mapping
# - History: green (bullish) / red (bearish)
# - Predicted: white (bullish) / black (bearish)
def draw_candles(
    ax,
    ohlc: pd.DataFrame,
    start_x: int,
    up_edge: str,
    up_face: str,
    down_edge: str,
    down_face: str,
    wick_color: str,
    width: float = 0.62,
    lw: float = 1.0,
    alpha: float = 1.0,
):
    values = ohlc[['Open', 'High', 'Low', 'Close']].to_numpy()

    for i, (o, h, l, c) in enumerate(values):
        x = start_x + i
        bullish = c >= o

        ax.vlines(x, l, h, color=wick_color, linewidth=lw, alpha=alpha, zorder=2)

        lower = min(o, c)
        height = abs(c - o)
        if height < 1e-8:
            height = 1e-6

        face = up_face if bullish else down_face
        edge = up_edge if bullish else down_edge

        rect = Rectangle(
            (x - width / 2, lower),
            width,
            height,
            facecolor=face,
            edgecolor=edge,
            linewidth=lw,
            alpha=alpha,
            zorder=3,
        )
        ax.add_patch(rect)


context_bars = 170
context_start = max(0, anchor_end - context_bars + 1)
context_df = raw_df.iloc[context_start : anchor_end + 1][FEATURES].copy()

fig, ax = plt.subplots(figsize=(17, 8), facecolor='black')
ax.set_facecolor('black')

# Historical candles (actual): green/red
draw_candles(
    ax,
    context_df,
    start_x=0,
    up_edge='#2ECC71',
    up_face='#2ECC71',
    down_edge='#E74C3C',
    down_face='#E74C3C',
    wick_color='#D9D9D9',
    width=0.58,
    lw=1.0,
    alpha=0.95,
)

# Actual future candles (for comparison): muted green/red
draw_candles(
    ax,
    actual_future_df,
    start_x=len(context_df),
    up_edge='#1E8F4E',
    up_face='#1E8F4E',
    down_edge='#B03A2E',
    down_face='#B03A2E',
    wick_color='#9A9A9A',
    width=0.58,
    lw=1.0,
    alpha=0.75,
)

# Predicted future candles: white if bullish (green-equivalent), black if bearish (red-equivalent)
draw_candles(
    ax,
    pred_future_df,
    start_x=len(context_df),
    up_edge='#FFFFFF',
    up_face='#FFFFFF',
    down_edge='#000000',
    down_face='#000000',
    wick_color='#F0F0F0',
    width=0.50,
    lw=1.35,
    alpha=1.0,
)

split_x = len(context_df) - 0.5
ax.axvline(split_x, color='white', linestyle='--', linewidth=0.8, alpha=0.5)

combined_index = context_df.index.append(actual_future_df.index)
total_bars = len(combined_index)
tick_step = max(1, total_bars // 10)
ticks = list(range(0, total_bars, tick_step))
if ticks[-1] != total_bars - 1:
    ticks.append(total_bars - 1)

labels = [combined_index[i].strftime('%m-%d %H:%M') for i in ticks]
ax.set_xticks(ticks)
ax.set_xticklabels(labels, rotation=28, ha='right', color='white', fontsize=9)

ax.tick_params(axis='y', colors='white')
for spine in ax.spines.values():
    spine.set_color('#606060')

ax.grid(color='#222222', linewidth=0.6, alpha=0.35)
ax.set_title('MSFT 1-Minute Candles: History + 15-Step Recursive GRU Forecast', color='white', pad=16)
ax.set_ylabel('Price', color='white')

legend_elements = [
    Patch(facecolor='#2ECC71', edgecolor='#2ECC71', label='History bullish (green)'),
    Patch(facecolor='#E74C3C', edgecolor='#E74C3C', label='History bearish (red)'),
    Patch(facecolor='#FFFFFF', edgecolor='#FFFFFF', label='Predicted bullish (white)'),
    Patch(facecolor='#000000', edgecolor='#FFFFFF', label='Predicted bearish (black)'),
]
leg = ax.legend(handles=legend_elements, facecolor='black', edgecolor='#707070', framealpha=1.0, loc='upper left')
for text in leg.get_texts():
    text.set_color('white')

plt.tight_layout()
plt.show()


## Results

- `one_step_metrics` reports t+1 quality over the test split.
- `recursive_close_mae/rmse` report 15-step recursive error accumulation.
- The final chart is intentionally grayscale on black, with predicted candles emphasized in white.

## Next steps

- Increase training coverage with a data provider that guarantees full 60-day 1-minute history.
- Add walk-forward evaluation windows.
- After baseline lock-in, extend to multi-symbol training and then crypto.
