In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import talib
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import os
import math
import time

########################
# Configuration
########################
ticker = 'LPX'
start_date = '2020-01-01'
end_date = '2024-01-01'
1
# Hyperparameters
n_steps = 2
window_size = 5
step_size = 2
initial_epochs = 100
update_epochs = 100
initial_lr = 0.0005
update_lr = 0.00005
train_ratio = 0.9
n_units = 16
batch_size = 4

# If you change features or other logic, do so below
if not os.path.exists("rolling_error_analysis_results"):
    os.makedirs("rolling_error_analysis_results")


def load_full_data_helper(ticker, start_date, end_date, window_size):
    stock_data = yf.download(ticker, start=start_date, end=end_date)
    stock_data.reset_index(inplace=True)
    if stock_data.empty:
        return None, None, None

    # Next day prediction setup
    stock_data['Close_next_day'] = stock_data['Close'].shift(-1)
    stock_data.dropna(subset=['Close_next_day'], inplace=True)

    # Simple features
    X_feat = stock_data[['Open', 'High', 'Low']]
    y_raw = stock_data['Close_next_day'].values
    dates = stock_data['Date'].values

    if len(dates) < window_size:
        return None, None, None
    return X_feat.values, y_raw, dates


def load_full_data(ticker, start_date, end_date, window_size):
    X_feat, y_raw, dates = load_full_data_helper(ticker, start_date, end_date, window_size)
    if X_feat is None:
        return None, None, None
    return X_feat, y_raw, dates


def extract_window(X_full, y_full, dates_full, start_idx, window_size, n_steps):
    end_idx = start_idx + window_size
    if end_idx > len(X_full):
        return None, None, None, None, None, None

    X_window = X_full[start_idx:end_idx]
    y_window = y_full[start_idx:end_idx]
    dates_window = dates_full[start_idx:end_idx]

    def lstm_split(dataX, dataY, n_steps):
        X, y = [], []
        for i in range(len(dataX)-n_steps+1):
            X.append(dataX[i:i+n_steps])
            y.append(dataY[i+n_steps-1])
        return np.array(X), np.array(y)

    X_samples, y_samples = lstm_split(X_window, y_window, n_steps)
    sample_dates = dates_window[n_steps-1:]

    feat_scaler = RobustScaler()
    target_scaler = RobustScaler()
    X_flat = X_window.reshape(len(X_window), -1)
    feat_scaler.fit(X_flat)
    y_window_reshaped = y_window.reshape(-1,1)
    target_scaler.fit(y_window_reshaped)

    X_scaled = feat_scaler.transform(X_flat).reshape(X_window.shape)
    y_scaled = target_scaler.transform(y_window_reshaped).flatten()
    X_samples_scaled, y_samples_scaled = lstm_split(X_scaled, y_scaled, n_steps)

    return X_samples_scaled, y_samples_scaled, sample_dates, feat_scaler, target_scaler, (X_window, y_window, dates_window)


def create_lstm_model(n_units, learning_rate, n_steps, n_features, n_layers=1):
    inputs = Input(shape=(n_steps, n_features))
    x = LSTM(n_units, activation='relu', return_sequences=(n_layers > 1))(inputs)
    if n_layers > 1:
        x = LSTM(n_units, activation='relu')(x)
    outputs = Dense(1)(x)
    model = Model(inputs, outputs)
    # We'll compile after loading weights if needed
    return model


def train_on_window(X_samples, y_samples, feat_scaler, target_scaler,
                    initial_training=False, prev_weights=None):
    total_samples = len(X_samples)
    global train_ratio
    train_count = int(math.floor(total_samples * train_ratio))
    if train_count >= total_samples:
        train_count = total_samples - 1

    X_train, y_train = X_samples[:train_count], y_samples[:train_count]
    X_val, y_val = X_samples[train_count:], y_samples[train_count:]

    if X_train.size == 0 or X_val.size == 0:
        return None, np.nan

    n_steps_local = X_train.shape[1]
    n_features = X_train.shape[2]

    if initial_training:
        epochs = initial_epochs
        lr = initial_lr
    else:
        epochs = update_epochs
        lr = update_lr

    model = create_lstm_model(n_units, lr, n_steps_local, n_features, n_layers=1)

    if prev_weights is not None:
        model.load_weights(prev_weights)

    model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=lr))

    es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0, shuffle=False,
              validation_data=(X_val, y_val), callbacks=[es])

    y_val_pred_scaled = model.predict(X_val, verbose=0)
    y_val_unscaled = target_scaler.inverse_transform(y_val.reshape(-1,1)).flatten()
    y_val_pred_unscaled = target_scaler.inverse_transform(y_val_pred_scaled).flatten()
    val_mape = mean_absolute_percentage_error(y_val_unscaled, y_val_pred_unscaled) * 100

    return model, val_mape


def rolling_training_early_stopping(ticker, window_size, step_size, n_steps):
    X_full, y_full, dates_full = load_full_data(ticker, start_date, end_date, window_size)
    if X_full is None:
        print("[WARN] Not enough data.")
        return

    start_time = time.time()

    start_idx = 0
    window_data = extract_window(X_full, y_full, dates_full, start_idx, window_size, n_steps)
    if window_data[0] is None:
        print("[WARN] Initial window not valid.")
        return
    X_samples, y_samples, sample_dates, feat_scaler, target_scaler, (Xw, yw, dw) = window_data

    print("[INFO] Training initial window with Early Stopping...")
    model, val_mape = train_on_window(X_samples, y_samples, feat_scaler, target_scaler, initial_training=True, prev_weights=None)
    print(f"[RESULT] Initial window validation MAPE: {val_mape:.2f}%")

    weights_path = "rolling_error_analysis_results/cost_initial_es.weights.h5"
    model.save_weights(weights_path)

    predictions = []
    actuals = []
    pred_dates = []

    iteration = 1
    while True:
        start_idx += step_size
        window_data = extract_window(X_full, y_full, dates_full, start_idx, window_size, n_steps)
        if window_data[0] is None:
            print("[INFO] No more windows can be extracted.")
            break
        X_samples, y_samples, sample_dates, feat_scaler, target_scaler, (Xw, yw, dw) = window_data

        print(f"\n[INFO] Iteration {iteration}: training on new window with Early Stopping...")
        model, val_mape = train_on_window(X_samples, y_samples, feat_scaler, target_scaler, initial_training=False, prev_weights=weights_path)
        print(f"[RESULT] Window validation MAPE: {val_mape:.2f}%")

        weights_path = f"rolling_error_analysis_results/cost_iteration_{iteration}_es.weights.h5"
        model.save_weights(weights_path)

        X_test_last = X_samples[-1:]
        y_test_last = y_samples[-1:]
        y_pred_scaled = model.predict(X_test_last, verbose=0)
        y_test_unscaled = target_scaler.inverse_transform(y_test_last.reshape(-1,1)).flatten()
        y_pred_unscaled = target_scaler.inverse_transform(y_pred_scaled).flatten()

        predictions.append(y_pred_unscaled[0])
        actuals.append(y_test_unscaled[0])
        pred_dates.append(sample_dates[-1])

        test_mape = mean_absolute_percentage_error(y_test_unscaled, y_pred_unscaled)*100
        print(f"[INFO] Predicted last sample of window: Actual={y_test_unscaled[0]:.2f}, Pred={y_pred_unscaled[0]:.2f}, MAPE={test_mape:.2f}%")

        iteration += 1

    df_results = pd.DataFrame({
        'Date': pred_dates,
        'Actual': actuals,
        'Predicted': predictions
    })
    df_results['Absolute_Error'] = np.abs(df_results['Actual'] - df_results['Predicted'])
    df_results['APE'] = df_results['Absolute_Error'] / np.abs(df_results['Actual'])
    df_results['MAPE'] = df_results['APE'] * 100.0

    df_results.to_csv("rolling_error_analysis_results/cost_rolling_results_es.csv", index=False)
    overall_mape = df_results['MAPE'].mean()
    print(f"\n[INFO] Rolling training with Early Stopping completed for {ticker}. Overall MAPE: {overall_mape:.2f}%")
    elapsed = time.time() - start_time
    print(f"[INFO] Total elapsed time: {elapsed:.2f}s")

    worst_10 = df_results.sort_values('MAPE', ascending=False).head(10)
    print("[INFO] Top 10 worst predictions in rolling scenario (Early Stopping):")
    print(worst_10[['Date', 'Actual', 'Predicted', 'MAPE']])

if __name__ == "__main__":
    # Just run with the current top-level defined parameters:
    rolling_training_early_stopping(ticker, window_size, step_size, n_steps)


[*********************100%%**********************]  1 of 1 completed
[INFO] Training initial window with Early Stopping...
[RESULT] Initial window validation MAPE: 1.66%

[INFO] Iteration 1: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.06%
[INFO] Predicted last sample of window: Actual=31.07, Pred=31.09, MAPE=0.06%

[INFO] Iteration 2: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.32%
[INFO] Predicted last sample of window: Actual=31.50, Pred=31.08, MAPE=1.32%

[INFO] Iteration 3: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.19%
[INFO] Predicted last sample of window: Actual=31.88, Pred=31.94, MAPE=0.19%

[INFO] Iteration 4: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.32%
[INFO] Predicted last sample of window: Actual=31.15, Pred=31.56, MAPE=1.32%

[INFO] Iteration 5: training on new window with Early Stopping...
[RESULT] Window validation MAPE

[RESULT] Window validation MAPE: 7.29%
[INFO] Predicted last sample of window: Actual=18.44, Pred=17.10, MAPE=7.29%

[INFO] Iteration 38: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 10.02%
[INFO] Predicted last sample of window: Actual=21.14, Pred=19.02, MAPE=10.02%

[INFO] Iteration 39: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.41%
[INFO] Predicted last sample of window: Actual=19.51, Pred=19.59, MAPE=0.41%

[INFO] Iteration 40: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.12%
[INFO] Predicted last sample of window: Actual=19.61, Pred=19.59, MAPE=0.12%

[INFO] Iteration 41: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.80%
[INFO] Predicted last sample of window: Actual=20.01, Pred=19.65, MAPE=1.80%

[INFO] Iteration 42: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 2.80%
[INFO] Predicted last sample of window:

[RESULT] Window validation MAPE: 0.37%
[INFO] Predicted last sample of window: Actual=33.68, Pred=33.55, MAPE=0.37%

[INFO] Iteration 83: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 4.21%
[INFO] Predicted last sample of window: Actual=31.64, Pred=32.97, MAPE=4.21%

[INFO] Iteration 84: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.01%
[INFO] Predicted last sample of window: Actual=32.08, Pred=31.76, MAPE=1.01%

[INFO] Iteration 85: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.77%
[INFO] Predicted last sample of window: Actual=31.40, Pred=31.64, MAPE=0.77%

[INFO] Iteration 86: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.32%
[INFO] Predicted last sample of window: Actual=32.61, Pred=32.18, MAPE=1.32%

[INFO] Iteration 87: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.22%
[INFO] Predicted last sample of window: A

[RESULT] Window validation MAPE: 2.39%
[INFO] Predicted last sample of window: Actual=37.64, Pred=38.54, MAPE=2.39%

[INFO] Iteration 128: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.00%
[INFO] Predicted last sample of window: Actual=38.41, Pred=38.41, MAPE=0.00%

[INFO] Iteration 129: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.55%
[INFO] Predicted last sample of window: Actual=38.20, Pred=37.99, MAPE=0.55%

[INFO] Iteration 130: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 3.80%
[INFO] Predicted last sample of window: Actual=39.52, Pred=38.02, MAPE=3.80%

[INFO] Iteration 131: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 2.57%
[INFO] Predicted last sample of window: Actual=40.61, Pred=39.56, MAPE=2.57%

[INFO] Iteration 132: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 4.76%
[INFO] Predicted last sample of wind

[RESULT] Window validation MAPE: 0.00%
[INFO] Predicted last sample of window: Actual=65.15, Pred=65.15, MAPE=0.00%

[INFO] Iteration 173: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.72%
[INFO] Predicted last sample of window: Actual=64.74, Pred=65.21, MAPE=0.72%

[INFO] Iteration 174: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 3.95%
[INFO] Predicted last sample of window: Actual=68.05, Pred=65.36, MAPE=3.95%

[INFO] Iteration 175: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 2.27%
[INFO] Predicted last sample of window: Actual=68.89, Pred=67.32, MAPE=2.27%

[INFO] Iteration 176: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 3.54%
[INFO] Predicted last sample of window: Actual=64.94, Pred=67.24, MAPE=3.54%

[INFO] Iteration 177: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 4.89%
[INFO] Predicted last sample of wind

[RESULT] Window validation MAPE: 1.09%
[INFO] Predicted last sample of window: Actual=63.74, Pred=63.05, MAPE=1.09%

[INFO] Iteration 218: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.78%
[INFO] Predicted last sample of window: Actual=62.56, Pred=63.05, MAPE=0.78%

[INFO] Iteration 219: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.04%
[INFO] Predicted last sample of window: Actual=61.78, Pred=61.76, MAPE=0.04%

[INFO] Iteration 220: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.74%
[INFO] Predicted last sample of window: Actual=62.08, Pred=61.62, MAPE=0.74%

[INFO] Iteration 221: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 3.27%
[INFO] Predicted last sample of window: Actual=63.88, Pred=61.79, MAPE=3.27%

[INFO] Iteration 222: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 4.63%
[INFO] Predicted last sample of wind

[RESULT] Window validation MAPE: 1.22%
[INFO] Predicted last sample of window: Actual=67.89, Pred=68.72, MAPE=1.22%

[INFO] Iteration 263: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 3.33%
[INFO] Predicted last sample of window: Actual=71.50, Pred=69.12, MAPE=3.33%

[INFO] Iteration 264: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.77%
[INFO] Predicted last sample of window: Actual=68.04, Pred=68.56, MAPE=0.77%

[INFO] Iteration 265: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.73%
[INFO] Predicted last sample of window: Actual=69.10, Pred=68.59, MAPE=0.73%

[INFO] Iteration 266: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.29%
[INFO] Predicted last sample of window: Actual=67.99, Pred=68.19, MAPE=0.29%

[INFO] Iteration 267: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.53%
[INFO] Predicted last sample of wind

[RESULT] Window validation MAPE: 11.93%
[INFO] Predicted last sample of window: Actual=54.04, Pred=60.49, MAPE=11.93%

[INFO] Iteration 308: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.63%
[INFO] Predicted last sample of window: Actual=54.67, Pred=53.78, MAPE=1.63%

[INFO] Iteration 309: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 4.48%
[INFO] Predicted last sample of window: Actual=51.75, Pred=54.07, MAPE=4.48%

[INFO] Iteration 310: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.42%
[INFO] Predicted last sample of window: Actual=52.86, Pred=52.64, MAPE=0.42%

[INFO] Iteration 311: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.00%
[INFO] Predicted last sample of window: Actual=51.91, Pred=51.91, MAPE=0.00%

[INFO] Iteration 312: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 4.04%
[INFO] Predicted last sample of wi

[RESULT] Window validation MAPE: 2.85%
[INFO] Predicted last sample of window: Actual=57.73, Pred=56.08, MAPE=2.85%

[INFO] Iteration 353: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.20%
[INFO] Predicted last sample of window: Actual=58.05, Pred=57.93, MAPE=0.20%

[INFO] Iteration 354: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 2.40%
[INFO] Predicted last sample of window: Actual=56.65, Pred=58.01, MAPE=2.40%

[INFO] Iteration 355: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 6.64%
[INFO] Predicted last sample of window: Actual=52.58, Pred=56.07, MAPE=6.64%

[INFO] Iteration 356: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.94%
[INFO] Predicted last sample of window: Actual=53.27, Pred=52.24, MAPE=1.94%

[INFO] Iteration 357: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.07%
[INFO] Predicted last sample of wind

[RESULT] Window validation MAPE: 2.17%
[INFO] Predicted last sample of window: Actual=57.89, Pred=59.15, MAPE=2.17%

[INFO] Iteration 398: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.94%
[INFO] Predicted last sample of window: Actual=57.40, Pred=57.94, MAPE=0.94%

[INFO] Iteration 399: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.22%
[INFO] Predicted last sample of window: Actual=55.08, Pred=55.20, MAPE=0.22%

[INFO] Iteration 400: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 2.10%
[INFO] Predicted last sample of window: Actual=54.48, Pred=55.62, MAPE=2.10%

[INFO] Iteration 401: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.95%
[INFO] Predicted last sample of window: Actual=54.38, Pred=54.90, MAPE=0.95%

[INFO] Iteration 402: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.29%
[INFO] Predicted last sample of wind

[RESULT] Window validation MAPE: 0.08%
[INFO] Predicted last sample of window: Actual=78.60, Pred=78.54, MAPE=0.08%

[INFO] Iteration 443: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 1.50%
[INFO] Predicted last sample of window: Actual=77.43, Pred=78.59, MAPE=1.50%

[INFO] Iteration 444: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.63%
[INFO] Predicted last sample of window: Actual=76.02, Pred=76.50, MAPE=0.63%

[INFO] Iteration 445: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.37%
[INFO] Predicted last sample of window: Actual=76.37, Pred=76.09, MAPE=0.37%

[INFO] Iteration 446: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.23%
[INFO] Predicted last sample of window: Actual=75.93, Pred=76.11, MAPE=0.23%

[INFO] Iteration 447: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.03%
[INFO] Predicted last sample of wind

[RESULT] Window validation MAPE: 1.32%
[INFO] Predicted last sample of window: Actual=60.21, Pred=61.00, MAPE=1.32%

[INFO] Iteration 488: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.50%
[INFO] Predicted last sample of window: Actual=61.46, Pred=61.15, MAPE=0.50%

[INFO] Iteration 489: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.02%
[INFO] Predicted last sample of window: Actual=61.44, Pred=61.45, MAPE=0.02%

[INFO] Iteration 490: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.74%
[INFO] Predicted last sample of window: Actual=60.99, Pred=61.44, MAPE=0.74%

[INFO] Iteration 491: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 2.99%
[INFO] Predicted last sample of window: Actual=63.60, Pred=61.70, MAPE=2.99%

[INFO] Iteration 492: training on new window with Early Stopping...
[RESULT] Window validation MAPE: 0.54%
[INFO] Predicted last sample of wind