In [1]:
import numpy as np
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [2]:
# --- 1. Load & Preprocess Data ---
# Example: Load QBO data with a 'Date' and '25 hPa' column
data_df = pd.read_excel('QBO Data.xlsx')
data_df['Date'] = pd.to_datetime(data_df['Date'], format='%d%m%Y')
data_df.set_index('Date', inplace=True)
series = data_df['25 hPa'].values.astype(float)

In [3]:
# --- 2. Train-Test Split (90/10, sequential) ---
train_size = int(len(series) * 0.9)
train, test = series[:train_size], series[train_size:]

In [4]:
# --- 3. Fit SARIMA on Training Data ---
# (Tune your SARIMA parameters as needed)
sarima = SARIMAX(train, order=(2,0,2), seasonal_order=(1,0,1,12))
sarima_fit = sarima.fit(disp=False)
sarima_pred_train = sarima_fit.fittedvalues
sarima_pred_test = sarima_fit.forecast(steps=len(test))

In [5]:
# --- 4. Compute Residuals for Training (actual - SARIMA) ---
residual_train = train - sarima_pred_train

In [6]:
# --- 5. Feature Engineering for Transformer ---
# Example: Use lag features (feel free to add more features)
def create_lag_features(series, lags=[1,2,3]):
    df = pd.DataFrame({'y': series})
    for lag in lags:
        df[f'lag_{lag}'] = df['y'].shift(lag)
    return df.dropna()

lags = [1,2,3,12]  # Example lags
full_df = create_lag_features(series, lags)
full_df.reset_index(drop=True, inplace=True)

# Align residuals with lagged features
aligned_residual = residual_train[-len(full_df[:train_size-lags[-1]]):]  # For training
aligned_sarima_pred_test = sarima_pred_test[-len(test)+lags[-1]:]       # For hybrid output

In [7]:
# --- 6. Prepare Data for Transformer (windowed input, residual target) ---
# Split lag features into train/test
X = full_df.drop('y', axis=1).values
y = full_df['y'].values

X_train = X[:train_size-lags[-1]]
X_test = X[train_size-lags[-1]:]
y_train = aligned_residual  # Target: residual SARIMA
y_test = test[lags[-1]:]    # For evaluation

# Scale features and residuals
feature_scaler = StandardScaler()
X_train_scaled = feature_scaler.fit_transform(X_train)
X_test_scaled = feature_scaler.transform(X_test)

target_scaler = StandardScaler()
y_train_scaled = target_scaler.fit_transform(y_train.reshape(-1,1)).flatten()

In [8]:
# --- 7. Build Transformer Model (Simple Version) ---
def build_transformer(input_shape, head_size=8, num_heads=2, ff_dim=8, num_blocks=2, mlp_units=[32], dropout=0.1):
    inputs = keras.Input(shape=input_shape)
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    for _ in range(num_blocks):
        attn_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=head_size, dropout=dropout)(x, x)
        x = layers.Add()([x, attn_output])
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        ff_output = layers.Dense(ff_dim, activation="relu")(x)
        x = layers.Add()([x, ff_output])
    x = layers.Flatten()(x)
    for units in mlp_units:
        x = layers.Dense(units, activation="relu")(x)
        x = layers.Dropout(dropout)(x)
    outputs = layers.Dense(1)(x)
    return keras.Model(inputs, outputs)

# Reshape for transformer: (samples, time steps, features)
# Here each sample is a feature vector, not a sequence, so use (samples, 1, features)
X_train_tr = X_train_scaled[:, np.newaxis, :]
X_test_tr = X_test_scaled[:, np.newaxis, :]

model = build_transformer(X_train_tr.shape[1:])

model.compile(
    loss="mse",
    optimizer=keras.optimizers.Adam(1e-3),
    metrics=["mae"]
)

ValueError: Inputs have incompatible shapes. Received shapes (1, 4) and (1, 8)

In [None]:
# --- 8. Train Transformer on Residuals ---
checkpoint = ModelCheckpoint("best_hybrid_transformer.h5", save_best_only=True, monitor='val_loss', mode='min')
history = model.fit(
    X_train_tr, y_train_scaled,
    validation_split=0.1,
    epochs=100,
    batch_size=32,
    callbacks=[checkpoint],
    verbose=1
)

In [None]:
# --- 9. Predict Residuals on Test Data ---
residual_pred_scaled = model.predict(X_test_tr)
residual_pred = target_scaler.inverse_transform(residual_pred_scaled).flatten()

# --- 10. Hybrid Prediction on Test: SARIMA + Transformer Residual ---
hybrid_pred = aligned_sarima_pred_test + residual_pred

In [None]:
# --- 11. Evaluation ---
rmse_sarima = mean_squared_error(y_test, aligned_sarima_pred_test, squared=False)
rmse_hybrid = mean_squared_error(y_test, hybrid_pred, squared=False)
r2_sarima = r2_score(y_test, aligned_sarima_pred_test)
r2_hybrid = r2_score(y_test, hybrid_pred)

print(f"SARIMA RMSE: {rmse_sarima:.3f}, Hybrid RMSE: {rmse_hybrid:.3f}")
print(f"SARIMA R2: {r2_sarima:.3f}, Hybrid R2: {r2_hybrid:.3f}")

In [None]:
# --- 12. Plot Results ---
plt.figure(figsize=(12,6))
plt.plot(np.arange(len(series)), series, label="Actual")
plt.plot(np.arange(train_size, len(series)), aligned_sarima_pred_test, label="SARIMA", color="green")
plt.plot(np.arange(train_size, len(series)), hybrid_pred, label="Hybrid", color="red", linestyle="--")
plt.legend()
plt.title("Hybrid SARIMA–Transformer Forecast")
plt.xlabel("Time Index")
plt.ylabel("QBO (25 hPa)")
plt.show()