In [None]:
# notebooks/FUSION_offline_notebook.py
# Combine embeddings from 3 trained models (ResNLS, GRU, SCSO-LSTM) and train a fusion head

import os, math, warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import tensorflow as tf
from tensorflow.keras import layers, Model

# -----------------------------
# Config
# -----------------------------
# DATA_CSV = "data/HDFCBANK.NS.csv"           # path to your CSV
data_dir = "../Data"  # path where your CSVs are stored
ticker = 'HDFCBANK.NS'
DATA_CSV = os.path.join(data_dir, f"{ticker}_features.csv")
TARGET = "Close"                             # predict next Close
VAL_FRAC = 0.15
TEST_FRAC = 0.15
RANDOM_SEED = 42

# paths to saved trained models (from your individual notebooks)
RESNLS_PATH = "models/resnls_best.keras"     # saved Keras model with 'resnls_embedding' layer
GRU_PATH    = "models/gru_best.keras"        # saved Keras model with 'gru_embedding' layer
SCSO_PATH   = "models/scso_lstm_best.keras"  # saved Keras model; if no named embedding, we take penultimate

# respective window sizes used during training
RESNLS_SEQ = 5
GRU_SEQ    = 20
SCSO_SEQ   = 90

np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

# -----------------------------
# Load and feature prep (same as your backbones)
# -----------------------------
def ema(series: pd.Series, span: int) -> pd.Series:
    k = 2 / (span + 1)
    return series.ewm(alpha=k, adjust=False).mean()

def wilder_ema(series: pd.Series, period: int) -> pd.Series:
    return series.ewm(alpha=1/period, adjust=False).mean()

def rsi_wilder(close: pd.Series, period: int = 14) -> pd.Series:
    delta = close.diff()
    up = delta.clip(lower=0)
    down = -delta.clip(upper=0)
    avg_up = wilder_ema(up, period)
    avg_down = wilder_ema(down, period)
    rs = avg_up / (avg_down.replace(0, np.nan))
    rsi = 100 - (100 / (1 + rs))
    return rsi.fillna(0)

df = pd.read_csv(DATA_CSV)
cols = {c: c.title() for c in df.columns}
df.rename(columns=cols, inplace=True)
df = df[['Open','High','Low','Close','Volume']].dropna().reset_index(drop=True)

features = df.copy()
features['LogRet'] = np.log(features['Close']).diff().fillna(0.0)
features['RSI14']  = rsi_wilder(features['Close'], 14)
features['SMA10']  = features['Close'].rolling(10).mean().bfill()
features['SMA20']  = features['Close'].rolling(20).mean().bfill()
features['SMA50']  = features['Close'].rolling(50).mean().bfill()
features['SMA100'] = features['Close'].rolling(100).mean().bfill()
features = features.dropna().reset_index(drop=True)

# predict next-step Close
target = features[TARGET].shift(-1).dropna().reset_index(drop=True)
features = features.iloc[:-1, :].reset_index(drop=True)

# -----------------------------
# Chronological split
# -----------------------------
n = len(features)
test_size = int(math.floor(TEST_FRAC * n))
val_size  = int(math.floor(VAL_FRAC * n))
train_size = n - val_size - test_size

X_all = features.values
y_all = target.values.reshape(-1, 1)

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_train_raw = X_all[:train_size]
X_val_raw   = X_all[train_size:train_size+val_size]
X_test_raw  = X_all[train_size+val_size:]

y_train_raw = y_all[:train_size]
y_val_raw   = y_all[train_size:train_size+val_size]
y_test_raw  = y_all[train_size+val_size:]

X_train_s = scaler_X.fit_transform(X_train_raw)
X_val_s   = scaler_X.transform(X_val_raw)
X_test_s  = scaler_X.transform(X_test_raw)

y_train_s = scaler_y.fit_transform(y_train_raw)
y_val_s   = scaler_y.transform(y_val_raw)
y_test_s  = scaler_y.transform(y_test_raw)

def make_sequences_with_end_idx(X, y, seq_len):
    Xs, ys, end_idx = [], [], []
    for i in range(len(X) - seq_len + 1):
        Xs.append(X[i:i+seq_len])
        ys.append(y[i+seq_len-1])
        end_idx.append(i+seq_len-1)  # index in this split
    return np.array(Xs), np.array(ys), np.array(end_idx)

# Build sequences per branch on each split (train, val, test)
def build_branch_sequences(seq_len):
    Xtr, ytr, idx_tr = make_sequences_with_end_idx(X_train_s, y_train_s, seq_len)
    Xva, yva, idx_va = make_sequences_with_end_idx(
        np.vstack([X_train_s[-(seq_len-1):], X_val_s]),
        np.vstack([y_train_s[-(seq_len-1):], y_val_s]),
        seq_len
    )
    # map val end_idx to global indices
    idx_va_global = (train_size - (seq_len-1)) + idx_va
    Xte, yte, idx_te = make_sequences_with_end_idx(
        np.vstack([X_val_s[-(seq_len-1):], X_test_s]),
        np.vstack([y_val_s[-(seq_len-1):], y_test_s]),
        seq_len
    )
    idx_te_global = (train_size + val_size - (seq_len-1)) + idx_te
    return (Xtr, ytr, idx_tr), (Xva, yva, idx_va_global), (Xte, yte, idx_te_global)

res_tr, res_va, res_te = build_branch_sequences(RESNLS_SEQ)
gru_tr, gru_va, gru_te = build_branch_sequences(GRU_SEQ)
lstm_tr, lstm_va, lstm_te = build_branch_sequences(SCSO_SEQ)

# -----------------------------
# Load trained models and build embedding models
# -----------------------------
res_model = tf.keras.models.load_model(RESNLS_PATH, compile=False)
gru_model = tf.keras.models.load_model(GRU_PATH, compile=False)
lstm_model= tf.keras.models.load_model(SCSO_PATH, compile=False)

# Extract embeddings via named layer if present, else via penultimate layer
def build_embedding_model(m, preferred_layer_name):
    try:
        out = m.get_layer(preferred_layer_name).output
        return Model(m.input, out)
    except:
        # fallback: second-to-last layer output
        out = m.layers[-2].output
        return Model(m.input, out)

res_embed = build_embedding_model(res_model, "resnls_embedding")
gru_embed = build_embedding_model(gru_model, "gru_embedding")
lstm_embed= build_embedding_model(lstm_model, "lstm_embedding")  # may fallback

# -----------------------------
# Align by global end indices (intersection across branches)
# -----------------------------
def align_by_indices(idx_a, idx_b, idx_c):
    common = np.intersect1d(np.intersect1d(idx_a, idx_b), idx_c)
    def selector(idx_vec):
        where = {v:i for i,v in enumerate(idx_vec)}
        return np.array([where[v] for v in common])
    return common, selector

# Selectors for each split
common_tr, sel_tr = align_by_indices(res_tr[2], gru_tr[2], lstm_tr[2])
common_va, sel_va = align_by_indices(res_va[2], gru_va[2], lstm_va[2])
common_te, sel_te = align_by_indices(res_te[2], gru_te[2], lstm_te[2])

# Slice aligned sequences
Xtr_res = res_tr[0][sel_tr(res_tr[2])]
Xtr_gru = gru_tr[0][sel_tr(gru_tr[2])]
Xtr_lstm= lstm_tr[0][sel_tr(lstm_tr[2])]
ytr     = res_tr[1][sel_tr(res_tr[2])]

Xva_res = res_va[0][sel_va(res_va[2])]
Xva_gru = gru_va[0][sel_va(gru_va[2])]
Xva_lstm= lstm_va[0][sel_va(lstm_va[2])]
yva     = res_va[1][sel_va(res_va[2])]

Xte_res = res_te[0][sel_te(res_te[2])]
Xte_gru = gru_te[0][sel_te(gru_te[2])]
Xte_lstm= lstm_te[0][sel_te(lstm_te[2])]
yte     = res_te[1][sel_te(res_te[2])]

# -----------------------------
# Compute embeddings for each branch
# -----------------------------
Etr_res = res_embed.predict(Xtr_res, batch_size=256, verbose=0)
Etr_gru = gru_embed.predict(Xtr_gru, batch_size=256, verbose=0)
Etr_lstm= lstm_embed.predict(Xtr_lstm, batch_size=256, verbose=0)

Eva_res = res_embed.predict(Xva_res, batch_size=256, verbose=0)
Eva_gru = gru_embed.predict(Xva_gru, batch_size=256, verbose=0)
Eva_lstm= lstm_embed.predict(Xva_lstm, batch_size=256, verbose=0)

Ete_res = res_embed.predict(Xte_res, batch_size=256, verbose=0)
Ete_gru = gru_embed.predict(Xte_gru, batch_size=256, verbose=0)
Ete_lstm= lstm_embed.predict(Xte_lstm, batch_size=256, verbose=0)

# Concatenate embeddings
Xtr_fuse = np.concatenate([Etr_res, Etr_gru, Etr_lstm], axis=1)
Xva_fuse = np.concatenate([Eva_res, Eva_gru, Eva_lstm], axis=1)
Xte_fuse = np.concatenate([Ete_res, Ete_gru, Ete_lstm], axis=1)

# -----------------------------
# Fusion head (MLP)
# -----------------------------
inp = layers.Input(shape=(Xtr_fuse.shape[1],))
x = layers.Dense(128, activation='relu')(inp)
x = layers.Dropout(0.2)(x)
x = layers.Dense(64, activation='relu')(x)
out = layers.Dense(1, activation='linear')(x)
fusion = Model(inp, out)
fusion.compile(optimizer=tf.keras.optimizers.Adam(1e-3), loss='mse', metrics=['mae'])

cbs = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)]
hist = fusion.fit(Xtr_fuse, ytr, validation_data=(Xva_fuse, yva), epochs=100, batch_size=128, callbacks=cbs, verbose=1)

# -----------------------------
# Evaluate and plot
# -----------------------------
y_pred_s = fusion.predict(Xte_fuse, batch_size=256)
y_pred = scaler_y.inverse_transform(y_pred_s)
y_true = scaler_y.inverse_transform(yte)

mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)
rmse = math.sqrt(mse)
print(f"Fusion MSE={mse:.6f}  RMSE={rmse:.6f}  MAE={mae:.6f}")

# Build time index for aligned test targets
# We built end indices in global space; we can just use a simple sequential index for plotting
plot_idx = np.arange(len(y_true))

plt.figure(figsize=(12,5))
plt.plot(plot_idx, y_true.flatten(), label="Actual", color='black', linewidth=1.5)
plt.plot(plot_idx, y_pred.flatten(), label="Predicted", color='purple', linewidth=1.5)
plt.title("Fusion Head: Actual vs Predicted Close")
plt.xlabel("Aligned Test Samples")
plt.ylabel("Price")
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
