In [1]:
import os
import math
import numpy as np
import pandas as pd
from datetime import datetime

import tensorflow as tf
from tensorflow.keras import layers, models, callbacks

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

# -----------------------------
# Config
# -----------------------------
CSV_FILE = "/content/XAUUSDh1.csv"   # ensure this file exists in your working directory
DATE_COL = "Date"
SEQ_LEN = 64
VAL_RATIO = 0.15
TEST_RATIO = 0.15
BATCH_SIZE = 256
EPOCHS = 100
LR = 1e-3
NUM_HEADS = 4
KEY_DIM = 64
FF_DIM = 128
NUM_ENCODER_BLOCKS = 2
DROPOUT = 0.1
SEED = 42

tf.random.set_seed(SEED)
np.random.seed(SEED)

# -----------------------------
# Utilities: Indicators & Feature Engineering
# -----------------------------
def ema(series: pd.Series, span: int) -> pd.Series:
    return series.ewm(span=span, adjust=False).mean()

def rsi(series: pd.Series, period: int = 14) -> pd.Series:
    delta = series.diff()
    gain = np.where(delta > 0, delta, 0.0)
    loss = np.where(delta < 0, -delta, 0.0)
    gain = pd.Series(gain, index=series.index)
    loss = pd.Series(loss, index=series.index)
    avg_gain = gain.ewm(alpha=1/period, min_periods=period, adjust=False).mean()
    avg_loss = loss.ewm(alpha=1/period, min_periods=period, adjust=False).mean()
    rs = avg_gain / (avg_loss + 1e-12)
    rsi = 100 - (100 / (1 + rs))
    return rsi

def true_range(high: pd.Series, low: pd.Series, close: pd.Series) -> pd.Series:
    prev_close = close.shift(1)
    tr1 = (high - low).abs()
    tr2 = (high - prev_close).abs()
    tr3 = (low - prev_close).abs()
    return pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)

def atr(high: pd.Series, low: pd.Series, close: pd.Series, period: int = 14) -> pd.Series:
    tr = true_range(high, low, close)
    return tr.ewm(alpha=1/period, adjust=False).mean()

def macd(close: pd.Series, fast: int = 12, slow: int = 26, signal: int = 9):
    ema_fast = ema(close, fast)
    ema_slow = ema(close, slow)
    macd_line = ema_fast - ema_slow
    signal_line = ema(macd_line, signal)
    hist = macd_line - signal_line
    return macd_line, signal_line, hist

def bollinger(close: pd.Series, period: int = 20, num_std: float = 2.0):
    ma = close.rolling(window=period, min_periods=period).mean()
    sd = close.rolling(window=period, min_periods=period).std()
    upper = ma + num_std * sd
    lower = ma - num_std * sd
    width = upper - lower
    pctb = (close - lower) / (upper - lower + 1e-12)
    return ma, upper, lower, width, pctb

# -----------------------------
# Load & Prepare Data
# -----------------------------
df = pd.read_csv(CSV_FILE)
# Basic validation
required_cols = {"open", "high", "low", "close", "tick_volume", DATE_COL}
missing = required_cols - set(df.columns)
if missing:
    raise ValueError(f"Missing columns in CSV: {missing}")

# Ensure time order
df[DATE_COL] = pd.to_datetime(df[DATE_COL])
df = df.sort_values(DATE_COL).reset_index(drop=True)

# -----------------------------
# Feature Engineering
# -----------------------------
open_ = df["open"].astype(float)
high = df["high"].astype(float)
low = df["low"].astype(float)
close = df["close"].astype(float)
vol = df["tick_volume"].astype(float)

# Core features
df_feat = pd.DataFrame(index=df.index)
df_feat["close"] = close
df_feat["return"] = close.pct_change().fillna(0.0)
df_feat["price_change"] = close.diff().fillna(0.0)
df_feat["hl_spread"] = (high - low)
df_feat["oc_spread"] = (open_ - close)
df_feat["vol_chg"] = vol.diff().fillna(0.0)
df_feat["log_vol"] = np.log1p(vol)

# Volatility
df_feat["ret_vol_24"] = df_feat["return"].rolling(window=24, min_periods=24).std().fillna(method="bfill")
df_feat["ret_vol_72"] = df_feat["return"].rolling(window=72, min_periods=72).std().fillna(method="bfill")

# ATR
df_feat["atr_14"] = atr(high, low, close, period=14).fillna(method="bfill")

# RSI
df_feat["rsi_14"] = rsi(close, period=14).fillna(method="bfill")

# MACD
macd_line, signal_line, hist = macd(close, fast=12, slow=26, signal=9)
df_feat["macd"] = macd_line.fillna(method="bfill")
df_feat["macd_signal"] = signal_line.fillna(method="bfill")
df_feat["macd_hist"] = hist.fillna(method="bfill")

# EMAs
df_feat["ema_20"] = ema(close, 20).fillna(method="bfill")
df_feat["ema_50"] = ema(close, 50).fillna(method="bfill")
df_feat["ema_ratio_20_50"] = (df_feat["ema_20"] / (df_feat["ema_50"] + 1e-12))

# Bollinger Bands
ma20, upper20, lower20, width20, pctb20 = bollinger(close, period=20, num_std=2.0)
df_feat["bb_ma20"] = ma20.fillna(method="bfill")
df_feat["bb_upper20"] = upper20.fillna(method="bfill")
df_feat["bb_lower20"] = lower20.fillna(method="bfill")
df_feat["bb_width20"] = width20.fillna(method="bfill")
df_feat["bb_pctb20"] = pctb20.fillna(method="bfill")

# Target: next close
df_feat["target_next_close"] = df["close"].shift(-1)

# Drop last row with NaN target
df_feat = df_feat.iloc[:-1].copy()

# -----------------------------
# Train/Val/Test Split (time-aware)
# -----------------------------
N = len(df_feat)
test_size = int(N * TEST_RATIO)
val_size = int(N * VAL_RATIO)
train_size = N - val_size - test_size

train_df = df_feat.iloc[:train_size]
val_df   = df_feat.iloc[train_size:train_size+val_size]
test_df  = df_feat.iloc[train_size+val_size:]

feature_cols = [c for c in df_feat.columns if c != "target_next_close"]
target_col = "target_next_close"

X_train = train_df[feature_cols].values
y_train = train_df[target_col].values

X_val = val_df[feature_cols].values
y_val = val_df[target_col].values

X_test = test_df[feature_cols].values
y_test = test_df[target_col].values

# -----------------------------
# Scaling (fit on train only)
# -----------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# -----------------------------
# Sequence Building
# -----------------------------
def build_sequences(X, y, seq_len=SEQ_LEN):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_len + 1):
        X_seq.append(X[i:i+seq_len])
        # predict next close at the time aligned with the last step in window
        y_seq.append(y[i+seq_len-1])
    return np.array(X_seq, dtype=np.float32), np.array(y_seq, dtype=np.float32)

Xtr_seq, ytr_seq = build_sequences(X_train_scaled, y_train, SEQ_LEN)
Xval_seq, yval_seq = build_sequences(X_val_scaled, y_val, SEQ_LEN)
Xte_seq, yte_seq = build_sequences(X_test_scaled, y_test, SEQ_LEN)

num_features = Xtr_seq.shape[-1]

# -----------------------------
# Positional Encoding
# -----------------------------
def positional_encoding(seq_len, d_model):
    pos = np.arange(seq_len)[:, np.newaxis]
    i = np.arange(d_model)[np.newaxis, :]
    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
    angles = pos * angle_rates
    pe = np.zeros((seq_len, d_model), dtype=np.float32)
    pe[:, 0::2] = np.sin(angles[:, 0::2])
    pe[:, 1::2] = np.cos(angles[:, 1::2])
    return tf.constant(pe)

class AddPositionalEncoding(layers.Layer):
    def __init__(self, seq_len, d_model, **kwargs):
        super().__init__(**kwargs)
        self.seq_len = seq_len
        self.d_model = d_model
        self.pe = positional_encoding(seq_len, d_model)

    def call(self, x):
        # x: (batch, seq_len, d_model)
        return x + self.pe

# -----------------------------
# Transformer Encoder Block
# -----------------------------
def transformer_encoder(x, num_heads=NUM_HEADS, key_dim=KEY_DIM, ff_dim=FF_DIM, dropout=DROPOUT):
    attn_out = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim, dropout=dropout)(x, x)
    x = layers.Add()([x, attn_out])
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    ff = layers.Dense(ff_dim, activation="relu")(x)
    ff = layers.Dropout(dropout)(ff)
    ff = layers.Dense(x.shape[-1], activation=None)(ff)
    x = layers.Add()([x, ff])
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    return x

# -----------------------------
# Build Model
# -----------------------------
inputs = layers.Input(shape=(SEQ_LEN, num_features))
# Project features to model dimension
x = layers.Dense(KEY_DIM, activation="linear")(inputs)
x = AddPositionalEncoding(seq_len=SEQ_LEN, d_model=KEY_DIM)(x)

for _ in range(NUM_ENCODER_BLOCKS):
    x = transformer_encoder(x, num_heads=NUM_HEADS, key_dim=KEY_DIM, ff_dim=FF_DIM, dropout=DROPOUT)

x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(DROPOUT)(x)
x = layers.Dense(64, activation="relu")(x)
x = layers.Dropout(DROPOUT)(x)
outputs = layers.Dense(1, activation="linear")(x)

model = models.Model(inputs, outputs)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LR),
              loss="mse",
              metrics=[tf.keras.metrics.MAE, tf.keras.metrics.MAPE])

model.summary()

# -----------------------------
# Training
# -----------------------------
es = callbacks.EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
rlrp = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, min_lr=1e-5)
ckpt = callbacks.ModelCheckpoint("xau_transformer_best.h5", monitor="val_loss", save_best_only=True)

history = model.fit(
    Xtr_seq, ytr_seq,
    validation_data=(Xval_seq, yval_seq),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[es, rlrp, ckpt],
    verbose=1
)

# -----------------------------
# Evaluation
# -----------------------------
y_pred = model.predict(Xte_seq, batch_size=BATCH_SIZE).ravel()
mae = mean_absolute_error(yte_seq, y_pred)
mape = mean_absolute_percentage_error(yte_seq, y_pred)
rmse = math.sqrt(np.mean((y_pred - yte_seq)**2))

print(f"Test MAE:  {mae:.4f}")
print(f"Test MAPE: {mape:.4f}")
print(f"Test RMSE: {rmse:.4f}")

# -----------------------------
# Inference: next-step prediction from latest window
# -----------------------------
# Build the last window from test set (or the latest available sequence)
latest_window = X_test_scaled[-SEQ_LEN:]
latest_window = latest_window[np.newaxis, ...]  # shape (1, SEQ_LEN, num_features)
next_close_pred = float(model.predict(latest_window))
print(f"Predicted next close: {next_close_pred:.2f}")


  df_feat["ret_vol_24"] = df_feat["return"].rolling(window=24, min_periods=24).std().fillna(method="bfill")
  df_feat["ret_vol_72"] = df_feat["return"].rolling(window=72, min_periods=72).std().fillna(method="bfill")
  df_feat["atr_14"] = atr(high, low, close, period=14).fillna(method="bfill")
  df_feat["rsi_14"] = rsi(close, period=14).fillna(method="bfill")
  df_feat["macd"] = macd_line.fillna(method="bfill")
  df_feat["macd_signal"] = signal_line.fillna(method="bfill")
  df_feat["macd_hist"] = hist.fillna(method="bfill")
  df_feat["ema_20"] = ema(close, 20).fillna(method="bfill")
  df_feat["ema_50"] = ema(close, 50).fillna(method="bfill")
  df_feat["bb_ma20"] = ma20.fillna(method="bfill")
  df_feat["bb_upper20"] = upper20.fillna(method="bfill")
  df_feat["bb_lower20"] = lower20.fillna(method="bfill")
  df_feat["bb_width20"] = width20.fillna(method="bfill")
  df_feat["bb_pctb20"] = pctb20.fillna(method="bfill")


Epoch 1/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 17282598912.0000 - mean_absolute_error: 130579.5312 - mean_absolute_percentage_error: 99.9803



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 78ms/step - loss: 17282598912.0000 - mean_absolute_error: 130579.4375 - mean_absolute_percentage_error: 99.9801 - val_loss: 25060179968.0000 - val_mean_absolute_error: 157118.6719 - val_mean_absolute_percentage_error: 99.8645 - learning_rate: 0.0010
Epoch 2/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 17187137536.0000 - mean_absolute_error: 130213.4297 - mean_absolute_percentage_error: 99.6967



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 17186476032.0000 - mean_absolute_error: 130210.6875 - mean_absolute_percentage_error: 99.6943 - val_loss: 24723961856.0000 - val_mean_absolute_error: 156045.0938 - val_mean_absolute_percentage_error: 99.1721 - learning_rate: 0.0010
Epoch 3/100
[1m156/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 16820592640.0000 - mean_absolute_error: 128797.2031 - mean_absolute_percentage_error: 98.5994



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 16817748992.0000 - mean_absolute_error: 128785.8203 - mean_absolute_percentage_error: 98.5901 - val_loss: 23900661760.0000 - val_mean_absolute_error: 153384.3750 - val_mean_absolute_percentage_error: 97.4561 - learning_rate: 0.0010
Epoch 4/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 16016974848.0000 - mean_absolute_error: 125635.5938 - mean_absolute_percentage_error: 96.1495



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - loss: 16013479936.0000 - mean_absolute_error: 125621.3828 - mean_absolute_percentage_error: 96.1382 - val_loss: 22445737984.0000 - val_mean_absolute_error: 148565.9219 - val_mean_absolute_percentage_error: 94.3484 - learning_rate: 0.0010
Epoch 5/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 14684663808.0000 - mean_absolute_error: 120208.3516 - mean_absolute_percentage_error: 91.9438



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 14679470080.0000 - mean_absolute_error: 120186.2656 - mean_absolute_percentage_error: 91.9264 - val_loss: 20317216768.0000 - val_mean_absolute_error: 141220.8281 - val_mean_absolute_percentage_error: 89.6111 - learning_rate: 0.0010
Epoch 6/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 12832392192.0000 - mean_absolute_error: 112224.6016 - mean_absolute_percentage_error: 85.7571



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 12825777152.0000 - mean_absolute_error: 112194.3359 - mean_absolute_percentage_error: 85.7334 - val_loss: 17598703616.0000 - val_mean_absolute_error: 131243.2969 - val_mean_absolute_percentage_error: 83.1760 - learning_rate: 0.0010
Epoch 7/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 10569101312.0000 - mean_absolute_error: 101617.3828 - mean_absolute_percentage_error: 77.5396



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 10561588224.0000 - mean_absolute_error: 101579.1719 - mean_absolute_percentage_error: 77.5097 - val_loss: 14486941696.0000 - val_mean_absolute_error: 118798.3281 - val_mean_absolute_percentage_error: 75.1495 - learning_rate: 0.0010
Epoch 8/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 8108400128.0000 - mean_absolute_error: 88642.9766 - mean_absolute_percentage_error: 67.4857



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 8100813312.0000 - mean_absolute_error: 88598.3906 - mean_absolute_percentage_error: 67.4509 - val_loss: 11270819840.0000 - val_mean_absolute_error: 104388.3047 - val_mean_absolute_percentage_error: 65.8556 - learning_rate: 0.0010
Epoch 9/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 5719867392.0000 - mean_absolute_error: 73881.7812 - mean_absolute_percentage_error: 56.0444



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 5713148928.0000 - mean_absolute_error: 73833.8281 - mean_absolute_percentage_error: 56.0070 - val_loss: 8270053376.0000 - val_mean_absolute_error: 88860.2969 - val_mean_absolute_percentage_error: 55.8407 - learning_rate: 0.0010
Epoch 10/100
[1m156/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 3672762368.0000 - mean_absolute_error: 58314.7188 - mean_absolute_percentage_error: 43.9839



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 3664805888.0000 - mean_absolute_error: 58241.8086 - mean_absolute_percentage_error: 43.9270 - val_loss: 5754133504.0000 - val_mean_absolute_error: 73350.0781 - val_mean_absolute_percentage_error: 45.8373 - learning_rate: 0.0010
Epoch 11/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 2125479296.0000 - mean_absolute_error: 42941.7617 - mean_absolute_percentage_error: 32.0750



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 2122059648.0000 - mean_absolute_error: 42898.0898 - mean_absolute_percentage_error: 32.0409 - val_loss: 3874053888.0000 - val_mean_absolute_error: 59162.0859 - val_mean_absolute_percentage_error: 36.6866 - learning_rate: 0.0010
Epoch 12/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 1148518784.0000 - mean_absolute_error: 29404.4727 - mean_absolute_percentage_error: 21.6053



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 1146635648.0000 - mean_absolute_error: 29369.2656 - mean_absolute_percentage_error: 21.5779 - val_loss: 2602097664.0000 - val_mean_absolute_error: 47203.7852 - val_mean_absolute_percentage_error: 28.9740 - learning_rate: 0.0010
Epoch 13/100
[1m155/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 631908544.0000 - mean_absolute_error: 19351.6270 - mean_absolute_percentage_error: 13.9232



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 630220224.0000 - mean_absolute_error: 19312.0664 - mean_absolute_percentage_error: 13.8933 - val_loss: 1858812928.0000 - val_mean_absolute_error: 38534.5664 - val_mean_absolute_percentage_error: 23.3827 - learning_rate: 0.0010
Epoch 14/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 412665504.0000 - mean_absolute_error: 14547.1436 - mean_absolute_percentage_error: 10.4259



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 412419296.0000 - mean_absolute_error: 14541.6885 - mean_absolute_percentage_error: 10.4221 - val_loss: 1461797376.0000 - val_mean_absolute_error: 32983.2852 - val_mean_absolute_percentage_error: 19.8024 - learning_rate: 0.0010
Epoch 15/100
[1m156/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 345606560.0000 - mean_absolute_error: 13422.2119 - mean_absolute_percentage_error: 9.7455



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 345505184.0000 - mean_absolute_error: 13420.8594 - mean_absolute_percentage_error: 9.7448 - val_loss: 1266539904.0000 - val_mean_absolute_error: 29883.0859 - val_mean_absolute_percentage_error: 17.8037 - learning_rate: 0.0010
Epoch 16/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 329014016.0000 - mean_absolute_error: 13304.3330 - mean_absolute_percentage_error: 9.7681



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 329018080.0000 - mean_absolute_error: 13305.3047 - mean_absolute_percentage_error: 9.7690 - val_loss: 1184466816.0000 - val_mean_absolute_error: 28609.7520 - val_mean_absolute_percentage_error: 17.0009 - learning_rate: 0.0010
Epoch 17/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 315696768.0000 - mean_absolute_error: 13134.4570 - mean_absolute_percentage_error: 9.6777



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 315504736.0000 - mean_absolute_error: 13127.8984 - mean_absolute_percentage_error: 9.6720 - val_loss: 1057573120.0000 - val_mean_absolute_error: 26660.6895 - val_mean_absolute_percentage_error: 15.7924 - learning_rate: 0.0010
Epoch 18/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 239859728.0000 - mean_absolute_error: 10876.4424 - mean_absolute_percentage_error: 7.8271



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 239762624.0000 - mean_absolute_error: 10875.1807 - mean_absolute_percentage_error: 7.8263 - val_loss: 843349312.0000 - val_mean_absolute_error: 22931.6133 - val_mean_absolute_percentage_error: 13.4667 - learning_rate: 0.0010
Epoch 19/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 202946400.0000 - mean_absolute_error: 10299.7549 - mean_absolute_percentage_error: 7.4828



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 202863648.0000 - mean_absolute_error: 10298.2500 - mean_absolute_percentage_error: 7.4818 - val_loss: 669332800.0000 - val_mean_absolute_error: 19246.9121 - val_mean_absolute_percentage_error: 11.1493 - learning_rate: 0.0010
Epoch 20/100
[1m155/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 23ms/step - loss: 175455264.0000 - mean_absolute_error: 9764.5176 - mean_absolute_percentage_error: 7.1514



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - loss: 175344448.0000 - mean_absolute_error: 9762.2432 - mean_absolute_percentage_error: 7.1500 - val_loss: 534215232.0000 - val_mean_absolute_error: 16465.4766 - val_mean_absolute_percentage_error: 9.4707 - learning_rate: 0.0010
Epoch 21/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 153697152.0000 - mean_absolute_error: 9290.4775 - mean_absolute_percentage_error: 6.8660



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 153656064.0000 - mean_absolute_error: 9289.8457 - mean_absolute_percentage_error: 6.8657 - val_loss: 428129568.0000 - val_mean_absolute_error: 13702.8906 - val_mean_absolute_percentage_error: 7.7680 - learning_rate: 0.0010
Epoch 22/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 134680112.0000 - mean_absolute_error: 8838.0518 - mean_absolute_percentage_error: 6.5915



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 134661616.0000 - mean_absolute_error: 8837.7402 - mean_absolute_percentage_error: 6.5914 - val_loss: 348846816.0000 - val_mean_absolute_error: 11795.4336 - val_mean_absolute_percentage_error: 6.6266 - learning_rate: 0.0010
Epoch 23/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 125873952.0000 - mean_absolute_error: 8641.4893 - mean_absolute_percentage_error: 6.4901



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 125853416.0000 - mean_absolute_error: 8640.9424 - mean_absolute_percentage_error: 6.4898 - val_loss: 292069504.0000 - val_mean_absolute_error: 10993.7275 - val_mean_absolute_percentage_error: 6.2311 - learning_rate: 0.0010
Epoch 24/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 116662720.0000 - mean_absolute_error: 8398.9873 - mean_absolute_percentage_error: 6.3442



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 116646184.0000 - mean_absolute_error: 8398.3516 - mean_absolute_percentage_error: 6.3438 - val_loss: 250937632.0000 - val_mean_absolute_error: 10205.2568 - val_mean_absolute_percentage_error: 5.8006 - learning_rate: 0.0010
Epoch 25/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 111464464.0000 - mean_absolute_error: 8242.2510 - mean_absolute_percentage_error: 6.2516



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 111459984.0000 - mean_absolute_error: 8242.3594 - mean_absolute_percentage_error: 6.2517 - val_loss: 220814288.0000 - val_mean_absolute_error: 9732.1768 - val_mean_absolute_percentage_error: 5.5642 - learning_rate: 0.0010
Epoch 26/100
[1m156/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 107817920.0000 - mean_absolute_error: 8137.0391 - mean_absolute_percentage_error: 6.1876



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 107820056.0000 - mean_absolute_error: 8137.1616 - mean_absolute_percentage_error: 6.1877 - val_loss: 200283680.0000 - val_mean_absolute_error: 9529.0713 - val_mean_absolute_percentage_error: 5.4922 - learning_rate: 0.0010
Epoch 27/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 105244672.0000 - mean_absolute_error: 8077.4375 - mean_absolute_percentage_error: 6.1648



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 105246376.0000 - mean_absolute_error: 8077.5493 - mean_absolute_percentage_error: 6.1649 - val_loss: 178457952.0000 - val_mean_absolute_error: 8306.5146 - val_mean_absolute_percentage_error: 4.6959 - learning_rate: 0.0010
Epoch 28/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 104869288.0000 - mean_absolute_error: 8065.5908 - mean_absolute_percentage_error: 6.1575



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 104875840.0000 - mean_absolute_error: 8065.8970 - mean_absolute_percentage_error: 6.1578 - val_loss: 160082960.0000 - val_mean_absolute_error: 7428.4600 - val_mean_absolute_percentage_error: 4.1516 - learning_rate: 0.0010
Epoch 29/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 102552448.0000 - mean_absolute_error: 8014.4043 - mean_absolute_percentage_error: 6.1236



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 102556776.0000 - mean_absolute_error: 8014.5718 - mean_absolute_percentage_error: 6.1237 - val_loss: 144172240.0000 - val_mean_absolute_error: 6934.9531 - val_mean_absolute_percentage_error: 3.8673 - learning_rate: 0.0010
Epoch 30/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 102667088.0000 - mean_absolute_error: 8019.7397 - mean_absolute_percentage_error: 6.1331



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 102679312.0000 - mean_absolute_error: 8020.2041 - mean_absolute_percentage_error: 6.1335 - val_loss: 139376080.0000 - val_mean_absolute_error: 7146.4072 - val_mean_absolute_percentage_error: 4.0446 - learning_rate: 0.0010
Epoch 31/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 102504320.0000 - mean_absolute_error: 8036.0488 - mean_absolute_percentage_error: 6.1513



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 102498792.0000 - mean_absolute_error: 8035.7686 - mean_absolute_percentage_error: 6.1511 - val_loss: 130388352.0000 - val_mean_absolute_error: 6677.8091 - val_mean_absolute_percentage_error: 3.7367 - learning_rate: 0.0010
Epoch 32/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 102857696.0000 - mean_absolute_error: 8051.3838 - mean_absolute_percentage_error: 6.1674 - val_loss: 134189544.0000 - val_mean_absolute_error: 7165.0840 - val_mean_absolute_percentage_error: 4.0565 - learning_rate: 0.0010
Epoch 33/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 101116208.0000 - mean_absolute_error: 7979.3481 - mean_absolute_percentage_error: 6.1058



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 101115672.0000 - mean_absolute_error: 7979.1328 - mean_absolute_percentage_error: 6.1057 - val_loss: 117763288.0000 - val_mean_absolute_error: 6174.6807 - val_mean_absolute_percentage_error: 3.4448 - learning_rate: 0.0010
Epoch 34/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 102806360.0000 - mean_absolute_error: 8028.0767 - mean_absolute_percentage_error: 6.1421



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 102792256.0000 - mean_absolute_error: 8027.5654 - mean_absolute_percentage_error: 6.1417 - val_loss: 112303400.0000 - val_mean_absolute_error: 6078.8354 - val_mean_absolute_percentage_error: 3.4184 - learning_rate: 0.0010
Epoch 35/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 99828728.0000 - mean_absolute_error: 7958.2002 - mean_absolute_percentage_error: 6.0969



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 99835416.0000 - mean_absolute_error: 7958.1963 - mean_absolute_percentage_error: 6.0968 - val_loss: 106324008.0000 - val_mean_absolute_error: 5652.4873 - val_mean_absolute_percentage_error: 3.1347 - learning_rate: 0.0010
Epoch 36/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 99719208.0000 - mean_absolute_error: 7905.2671 - mean_absolute_percentage_error: 6.0546



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 99733096.0000 - mean_absolute_error: 7905.9922 - mean_absolute_percentage_error: 6.0551 - val_loss: 102475240.0000 - val_mean_absolute_error: 5489.9321 - val_mean_absolute_percentage_error: 3.0380 - learning_rate: 0.0010
Epoch 37/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 101532440.0000 - mean_absolute_error: 7988.7695 - mean_absolute_percentage_error: 6.1244



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 101517752.0000 - mean_absolute_error: 7988.3081 - mean_absolute_percentage_error: 6.1241 - val_loss: 96724624.0000 - val_mean_absolute_error: 5309.5166 - val_mean_absolute_percentage_error: 2.9487 - learning_rate: 0.0010
Epoch 38/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 100146808.0000 - mean_absolute_error: 7953.7441 - mean_absolute_percentage_error: 6.1008



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 100142424.0000 - mean_absolute_error: 7953.3945 - mean_absolute_percentage_error: 6.1005 - val_loss: 96283144.0000 - val_mean_absolute_error: 5432.3110 - val_mean_absolute_percentage_error: 3.0283 - learning_rate: 0.0010
Epoch 39/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 97993608.0000 - mean_absolute_error: 7840.3877 - mean_absolute_percentage_error: 6.0150



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 97990392.0000 - mean_absolute_error: 7840.2871 - mean_absolute_percentage_error: 6.0149 - val_loss: 93703168.0000 - val_mean_absolute_error: 5168.1792 - val_mean_absolute_percentage_error: 2.8776 - learning_rate: 0.0010
Epoch 40/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 98571448.0000 - mean_absolute_error: 7869.8130 - mean_absolute_percentage_error: 6.0334



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 98580512.0000 - mean_absolute_error: 7870.1411 - mean_absolute_percentage_error: 6.0337 - val_loss: 89523920.0000 - val_mean_absolute_error: 5036.4722 - val_mean_absolute_percentage_error: 2.8098 - learning_rate: 0.0010
Epoch 41/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 98080024.0000 - mean_absolute_error: 7864.4478 - mean_absolute_percentage_error: 6.0197



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 98075872.0000 - mean_absolute_error: 7863.9937 - mean_absolute_percentage_error: 6.0194 - val_loss: 87332240.0000 - val_mean_absolute_error: 4918.9297 - val_mean_absolute_percentage_error: 2.7303 - learning_rate: 0.0010
Epoch 42/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 95987648.0000 - mean_absolute_error: 7781.3306 - mean_absolute_percentage_error: 5.9660



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 95988664.0000 - mean_absolute_error: 7781.3940 - mean_absolute_percentage_error: 5.9661 - val_loss: 83155616.0000 - val_mean_absolute_error: 4313.1997 - val_mean_absolute_percentage_error: 2.3383 - learning_rate: 0.0010
Epoch 43/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 96113112.0000 - mean_absolute_error: 7777.9824 - mean_absolute_percentage_error: 5.9608 - val_loss: 85409544.0000 - val_mean_absolute_error: 5338.5576 - val_mean_absolute_percentage_error: 3.0369 - learning_rate: 0.0010
Epoch 44/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 95806896.0000 - mean_absolute_error: 7756.7681 - mean_absolute_percentage_error: 5.9446



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 95811776.0000 - mean_absolute_error: 7756.8926 - mean_absolute_percentage_error: 5.9446 - val_loss: 82700440.0000 - val_mean_absolute_error: 4684.0781 - val_mean_absolute_percentage_error: 2.5671 - learning_rate: 0.0010
Epoch 45/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 22ms/step - loss: 96244184.0000 - mean_absolute_error: 7783.6699 - mean_absolute_percentage_error: 5.9679



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 25ms/step - loss: 96238600.0000 - mean_absolute_error: 7783.5547 - mean_absolute_percentage_error: 5.9678 - val_loss: 81749304.0000 - val_mean_absolute_error: 4992.1475 - val_mean_absolute_percentage_error: 2.8219 - learning_rate: 0.0010
Epoch 46/100
[1m156/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 95660704.0000 - mean_absolute_error: 7751.5664 - mean_absolute_percentage_error: 5.9395



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 95666408.0000 - mean_absolute_error: 7751.8994 - mean_absolute_percentage_error: 5.9397 - val_loss: 80429064.0000 - val_mean_absolute_error: 4966.5308 - val_mean_absolute_percentage_error: 2.7579 - learning_rate: 0.0010
Epoch 47/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 95479320.0000 - mean_absolute_error: 7778.8965 - mean_absolute_percentage_error: 5.9654



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 95479320.0000 - mean_absolute_error: 7778.6523 - mean_absolute_percentage_error: 5.9651 - val_loss: 74061848.0000 - val_mean_absolute_error: 4333.0640 - val_mean_absolute_percentage_error: 2.3817 - learning_rate: 0.0010
Epoch 48/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 93413768.0000 - mean_absolute_error: 7677.1997 - mean_absolute_percentage_error: 5.8836 - val_loss: 82816216.0000 - val_mean_absolute_error: 5298.7285 - val_mean_absolute_percentage_error: 3.0335 - learning_rate: 0.0010
Epoch 49/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 94774232.0000 - mean_absolute_error: 7712.2598 - mean_absolute_percentage_error: 5.9026



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 94764088.0000 - mean_absolute_error: 7712.0278 - mean_absolute_percentage_error: 5.9024 - val_loss: 68172584.0000 - val_mean_absolute_error: 3870.5510 - val_mean_absolute_percentage_error: 2.1018 - learning_rate: 0.0010
Epoch 50/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 93643392.0000 - mean_absolute_error: 7693.6064 - mean_absolute_percentage_error: 5.8958 - val_loss: 72456568.0000 - val_mean_absolute_error: 4622.4214 - val_mean_absolute_percentage_error: 2.6210 - learning_rate: 0.0010
Epoch 51/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 91685560.0000 - mean_absolute_error: 7590.8047 - mean_absolute_percentage_error: 5.8132



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 91680888.0000 - mean_absolute_error: 7590.5283 - mean_absolute_percentage_error: 5.8129 - val_loss: 67824696.0000 - val_mean_absolute_error: 4071.1260 - val_mean_absolute_percentage_error: 2.2577 - learning_rate: 0.0010
Epoch 52/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 89966224.0000 - mean_absolute_error: 7536.8706 - mean_absolute_percentage_error: 5.7757



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 89994168.0000 - mean_absolute_error: 7537.7690 - mean_absolute_percentage_error: 5.7763 - val_loss: 67473760.0000 - val_mean_absolute_error: 4061.5215 - val_mean_absolute_percentage_error: 2.2431 - learning_rate: 0.0010
Epoch 53/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 91380304.0000 - mean_absolute_error: 7568.3618 - mean_absolute_percentage_error: 5.7993



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 91390264.0000 - mean_absolute_error: 7568.7671 - mean_absolute_percentage_error: 5.7996 - val_loss: 64534588.0000 - val_mean_absolute_error: 3609.1189 - val_mean_absolute_percentage_error: 1.9554 - learning_rate: 0.0010
Epoch 54/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - loss: 93316448.0000 - mean_absolute_error: 7643.0303 - mean_absolute_percentage_error: 5.8550 - val_loss: 73270800.0000 - val_mean_absolute_error: 4982.7944 - val_mean_absolute_percentage_error: 2.7837 - learning_rate: 0.0010
Epoch 55/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 92004776.0000 - mean_absolute_error: 7599.4043 - mean_absolute_percentage_error: 5.8136 - val_loss: 65526584.0000 - val_mean_absolute_error: 409



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 92783312.0000 - mean_absolute_error: 7637.2163 - mean_absolute_percentage_error: 5.8396 - val_loss: 63553776.0000 - val_mean_absolute_error: 4247.3696 - val_mean_absolute_percentage_error: 2.3892 - learning_rate: 0.0010
Epoch 58/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 91038104.0000 - mean_absolute_error: 7564.2788 - mean_absolute_percentage_error: 5.7964



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 91043416.0000 - mean_absolute_error: 7564.3931 - mean_absolute_percentage_error: 5.7964 - val_loss: 61199108.0000 - val_mean_absolute_error: 3958.0588 - val_mean_absolute_percentage_error: 2.1729 - learning_rate: 0.0010
Epoch 59/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 92278808.0000 - mean_absolute_error: 7615.9634 - mean_absolute_percentage_error: 5.8273 - val_loss: 61402836.0000 - val_mean_absolute_error: 3958.1445 - val_mean_absolute_percentage_error: 2.1723 - learning_rate: 0.0010
Epoch 60/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 91894472.0000 - mean_absolute_error: 7603.4551 - mean_absolute_percentage_error: 5.8196 - val_loss: 61500928.0000 - val_mean_absolute_error: 397



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 92468328.0000 - mean_absolute_error: 7602.2134 - mean_absolute_percentage_error: 5.8206 - val_loss: 57818516.0000 - val_mean_absolute_error: 3639.0938 - val_mean_absolute_percentage_error: 1.9975 - learning_rate: 0.0010
Epoch 62/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 90127048.0000 - mean_absolute_error: 7536.9785 - mean_absolute_percentage_error: 5.7696 - val_loss: 58630036.0000 - val_mean_absolute_error: 4234.1050 - val_mean_absolute_percentage_error: 2.4130 - learning_rate: 0.0010
Epoch 63/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 92321648.0000 - mean_absolute_error: 7603.1875 - mean_absolute_percentage_error: 5.8148 - val_loss: 61760376.0000 - val_mean_absolute_error: 465



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 91029504.0000 - mean_absolute_error: 7562.3726 - mean_absolute_percentage_error: 5.7881 - val_loss: 57628848.0000 - val_mean_absolute_error: 3235.7964 - val_mean_absolute_percentage_error: 1.7362 - learning_rate: 0.0010
Epoch 65/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 90492408.0000 - mean_absolute_error: 7539.1333 - mean_absolute_percentage_error: 5.7752 - val_loss: 64100588.0000 - val_mean_absolute_error: 4784.0337 - val_mean_absolute_percentage_error: 2.6899 - learning_rate: 0.0010
Epoch 66/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 91960344.0000 - mean_absolute_error: 7604.1548 - mean_absolute_percentage_error: 5.8218



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 91962464.0000 - mean_absolute_error: 7604.0513 - mean_absolute_percentage_error: 5.8217 - val_loss: 57323724.0000 - val_mean_absolute_error: 3595.0359 - val_mean_absolute_percentage_error: 1.9582 - learning_rate: 0.0010
Epoch 67/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 90785912.0000 - mean_absolute_error: 7546.4233 - mean_absolute_percentage_error: 5.7842



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 90791056.0000 - mean_absolute_error: 7546.6323 - mean_absolute_percentage_error: 5.7843 - val_loss: 56018316.0000 - val_mean_absolute_error: 3521.5688 - val_mean_absolute_percentage_error: 1.9283 - learning_rate: 0.0010
Epoch 68/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 92563512.0000 - mean_absolute_error: 7617.4995 - mean_absolute_percentage_error: 5.8338 - val_loss: 57776076.0000 - val_mean_absolute_error: 3953.3955 - val_mean_absolute_percentage_error: 2.2194 - learning_rate: 0.0010
Epoch 69/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 92879840.0000 - mean_absolute_error: 7655.3013 - mean_absolute_percentage_error: 5.8587 - val_loss: 59093732.0000 - val_mean_absolute_error: 378



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 89943168.0000 - mean_absolute_error: 7521.6260 - mean_absolute_percentage_error: 5.7564 - val_loss: 55900524.0000 - val_mean_absolute_error: 3490.6255 - val_mean_absolute_percentage_error: 1.8999 - learning_rate: 0.0010
Epoch 72/100
[1m156/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 91975256.0000 - mean_absolute_error: 7588.5464 - mean_absolute_percentage_error: 5.8088



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 91948968.0000 - mean_absolute_error: 7587.8237 - mean_absolute_percentage_error: 5.8082 - val_loss: 55687516.0000 - val_mean_absolute_error: 3664.4817 - val_mean_absolute_percentage_error: 2.0182 - learning_rate: 0.0010
Epoch 73/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 90624136.0000 - mean_absolute_error: 7522.6963 - mean_absolute_percentage_error: 5.7561 - val_loss: 57752856.0000 - val_mean_absolute_error: 4022.4565 - val_mean_absolute_percentage_error: 2.2347 - learning_rate: 0.0010
Epoch 74/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 90321016.0000 - mean_absolute_error: 7534.5308 - mean_absolute_percentage_error: 5.7658



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 90330680.0000 - mean_absolute_error: 7534.7959 - mean_absolute_percentage_error: 5.7660 - val_loss: 55638252.0000 - val_mean_absolute_error: 3434.2737 - val_mean_absolute_percentage_error: 1.8732 - learning_rate: 0.0010
Epoch 75/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 90089848.0000 - mean_absolute_error: 7534.3452 - mean_absolute_percentage_error: 5.7684 - val_loss: 59152656.0000 - val_mean_absolute_error: 3653.9797 - val_mean_absolute_percentage_error: 2.0452 - learning_rate: 0.0010
Epoch 76/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - loss: 92230064.0000 - mean_absolute_error: 7608.0034 - mean_absolute_percentage_error: 5.8211



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 92227632.0000 - mean_absolute_error: 7608.0161 - mean_absolute_percentage_error: 5.8211 - val_loss: 55295884.0000 - val_mean_absolute_error: 3468.4556 - val_mean_absolute_percentage_error: 1.8972 - learning_rate: 0.0010
Epoch 77/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 90900152.0000 - mean_absolute_error: 7544.2471 - mean_absolute_percentage_error: 5.7776 - val_loss: 56126124.0000 - val_mean_absolute_error: 4063.3145 - val_mean_absolute_percentage_error: 2.2497 - learning_rate: 0.0010
Epoch 78/100
[1m157/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 90319120.0000 - mean_absolute_error: 7539.9370 - mean_absolute_percentage_error: 5.7732



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 90325184.0000 - mean_absolute_error: 7540.1689 - mean_absolute_percentage_error: 5.7734 - val_loss: 53718624.0000 - val_mean_absolute_error: 3579.3816 - val_mean_absolute_percentage_error: 1.9807 - learning_rate: 0.0010
Epoch 79/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 91453704.0000 - mean_absolute_error: 7603.6704 - mean_absolute_percentage_error: 5.8202 - val_loss: 55508668.0000 - val_mean_absolute_error: 3283.4502 - val_mean_absolute_percentage_error: 1.7788 - learning_rate: 0.0010
Epoch 80/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - loss: 92687792.0000 - mean_absolute_error: 7624.0532 - mean_absolute_percentage_error: 5.8361 - val_loss: 54333124.0000 - val_mean_absolute_error: 362



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - loss: 91084472.0000 - mean_absolute_error: 7578.8794 - mean_absolute_percentage_error: 5.8058 - val_loss: 52306532.0000 - val_mean_absolute_error: 3308.0713 - val_mean_absolute_percentage_error: 1.7878 - learning_rate: 0.0010
Epoch 84/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 92426272.0000 - mean_absolute_error: 7597.1562 - mean_absolute_percentage_error: 5.8160 - val_loss: 57221572.0000 - val_mean_absolute_error: 4537.0781 - val_mean_absolute_percentage_error: 2.5829 - learning_rate: 0.0010
Epoch 85/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 91936496.0000 - mean_absolute_error: 7604.4448 - mean_absolute_percentage_error: 5.8231 - val_loss: 52684304.0000 - val_mean_absolute_error: 344



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 92076304.0000 - mean_absolute_error: 7600.6699 - mean_absolute_percentage_error: 5.8142 - val_loss: 51993084.0000 - val_mean_absolute_error: 3264.1665 - val_mean_absolute_percentage_error: 1.7760 - learning_rate: 0.0010
Epoch 87/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 91129216.0000 - mean_absolute_error: 7562.4619 - mean_absolute_percentage_error: 5.7860 - val_loss: 61062292.0000 - val_mean_absolute_error: 4286.9004 - val_mean_absolute_percentage_error: 2.3756 - learning_rate: 0.0010
Epoch 88/100
[1m156/158[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 21ms/step - loss: 89450280.0000 - mean_absolute_error: 7483.6768 - mean_absolute_percentage_error: 5.7281



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - loss: 89459320.0000 - mean_absolute_error: 7484.2246 - mean_absolute_percentage_error: 5.7286 - val_loss: 51825876.0000 - val_mean_absolute_error: 3431.0818 - val_mean_absolute_percentage_error: 1.9140 - learning_rate: 0.0010
Epoch 89/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - loss: 90280264.0000 - mean_absolute_error: 7547.4116 - mean_absolute_percentage_error: 5.7821 - val_loss: 53632480.0000 - val_mean_absolute_error: 3405.6990 - val_mean_absolute_percentage_error: 1.8645 - learning_rate: 0.0010
Epoch 90/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - loss: 89831400.0000 - mean_absolute_error: 7520.6455 - mean_absolute_percentage_error: 5.7621 - val_loss: 52319796.0000 - val_mean_absolute_error: 351



[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - loss: 89610656.0000 - mean_absolute_error: 7484.5073 - mean_absolute_percentage_error: 5.7297 - val_loss: 49134716.0000 - val_mean_absolute_error: 3082.3240 - val_mean_absolute_percentage_error: 1.6690 - learning_rate: 5.0000e-04
Epoch 96/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 22ms/step - loss: 89841656.0000 - mean_absolute_error: 7508.1440 - mean_absolute_percentage_error: 5.7523 - val_loss: 50197948.0000 - val_mean_absolute_error: 3217.9595 - val_mean_absolute_percentage_error: 1.7443 - learning_rate: 5.0000e-04
Epoch 97/100
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 22ms/step - loss: 90879880.0000 - mean_absolute_error: 7577.5596 - mean_absolute_percentage_error: 5.8045 - val_loss: 51625140.0000 - val_mean_absolute_er

  next_close_pred = float(model.predict(latest_window))


In [None]:
import math
import numpy as np
import pandas as pd
from typing import List, Dict, Tuple
import tensorflow
from tensorflow.keras import layers, models, callbacks
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error

# -----------------------------
# Config
# -----------------------------
CSV_FILE = "/content/XAUUSDh1.csv"   # path to your CSV
DATE_COL = "Date"
SEQ_LEN = 128              # longer context generally helps
BATCH_SIZE = 256
EPOCHS = 50                # set lower for quick runs; increase after validation
LR = 1e-3
NUM_HEADS = 4
KEY_DIM = 64
FF_DIM = 128
NUM_ENCODER_BLOCKS = 2
DROPOUT = 0.1
EMBED_DIM = 32
TAU = 0.1           # temperature for InfoNCE
ALPHA = 0.2         # weight for contrastive loss vs forecast loss
SEED = 42

# Walk-forward split ratios (fractions of total samples per fold)
INITIAL_TRAIN_RATIO = 0.6
VAL_RATIO = 0.1
TEST_RATIO = 0.1
# The remaining ~0.2 will be covered by subsequent folds via walk-forward

tf.random.set_seed(SEED)
np.random.seed(SEED)

# -----------------------------
# Feature Engineering Utilities
# -----------------------------
def ema(series: pd.Series, span: int) -> pd.Series:
    return series.ewm(span=span, adjust=False).mean()

def rsi(series: pd.Series, period: int = 14) -> pd.Series:
    delta = series.diff()
    gain = np.where(delta > 0, delta, 0.0)
    loss = np.where(delta < 0, -delta, 0.0)
    gain = pd.Series(gain, index=series.index)
    loss = pd.Series(loss, index=series.index)
    avg_gain = gain.ewm(alpha=1/period, min_periods=period, adjust=False).mean()
    avg_loss = loss.ewm(alpha=1/period, min_periods=period, adjust=False).mean()
    rs = avg_gain / (avg_loss + 1e-12)
    rsi_val = 100 - (100 / (1 + rs))
    return rsi_val

def true_range(high: pd.Series, low: pd.Series, close: pd.Series) -> pd.Series:
    prev_close = close.shift(1)
    tr1 = (high - low).abs()
    tr2 = (high - prev_close).abs()
    tr3 = (low - prev_close).abs()
    return pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)

def atr(high: pd.Series, low: pd.Series, close: pd.Series, period: int = 14) -> pd.Series:
    tr = true_range(high, low, close)
    return tr.ewm(alpha=1/period, adjust=False).mean()

def macd(close: pd.Series, fast: int = 12, slow: int = 26, signal: int = 9):
    ema_fast = ema(close, fast)
    ema_slow = ema(close, slow)
    macd_line = ema_fast - ema_slow
    signal_line = ema(macd_line, signal)
    hist = macd_line - signal_line
    return macd_line, signal_line, hist

def bollinger(close: pd.Series, period: int = 20, num_std: float = 2.0):
    ma = close.rolling(window=period, min_periods=period).mean()
    sd = close.rolling(window=period, min_periods=period).std()
    upper = ma + num_std * sd
    lower = ma - num_std * sd
    width = upper - lower
    pctb = (close - lower) / (upper - lower + 1e-12)
    return ma, upper, lower, width, pctb

# -----------------------------
# Data Loading and Features
# -----------------------------
def load_and_engineer(csv_file: str, date_col: str) -> pd.DataFrame:
    df = pd.read_csv(csv_file)
    required_cols = {"open", "high", "low", "close", "tick_volume", date_col}
    missing = required_cols - set(df.columns)
    if missing:
        raise ValueError(f"Missing columns in CSV: {missing}")

    df[date_col] = pd.to_datetime(df[date_col])
    df = df.sort_values(date_col).reset_index(drop=True)

    open_ = df["open"].astype(float)
    high = df["high"].astype(float)
    low = df["low"].astype(float)
    close = df["close"].astype(float)
    vol = df["tick_volume"].astype(float)

    feat = pd.DataFrame(index=df.index)
    feat["close"] = close
    feat["ret"] = close.pct_change().fillna(0.0)
    feat["log_ret"] = np.log1p(feat["ret"]).fillna(0.0)
    feat["price_change"] = close.diff().fillna(0.0)
    feat["hl_spread"] = (high - low)
    feat["oc_spread"] = (open_ - close)
    feat["log_vol"] = np.log1p(vol)
    feat["vol_chg"] = vol.diff().fillna(0.0)

    # Volatility
    feat["ret_vol_24"] = feat["ret"].rolling(window=24, min_periods=24).std().fillna(method="bfill")
    feat["ret_vol_72"] = feat["ret"].rolling(window=72, min_periods=72).std().fillna(method="bfill")

    # ATR
    feat["atr_14"] = atr(high, low, close, period=14).fillna(method="bfill")

    # RSI
    feat["rsi_14"] = rsi(close, period=14).fillna(method="bfill")

    # MACD
    macd_line, signal_line, hist = macd(close, fast=12, slow=26, signal=9)
    feat["macd"] = macd_line.fillna(method="bfill")
    feat["macd_signal"] = signal_line.fillna(method="bfill")
    feat["macd_hist"] = hist.fillna(method="bfill")

    # EMAs
    feat["ema_20"] = ema(close, 20).fillna(method="bfill")
    feat["ema_50"] = ema(close, 50).fillna(method="bfill")
    feat["ema_ratio_20_50"] = (feat["ema_20"] / (feat["ema_50"] + 1e-12))

    # Bollinger
    ma20, upper20, lower20, width20, pctb20 = bollinger(close, period=20, num_std=2.0)
    feat["bb_ma20"] = ma20.fillna(method="bfill")
    feat["bb_upper20"] = upper20.fillna(method="bfill")
    feat["bb_lower20"] = lower20.fillna(method="bfill")
    feat["bb_width20"] = width20.fillna(method="bfill")
    feat["bb_pctb20"] = pctb20.fillna(method="bfill")

    # Target: next-step return
    feat["target_next_ret"] = feat["ret"].shift(-1)
    feat["target_next_close"] = df["close"].shift(-1)

    # Drop last row with NaN target
    feat = feat.iloc[:-1].copy()
    feat[date_col] = df[date_col].iloc[:-1].values
    return feat

# -----------------------------
# Sequence/Pair Builders
# -----------------------------
def build_anchor_positive_pairs(X: np.ndarray, y: np.ndarray, seq_len: int) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    Xa, Xp, ya = [], [], []
    # Positive is the adjacent next window (shifted by 1)
    for i in range(len(X) - seq_len - 0):
        j = i + 1
        if j + seq_len - 1 >= len(X):
            break
        Xa.append(X[i:i+seq_len])
        Xp.append(X[j:j+seq_len])
        ya.append(y[i+seq_len-1])  # predict next ret at the end of anchor window
    return np.array(Xa, dtype=np.float32), np.array(Xp, dtype=np.float32), np.array(ya, dtype=np.float32)

def make_dataset(Xa, Xp, ya, batch_size=BATCH_SIZE, shuffle=True):
    ds = tf.data.Dataset.from_tensor_slices(((Xa, Xp), ya))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(ya), seed=SEED)
    ds = ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

# -----------------------------
# Positional Encoding
# -----------------------------
def positional_encoding(seq_len, d_model):
    pos = np.arange(seq_len)[:, np.newaxis]
    i = np.arange(d_model)[np.newaxis, :]
    angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
    angles = pos * angle_rates
    pe = np.zeros((seq_len, d_model), dtype=np.float32)
    pe[:, 0::2] = np.sin(angles[:, 0::2])
    pe[:, 1::2] = np.cos(angles[:, 1::2])
    return tf.constant(pe)

class AddPositionalEncoding(layers.Layer):
    def __init__(self, seq_len, d_model, **kwargs):
        super().__init__(**kwargs)
        self.pe = positional_encoding(seq_len, d_model)

    def call(self, x):
        return x + self.pe

# -----------------------------
# Transformer Encoder Block
# -----------------------------
class TransformerEncoderBlock(layers.Layer):
    def __init__(self, num_heads, key_dim, ff_dim, dropout=0.1):
        super().__init__()
        self.mha = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim, dropout=dropout)
        self.norm1 = layers.LayerNormalization(epsilon=1e-6)
        self.norm2 = layers.LayerNormalization(epsilon=1e-6)
        self.ffn = models.Sequential([
            layers.Dense(ff_dim, activation="relu"),
            layers.Dropout(dropout),
            layers.Dense(key_dim)  # project back to key_dim
        ])
        self.dropout = layers.Dropout(dropout)

    def call(self, x, training=False):
        attn_out = self.mha(x, x, training=training)
        x = self.norm1(x + attn_out)
        ffn_out = self.ffn(x, training=training)
        x = self.norm2(x + ffn_out)
        return x

# -----------------------------
# Dual-head Model with custom train_step (InfoNCE + MSE)
# -----------------------------
class DualHeadModel(tf.keras.Model):
    def __init__(self, seq_len, key_dim, embed_dim,
                 num_heads=4, ff_dim=128, num_blocks=2, dropout=0.1, **kwargs):
        super().__init__(**kwargs)
        # Projection + positional encoding
        self.proj = layers.Dense(key_dim)
        self.posenc = AddPositionalEncoding(seq_len, key_dim)
        # Stack of encoder blocks
        self.encoders = [TransformerEncoderBlock(num_heads, key_dim, ff_dim, dropout)
                         for _ in range(num_blocks)]
        # Pooling + dropout
        self.pool = layers.GlobalAveragePooling1D()
        self.dropout = layers.Dropout(dropout)
        # Embedding head
        self.emb_dense = layers.Dense(embed_dim, name="embedding")
        # Regression head
        self.reg_dense1 = layers.Dense(64, activation="relu")
        self.reg_out = layers.Dense(1, name="forecast")
        # Metrics
        self.mae_metric = tf.keras.metrics.MeanAbsoluteError(name="forecast_mae")
        self.mape_metric = tf.keras.metrics.MeanAbsolutePercentageError(name="forecast_mape")

    def encode(self, x, training=False):
        x = self.proj(x)
        x = self.posenc(x)
        for enc in self.encoders:
            x = enc(x, training=training)
        x = self.pool(x)
        x = self.dropout(x, training=training)
        return x

    def call(self, inputs, training=False):
        xa, xp = inputs
        fa = self.encode(xa, training=training)
        fp = self.encode(xp, training=training)
        ea = tf.linalg.l2_normalize(self.emb_dense(fa), axis=1)
        ep = tf.linalg.l2_normalize(self.emb_dense(fp), axis=1)
        # Forecast from anchor path
        r = self.dropout(fa, training=training)
        r = self.reg_dense1(r)
        r = self.dropout(r, training=training)
        yhat = self.reg_out(r)
        return ea, ep, yhat

    def info_nce(self, ea, ep, tau=0.1):
        logits = tf.matmul(ea, ep, transpose_b=True) / tau
        batch_size = tf.shape(logits)[0]
        labels = tf.range(batch_size)
        loss = tf.reduce_mean(
            tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
        )
        return loss

    def train_step(self, data):
        (xa, xp), ya = data
        with tf.GradientTape() as tape:
            ea, ep, yhat = self((xa, xp), training=True)
            forecast_loss = tf.reduce_mean(tf.keras.losses.mse(ya, tf.squeeze(yhat, axis=-1)))
            contrastive_loss = self.info_nce(ea, ep)
            total_loss = forecast_loss + 0.2 * contrastive_loss
        grads = tape.gradient(total_loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
        # Metrics
        self.mae_metric.update_state(ya, tf.squeeze(yhat, axis=-1))
        self.mape_metric.update_state(ya, tf.squeeze(yhat, axis=-1))
        return {
            "loss": total_loss,
            "forecast_loss": forecast_loss,
            "contrastive_loss": contrastive_loss,
            "forecast_mae": self.mae_metric.result(),
            "forecast_mape": self.mape_metric.result(),
        }

    def test_step(self, data):
        (xa, xp), ya = data
        ea, ep, yhat = self((xa, xp), training=False)
        forecast_loss = tf.reduce_mean(tf.keras.losses.mse(ya, tf.squeeze(yhat, axis=-1)))
        contrastive_loss = self.info_nce(ea, ep)
        total_loss = forecast_loss + 0.2 * contrastive_loss
        self.mae_metric.update_state(ya, tf.squeeze(yhat, axis=-1))
        self.mape_metric.update_state(ya, tf.squeeze(yhat, axis=-1))
        return {
            "loss": total_loss,
            "forecast_loss": forecast_loss,
            "contrastive_loss": contrastive_loss,
            "forecast_mae": self.mae_metric.result(),
            "forecast_mape": self.mape_metric.result(),
        }

# -----------------------------
# Walk-forward Split Generator
# -----------------------------
def generate_walk_forward_slices(N: int,
                                 initial_train_ratio: float,
                                 val_ratio: float,
                                 test_ratio: float) -> List[Tuple[int, int, int, int, int, int]]:
    """
    Returns list of (train_start, train_end, val_start, val_end, test_start, test_end) indices.
    Expanding train window; fixed val/test windows; step forward by test window.
    """
    initial_train = int(N * initial_train_ratio)
    val_len = int(N * val_ratio)
    test_len = int(N * test_ratio)
    slices = []

    train_start = 0
    train_end = initial_train
    val_start = train_end
    val_end = val_start + val_len
    test_start = val_end
    test_end = test_start + test_len

    while test_end <= N:
        slices.append((train_start, train_end, val_start, val_end, test_start, test_end))
        # advance: expand train to include previous val+test, then new val/test slices
        train_end = test_end
        val_start = train_end
        val_end = val_start + val_len
        test_start = val_end
        test_end = test_start + test_len

    return slices

# -----------------------------
# Fold Runner
# -----------------------------
def run_fold(feat_df: pd.DataFrame,
             date_col: str,
             indices: Tuple[int, int, int, int, int, int]) -> Dict[str, float]:
    ts, te, vs, ve, ss, se = indices

    train_df = feat_df.iloc[ts:te]
    val_df   = feat_df.iloc[vs:ve]
    test_df  = feat_df.iloc[ss:se]

    feature_cols = [c for c in feat_df.columns if c not in ["target_next_ret", "target_next_close", date_col]]
    target_col = "target_next_ret"

    # Fit scalers on train
    X_scaler = StandardScaler()
    y_scaler = StandardScaler()

    X_train = train_df[feature_cols].values
    y_train = train_df[target_col].values.reshape(-1, 1)

    X_val = val_df[feature_cols].values
    y_val = val_df[target_col].values.reshape(-1, 1)

    X_test = test_df[feature_cols].values
    y_test = test_df[target_col].values.reshape(-1, 1)

    X_train_scaled = X_scaler.fit_transform(X_train)
    X_val_scaled   = X_scaler.transform(X_val)
    X_test_scaled  = X_scaler.transform(X_test)

    y_train_scaled = y_scaler.fit_transform(y_train).ravel()
    y_val_scaled   = y_scaler.transform(y_val).ravel()
    y_test_scaled  = y_scaler.transform(y_test).ravel()

    Xa_tr, Xp_tr, ya_tr = build_anchor_positive_pairs(X_train_scaled, y_train_scaled, SEQ_LEN)
    Xa_val, Xp_val, ya_val = build_anchor_positive_pairs(X_val_scaled, y_val_scaled, SEQ_LEN)
    Xa_te, Xp_te, ya_te = build_anchor_positive_pairs(X_test_scaled, y_test_scaled, SEQ_LEN)

    # If sequences are too few, skip this fold
    if len(ya_tr) == 0 or len(ya_val) == 0 or len(ya_te) == 0:
        return {"skip": 1}

    train_ds = make_dataset(Xa_tr, Xp_tr, ya_tr, BATCH_SIZE, shuffle=True)
    val_ds   = make_dataset(Xa_val, Xp_val, ya_val, BATCH_SIZE, shuffle=False)
    test_ds  = make_dataset(Xa_te, Xp_te, ya_te, BATCH_SIZE, shuffle=False)

    # Build and compile model
    model = DualHeadModel(seq_len=SEQ_LEN, key_dim=KEY_DIM, embed_dim=EMBED_DIM)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LR))

    # Callbacks
    es = callbacks.EarlyStopping(monitor="val_loss", patience=7, restore_best_weights=True)
    rlrp = callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=4, min_lr=1e-5)
    ckpt = callbacks.ModelCheckpoint(f"xau_dualhead_fold_{ts}_{se}.h5", monitor="val_loss", save_best_only=True)

    # Train
    history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS,
                        callbacks=[es, rlrp, ckpt], verbose=1)

    # Evaluate on test
    eval_out = model.evaluate(test_ds, verbose=0)

    # Collect regression predictions for metrics on price
    y_true_ret = []
    y_pred_ret = []
    last_close = []  # for reconstructing predicted next close

    # Align closes for windows: at each anchor window end, use the actual last close to reconstruct next close
    closes = test_df["close"].values
    # The close aligned with the end of each anchor window starts at index SEQ_LEN-1 within test_df
    anchor_end_close = closes[SEQ_LEN-1:len(test_df)-0]

    idx = 0
    for (xa_batch, xp_batch), ya_batch in test_ds:
        _, _, yhat_batch = model((xa_batch, xp_batch), training=False)
        y_true_ret.append(ya_batch.numpy())
        y_pred_ret.append(tf.squeeze(yhat_batch, axis=-1).numpy())
        # prepare last_close slice matching batch length
        batch_len = ya_batch.shape[0]
        last_close.extend(anchor_end_close[idx:idx+batch_len])
        idx += batch_len

    y_true_ret = np.concatenate(y_true_ret)
    y_pred_ret_scaled = np.concatenate(y_pred_ret)

    # Invert target scaling
    y_true_ret = y_true_ret.reshape(-1, 1)
    y_pred_ret_scaled = y_pred_ret_scaled.reshape(-1, 1)
    y_true_ret = y_scaler.inverse_transform(y_true_ret).ravel()
    y_pred_ret = y_scaler.inverse_transform(y_pred_ret_scaled).ravel()

    # Reconstruct next close from returns: next_close = last_close * (1 + ret)
    last_close = np.array(last_close)
    y_pred_close = last_close * (1.0 + y_pred_ret)
    # Ground-truth next close aligned to anchor window ends
    target_next_close = test_df["target_next_close"].values[SEQ_LEN-1:SEQ_LEN-1+len(y_pred_close)]

    # Metrics on price
    mae = mean_absolute_error(target_next_close, y_pred_close)
    mape = mean_absolute_percentage_error(target_next_close, y_pred_close)
    rmse = math.sqrt(np.mean((y_pred_close - target_next_close)**2))

    # Baselines:
    # - Persistence baseline: next close = last close (i.e., ret baseline = 0)
    baseline_close = last_close
    baseline_mae = mean_absolute_error(target_next_close, baseline_close)
    baseline_mape = mean_absolute_percentage_error(target_next_close, baseline_close)
    baseline_rmse = math.sqrt(np.mean((baseline_close - target_next_close)**2))

    return {
        "skip": 0,
        "fold_train_start": ts,
        "fold_test_end": se,
        "val_loss": float(eval_out["loss"]) if isinstance(eval_out, dict) and "loss" in eval_out else float(eval_out[0]),
        "mae": float(mae),
        "mape": float(mape),
        "rmse": float(rmse),
        "baseline_mae": float(baseline_mae),
        "baseline_mape": float(baseline_mape),
        "baseline_rmse": float(baseline_rmse),
    }

# -----------------------------
# Main: Run Walk-forward
# -----------------------------
def main():
    feat_df = load_and_engineer(CSV_FILE, DATE_COL)
    N = len(feat_df)
    slices = generate_walk_forward_slices(
        N,
        INITIAL_TRAIN_RATIO,
        VAL_RATIO,
        TEST_RATIO
    )

    results = []
    for idx, s in enumerate(slices, 1):
        print(f"\n=== Fold {idx}/{len(slices)} ===")
        ts, te, vs, ve, ss, se = s
        print(f"Train: {ts}-{te}, Val: {vs}-{ve}, Test: {ss}-{se}")
        res = run_fold(feat_df, DATE_COL, s)
        if res.get("skip", 0) == 1:
            print("Skipping fold (insufficient sequence length).")
            continue
        print(f"Fold {idx} Metrics: "
              f"MAE={res['mae']:.2f}, MAPE={res['mape']:.4f}, RMSE={res['rmse']:.2f} | "
              f"Baseline MAE={res['baseline_mae']:.2f}, Baseline MAPE={res['baseline_mape']:.4f}, "
              f"Baseline RMSE={res['baseline_rmse']:.2f}")
        results.append(res)

    if results:
        # Aggregate
        avg_mae = np.mean([r["mae"] for r in results])
        avg_mape = np.mean([r["mape"] for r in results])
        avg_rmse = np.mean([r["rmse"] for r in results])

        avg_b_mae = np.mean([r["baseline_mae"] for r in results])
        avg_b_mape = np.mean([r["baseline_mape"] for r in results])
        avg_b_rmse = np.mean([r["baseline_rmse"] for r in results])

        print("\n=== Aggregate Walk-forward Results ===")
        print(f"Model:   MAE={avg_mae:.2f}, MAPE={avg_mape:.4f}, RMSE={avg_rmse:.2f}")
        print(f"Baseline MAE={avg_b_mae:.2f}, MAPE={avg_b_mape:.4f}, RMSE={avg_b_rmse:.2f}")

    # Optional: final inference on the latest available window of the last fold
    # Build a latest window from the last test slice
    if slices:
        _, _, _, _, ss, se = slices[-1]
        last_test = feat_df.iloc[ss:se]
        feature_cols = [c for c in feat_df.columns if c not in ["target_next_ret", "target_next_close", DATE_COL]]

        X_scaler = StandardScaler()
        y_scaler = StandardScaler()

        # Fit on train+val of the last fold for inference
        ts, te, vs, ve, *_ = slices[-1]
        fit_df = feat_df.iloc[ts:ve]
        X_fit = fit_df[feature_cols].values
        y_fit = fit_df["target_next_ret"].values.reshape(-1, 1)
        X_scaler.fit(X_fit)
        y_scaler.fit(y_fit)

        X_test_scaled = X_scaler.transform(last_test[feature_cols].values)
        closes = last_test["close"].values

        # Create anchor/positive for latest window
        if len(X_test_scaled) >= SEQ_LEN + 1:
            anchor = X_test_scaled[-SEQ_LEN:]
            positive = X_test_scaled[-SEQ_LEN+1:]
            anchor = anchor[np.newaxis, ...]
            positive = positive[np.newaxis, ...]

            # Build and load a fresh model for inference
            model = DualHeadModel(seq_len=SEQ_LEN, key_dim=KEY_DIM, embed_dim=EMBED_DIM)
            # Not loading weights here (no file path management in this template); train your final model then load for production

            # Forward pass (untrained in this snippet)
            ea, ep, yhat = model((anchor, positive), training=False)
            pred_ret_scaled = float(tf.squeeze(yhat, axis=-1).numpy().ravel()[0])
            pred_ret = float(y_scaler.inverse_transform([[pred_ret_scaled]]).ravel()[0])

            latest_close = float(closes[-1])
            next_close_pred = latest_close * (1.0 + pred_ret)
            print(f"\nLatest inference (untrained): predicted next close = {next_close_pred:.2f}")

if __name__ == "__main__":
    main()


  feat["ret_vol_24"] = feat["ret"].rolling(window=24, min_periods=24).std().fillna(method="bfill")
  feat["ret_vol_72"] = feat["ret"].rolling(window=72, min_periods=72).std().fillna(method="bfill")
  feat["atr_14"] = atr(high, low, close, period=14).fillna(method="bfill")
  feat["rsi_14"] = rsi(close, period=14).fillna(method="bfill")
  feat["macd"] = macd_line.fillna(method="bfill")
  feat["macd_signal"] = signal_line.fillna(method="bfill")
  feat["macd_hist"] = hist.fillna(method="bfill")
  feat["ema_20"] = ema(close, 20).fillna(method="bfill")
  feat["ema_50"] = ema(close, 50).fillna(method="bfill")
  feat["bb_ma20"] = ma20.fillna(method="bfill")
  feat["bb_upper20"] = upper20.fillna(method="bfill")
  feat["bb_lower20"] = lower20.fillna(method="bfill")
  feat["bb_width20"] = width20.fillna(method="bfill")
  feat["bb_pctb20"] = pctb20.fillna(method="bfill")



=== Fold 1/2 ===
Train: 0-34559, Val: 34559-40318, Test: 40318-46077
Epoch 1/50
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step - contrastive_loss: 1.8609 - forecast_loss: 1.0414 - forecast_mae: 0.6679 - forecast_mape: 263.9066 - loss: 1.4136



[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 198ms/step - contrastive_loss: 1.8528 - forecast_loss: 1.0405 - forecast_mae: 0.6678 - forecast_mape: 263.6139 - loss: 1.4111 - val_contrastive_loss: 4.6416 - val_forecast_loss: 0.4689 - val_forecast_mae: 0.4411 - val_forecast_mape: 186.8220 - val_loss: 1.3972 - learning_rate: 0.0010
Epoch 2/50
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step - contrastive_loss: 0.8092 - forecast_loss: 1.0105 - forecast_mae: 0.6345 - forecast_mape: 179.2558 - loss: 1.1723



[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 88ms/step - contrastive_loss: 0.8055 - forecast_loss: 1.0089 - forecast_mae: 0.6345 - forecast_mape: 179.2266 - loss: 1.1700 - val_contrastive_loss: 3.4530 - val_forecast_loss: 0.4629 - val_forecast_mae: 0.4327 - val_forecast_mape: 146.1655 - val_loss: 1.1535 - learning_rate: 0.0010
Epoch 3/50
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - contrastive_loss: 0.4889 - forecast_loss: 1.0037 - forecast_mae: 0.6300 - forecast_mape: 160.4449 - loss: 1.1014



[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 90ms/step - contrastive_loss: 0.4868 - forecast_loss: 1.0000 - forecast_mae: 0.6300 - forecast_mape: 160.4582 - loss: 1.0973 - val_contrastive_loss: 3.3255 - val_forecast_loss: 0.4612 - val_forecast_mae: 0.4331 - val_forecast_mape: 134.6468 - val_loss: 1.1263 - learning_rate: 0.0010
Epoch 4/50
[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step - contrastive_loss: 0.3880 - forecast_loss: 1.0015 - forecast_mae: 0.6225 - forecast_mape: 158.0280 - loss: 1.0790



[1m135/135[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 90ms/step - contrastive_loss: 0.3868 - forecast_loss: 1.0000 - forecast_mae: 0.6225 - forecast_mape: 158.0040 - loss: 1.0774 - val_contrastive_loss: 2.9308 - val_forecast_loss: 0.4634 - val_forecast_mae: 0.4316 - val_forecast_mape: 123.5976 - val_loss: 1.0495 - learning_rate: 0.0010
Epoch 5/50
[1m 89/135[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m3s[0m 84ms/step - contrastive_loss: 0.3548 - forecast_loss: 1.0045 - forecast_mae: 0.6209 - forecast_mape: 146.0966 - loss: 1.0755

In [4]:
df

Unnamed: 0,Date,open,high,low,close,tick_volume
0,2012-05-17 08:00:00,154759.0,155384.0,154740.0,155295.0,4418
1,2012-05-17 09:00:00,155297.0,155297.0,154837.0,154966.0,5512
2,2012-05-17 10:00:00,154975.0,155055.0,154563.0,154801.0,6503
3,2012-05-17 11:00:00,154798.0,155050.0,154654.0,154952.0,5950
4,2012-05-17 12:00:00,154978.0,154978.0,154619.0,154805.0,6485
...,...,...,...,...,...,...
57595,2022-03-04 19:00:00,196265.0,196536.0,196003.0,196212.0,6657
57596,2022-03-04 20:00:00,196212.0,196647.0,196080.0,196512.0,5153
57597,2022-03-04 21:00:00,196524.0,196996.0,196454.0,196706.0,5393
57598,2022-03-04 22:00:00,196699.0,197019.0,196519.0,196733.0,6665
