In [None]:
keep 
atr_14  , OBV

drop

'ema_cross_up', 'macd_cross_up', 'oversold_reversal', 'overbought_reversal', 'trending_market'

In [None]:
drop_tcn = [
    'open', 'high', 'low', 'typical_price', 'EMA_21', 'SMA_20',
    'vwap_24h', 'close_4h', 'bollinger_upper', 'bollinger_lower',
    'resistance_level', 'support_level', 'high_low', 'high_close', 'low_close',
    'true_range', 'volume_mean_20', 'MACD_line', 'MACD_signal',
    'volatility_regime', 'trending_market', 'above_sma50', 'ema7_above_ema21',
    'vol_spike_1_5x', 'near_upper_band', 'near_lower_band',
    'break_upper_band', 'break_lower_band', 'rsi_oversold',
    'above_sma20', 'macd_positive', 'volume_breakout', 'volume_breakdown',
    'bullish_scenario_1', 'bullish_scenario_2', 'bullish_scenario_3',
    'bullish_scenario_4', 'bullish_scenario_5', 'bullish_scenario_6',
    'bearish_scenario_1', 'bearish_scenario_2', 'bearish_scenario_3',
    'bearish_scenario_4', 'bearish_scenario_6','ema_cross_up', 'macd_cross_up', 'oversold_reversal', 'overbought_reversal',
]

In [21]:
"""
tcn_optuna_search.py  —  Robust TCN hyper-parameter optimisation
─────────────────────────────────────────────────────────────────
• Objective  : weighted-F1 with α = 2  (precision has double weight)
• Output     : scaler, best-params (.json), all trials (.csv), history plot
• Tested on  : TensorFlow 2.16 · tcn 3.5 · Optuna 3.x  (CPU & single GPU)
"""

# ───────────────────────── imports ──────────────────────────
import os, json, gc, warnings, optuna
from datetime import datetime
from pathlib import Path
from typing import Dict, Tuple

import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score

import tensorflow as tf
from tensorflow import keras
from tcn import TCN                       #  pip install tcn==3.*
from optuna_integration.tfkeras import TFKerasPruningCallback

# ───────────────────── runtime hygiene ──────────────────────
warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

SEED, VAL_FRAC, ALPHA = 42, 0.20, 2.0
N_TRIALS, TIMEOUT = 40, 80 * 60  # seconds

np.random.seed(SEED)
tf.random.set_seed(SEED)
for g in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

# ──────────────────────── paths, drops ─────────────────────
CSV_PATH = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction"
                r"\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")
SCALER_PKL = "tcn_scaler.pkl"

DROP_COLS = [  # (shortened – add the rest of your leakage columns here)
    "close", "open", "high", "low", "typical_price", "high_low",
    "high_close", "low_close", "EMA_21", "SMA_20", "vwap_24h", "close_4h"
]

# ───────────────────────── data load ───────────────────────
print("📊 Loading data …")
df = (
    pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
      .loc["2018-01-01":]
      .drop(columns=[c for c in DROP_COLS if c in df.columns])
      .dropna(subset=["target"])
      .dropna()
)
print(f"Shape  : {df.shape}")
print("Classes:\n", df["target"].value_counts(), "\n")

X_raw, y_raw = df.drop(columns="target").values, df["target"].astype(int).values
n_features   = X_raw.shape[1]

split = int(len(df) * (1 - VAL_FRAC))
scaler = StandardScaler().fit(X_raw[:split])
joblib.dump(scaler, SCALER_PKL)

X_tr_raw = scaler.transform(X_raw[:split]).astype(np.float32)
X_va_raw = scaler.transform(X_raw[split:]).astype(np.float32)
y_tr_raw, y_va_raw = y_raw[:split], y_raw[split:]

pos_rate = y_tr_raw.mean()
CLASS_W0 = np.float32(1.0)
CLASS_W1 = np.float32((1 - pos_rate) / pos_rate if pos_rate else 1.0)
print(f"Class-weights 0 / 1 → {CLASS_W0:.2f} / {CLASS_W1:.2f}")

# ─────────────────── window cache helper ───────────────────
_CACHE: Dict[Tuple[int, int, int], Tuple[np.ndarray, np.ndarray]] = {}

def make_windows(arr: np.ndarray, labels: np.ndarray, win: int):
    k = (id(arr), len(labels), win)
    if k in _CACHE:
        return _CACHE[k]

    Xs, ys = [], []
    for i in range(win, len(arr)):
        Xs.append(arr[i - win:i])
        ys.append(labels[i])
    Xw, yw = np.asarray(Xs, np.float32), np.asarray(ys, np.int8)

    if Xw.nbytes + yw.nbytes < 1_000_000_000:
        _CACHE[k] = (Xw, yw)
    return Xw, yw

# ─────────────── metric: weighted-F1 (α=2) ────────────────
def f1_alpha2(y_true, y_prob) -> float:
    y_pred = (y_prob >= 0.5).astype(int)
    p, r   = precision_score(y_true, y_pred, zero_division=0), \
             recall_score   (y_true, y_pred, zero_division=0)
    return 0. if p+r == 0 else (1+ALPHA)*p*r / (ALPHA*p + r)

# ──────────────── TCN model factory ───────────────────────
def build_tcn(cfg: dict) -> keras.Model:
    dilations = [2 ** i for i in range(cfg["nb_stacks"] * cfg["blocks_per_stack"])]

    inp = keras.layers.Input(shape=(cfg["window"], n_features))
    x   = TCN(
            nb_filters       = cfg["filters"],
            kernel_size      = cfg["kernel"],
            nb_stacks        = cfg["nb_stacks"],
            dilations        = dilations,
            padding          = "causal",
            dropout_rate     = cfg["dropout"],
            activation       = cfg["act"],
            use_skip_connections = True,
            use_batch_norm   = cfg["norm"] == "batch",
            use_layer_norm   = cfg["norm"] == "layer",
            return_sequences = False
         )(inp)

    x   = keras.layers.Dense(cfg["dense"], activation=cfg["act"])(x)
    x   = keras.layers.Dropout(cfg["dropout"])(x)
    out = keras.layers.Dense(1, activation="sigmoid")(x)
    model = keras.Model(inp, out)

    # balanced BCE
    def weighted_bce(y_t, y_p):
        y_t = tf.cast(y_t, y_p.dtype)
        w   = tf.where(tf.equal(y_t, 1), CLASS_W1, CLASS_W0)
        w   = tf.cast(w, y_p.dtype)
        return tf.reduce_mean(w * keras.losses.binary_crossentropy(y_t, y_p))

    model.compile(keras.optimizers.Adam(cfg["lr"]), loss=weighted_bce)
    return model

# ──────────────────── Optuna objective ────────────────────
def objective(trial: optuna.trial.Trial):
    cfg = dict(
        window           = trial.suggest_int("window", 12, 60, step=6),
        filters          = trial.suggest_categorical("filters", [32, 48, 64, 96]),
        kernel           = trial.suggest_int("kernel", 2, 6),
        nb_stacks        = trial.suggest_int("nb_stacks", 1, 3),
        blocks_per_stack = trial.suggest_int("blocks_per_stack", 1, 2),
        dropout          = trial.suggest_float("dropout", 0.05, 0.35),
        dense            = trial.suggest_categorical("dense", [32, 64, 128]),
        act              = trial.suggest_categorical("act", ["relu", "elu", "selu", tf.nn.swish]),
        lr               = trial.suggest_float("lr", 5e-5, 3e-3, log=True),
        batch            = trial.suggest_categorical("batch", [32, 64]),
        norm             = trial.suggest_categorical("norm", ["none", "batch", "layer"]),
    )

    X_tr, y_tr = make_windows(X_tr_raw, y_tr_raw, cfg["window"])
    X_va, y_va = make_windows(X_va_raw, y_va_raw, cfg["window"])

    if len(X_tr) < cfg["batch"] * 10:      # too small → prune
        return float("inf")

    tf.keras.backend.clear_session(); gc.collect()
    model = build_tcn(cfg)

    cb = [
        keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True, verbose=0),
        keras.callbacks.ReduceLROnPlateau(patience=5, factor=.5, min_lr=1e-6, verbose=0),
        TFKerasPruningCallback(trial, "val_loss")
    ]

    hist = model.fit(X_tr, y_tr,
                     validation_data=(X_va, y_va),
                     epochs=100,
                     batch_size=cfg["batch"],
                     shuffle=False,
                     callbacks=cb,
                     verbose=0)

    y_prob = model.predict(X_va, batch_size=cfg["batch"], verbose=0).ravel()
    score  = f1_alpha2(y_va, y_prob)

    # store metrics to inspect later
    trial.set_user_attr("precision", precision_score(y_va, y_prob >= 0.5, zero_division=0))
    trial.set_user_attr("recall",    recall_score   (y_va, y_prob >= 0.5, zero_division=0))
    trial.set_user_attr("best_epoch", int(np.argmin(hist.history["val_loss"]) + 1))

    # print live metrics
    print(f"Trial {trial.number:2d}  "
          f"Fα2={score:.4f}  P={trial.user_attrs['precision']:.3f} "
          f"R={trial.user_attrs['recall']:.3f}  cfg={trial.params}")

    del model; tf.keras.backend.clear_session(); gc.collect()
    return -score   # minimise

# ─────────────────────────── main ──────────────────────────
def main():
    study = optuna.create_study(direction="minimize",
                                sampler=optuna.samplers.TPESampler(seed=SEED),
                                pruner =optuna.pruners.MedianPruner(n_startup_trials=5))

    print(f"\n🚀 Optimising  {N_TRIALS} trials  |  {TIMEOUT//60} min wall-time …")
    study.optimize(objective,
                   n_trials=N_TRIALS,
                   timeout=TIMEOUT,
                   show_progress_bar=True,
                   gc_after_trial=True)

    best, ts = study.best_trial, datetime.utcnow().strftime("%Y%m%d_%H%M%S")

    print("\n═══════════ BEST RESULT ═══════════")
    print(f"weighted-F1 (α=2) : {-best.value:.4f}")
    print(f"precision         : {best.user_attrs['precision']:.4f}")
    print(f"recall            : {best.user_attrs['recall']:.4f}")
    print(f"best epoch        : {best.user_attrs['best_epoch']}")
    print("hyper-parameters  :")
    for k, v in best.params.items():
        print(f"  {k:18s}: {v}")

    json.dump(best.params, open(f"best_params_tcn_{ts}.json", "w"), indent=2)
    study.trials_dataframe().to_csv(f"trials_tcn_{ts}.csv", index=False)

    try:
        import matplotlib.pyplot as plt
        optuna.visualization.matplotlib.plot_optimization_history(study)
        plt.tight_layout()
        plt.savefig(f"optuna_tcn_history_{ts}.png", dpi=300)
        plt.close()
    except Exception:
        print("⚠️ matplotlib unavailable – history plot skipped.")

    print(f"\n📝 Artefacts saved (timestamp {ts}).  "
          f"Scaler → {SCALER_PKL}")

# ───────────────────────────────────────────────────────────
if __name__ == "__main__":
    main()


📊 Loading data …
Shape  : (15855, 59)
Classes:
 target
1    8097
0    7758
Name: count, dtype: int64 

Class-weights 0 / 1 → 1.00 / 0.97


[I 2025-06-09 18:52:10,220] A new study created in memory with name: no-name-5d78d030-0093-439e-a2ba-35523e4ee0ab



🚀 Optimising  40 trials  |  80 min wall-time …


  0%|          | 0/40 [00:00<?, ?it/s]

Trial  0  Fα2=0.1605  P=0.509 R=0.120  cfg={'window': 30, 'filters': 32, 'kernel': 2, 'nb_stacks': 1, 'blocks_per_stack': 2, 'dropout': 0.23033450352296264, 'dense': 128, 'act': 'relu', 'lr': 0.00017376356936978755, 'batch': 32, 'norm': 'batch'}


  0%|          | 0/40 [01:10<?, ?it/s]

[I 2025-06-09 18:53:20,599] Trial 0 finished with value: -0.1605241605241605 and parameters: {'window': 30, 'filters': 32, 'kernel': 2, 'nb_stacks': 1, 'blocks_per_stack': 2, 'dropout': 0.23033450352296264, 'dense': 128, 'act': 'relu', 'lr': 0.00017376356936978755, 'batch': 32, 'norm': 'batch'}. Best is trial 0 with value: -0.1605241605241605.


Best trial: 0. Best value: -0.160524:   2%|▎         | 1/40 [01:11<46:34, 71.66s/it, 71.66/4800 seconds]

Trial  1  Fα2=0.0000  P=0.000 R=0.000  cfg={'window': 24, 'filters': 64, 'kernel': 4, 'nb_stacks': 2, 'blocks_per_stack': 1, 'dropout': 0.2322634555704315, 'dense': 128, 'act': 'relu', 'lr': 0.0008234548958371457, 'batch': 32, 'norm': 'layer'}


Best trial: 0. Best value: -0.160524:   2%|▎         | 1/40 [02:52<46:34, 71.66s/it, 71.66/4800 seconds]

[I 2025-06-09 18:55:02,851] Trial 1 finished with value: -0.0 and parameters: {'window': 24, 'filters': 64, 'kernel': 4, 'nb_stacks': 2, 'blocks_per_stack': 1, 'dropout': 0.2322634555704315, 'dense': 128, 'act': 'relu', 'lr': 0.0008234548958371457, 'batch': 32, 'norm': 'layer'}. Best is trial 0 with value: -0.1605241605241605.


Best trial: 0. Best value: -0.160524:   5%|▌         | 2/40 [02:53<56:47, 89.67s/it, 173.94/4800 seconds]

Trial  2  Fα2=0.4784  P=0.532 R=0.456  cfg={'window': 24, 'filters': 32, 'kernel': 2, 'nb_stacks': 3, 'blocks_per_stack': 2, 'dropout': 0.3318496824692567, 'dense': 128, 'act': <function swish at 0x0000025F5ECDAF80>, 'lr': 0.0002455257311459749, 'batch': 64, 'norm': 'layer'}


Best trial: 0. Best value: -0.160524:   5%|▌         | 2/40 [13:54<56:47, 89.67s/it, 173.94/4800 seconds]

[I 2025-06-09 19:06:04,741] Trial 2 finished with value: -0.4783904619970194 and parameters: {'window': 24, 'filters': 32, 'kernel': 2, 'nb_stacks': 3, 'blocks_per_stack': 2, 'dropout': 0.3318496824692567, 'dense': 128, 'act': <function swish at 0x0000025F5ECDAF80>, 'lr': 0.0002455257311459749, 'batch': 64, 'norm': 'layer'}. Best is trial 2 with value: -0.4783904619970194.


Best trial: 2. Best value: -0.47839:   8%|▊         | 3/40 [13:56<3:36:29, 351.07s/it, 836.07/4800 seconds]

Trial  3  Fα2=0.3502  P=0.527 R=0.300  cfg={'window': 18, 'filters': 64, 'kernel': 2, 'nb_stacks': 1, 'blocks_per_stack': 2, 'dropout': 0.26205720315428516, 'dense': 64, 'act': 'selu', 'lr': 0.00019380132456492117, 'batch': 64, 'norm': 'batch'}


Best trial: 2. Best value: -0.47839:   8%|▊         | 3/40 [14:42<3:36:29, 351.07s/it, 836.07/4800 seconds]

[I 2025-06-09 19:06:52,361] Trial 3 finished with value: -0.3501890359168242 and parameters: {'window': 18, 'filters': 64, 'kernel': 2, 'nb_stacks': 1, 'blocks_per_stack': 2, 'dropout': 0.26205720315428516, 'dense': 64, 'act': 'selu', 'lr': 0.00019380132456492117, 'batch': 64, 'norm': 'batch'}. Best is trial 2 with value: -0.4783904619970194.


Best trial: 2. Best value: -0.47839:  10%|█         | 4/40 [14:43<2:18:44, 231.23s/it, 883.60/4800 seconds]

Trial  4  Fα2=0.7657  P=0.521 R=1.000  cfg={'window': 54, 'filters': 96, 'kernel': 4, 'nb_stacks': 3, 'blocks_per_stack': 1, 'dropout': 0.2068198488145982, 'dense': 32, 'act': 'elu', 'lr': 0.0020547569815878254, 'batch': 64, 'norm': 'none'}


Best trial: 2. Best value: -0.47839:  10%|█         | 4/40 [33:13<2:18:44, 231.23s/it, 883.60/4800 seconds]

[I 2025-06-09 19:25:23,788] Trial 4 finished with value: -0.7656667190199467 and parameters: {'window': 54, 'filters': 96, 'kernel': 4, 'nb_stacks': 3, 'blocks_per_stack': 1, 'dropout': 0.2068198488145982, 'dense': 32, 'act': 'elu', 'lr': 0.0020547569815878254, 'batch': 64, 'norm': 'none'}. Best is trial 4 with value: -0.7656667190199467.


Best trial: 4. Best value: -0.765667:  12%|█▎        | 5/40 [33:35<5:20:04, 548.71s/it, 1995.21/4800 seconds]

[I 2025-06-09 19:25:45,368] Trial 5 pruned. Trial was pruned at epoch 1.


Best trial: 4. Best value: -0.765667:  15%|█▌        | 6/40 [33:50<3:29:22, 369.48s/it, 2016.78/4800 seconds]

[I 2025-06-09 19:26:01,051] Trial 6 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  18%|█▊        | 7/40 [34:14<2:19:36, 253.83s/it, 2032.52/4800 seconds]

[I 2025-06-09 19:26:24,480] Trial 7 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  20%|██        | 8/40 [34:16<1:36:15, 180.50s/it, 2056.00/4800 seconds]

Trial  8  Fα2=0.0000  P=0.000 R=0.000  cfg={'window': 48, 'filters': 48, 'kernel': 2, 'nb_stacks': 3, 'blocks_per_stack': 2, 'dropout': 0.12738248831454668, 'dense': 64, 'act': <function swish at 0x0000025F5ECDAF80>, 'lr': 0.001995489737195091, 'batch': 32, 'norm': 'layer'}


Best trial: 4. Best value: -0.765667:  20%|██        | 8/40 [38:46<1:36:15, 180.50s/it, 2056.00/4800 seconds]

[I 2025-06-09 19:30:56,774] Trial 8 finished with value: -0.0 and parameters: {'window': 48, 'filters': 48, 'kernel': 2, 'nb_stacks': 3, 'blocks_per_stack': 2, 'dropout': 0.12738248831454668, 'dense': 64, 'act': <function swish at 0x0000025F5ECDAF80>, 'lr': 0.001995489737195091, 'batch': 32, 'norm': 'layer'}. Best is trial 4 with value: -0.7656667190199467.


Best trial: 4. Best value: -0.765667:  22%|██▎       | 9/40 [38:58<1:48:05, 209.22s/it, 2328.39/4800 seconds]

[I 2025-06-09 19:31:08,443] Trial 9 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  25%|██▌       | 10/40 [47:12<1:14:07, 148.23s/it, 2340.05/4800 seconds]

[I 2025-06-09 19:39:22,883] Trial 10 pruned. Trial was pruned at epoch 22.


Best trial: 4. Best value: -0.765667:  28%|██▊       | 11/40 [47:47<2:02:50, 254.17s/it, 2834.41/4800 seconds]

[I 2025-06-09 19:39:57,582] Trial 11 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  30%|███       | 12/40 [48:21<1:27:29, 187.49s/it, 2869.41/4800 seconds]

[I 2025-06-09 19:40:31,286] Trial 12 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  32%|███▎      | 13/40 [48:51<1:03:22, 140.84s/it, 2902.88/4800 seconds]

[I 2025-06-09 19:41:01,988] Trial 13 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  35%|███▌      | 14/40 [49:38<46:37, 107.58s/it, 2933.62/4800 seconds]  

[I 2025-06-09 19:41:48,536] Trial 14 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  38%|███▊      | 15/40 [50:02<37:10, 89.21s/it, 2980.27/4800 seconds] 

[I 2025-06-09 19:42:12,653] Trial 15 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  40%|████      | 16/40 [50:22<27:51, 69.63s/it, 3004.41/4800 seconds]

[I 2025-06-09 19:42:33,114] Trial 16 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  42%|████▎     | 17/40 [50:54<21:01, 54.83s/it, 3024.84/4800 seconds]

[I 2025-06-09 19:43:04,270] Trial 17 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  45%|████▌     | 18/40 [51:31<17:29, 47.71s/it, 3055.98/4800 seconds]

[I 2025-06-09 19:43:41,819] Trial 18 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  48%|████▊     | 19/40 [51:58<15:38, 44.69s/it, 3093.63/4800 seconds]

[I 2025-06-09 19:44:08,636] Trial 19 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  50%|█████     | 20/40 [52:32<13:06, 39.33s/it, 3120.48/4800 seconds]

[I 2025-06-09 19:44:42,949] Trial 20 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  52%|█████▎    | 21/40 [52:44<11:58, 37.83s/it, 3154.80/4800 seconds]

[I 2025-06-09 19:44:54,758] Trial 21 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  55%|█████▌    | 22/40 [52:56<09:00, 30.02s/it, 3166.61/4800 seconds]

[I 2025-06-09 19:45:06,538] Trial 22 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  57%|█████▊    | 23/40 [53:08<06:57, 24.54s/it, 3178.37/4800 seconds]

[I 2025-06-09 19:45:18,786] Trial 23 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  60%|██████    | 24/40 [53:32<05:33, 20.86s/it, 3190.65/4800 seconds]

[I 2025-06-09 19:45:42,569] Trial 24 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  62%|██████▎   | 25/40 [54:32<05:26, 21.76s/it, 3214.49/4800 seconds]

[I 2025-06-09 19:46:43,083] Trial 25 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  65%|██████▌   | 26/40 [54:44<07:48, 33.48s/it, 3275.34/4800 seconds]

[I 2025-06-09 19:46:54,500] Trial 26 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  68%|██████▊   | 27/40 [55:07<05:49, 26.85s/it, 3286.70/4800 seconds]

[I 2025-06-09 19:47:17,998] Trial 27 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  70%|███████   | 28/40 [55:51<05:10, 25.85s/it, 3310.21/4800 seconds]

[I 2025-06-09 19:48:02,045] Trial 28 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  72%|███████▎  | 29/40 [56:05<05:44, 31.33s/it, 3354.33/4800 seconds]

[I 2025-06-09 19:48:16,155] Trial 29 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  75%|███████▌  | 30/40 [56:20<04:21, 26.16s/it, 3368.45/4800 seconds]

[I 2025-06-09 19:48:31,058] Trial 30 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  78%|███████▊  | 31/40 [56:34<03:24, 22.77s/it, 3383.30/4800 seconds]

[I 2025-06-09 19:48:44,848] Trial 31 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  80%|████████  | 32/40 [56:51<02:40, 20.08s/it, 3397.11/4800 seconds]

[I 2025-06-09 19:49:01,379] Trial 32 pruned. Trial was pruned at epoch 1.


Best trial: 4. Best value: -0.765667:  82%|████████▎ | 33/40 [57:06<02:12, 18.97s/it, 3413.50/4800 seconds]

[I 2025-06-09 19:49:16,742] Trial 33 pruned. Trial was pruned at epoch 1.


Best trial: 4. Best value: -0.765667:  85%|████████▌ | 34/40 [57:08<01:47, 17.89s/it, 3428.87/4800 seconds]

Trial 34  Fα2=0.3297  P=0.497 R=0.282  cfg={'window': 24, 'filters': 48, 'kernel': 2, 'nb_stacks': 2, 'blocks_per_stack': 2, 'dropout': 0.31901512444749536, 'dense': 128, 'act': 'relu', 'lr': 0.0006474141743156096, 'batch': 32, 'norm': 'layer'}


Best trial: 4. Best value: -0.765667:  85%|████████▌ | 34/40 [1:01:16<01:47, 17.89s/it, 3428.87/4800 seconds]

[I 2025-06-09 19:53:27,074] Trial 34 finished with value: -0.3297015632401705 and parameters: {'window': 24, 'filters': 48, 'kernel': 2, 'nb_stacks': 2, 'blocks_per_stack': 2, 'dropout': 0.31901512444749536, 'dense': 128, 'act': 'relu', 'lr': 0.0006474141743156096, 'batch': 32, 'norm': 'layer'}. Best is trial 4 with value: -0.7656667190199467.


Best trial: 4. Best value: -0.765667:  88%|████████▊ | 35/40 [1:01:48<07:18, 87.63s/it, 3679.21/4800 seconds]

[I 2025-06-09 19:53:59,012] Trial 35 pruned. Trial was pruned at epoch 0.


Best trial: 4. Best value: -0.765667:  90%|█████████ | 36/40 [1:01:51<04:43, 70.99s/it, 3711.39/4800 seconds]

Trial 36  Fα2=0.7666  P=0.523 R=1.000  cfg={'window': 12, 'filters': 48, 'kernel': 5, 'nb_stacks': 3, 'blocks_per_stack': 2, 'dropout': 0.3118914406542517, 'dense': 128, 'act': 'relu', 'lr': 0.0005947328436650964, 'batch': 32, 'norm': 'layer'}


Best trial: 4. Best value: -0.765667:  90%|█████████ | 36/40 [1:21:31<04:43, 70.99s/it, 3711.39/4800 seconds]

[I 2025-06-09 20:13:41,748] Trial 36 finished with value: -0.766599597585513 and parameters: {'window': 12, 'filters': 48, 'kernel': 5, 'nb_stacks': 3, 'blocks_per_stack': 2, 'dropout': 0.3118914406542517, 'dense': 128, 'act': 'relu', 'lr': 0.0005947328436650964, 'batch': 32, 'norm': 'layer'}. Best is trial 36 with value: -0.766599597585513.


Best trial: 36. Best value: -0.7666:  92%|█████████▎| 37/40 [1:21:34<06:36, 132.27s/it, 4894.06/4800 seconds]



═══════════ BEST RESULT ═══════════
weighted-F1 (α=2) : 0.7666
precision         : 0.5226
recall            : 1.0000
best epoch        : 35
hyper-parameters  :
  window            : 12
  filters           : 48
  kernel            : 5
  nb_stacks         : 3
  blocks_per_stack  : 2
  dropout           : 0.3118914406542517
  dense             : 128
  act               : relu
  lr                : 0.0005947328436650964
  batch             : 32
  norm              : layer

📝 Artefacts saved (timestamp 20250609_171344).  Scaler → tcn_scaler.pkl


In [None]:
[I 2025-06-09 18:05:11,711] A new study created in memory with name: no-name-0933098c-6339-4e53-ace1-0af2750e397a
📊 Loading data …
Shape  : (15855, 66)
Classes:
 target
1    8097
0    7758
Name: count, dtype: int64 

Class-weights 0 / 1 → 1.00 / 0.97

🚀 Optimising  40 trials  |  30 min wall-time …
  0%|          | 0/40 [00:00<?, ?it/s]Trial  0  Fα2=0.4618  P=0.529 R=0.434  cfg={'window': 30, 'filters': 32, 'kernel': 2, 'nb_stacks': 1, 'blocks_per_stack': 2, 'dropout': 0.23033450352296264, 'dense': 128, 'act': 'relu', 'lr': 0.00017376356936978755, 'batch': 32, 'norm': 'batch'}
  0%|          | 0/40 [00:44<?, ?it/s][I 2025-06-09 18:05:56,603] Trial 0 finished with value: -0.46183783783783783 and parameters: {'window': 30, 'filters': 32, 'kernel': 2, 'nb_stacks': 1, 'blocks_per_stack': 2, 'dropout': 0.23033450352296264, 'dense': 128, 'act': 'relu', 'lr': 0.00017376356936978755, 'batch': 32, 'norm': 'batch'}. Best is trial 0 with value: -0.46183783783783783.
Best trial: 0. Best value: -0.461838:   2%|▎         | 1/40 [00:45<29:26, 45.29s/it, 45.29/1800 seconds]Trial  1  Fα2=0.2813  P=0.515 R=0.229  cfg={'window': 24, 'filters': 64, 'kernel': 4, 'nb_stacks': 2, 'blocks_per_stack': 1, 'dropout': 0.2322634555704315, 'dense': 128, 'act': 'relu', 'lr': 0.0008234548958371457, 'batch': 32, 'norm': 'layer'}
Best trial: 0. Best value: -0.461838:   2%|▎         | 1/40 [02:14<29:26, 45.29s/it, 45.29/1800 seconds][I 2025-06-09 18:07:26,384] Trial 1 finished with value: -0.28134328358208954 and parameters: {'window': 24, 'filters': 64, 'kernel': 4, 'nb_stacks': 2, 'blocks_per_stack': 1, 'dropout': 0.2322634555704315, 'dense': 128, 'act': 'relu', 'lr': 0.0008234548958371457, 'batch': 32, 'norm': 'layer'}. Best is trial 0 with value: -0.46183783783783783.
Best trial: 0. Best value: -0.461838:   5%|▌         | 2/40 [02:15<45:16, 71.48s/it, 135.10/1800 seconds]Trial  2  Fα2=0.5694  P=0.519 R=0.599  cfg={'window': 24, 'filters': 32, 'kernel': 2, 'nb_stacks': 3, 'blocks_per_stack': 2, 'dropout': 0.3318496824692567, 'dense': 128, 'act': <function swish at 0x0000025F5ECDAF80>, 'lr': 0.0002455257311459749, 'batch': 64, 'norm': 'layer'}
Best trial: 0. Best value: -0.461838:   5%|▌         | 2/40 [13:03<45:16, 71.48s/it, 135.10/1800 seconds][I 2025-06-09 18:18:15,273] Trial 2 finished with value: -0.5694444444444443 and parameters: {'window': 24, 'filters': 32, 'kernel': 2, 'nb_stacks': 3, 'blocks_per_stack': 2, 'dropout': 0.3318496824692567, 'dense': 128, 'act': <function swish at 0x0000025F5ECDAF80>, 'lr': 0.0002455257311459749, 'batch': 64, 'norm': 'layer'}. Best is trial 2 with value: -0.5694444444444443.
Best trial: 2. Best value: -0.569444:   8%|▊         | 3/40 [13:04<3:26:43, 335.23s/it, 784.19/1800 seconds]Trial  3  Fα2=0.4582  P=0.528 R=0.430  cfg={'window': 18, 'filters': 64, 'kernel': 2, 'nb_stacks': 1, 'blocks_per_stack': 2, 'dropout': 0.26205720315428516, 'dense': 64, 'act': 'selu', 'lr': 0.00019380132456492117, 'batch': 64, 'norm': 'batch'}
Best trial: 2. Best value: -0.569444:   8%|▊         | 3/40 [13:50<3:26:43, 335.23s/it, 784.19/1800 seconds][I 2025-06-09 18:19:01,809] Trial 3 finished with value: -0.45815358067299394 and parameters: {'window': 18, 'filters': 64, 'kernel': 2, 'nb_stacks': 1, 'blocks_per_stack': 2, 'dropout': 0.26205720315428516, 'dense': 64, 'act': 'selu', 'lr': 0.00019380132456492117, 'batch': 64, 'norm': 'batch'}. Best is trial 2 with value: -0.5694444444444443.
Best trial: 2. Best value: -0.569444:  10%|█         | 4/40 [13:50<2:12:44, 221.23s/it, 830.66/1800 seconds]Trial  4  Fα2=0.0734  P=0.464 R=0.052  cfg={'window': 54, 'filters': 96, 'kernel': 4, 'nb_stacks': 3, 'blocks_per_stack': 1, 'dropout': 0.2068198488145982, 'dense': 32, 'act': 'elu', 'lr': 0.0020547569815878254, 'batch': 64, 'norm': 'none'}
Best trial: 2. Best value: -0.569444:  10%|█         | 4/40 [20:56<2:12:44, 221.23s/it, 830.66/1800 seconds][I 2025-06-09 18:26:07,757] Trial 4 finished with value: -0.07344797435150102 and parameters: {'window': 54, 'filters': 96, 'kernel': 4, 'nb_stacks': 3, 'blocks_per_stack': 1, 'dropout': 0.2068198488145982, 'dense': 32, 'act': 'elu', 'lr': 0.0020547569815878254, 'batch': 64, 'norm': 'none'}. Best is trial 2 with value: -0.5694444444444443.
Best trial: 2. Best value: -0.569444:  12%|█▎        | 5/40 [21:44<2:52:07, 295.06s/it, 1256.64/1800 seconds][I 2025-06-09 18:26:56,640] Trial 5 pruned. Trial was pruned at epoch 2.
Best trial: 2. Best value: -0.569444:  15%|█▌        | 6/40 [22:07<1:59:46, 211.37s/it, 1305.56/1800 seconds][I 2025-06-09 18:27:19,541] Trial 6 pruned. Trial was pruned at epoch 0.
Best trial: 2. Best value: -0.569444:  18%|█▊        | 7/40 [22:51<1:22:22, 149.77s/it, 1328.51/1800 seconds][I 2025-06-09 18:28:03,649] Trial 7 pruned. Trial was pruned at epoch 0.
Best trial: 2. Best value: -0.569444:  20%|██        | 8/40 [25:29<1:01:56, 116.16s/it, 1372.68/1800 seconds][I 2025-06-09 18:30:41,459] Trial 8 pruned. Trial was pruned at epoch 2.
Best trial: 2. Best value: -0.569444:  22%|██▎       | 9/40 [25:45<1:06:46, 129.23s/it, 1530.66/1800 seconds][I 2025-06-09 18:30:57,079] Trial 9 pruned. Trial was pruned at epoch 0.
Best trial: 2. Best value: -0.569444:  25%|██▌       | 10/40 [26:39<47:04, 94.15s/it, 1546.25/1800 seconds]  [I 2025-06-09 18:31:51,377] Trial 10 pruned. Trial was pruned at epoch 0.
Best trial: 2. Best value: -0.569444:  28%|██▊       | 11/40 [26:48<39:37, 82.00s/it, 1600.71/1800 seconds][I 2025-06-09 18:32:00,674] Trial 11 pruned. Trial was pruned at epoch 0.
Best trial: 2. Best value: -0.569444:  30%|███       | 12/40 [26:57<27:56, 59.86s/it, 1609.93/1800 seconds][I 2025-06-09 18:32:09,118] Trial 12 pruned. Trial was pruned at epoch 0.
Best trial: 2. Best value: -0.569444:  32%|███▎      | 13/40 [27:06<19:56, 44.30s/it, 1618.42/1800 seconds][I 2025-06-09 18:32:18,590] Trial 13 pruned. Trial was pruned at epoch 0.
Best trial: 2. Best value: -0.569444:  35%|███▌      | 14/40 [27:38<14:38, 33.78s/it, 1627.88/1800 seconds][I 2025-06-09 18:32:50,659] Trial 14 pruned. Trial was pruned at epoch 0.
Best trial: 2. Best value: -0.569444:  38%|███▊      | 15/40 [27:39<13:51, 33.27s/it, 1659.99/1800 seconds]Trial 15  Fα2=0.5717  P=0.532 R=0.594  cfg={'window': 30, 'filters': 32, 'kernel': 3, 'nb_stacks': 1, 'blocks_per_stack': 2, 'dropout': 0.19135781227800264, 'dense': 32, 'act': 'relu', 'lr': 5.35786547006468e-05, 'batch': 32, 'norm': 'layer'}
Best trial: 2. Best value: -0.569444:  38%|███▊      | 15/40 [28:35<13:51, 33.27s/it, 1659.99/1800 seconds][I 2025-06-09 18:33:47,413] Trial 15 finished with value: -0.5716803760282021 and parameters: {'window': 30, 'filters': 32, 'kernel': 3, 'nb_stacks': 1, 'blocks_per_stack': 2, 'dropout': 0.19135781227800264, 'dense': 32, 'act': 'relu', 'lr': 5.35786547006468e-05, 'batch': 32, 'norm': 'layer'}. Best is trial 15 with value: -0.5716803760282021.
Best trial: 15. Best value: -0.57168:  40%|████      | 16/40 [29:14<16:08, 40.33s/it, 1716.72/1800 seconds][I 2025-06-09 18:34:26,236] Trial 16 pruned. Trial was pruned at epoch 0.
Best trial: 15. Best value: -0.57168:  42%|████▎     | 17/40 [29:29<15:18, 39.93s/it, 1755.72/1800 seconds][I 2025-06-09 18:34:41,644] Trial 17 pruned. Trial was pruned at epoch 0.
Best trial: 15. Best value: -0.57168:  45%|████▌     | 18/40 [32:38<11:56, 32.56s/it, 1771.12/1800 seconds][I 2025-06-09 18:37:50,265] Trial 18 pruned. Trial was pruned at epoch 1.
Best trial: 15. Best value: -0.57168:  48%|████▊     | 19/40 [32:39<36:06, 103.15s/it, 1959.93/1800 seconds]

═══════════ BEST RESULT ═══════════
weighted-F1 (α=2) : 0.5717
precision         : 0.5323
recall            : 0.5937
best epoch        : 7
hyper-parameters  :
  window            : 30
  filters           : 32
  kernel            : 3
  nb_stacks         : 1
  blocks_per_stack  : 2
  dropout           : 0.19135781227800264
  dense             : 32
  act               : relu
  lr                : 5.35786547006468e-05
  batch             : 32
  norm              : layer

📝 Artefacts saved (timestamp 20250609_153751).  Scaler → tcn_scaler.pkl

In [3]:
"""
train_tcn_final.py  ·  2025-06-09
────────────────────────────────────────────────────
One-shot training of a Temporal Convolutional Network
for 4-hour BTC direction, using the fixed hyper-params
supplied by the user (see CFG below).

Outputs
• tcn_btc_direction.h5
• tcn_scaler.pkl
• tcn_training_summary.json
"""

# ─────────────────── imports & hygiene ───────────────────
import os, json, gc, warnings
from datetime import datetime
from pathlib import Path
from typing import Dict, Tuple

import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (precision_score, recall_score,
                             confusion_matrix, accuracy_score)

import tensorflow as tf
from tensorflow import keras
from tcn import TCN                     # pip install tcn==3.*

warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
for g in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

# ───────────────────── paths & constants ──────────────────
CSV_PATH = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction"
                r"\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")

SCALER_PKL = "tcn_scaler.pkl"
MODEL_H5   = "tcn_btc_direction.h5"
SUMMARY_JS = "tcn_training_summary.json"

VAL_FRAC = 0.20         # last 20 % for validation
ALPHA    = 2.0          # precision weight in Fβ (β=√α)

DROP_COLS = [           # minimal leakage list – extend as needed
    "open", "high", "low", "close", "typical_price",
    "high_low", "high_close", "low_close", "EMA_21", "SMA_20",
    "vwap_24h", "close_4h"
]

# ─────────────────── hyper-parameters (fixed) ─────────────
CFG: Dict = {
    'window'          : 24,
    'filters'         : 32,
    'kernel'          : 2,
    'nb_stacks'       : 3,
    'blocks_per_stack': 2,
    'dropout'         : 0.3318496824692567,
    'dense'           : 128,
    'act'             : 'swish',
    'lr'              : 0.0002455257311459749,
    'batch'           : 64,
    'norm'            : 'layer'            # "none" | "batch" | "layer"
}

# ───────────────────── helper functions ───────────────────
def make_windows(arr: np.ndarray,
                 labels: np.ndarray,
                 win: int) -> Tuple[np.ndarray, np.ndarray]:
    """Turn a 2-D feature matrix into sliding 3-D windows."""
    X, y = [], []
    for i in range(win, len(arr)):
        X.append(arr[i - win:i])
        y.append(labels[i])
    return np.asarray(X, np.float32), np.asarray(y, np.int8)

def f1_alpha2(y_true, y_prob, alpha: float) -> float:
    """Weighted F1 where precision gets α-times the weight of recall."""
    y_pred = (y_prob >= .5).astype(int)
    p = precision_score(y_true, y_pred, zero_division=0)
    r = recall_score   (y_true, y_pred, zero_division=0)
    return 0.0 if p + r == 0 else (1 + alpha) * p * r / (alpha * p + r)

def build_model(cfg: Dict,
                n_feat: int,
                class_w0: float,
                class_w1: float) -> keras.Model:
    """Create & compile a single-output TCN binary classifier."""
    # -- validation of activation string (fix #1) --
    act = cfg["act"].lower()
    assert act in {"relu", "elu", "selu", "swish", "tanh"}, \
        f"Unsupported activation: {cfg['act']}"

    # dilations: 1 → …, 2**k
    dilations = [2 ** i
                 for i in range(cfg["nb_stacks"] * cfg["blocks_per_stack"])]

    inputs = keras.layers.Input(shape=(cfg["window"], n_feat))

    x = TCN(
            nb_filters        = cfg["filters"],
            kernel_size       = cfg["kernel"],
            nb_stacks         = cfg["nb_stacks"],
            dilations         = dilations,
            padding           = "causal",
            dropout_rate      = cfg["dropout"],
            activation        = act,
            use_skip_connections=True,
            use_batch_norm    = cfg["norm"] == "batch",
            use_layer_norm    = cfg["norm"] == "layer",
            return_sequences  = False
        )(inputs)

    x = keras.layers.Dense(cfg["dense"], activation=act)(x)
    x = keras.layers.Dropout(cfg["dropout"])(x)
    outputs = keras.layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(inputs, outputs)

    # custom weighted BCE (float32 safe)
    def weighted_bce(y_t, y_p):
        y_t = tf.cast(y_t, y_p.dtype)
        w   = tf.where(tf.equal(y_t, 1), class_w1, class_w0)
        w   = tf.cast(w, y_p.dtype)
        return tf.reduce_mean(w * keras.losses.binary_crossentropy(y_t, y_p))

    model.compile(optimizer=keras.optimizers.Adam(cfg["lr"]),
                  loss=weighted_bce)
    return model

# ─────────────────────── 1. load dataset ────────────────────
print("📊 Loading data …")
df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df = df.loc["2018-01-01":]
df = df.drop(columns=[c for c in DROP_COLS if c in df.columns])
df = df.dropna(subset=["target"])
df = df.dropna()

print(f"Data after cleaning : {df.shape}")

X_raw = df.drop(columns="target").values
y_raw = df["target"].astype(int).values
n_features = X_raw.shape[1]

split = int(len(df) * (1 - VAL_FRAC))
scaler = StandardScaler().fit(X_raw[:split])
joblib.dump(scaler, SCALER_PKL)

X_tr_raw = scaler.transform(X_raw[:split]).astype(np.float32)
X_va_raw = scaler.transform(X_raw[split:]).astype(np.float32)
y_tr_raw, y_va_raw = y_raw[:split], y_raw[split:]

# class weights (fix #2 clarity)
pos_rate = y_tr_raw.mean()
CLASS_W0 = np.float32(1.0)
CLASS_W1 = (np.float32((1 - pos_rate) / pos_rate)
            if pos_rate != 0 else np.float32(1.0))
print(f"Class weights 0 / 1 → {CLASS_W0:.2f} / {CLASS_W1:.2f}")

# slide into windows
X_tr, y_tr = make_windows(X_tr_raw, y_tr_raw, CFG["window"])
X_va, y_va = make_windows(X_va_raw, y_va_raw, CFG["window"])
print(f"Train windows : {X_tr.shape} • Val windows : {X_va.shape}")

# ─────────────────────── 2. train model ─────────────────────
tf.keras.backend.clear_session(); gc.collect()

model = build_model(CFG, n_features, CLASS_W0, CLASS_W1)
model.summary(line_length=120)

callbacks = [
    keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True, verbose=1),
    keras.callbacks.ReduceLROnPlateau(patience=5, factor=.5, min_lr=1e-6, verbose=1)
]

print("\n🚀 Training …")
history = model.fit(X_tr, y_tr,
                    validation_data=(X_va, y_va),
                    epochs=100,
                    batch_size=CFG["batch"],
                    shuffle=False,
                    callbacks=callbacks,
                    verbose=2)

# ─────────────────────── 3. evaluation ─────────────────────
print("\n📈 Final evaluation …")
prob = model.predict(X_va, batch_size=CFG["batch"], verbose=0).ravel()
pred = (prob >= .5).astype(int)

precision = precision_score(y_va, pred, zero_division=0)
recall    = recall_score   (y_va, pred, zero_division=0)
f1a2      = f1_alpha2(y_va, prob, ALPHA)   # fix #3
acc       = accuracy_score(y_va, pred)
cm        = confusion_matrix(y_va, pred)

print("\n───── VALIDATION METRICS ─────")
print(f"Accuracy            : {acc:6.3f}")
print(f"Precision           : {precision:6.3f}")
print(f"Recall              : {recall:6.3f}")
print(f"Weighted-F1 (α=2)   : {f1a2:6.3f}")
print(f"Confusion matrix    : {cm.ravel().tolist()}")

# ─────────────────────── 4. save artefacts ─────────────────
ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
model.save(MODEL_H5)

summary = {
    "timestamp"      : ts + "Z",
    "hyperparameters": CFG,
    "alpha"          : ALPHA,
    "n_features"     : n_features,
    "train_windows"  : int(len(X_tr)),
    "val_windows"    : int(len(X_va)),
    "class_weights"  : [float(CLASS_W0), float(CLASS_W1)],
    "metrics"        : dict(accuracy=float(acc),
                            precision=float(precision),
                            recall=float(recall),
                            f1_alpha2=float(f1a2)),
    "confusion_matrix": cm.ravel().tolist()
}
with open(SUMMARY_JS, "w") as f:
    json.dump(summary, f, indent=2)

print(f"\n✅ Model   saved → {MODEL_H5}")
print(f"✅ Scaler  saved → {SCALER_PKL}")
print(f"✅ Summary saved → {SUMMARY_JS}")
print("🎉 Training complete.")


📊 Loading data …
Data after cleaning : (15855, 59)
Class weights 0 / 1 → 1.00 / 0.97
Train windows : (12660, 24, 58) • Val windows : (3147, 24, 58)




🚀 Training …
Epoch 1/100
198/198 - 38s - 192ms/step - loss: 1.0617 - val_loss: 0.7057 - learning_rate: 2.4553e-04
Epoch 2/100
198/198 - 18s - 92ms/step - loss: 0.8069 - val_loss: 0.7041 - learning_rate: 2.4553e-04
Epoch 3/100
198/198 - 18s - 91ms/step - loss: 0.7418 - val_loss: 0.7051 - learning_rate: 2.4553e-04
Epoch 4/100
198/198 - 17s - 85ms/step - loss: 0.7195 - val_loss: 0.6986 - learning_rate: 2.4553e-04
Epoch 5/100
198/198 - 17s - 85ms/step - loss: 0.7059 - val_loss: 0.7019 - learning_rate: 2.4553e-04
Epoch 6/100
198/198 - 17s - 84ms/step - loss: 0.7013 - val_loss: 0.6982 - learning_rate: 2.4553e-04
Epoch 7/100
198/198 - 17s - 83ms/step - loss: 0.6954 - val_loss: 0.7003 - learning_rate: 2.4553e-04
Epoch 8/100
198/198 - 17s - 84ms/step - loss: 0.6917 - val_loss: 0.6960 - learning_rate: 2.4553e-04
Epoch 9/100
198/198 - 12s - 61ms/step - loss: 0.6906 - val_loss: 0.6921 - learning_rate: 2.4553e-04
Epoch 10/100
198/198 - 12s - 61ms/step - loss: 0.6914 - val_loss: 0.6894 - learning_r




───── VALIDATION METRICS ─────
Accuracy            :  0.493
Precision           :  0.565
Recall              :  0.127
Weighted-F1 (α=2)   :  0.171
Confusion matrix    : [1343, 160, 1436, 208]


TypeError: Could not locate class 'ResidualBlock'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': 'tcn.tcn', 'class_name': 'ResidualBlock', 'config': {'name': 'residual_block_0', 'dilation_rate': 1, 'nb_filters': 32, 'kernel_size': 2, 'padding': 'causal', 'activation': 'swish', 'dropout_rate': 0.3318496824692567, 'use_batch_norm': False, 'use_layer_norm': True, 'kernel_initializer': 'he_normal', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}}, 'registered_name': 'ResidualBlock', 'build_config': {'input_shape': [None, 24, 58]}}

In [23]:
"""
train_tcn_final.py  ·  2025-06-09
────────────────────────────────────────────────────
One-shot training of a Temporal Convolutional Network
for 4-hour BTC direction, using the fixed hyper-params
supplied by the user (see CFG below).

Outputs
• tcn_btc_direction.h5
• tcn_scaler.pkl
• tcn_training_summary.json
"""

# ─────────────────── imports & hygiene ───────────────────
import os, json, gc, warnings
from datetime import datetime
from pathlib import Path
from typing import Dict, Tuple

import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (precision_score, recall_score,
                             confusion_matrix, accuracy_score)

import tensorflow as tf
from tensorflow import keras
from tcn import TCN                     # pip install tcn==3.*

warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
for g in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

# ───────────────────── paths & constants ──────────────────
CSV_PATH = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction"
                r"\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")

SCALER_PKL = "tcn_scaler.pkl"
MODEL_H5   = "tcn_btc_direction.h5"
SUMMARY_JS = "tcn_training_summary.json"

VAL_FRAC = 0.20         # last 20 % for validation
ALPHA    = 2.0          # precision weight in Fβ (β=√α)

DROP_COLS = [           # minimal leakage list – extend as needed
    "open", "high", "low", "close", "typical_price",
    "high_low", "high_close", "low_close", "EMA_21", "SMA_20",
    "vwap_24h", "close_4h"
]

# ─────────────────── hyper-parameters (fixed) ─────────────
CFG: Dict = {
    'window'          : 12,
    'filters'         : 48,
    'kernel'          : 5,
    'nb_stacks'       : 3,
    'blocks_per_stack': 2,
    'dropout'         : 0.3118914406542517,
    'dense'           : 128,
    'act'             : 'relu',
    'lr'              : 5.947328436650964e-4,
    'batch'           : 32,
    'norm'            : 'layer'            # "none" | "batch" | "layer"
}

# ───────────────────── helper functions ───────────────────
def make_windows(arr: np.ndarray,
                 labels: np.ndarray,
                 win: int) -> Tuple[np.ndarray, np.ndarray]:
    """Turn a 2-D feature matrix into sliding 3-D windows."""
    X, y = [], []
    for i in range(win, len(arr)):
        X.append(arr[i - win:i])
        y.append(labels[i])
    return np.asarray(X, np.float32), np.asarray(y, np.int8)

def f1_alpha2(y_true, y_prob, alpha: float) -> float:
    """Weighted F1 where precision gets α-times the weight of recall."""
    y_pred = (y_prob >= .5).astype(int)
    p = precision_score(y_true, y_pred, zero_division=0)
    r = recall_score   (y_true, y_pred, zero_division=0)
    return 0.0 if p + r == 0 else (1 + alpha) * p * r / (alpha * p + r)

def build_model(cfg: Dict,
                n_feat: int,
                class_w0: float,
                class_w1: float) -> keras.Model:
    """Create & compile a single-output TCN binary classifier."""
    # -- validation of activation string (fix #1) --
    act = cfg["act"].lower()
    assert act in {"relu", "elu", "selu", "swish", "tanh"}, \
        f"Unsupported activation: {cfg['act']}"

    # dilations: 1 → …, 2**k
    dilations = [2 ** i
                 for i in range(cfg["nb_stacks"] * cfg["blocks_per_stack"])]

    inputs = keras.layers.Input(shape=(cfg["window"], n_feat))

    x = TCN(
            nb_filters        = cfg["filters"],
            kernel_size       = cfg["kernel"],
            nb_stacks         = cfg["nb_stacks"],
            dilations         = dilations,
            padding           = "causal",
            dropout_rate      = cfg["dropout"],
            activation        = act,
            use_skip_connections=True,
            use_batch_norm    = cfg["norm"] == "batch",
            use_layer_norm    = cfg["norm"] == "layer",
            return_sequences  = False
        )(inputs)

    x = keras.layers.Dense(cfg["dense"], activation=act)(x)
    x = keras.layers.Dropout(cfg["dropout"])(x)
    outputs = keras.layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(inputs, outputs)

    # custom weighted BCE (float32 safe)
    def weighted_bce(y_t, y_p):
        y_t = tf.cast(y_t, y_p.dtype)
        w   = tf.where(tf.equal(y_t, 1), class_w1, class_w0)
        w   = tf.cast(w, y_p.dtype)
        return tf.reduce_mean(w * keras.losses.binary_crossentropy(y_t, y_p))

    model.compile(optimizer=keras.optimizers.Adam(cfg["lr"]),
                  loss=weighted_bce)
    return model

# ─────────────────────── 1. load dataset ────────────────────
print("📊 Loading data …")
df = (pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
        .loc["2018-01-01":]
        .drop(columns=[c for c in DROP_COLS if c in df.columns])
        .dropna(subset=["target"])
        .dropna())
print(f"Data after cleaning : {df.shape}")

X_raw = df.drop(columns="target").values
y_raw = df["target"].astype(int).values
n_features = X_raw.shape[1]

split = int(len(df) * (1 - VAL_FRAC))
scaler = StandardScaler().fit(X_raw[:split])
joblib.dump(scaler, SCALER_PKL)

X_tr_raw = scaler.transform(X_raw[:split]).astype(np.float32)
X_va_raw = scaler.transform(X_raw[split:]).astype(np.float32)
y_tr_raw, y_va_raw = y_raw[:split], y_raw[split:]

# class weights (fix #2 clarity)
pos_rate = y_tr_raw.mean()
CLASS_W0 = np.float32(1.0)
CLASS_W1 = (np.float32((1 - pos_rate) / pos_rate)
            if pos_rate != 0 else np.float32(1.0))
print(f"Class weights 0 / 1 → {CLASS_W0:.2f} / {CLASS_W1:.2f}")

# slide into windows
X_tr, y_tr = make_windows(X_tr_raw, y_tr_raw, CFG["window"])
X_va, y_va = make_windows(X_va_raw, y_va_raw, CFG["window"])
print(f"Train windows : {X_tr.shape} • Val windows : {X_va.shape}")

# ─────────────────────── 2. train model ─────────────────────
tf.keras.backend.clear_session(); gc.collect()

model = build_model(CFG, n_features, CLASS_W0, CLASS_W1)
model.summary(line_length=120)

callbacks = [
    keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True, verbose=1),
    keras.callbacks.ReduceLROnPlateau(patience=5, factor=.5, min_lr=1e-6, verbose=1)
]

print("\n🚀 Training …")
history = model.fit(X_tr, y_tr,
                    validation_data=(X_va, y_va),
                    epochs=100,
                    batch_size=CFG["batch"],
                    shuffle=False,
                    callbacks=callbacks,
                    verbose=2)

# ─────────────────────── 3. evaluation ─────────────────────
print("\n📈 Final evaluation …")
prob = model.predict(X_va, batch_size=CFG["batch"], verbose=0).ravel()
pred = (prob >= .5).astype(int)

precision = precision_score(y_va, pred, zero_division=0)
recall    = recall_score   (y_va, pred, zero_division=0)
f1a2      = f1_alpha2(y_va, prob, ALPHA)   # fix #3
acc       = accuracy_score(y_va, pred)
cm        = confusion_matrix(y_va, pred)

print("\n───── VALIDATION METRICS ─────")
print(f"Accuracy            : {acc:6.3f}")
print(f"Precision           : {precision:6.3f}")
print(f"Recall              : {recall:6.3f}")
print(f"Weighted-F1 (α=2)   : {f1a2:6.3f}")
print(f"Confusion matrix    : {cm.ravel().tolist()}")

# ─────────────────────── 4. save artefacts ─────────────────
ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
model.save(MODEL_H5)

summary = {
    "timestamp"      : ts + "Z",
    "hyperparameters": CFG,
    "alpha"          : ALPHA,
    "n_features"     : n_features,
    "train_windows"  : int(len(X_tr)),
    "val_windows"    : int(len(X_va)),
    "class_weights"  : [float(CLASS_W0), float(CLASS_W1)],
    "metrics"        : dict(accuracy=float(acc),
                            precision=float(precision),
                            recall=float(recall),
                            f1_alpha2=float(f1a2)),
    "confusion_matrix": cm.ravel().tolist()
}
with open(SUMMARY_JS, "w") as f:
    json.dump(summary, f, indent=2)

print(f"\n✅ Model   saved → {MODEL_H5}")
print(f"✅ Scaler  saved → {SCALER_PKL}")
print(f"✅ Summary saved → {SUMMARY_JS}")
print("🎉 Training complete.")


📊 Loading data …
Data after cleaning : (15855, 59)
Class weights 0 / 1 → 1.00 / 0.97
Train windows : (12672, 12, 58) • Val windows : (3159, 12, 58)



🚀 Training …
Epoch 1/100
396/396 - 25s - 63ms/step - loss: 0.8768 - val_loss: 0.6852 - learning_rate: 5.9473e-04
Epoch 2/100
396/396 - 12s - 31ms/step - loss: 0.6861 - val_loss: 0.6837 - learning_rate: 5.9473e-04
Epoch 3/100
396/396 - 12s - 31ms/step - loss: 0.6846 - val_loss: 0.6823 - learning_rate: 5.9473e-04
Epoch 4/100
396/396 - 13s - 32ms/step - loss: 0.6830 - val_loss: 0.6821 - learning_rate: 5.9473e-04
Epoch 5/100
396/396 - 13s - 34ms/step - loss: 0.6823 - val_loss: 0.6829 - learning_rate: 5.9473e-04
Epoch 6/100
396/396 - 13s - 33ms/step - loss: 0.6825 - val_loss: 0.6817 - learning_rate: 5.9473e-04
Epoch 7/100
396/396 - 16s - 40ms/step - loss: 0.6827 - val_loss: 0.6816 - learning_rate: 5.9473e-04
Epoch 8/100
396/396 - 13s - 33ms/step - loss: 0.6832 - val_loss: 0.6816 - learning_rate: 5.9473e-04
Epoch 9/100
396/396 - 13s - 32ms/step - loss: 0.6822 - val_loss: 0.6815 - learning_rate: 5.9473e-04
Epoch 10/100
396/396 - 13s - 34ms/step - loss: 0.6824 - val_loss: 0.6816 - learning_ra




───── VALIDATION METRICS ─────
Accuracy            :  0.523
Precision           :  0.523
Recall              :  1.000
Weighted-F1 (α=2)   :  0.767
Confusion matrix    : [0, 1508, 0, 1651]


TypeError: Could not locate class 'ResidualBlock'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': 'tcn.tcn', 'class_name': 'ResidualBlock', 'config': {'name': 'residual_block_0', 'dilation_rate': 1, 'nb_filters': 48, 'kernel_size': 5, 'padding': 'causal', 'activation': 'relu', 'dropout_rate': 0.3118914406542517, 'use_batch_norm': False, 'use_layer_norm': True, 'kernel_initializer': 'he_normal', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}}, 'registered_name': 'ResidualBlock', 'build_config': {'input_shape': [None, 12, 58]}}

In [None]:
"""
train_tcn_final.py  ·  2025-06-09
────────────────────────────────────────────────────
One-shot training of a Temporal Convolutional Network
for 4-hour BTC direction, using the fixed hyper-params
supplied by the user (see CFG below).

Outputs
• tcn_btc_direction.h5
• tcn_scaler.pkl
• tcn_training_summary.json
"""

# ─────────────────── imports & hygiene ───────────────────
import os, json, gc, warnings
from datetime import datetime
from pathlib import Path
from typing import Dict, Tuple

import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (precision_score, recall_score,
                             confusion_matrix, accuracy_score)

import tensorflow as tf
from tensorflow import keras
from tcn import TCN                     # pip install tcn==3.*

warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
for g in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

# ───────────────────── paths & constants ──────────────────
CSV_PATH = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction"
                r"\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")

SCALER_PKL = "tcn_scaler.pkl"
MODEL_H5   = "tcn_btc_direction.h5"
SUMMARY_JS = "tcn_training_summary.json"

VAL_FRAC = 0.20         # last 20 % for validation
ALPHA    = 2.0          # precision weight in Fβ (β=√α)

DROP_COLS = [           # minimal leakage list – extend as needed
    "open", "high", "low", "close", "typical_price",
    "high_low", "high_close", "low_close", "EMA_21", "SMA_20",
    "vwap_24h", "close_4h"
]

# ─────────────────── hyper-parameters (fixed) ─────────────
CFG: Dict = {
  'window': 24,
  'filters': 32,
  'kernel': 2,
  'nb_stacks': 3,
  'blocks_per_stack': 2,
  'dropout': 0.3318496824692567,
  'dense': 128,
  'act': 'swish',
  'lr': 0.0002455257311459749,
  'batch': 64,
  'norm': 'layer'
}



# ───────────────────── helper functions ───────────────────
def make_windows(arr: np.ndarray,
                 labels: np.ndarray,
                 win: int) -> Tuple[np.ndarray, np.ndarray]:
    """Turn a 2-D feature matrix into sliding 3-D windows."""
    X, y = [], []
    for i in range(win, len(arr)):
        X.append(arr[i - win:i])
        y.append(labels[i])
    return np.asarray(X, np.float32), np.asarray(y, np.int8)

def f1_alpha2(y_true, y_prob, alpha: float) -> float:
    """Weighted F1 where precision gets α-times the weight of recall."""
    y_pred = (y_prob >= .5).astype(int)
    p = precision_score(y_true, y_pred, zero_division=0)
    r = recall_score   (y_true, y_pred, zero_division=0)
    return 0.0 if p + r == 0 else (1 + alpha) * p * r / (alpha * p + r)

def build_model(cfg: Dict,
                n_feat: int,
                class_w0: float,
                class_w1: float) -> keras.Model:
    """Create & compile a single-output TCN binary classifier."""
    # -- validation of activation string (fix #1) --
    act = cfg["act"].lower()
    assert act in {"relu", "elu", "selu", "swish", "tanh"}, \
        f"Unsupported activation: {cfg['act']}"

    # dilations: 1 → …, 2**k
    dilations = [2 ** i
                 for i in range(cfg["nb_stacks"] * cfg["blocks_per_stack"])]

    inputs = keras.layers.Input(shape=(cfg["window"], n_feat))

    x = TCN(
            nb_filters        = cfg["filters"],
            kernel_size       = cfg["kernel"],
            nb_stacks         = cfg["nb_stacks"],
            dilations         = dilations,
            padding           = "causal",
            dropout_rate      = cfg["dropout"],
            activation        = act,
            use_skip_connections=True,
            use_batch_norm    = cfg["norm"] == "batch",
            use_layer_norm    = cfg["norm"] == "layer",
            return_sequences  = False
        )(inputs)

    x = keras.layers.Dense(cfg["dense"], activation=act)(x)
    x = keras.layers.Dropout(cfg["dropout"])(x)
    outputs = keras.layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(inputs, outputs)

    # custom weighted BCE (float32 safe)
    def weighted_bce(y_t, y_p):
        y_t = tf.cast(y_t, y_p.dtype)
        w   = tf.where(tf.equal(y_t, 1), class_w1, class_w0)
        w   = tf.cast(w, y_p.dtype)
        return tf.reduce_mean(w * keras.losses.binary_crossentropy(y_t, y_p))

    model.compile(optimizer=keras.optimizers.Adam(cfg["lr"]),
                  loss=weighted_bce)
    return model

# ─────────────────────── 1. load dataset ────────────────────
print("📊 Loading data …")
df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df = df.loc["2018-01-01":]
df = df.drop(columns=[c for c in DROP_COLS if c in df.columns])
df = df.dropna(subset=["target"])
df = df.dropna()

print(f"Data after cleaning : {df.shape}")

X_raw = df.drop(columns="target").values
y_raw = df["target"].astype(int).values
n_features = X_raw.shape[1]

split = int(len(df) * (1 - VAL_FRAC))
scaler = StandardScaler().fit(X_raw[:split])
joblib.dump(scaler, SCALER_PKL)

X_tr_raw = scaler.transform(X_raw[:split]).astype(np.float32)
X_va_raw = scaler.transform(X_raw[split:]).astype(np.float32)
y_tr_raw, y_va_raw = y_raw[:split], y_raw[split:]

# class weights (fix #2 clarity)
pos_rate = y_tr_raw.mean()
CLASS_W0 = np.float32(1.0)
CLASS_W1 = (np.float32((1 - pos_rate) / pos_rate)
            if pos_rate != 0 else np.float32(1.0))
print(f"Class weights 0 / 1 → {CLASS_W0:.2f} / {CLASS_W1:.2f}")

# slide into windows
X_tr, y_tr = make_windows(X_tr_raw, y_tr_raw, CFG["window"])
X_va, y_va = make_windows(X_va_raw, y_va_raw, CFG["window"])
print(f"Train windows : {X_tr.shape} • Val windows : {X_va.shape}")

# ─────────────────────── 2. train model ─────────────────────
tf.keras.backend.clear_session(); gc.collect()

model = build_model(CFG, n_features, CLASS_W0, CLASS_W1)
model.summary(line_length=120)

callbacks = [
    keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True, verbose=1),
    keras.callbacks.ReduceLROnPlateau(patience=5, factor=.5, min_lr=1e-6, verbose=1)
]

print("\n🚀 Training …")
history = model.fit(X_tr, y_tr,
                    validation_data=(X_va, y_va),
                    epochs=100,
                    batch_size=CFG["batch"],
                    shuffle=False,
                    callbacks=callbacks,
                    verbose=2)

# ─────────────────────── 3. evaluation ─────────────────────
print("\n📈 Final evaluation …")
prob = model.predict(X_va, batch_size=CFG["batch"], verbose=0).ravel()
pred = (prob >= .5).astype(int)

precision = precision_score(y_va, pred, zero_division=0)
recall    = recall_score   (y_va, pred, zero_division=0)
f1a2      = f1_alpha2(y_va, prob, ALPHA)   # fix #3
acc       = accuracy_score(y_va, pred)
cm        = confusion_matrix(y_va, pred)

print("\n───── VALIDATION METRICS ─────")
print(f"Accuracy            : {acc:6.3f}")
print(f"Precision           : {precision:6.3f}")
print(f"Recall              : {recall:6.3f}")
print(f"Weighted-F1 (α=2)   : {f1a2:6.3f}")
print(f"Confusion matrix    : {cm.ravel().tolist()}")

# ─────────────────────── 4. save artefacts ─────────────────
ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
model.save(MODEL_H5)

summary = {
    "timestamp"      : ts + "Z",
    "hyperparameters": CFG,
    "alpha"          : ALPHA,
    "n_features"     : n_features,
    "train_windows"  : int(len(X_tr)),
    "val_windows"    : int(len(X_va)),
    "class_weights"  : [float(CLASS_W0), float(CLASS_W1)],
    "metrics"        : dict(accuracy=float(acc),
                            precision=float(precision),
                            recall=float(recall),
                            f1_alpha2=float(f1a2)),
    "confusion_matrix": cm.ravel().tolist()
}
with open(SUMMARY_JS, "w") as f:
    json.dump(summary, f, indent=2)

print(f"\n✅ Model   saved → {MODEL_H5}")
print(f"✅ Scaler  saved → {SCALER_PKL}")
print(f"✅ Summary saved → {SUMMARY_JS}")
print("🎉 Training complete.")


📊 Loading data …
Data after cleaning : (15855, 59)
Class weights 0 / 1 → 1.00 / 0.97
Train windows : (12660, 24, 58) • Val windows : (3147, 24, 58)



🚀 Training …
Epoch 1/100
198/198 - 44s - 220ms/step - loss: 0.9470 - val_loss: 0.6877 - learning_rate: 2.4553e-04
Epoch 2/100
198/198 - 19s - 94ms/step - loss: 0.7569 - val_loss: 0.6850 - learning_rate: 2.4553e-04
Epoch 3/100
198/198 - 19s - 95ms/step - loss: 0.7161 - val_loss: 0.6851 - learning_rate: 2.4553e-04
Epoch 4/100
198/198 - 19s - 95ms/step - loss: 0.7040 - val_loss: 0.6845 - learning_rate: 2.4553e-04
Epoch 5/100
198/198 - 19s - 95ms/step - loss: 0.6982 - val_loss: 0.6844 - learning_rate: 2.4553e-04
Epoch 6/100
198/198 - 19s - 95ms/step - loss: 0.6934 - val_loss: 0.6832 - learning_rate: 2.4553e-04
Epoch 7/100
198/198 - 19s - 94ms/step - loss: 0.6906 - val_loss: 0.6834 - learning_rate: 2.4553e-04
Epoch 8/100
198/198 - 19s - 95ms/step - loss: 0.6896 - val_loss: 0.6822 - learning_rate: 2.4553e-04
Epoch 9/100
198/198 - 19s - 95ms/step - loss: 0.6896 - val_loss: 0.6830 - learning_rate: 2.4553e-04
Epoch 10/100
198/198 - 18s - 93ms/step - loss: 0.6879 - val_loss: 0.6828 - learning_r

In [4]:
"""
train_tcn_final.py  ·  2025-06-09
────────────────────────────────────────────────────
One-shot training of a Temporal Convolutional Network
for 4-hour BTC direction, using the fixed hyper-params
supplied by the user (see CFG below).

Outputs
• tcn_btc_direction.h5
• tcn_scaler.pkl
• tcn_training_summary.json
"""

# ─────────────────── imports & hygiene ───────────────────
import os, json, gc, warnings
from datetime import datetime
from pathlib import Path
from typing import Dict, Tuple

import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (precision_score, recall_score,
                             confusion_matrix, accuracy_score)

import tensorflow as tf
from tensorflow import keras
from tcn import TCN                     # pip install tcn==3.*

warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
for g in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

# ───────────────────── paths & constants ──────────────────
CSV_PATH = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction"
                r"\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")

SCALER_PKL = "tcn_scaler.pkl"
MODEL_H5   = "tcn_btc_direction.h5"
SUMMARY_JS = "tcn_training_summary.json"

VAL_FRAC = 0.20         # last 20 % for validation
ALPHA    = 2.0          # precision weight in Fβ (β=√α)

DROP_COLS = [           # minimal leakage list – extend as needed
    "open", "high", "low", "close", "typical_price",
    "high_low", "high_close", "low_close", "EMA_21", "SMA_20",
    "vwap_24h", "close_4h"
]

# ─────────────────── hyper-parameters (fixed) ─────────────
CFG: Dict = {
  'window': 12,
  'filters': 48,
  'kernel': 5,
  'nb_stacks': 3,
  'blocks_per_stack': 2,
  'dropout': 0.3118914406542517,
  'dense': 128,
  'act': 'relu',
  'lr': 0.0005947328436650964,
  'batch': 32,
  'norm': 'layer'
}


# ───────────────────── helper functions ───────────────────
def make_windows(arr: np.ndarray,
                 labels: np.ndarray,
                 win: int) -> Tuple[np.ndarray, np.ndarray]:
    """Turn a 2-D feature matrix into sliding 3-D windows."""
    X, y = [], []
    for i in range(win, len(arr)):
        X.append(arr[i - win:i])
        y.append(labels[i])
    return np.asarray(X, np.float32), np.asarray(y, np.int8)

def f1_alpha2(y_true, y_prob, alpha: float) -> float:
    """Weighted F1 where precision gets α-times the weight of recall."""
    y_pred = (y_prob >= .5).astype(int)
    p = precision_score(y_true, y_pred, zero_division=0)
    r = recall_score   (y_true, y_pred, zero_division=0)
    return 0.0 if p + r == 0 else (1 + alpha) * p * r / (alpha * p + r)

def build_model(cfg: Dict,
                n_feat: int,
                class_w0: float,
                class_w1: float) -> keras.Model:
    """Create & compile a single-output TCN binary classifier."""
    # -- validation of activation string (fix #1) --
    act = cfg["act"].lower()
    assert act in {"relu", "elu", "selu", "swish", "tanh"}, \
        f"Unsupported activation: {cfg['act']}"

    # dilations: 1 → …, 2**k
    dilations = [2 ** i
                 for i in range(cfg["nb_stacks"] * cfg["blocks_per_stack"])]

    inputs = keras.layers.Input(shape=(cfg["window"], n_feat))

    x = TCN(
            nb_filters        = cfg["filters"],
            kernel_size       = cfg["kernel"],
            nb_stacks         = cfg["nb_stacks"],
            dilations         = dilations,
            padding           = "causal",
            dropout_rate      = cfg["dropout"],
            activation        = act,
            use_skip_connections=True,
            use_batch_norm    = cfg["norm"] == "batch",
            use_layer_norm    = cfg["norm"] == "layer",
            return_sequences  = False
        )(inputs)

    x = keras.layers.Dense(cfg["dense"], activation=act)(x)
    x = keras.layers.Dropout(cfg["dropout"])(x)
    outputs = keras.layers.Dense(1, activation="sigmoid")(x)

    model = keras.Model(inputs, outputs)

    # custom weighted BCE (float32 safe)
    def weighted_bce(y_t, y_p):
        y_t = tf.cast(y_t, y_p.dtype)
        w   = tf.where(tf.equal(y_t, 1), class_w1, class_w0)
        w   = tf.cast(w, y_p.dtype)
        return tf.reduce_mean(w * keras.losses.binary_crossentropy(y_t, y_p))

    model.compile(optimizer=keras.optimizers.Adam(cfg["lr"]),
                  loss=weighted_bce)
    return model

# ─────────────────────── 1. load dataset ────────────────────
print("📊 Loading data …")
df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df = df.loc["2018-01-01":]
df = df.drop(columns=[c for c in DROP_COLS if c in df.columns])
df = df.dropna(subset=["target"])
df = df.dropna()

print(f"Data after cleaning : {df.shape}")

X_raw = df.drop(columns="target").values
y_raw = df["target"].astype(int).values
n_features = X_raw.shape[1]

split = int(len(df) * (1 - VAL_FRAC))
scaler = StandardScaler().fit(X_raw[:split])
joblib.dump(scaler, SCALER_PKL)

X_tr_raw = scaler.transform(X_raw[:split]).astype(np.float32)
X_va_raw = scaler.transform(X_raw[split:]).astype(np.float32)
y_tr_raw, y_va_raw = y_raw[:split], y_raw[split:]

# class weights (fix #2 clarity)
pos_rate = y_tr_raw.mean()
CLASS_W0 = np.float32(1.0)
CLASS_W1 = (np.float32((1 - pos_rate) / pos_rate)
            if pos_rate != 0 else np.float32(1.0))
print(f"Class weights 0 / 1 → {CLASS_W0:.2f} / {CLASS_W1:.2f}")

# slide into windows
X_tr, y_tr = make_windows(X_tr_raw, y_tr_raw, CFG["window"])
X_va, y_va = make_windows(X_va_raw, y_va_raw, CFG["window"])
print(f"Train windows : {X_tr.shape} • Val windows : {X_va.shape}")

# ─────────────────────── 2. train model ─────────────────────
tf.keras.backend.clear_session(); gc.collect()

model = build_model(CFG, n_features, CLASS_W0, CLASS_W1)
model.summary(line_length=120)

callbacks = [
    keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True, verbose=1),
    keras.callbacks.ReduceLROnPlateau(patience=5, factor=.5, min_lr=1e-6, verbose=1)
]

print("\n🚀 Training …")
history = model.fit(X_tr, y_tr,
                    validation_data=(X_va, y_va),
                    epochs=100,
                    batch_size=CFG["batch"],
                    shuffle=False,
                    callbacks=callbacks,
                    verbose=2)

# ─────────────────────── 3. evaluation ─────────────────────
print("\n📈 Final evaluation …")
prob = model.predict(X_va, batch_size=CFG["batch"], verbose=0).ravel()
pred = (prob >= .5).astype(int)

precision = precision_score(y_va, pred, zero_division=0)
recall    = recall_score   (y_va, pred, zero_division=0)
f1a2      = f1_alpha2(y_va, prob, ALPHA)   # fix #3
acc       = accuracy_score(y_va, pred)
cm        = confusion_matrix(y_va, pred)

print("\n───── VALIDATION METRICS ─────")
print(f"Accuracy            : {acc:6.3f}")
print(f"Precision           : {precision:6.3f}")
print(f"Recall              : {recall:6.3f}")
print(f"Weighted-F1 (α=2)   : {f1a2:6.3f}")
print(f"Confusion matrix    : {cm.ravel().tolist()}")

# ─────────────────────── 4. save artefacts ─────────────────
ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
model.save(MODEL_H5)

summary = {
    "timestamp"      : ts + "Z",
    "hyperparameters": CFG,
    "alpha"          : ALPHA,
    "n_features"     : n_features,
    "train_windows"  : int(len(X_tr)),
    "val_windows"    : int(len(X_va)),
    "class_weights"  : [float(CLASS_W0), float(CLASS_W1)],
    "metrics"        : dict(accuracy=float(acc),
                            precision=float(precision),
                            recall=float(recall),
                            f1_alpha2=float(f1a2)),
    "confusion_matrix": cm.ravel().tolist()
}
with open(SUMMARY_JS, "w") as f:
    json.dump(summary, f, indent=2)

print(f"\n✅ Model   saved → {MODEL_H5}")
print(f"✅ Scaler  saved → {SCALER_PKL}")
print(f"✅ Summary saved → {SUMMARY_JS}")
print("🎉 Training complete.")


📊 Loading data …
Data after cleaning : (15855, 59)
Class weights 0 / 1 → 1.00 / 0.97
Train windows : (12672, 12, 58) • Val windows : (3159, 12, 58)



🚀 Training …
Epoch 1/100
396/396 - 24s - 60ms/step - loss: 0.8478 - val_loss: 0.6884 - learning_rate: 5.9473e-04
Epoch 2/100
396/396 - 11s - 28ms/step - loss: 0.6874 - val_loss: 0.6842 - learning_rate: 5.9473e-04
Epoch 3/100
396/396 - 24s - 60ms/step - loss: 0.6846 - val_loss: 0.6839 - learning_rate: 5.9473e-04
Epoch 4/100
396/396 - 21s - 52ms/step - loss: 0.6832 - val_loss: 0.6831 - learning_rate: 5.9473e-04
Epoch 5/100
396/396 - 17s - 42ms/step - loss: 0.6826 - val_loss: 0.6830 - learning_rate: 5.9473e-04
Epoch 6/100
396/396 - 27s - 67ms/step - loss: 0.6829 - val_loss: 0.6839 - learning_rate: 5.9473e-04
Epoch 7/100
396/396 - 27s - 69ms/step - loss: 0.6823 - val_loss: 0.6822 - learning_rate: 5.9473e-04
Epoch 8/100
396/396 - 27s - 68ms/step - loss: 0.6820 - val_loss: 0.6874 - learning_rate: 5.9473e-04
Epoch 9/100
396/396 - 28s - 70ms/step - loss: 0.6824 - val_loss: 0.6822 - learning_rate: 5.9473e-04
Epoch 10/100
396/396 - 28s - 71ms/step - loss: 0.6821 - val_loss: 0.6872 - learning_ra




───── VALIDATION METRICS ─────
Accuracy            :  0.523
Precision           :  0.523
Recall              :  1.000
Weighted-F1 (α=2)   :  0.767
Confusion matrix    : [0, 1508, 0, 1651]


TypeError: Could not locate class 'ResidualBlock'. Make sure custom classes are decorated with `@keras.saving.register_keras_serializable()`. Full object config: {'module': 'tcn.tcn', 'class_name': 'ResidualBlock', 'config': {'name': 'residual_block_0', 'dilation_rate': 1, 'nb_filters': 48, 'kernel_size': 5, 'padding': 'causal', 'activation': 'relu', 'dropout_rate': 0.3118914406542517, 'use_batch_norm': False, 'use_layer_norm': True, 'kernel_initializer': 'he_normal', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}}, 'registered_name': 'ResidualBlock', 'build_config': {'input_shape': [None, 12, 58]}}