# In this notebook we would build the CNN model

In [None]:
keep 
atr_14  , obv

In [None]:
keep 
atr_14  , obv

drop

ema_cross_up, macd_cross_up, oversold_reversal, overbought_reversal, trending_market

In [None]:
drop_cnn = [
    'open', 'high', 'low', 'typical_price', 'EMA_7', 'EMA_21', 'SMA_20', 
    'vwap_24h', 'close_4h', 'bollinger_upper', 'bollinger_lower', 
    'resistance_level', 'support_level', 'high_low', 'high_close', 'low_close',
    'true_range', 'volume_mean_20', 'MACD_line', 'MACD_signal',
    'bollinger_width', 'volatility_regime',
    'vol_spike_1_5x', 'near_upper_band', 'near_lower_band',
    'break_upper_band', 'break_lower_band', 'rsi_oversold', 'rsi_overbought',
    'above_sma20', 'above_sma50', 'ema7_above_ema21', 'macd_positive',
    'volume_breakout', 'volume_breakdown', 'stoch_overbought', 'stoch_oversold',
    'cci_overbought', 'cci_oversold',
    'bullish_scenario_1', 'bullish_scenario_2', 'bullish_scenario_3',
    'bullish_scenario_4', 'bullish_scenario_5', 'bullish_scenario_6',
    'bearish_scenario_1', 'bearish_scenario_2', 'bearish_scenario_3',
    'bearish_scenario_4', 'bearish_scenario_6''ema_cross_up', 'macd_cross_up', 'oversold_reversal', 'overbought_reversal', 'trending_market'
]

In [7]:
import os, json, warnings, joblib, optuna
from pathlib import Path
from datetime import datetime
import numpy as np, pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers

# ─────── Setup ─────────────────────────────────────────────────────────
warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
for g in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

# ─────── Config ────────────────────────────────────────────────────────
CSV_PATH   = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction"
                  r"\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")
VAL_RATIO  = 0.20
WF_FOLDS   = 2         # speed-up: reduce from 3 to 2
ALPHA      = 2.0
N_TRIALS   = 30        # speed-up: reduce from 60
TIMEOUT    = 35 * 60
SCALER_OUT = "cnn_scaler.pkl"
PARAMS_OUT = "cnn_best_params.json"

DROP_COLS = ['open', 'high', 'low', 'typical_price', 'EMA_7', 'EMA_21', 'SMA_20', 'SMA_50',
    'vwap_24h', 'close_4h', 'bollinger_upper', 'bollinger_lower', 'resistance_level',
    'support_level', 'high_low', 'high_close', 'low_close', 'true_range', 'volume_mean_20',
    'MACD_line', 'MACD_signal', 'bollinger_width', 'volatility_regime', 'CCI', 'stoch_%D',
    'parkinson_vol', 'ema_cross_down', 'macd_cross_down', 'vol_spike_1_5x', 'near_upper_band',
    'near_lower_band', 'break_upper_band', 'break_lower_band', 'rsi_oversold', 'rsi_overbought',
    'above_sma20', 'above_sma50', 'ema7_above_ema21', 'macd_positive', 'volume_breakout',
    'volume_breakdown', 'stoch_overbought', 'stoch_oversold', 'cci_overbought', 'cci_oversold',
    'trending_market', 'bullish_scenario_1', 'bullish_scenario_2', 'bullish_scenario_3',
    'bullish_scenario_4', 'bullish_scenario_5', 'bullish_scenario_6', 'bearish_scenario_1',
    'bearish_scenario_2', 'bearish_scenario_3', 'bearish_scenario_4', 'bearish_scenario_6',
    'ema_cross_up', 'macd_cross_up', 'oversold_reversal', 'overbought_reversal', 'close'
]

# ─────── Helpers ───────────────────────────────────────────────────────
def weighted_f1(y_true, y_pred):
    p = precision_score(y_true, y_pred, zero_division=0)
    r = recall_score(y_true, y_pred, zero_division=0)
    return 0.0 if p + r == 0 else (1 + ALPHA) * p * r / (ALPHA * p + r)

def make_windows(arr, labels, win):
    X, y = [], []
    for i in range(win, len(arr)):
        X.append(arr[i-win:i])
        y.append(labels[i])
    return np.asarray(X, np.float32), np.asarray(y, np.int8)

def load_and_scale():
    df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
    df = df.loc["2018-01-01":]
    df = df.drop(columns=[c for c in DROP_COLS if c in df.columns])
    df = df.dropna(subset=["target"]).dropna()

    X_all = df.drop(columns="target").values
    y_all = df["target"].astype(int).values
    split_real = int(len(df) * (1 - VAL_RATIO))
    scaler = StandardScaler().fit(X_all[:split_real])
    X_scaled = scaler.transform(X_all)

    joblib.dump(scaler, SCALER_OUT)
    print(f"✔ Scaler fitted on {split_real} rows")
    return X_scaled, y_all, scaler, split_real, X_all.shape[1]

# ─────── Data Load ─────────────────────────────────────────────────────
X_SCALED, Y_ALL, SCALER, REAL_SPLIT_IDX, N_FEATS = load_and_scale()

# ─────── CNN Model ─────────────────────────────────────────────────────
def build_model(trial, win):
    conv_blocks = trial.suggest_int("conv_blocks", 1, 2)
    base_filters = trial.suggest_int("filters", 32, 96, step=32)
    kernel = trial.suggest_int("kernel", 2, 4)
    activation = trial.suggest_categorical("act", ["relu", "elu", "selu"])
    dropout = trial.suggest_float("dropout", 0.05, 0.3)
    dense_units = trial.suggest_int("dense", 64, 128, step=64)
    l2reg = trial.suggest_float("l2", 1e-6, 1e-3, log=True)

    inp = layers.Input(shape=(win, N_FEATS))
    x = inp
    for b in range(conv_blocks):
        f = base_filters * (2**b)
        y = layers.Conv1D(f, kernel, padding="causal", activation=activation,
                          kernel_regularizer=regularizers.l2(l2reg))(x)
        y = layers.BatchNormalization()(y)
        if x.shape[-1] != y.shape[-1]:
            x = layers.Conv1D(f, 1, padding="same")(x)
        x = layers.add([x, y])

    pool = trial.suggest_categorical("pool", ["gap", "gmp"])
    x = layers.GlobalMaxPooling1D()(x) if POOL_TYPE == "gmp" else layers.GlobalAveragePooling1D()(x)

    if trial.suggest_categorical("extra_dense", [True, False]):
        x = layers.Dense(dense_units, activation=activation,
                         kernel_regularizer=regularizers.l2(l2reg))(x)
        x = layers.Dropout(dropout)(x)

    out = layers.Dense(1, activation="sigmoid")(x)
    return keras.Model(inp, out)

# ─────── Optuna Objective ──────────────────────────────────────────────
def objective(trial):
    win = trial.suggest_int("window", 12, 48, step=6)
    batch = trial.suggest_categorical("batch", [32, 64])
    lr = trial.suggest_float("lr", 1e-4, 5e-3, log=True)
    epochs = trial.suggest_int("epochs", 20, 60, step=20)

    data_len = REAL_SPLIT_IDX - win
    val_size = data_len // (WF_FOLDS + 1)
    fold_scores = []

    for fold in range(WF_FOLDS):
        val_start = data_len - (WF_FOLDS - fold) * val_size
        val_end = val_start + val_size
        if val_start < win * 2:
            continue

        X_fold = X_SCALED[:val_end + win]
        y_fold = Y_ALL[:val_end + win]
        X_win, y_win = make_windows(X_fold, y_fold, win)
        train_end = val_start
        X_tr, y_tr = X_win[:train_end], y_win[:train_end]
        X_va, y_va = X_win[val_start:val_end], y_win[val_start:val_end]

        if len(X_tr) < 10 or len(X_va) < 5:
            continue

        tf.keras.backend.clear_session()
        model = build_model(trial, win)
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr),
                      loss="binary_crossentropy")

        model.fit(X_tr, y_tr,
                  epochs=epochs,
                  batch_size=batch,
                  validation_data=(X_va, y_va),
                  callbacks=[keras.callbacks.EarlyStopping(patience=5,
                                                           restore_best_weights=True)],
                  verbose=0)

        preds = (model.predict(X_va, verbose=0).ravel() >= 0.5).astype(int)
        p = precision_score(y_va, preds, zero_division=0)
        r = recall_score(y_va, preds, zero_division=0)
        f1 = weighted_f1(y_va, preds)
        print(f"🔁 Fold {fold} | Precision={p:.3f} | Recall={r:.3f} | F1={f1:.3f}")
        fold_scores.append(f1)

        trial.report(np.mean(fold_scores), step=fold)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return 1.0 - np.mean(fold_scores) if fold_scores else 1.0

# ─────── Run Optuna ────────────────────────────────────────────────────
if __name__ == "__main__":
    study = optuna.create_study(direction="minimize",
                                sampler=optuna.samplers.TPESampler(seed=SEED),
                                pruner=optuna.pruners.MedianPruner())
    study.optimize(objective, n_trials=N_TRIALS,
                   timeout=TIMEOUT, show_progress_bar=True)

    best_f1 = 1.0 - study.best_value
    print(f"\n🏆 Best Weighted-F1 (α=2): {best_f1:.4f}")
    print("📜 Best Hyperparameters:")
    print(json.dumps(study.best_params, indent=2))

    with open(PARAMS_OUT, "w") as f:
        json.dump(study.best_params, f, indent=2)
    print(f"✅ Saved parameters → {PARAMS_OUT}")
    print(f"✅ Saved scaler     → {SCALER_OUT}")
    print("⏰ Finished at:", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))


[I 2025-06-07 23:32:15,056] A new study created in memory with name: no-name-20976309-b2e8-42fd-ad22-832de27308a7


✔ Scaler fitted on 12684 rows


  0%|          | 0/30 [00:00<?, ?it/s]

🔁 Fold 0 | Precision=0.515 | Recall=0.736 | F1=0.644


Best trial: 0. Best value: 0.445697:   3%|▎         | 1/30 [00:13<06:19, 13.07s/it, 13.07/2100 seconds]

🔁 Fold 1 | Precision=0.488 | Recall=0.453 | F1=0.464
[I 2025-06-07 23:32:28,130] Trial 0 finished with value: 0.44569688452624034 and parameters: {'window': 24, 'batch': 32, 'lr': 0.0010401663679887319, 'epochs': 20, 'conv_blocks': 1, 'filters': 32, 'kernel': 4, 'act': 'elu', 'dropout': 0.2924774630404986, 'dense': 128, 'l2': 4.335281794951567e-06, 'pool': 'gmp', 'extra_dense': False}. Best is trial 0 with value: 0.44569688452624034.
🔁 Fold 0 | Precision=0.521 | Recall=0.679 | F1=0.617


Best trial: 0. Best value: 0.445697:   7%|▋         | 2/30 [00:26<06:16, 13.46s/it, 26.80/2100 seconds]

🔁 Fold 1 | Precision=0.499 | Recall=0.240 | F1=0.290
[I 2025-06-07 23:32:41,855] Trial 1 finished with value: 0.5463172571239047 and parameters: {'window': 30, 'batch': 64, 'lr': 0.00017258215396625024, 'epochs': 20, 'conv_blocks': 1, 'filters': 64, 'kernel': 4, 'act': 'selu', 'dropout': 0.061612603179999434, 'dense': 128, 'l2': 3.247673570627449e-06, 'pool': 'gmp', 'extra_dense': True}. Best is trial 0 with value: 0.44569688452624034.
🔁 Fold 0 | Precision=0.520 | Recall=0.682 | F1=0.618


Exception ignored in: <function WeakKeyDictionary.__init__.<locals>.remove at 0x000001FAAE233910>
Traceback (most recent call last):
  File "C:\Users\ADMIN\AppData\Local\Programs\Python\Python310\lib\weakref.py", line 371, in remove
    self = selfref()
KeyboardInterrupt: 
Best trial: 0. Best value: 0.445697:  10%|█         | 3/30 [00:36<05:16, 11.71s/it, 36.43/2100 seconds]

🔁 Fold 1 | Precision=0.505 | Recall=0.336 | F1=0.378
[I 2025-06-07 23:32:51,486] Trial 2 finished with value: 0.5021802351884824 and parameters: {'window': 24, 'batch': 64, 'lr': 0.0005595074635794797, 'epochs': 20, 'conv_blocks': 1, 'filters': 32, 'kernel': 4, 'act': 'elu', 'dropout': 0.1800170052944527, 'dense': 128, 'l2': 3.5856126103453987e-06, 'pool': 'gap', 'extra_dense': True}. Best is trial 0 with value: 0.44569688452624034.
🔁 Fold 0 | Precision=0.526 | Recall=0.364 | F1=0.405


Best trial: 0. Best value: 0.445697:  13%|█▎        | 4/30 [00:52<05:53, 13.61s/it, 52.94/2100 seconds]

🔁 Fold 1 | Precision=0.489 | Recall=0.605 | F1=0.561
[I 2025-06-07 23:33:07,998] Trial 3 finished with value: 0.5169144614752496 and parameters: {'window': 36, 'batch': 32, 'lr': 0.000215262809722153, 'epochs': 20, 'conv_blocks': 1, 'filters': 64, 'kernel': 2, 'act': 'relu', 'dropout': 0.18567402078956213, 'dense': 64, 'l2': 0.0002550298070162893, 'pool': 'gmp', 'extra_dense': True}. Best is trial 0 with value: 0.44569688452624034.
🔁 Fold 0 | Precision=0.524 | Recall=0.755 | F1=0.658


Best trial: 0. Best value: 0.445697:  17%|█▋        | 5/30 [01:02<05:03, 12.15s/it, 62.50/2100 seconds]

🔁 Fold 1 | Precision=0.499 | Recall=0.422 | F1=0.445
[I 2025-06-07 23:33:17,561] Trial 4 finished with value: 0.4483698581832408 and parameters: {'window': 12, 'batch': 32, 'lr': 0.001732053535845956, 'epochs': 60, 'conv_blocks': 1, 'filters': 64, 'kernel': 2, 'act': 'relu', 'dropout': 0.06588958757150591, 'dense': 64, 'l2': 9.4525713910723e-06, 'pool': 'gap', 'extra_dense': True}. Best is trial 0 with value: 0.44569688452624034.
🔁 Fold 0 | Precision=0.532 | Recall=0.291 | F1=0.343


Best trial: 0. Best value: 0.445697:  20%|██        | 6/30 [01:10<04:19, 10.79s/it, 70.66/2100 seconds]

🔁 Fold 1 | Precision=0.495 | Recall=0.629 | F1=0.577
[I 2025-06-07 23:33:25,723] Trial 5 finished with value: 0.5402358459247456 and parameters: {'window': 12, 'batch': 64, 'lr': 0.0008986552644007198, 'epochs': 60, 'conv_blocks': 1, 'filters': 64, 'kernel': 3, 'act': 'elu', 'dropout': 0.20910260281594512, 'dense': 64, 'l2': 3.35515102272148e-05, 'pool': 'gap', 'extra_dense': False}. Best is trial 0 with value: 0.44569688452624034.
🔁 Fold 0 | Precision=0.535 | Recall=0.583 | F1=0.566


Best trial: 0. Best value: 0.445697:  23%|██▎       | 7/30 [01:23<04:24, 11.52s/it, 83.67/2100 seconds]

🔁 Fold 1 | Precision=0.496 | Recall=0.751 | F1=0.641
[I 2025-06-07 23:33:38,729] Trial 6 pruned. 


Best trial: 0. Best value: 0.445697:  27%|██▋       | 8/30 [01:38<04:34, 12.47s/it, 98.18/2100 seconds]

🔁 Fold 0 | Precision=0.516 | Recall=0.916 | F1=0.728
[I 2025-06-07 23:33:53,237] Trial 7 pruned. 
🔁 Fold 0 | Precision=0.528 | Recall=0.440 | F1=0.466


Best trial: 8. Best value: 0.428452:  30%|███       | 9/30 [01:47<04:00, 11.47s/it, 107.46/2100 seconds]

🔁 Fold 1 | Precision=0.491 | Recall=0.835 | F1=0.677
[I 2025-06-07 23:34:02,523] Trial 8 finished with value: 0.4284524824342044 and parameters: {'window': 18, 'batch': 64, 'lr': 0.0007145565133513971, 'epochs': 20, 'conv_blocks': 1, 'filters': 96, 'kernel': 2, 'act': 'selu', 'dropout': 0.1105138178778751, 'dense': 128, 'l2': 0.00019268985325226193, 'pool': 'gmp', 'extra_dense': False}. Best is trial 8 with value: 0.4284524824342044.


Best trial: 8. Best value: 0.428452:  33%|███▎      | 10/30 [01:51<03:04,  9.23s/it, 111.68/2100 seconds]

🔁 Fold 0 | Precision=0.516 | Recall=0.844 | F1=0.697
[I 2025-06-07 23:34:06,743] Trial 9 pruned. 
🔁 Fold 0 | Precision=0.531 | Recall=0.551 | F1=0.544


Best trial: 8. Best value: 0.428452:  37%|███▋      | 11/30 [03:17<10:18, 32.54s/it, 197.05/2100 seconds]

🔁 Fold 1 | Precision=0.510 | Recall=0.337 | F1=0.380
[I 2025-06-07 23:35:32,111] Trial 10 finished with value: 0.537817116157062 and parameters: {'window': 48, 'batch': 64, 'lr': 0.004438277669999281, 'epochs': 40, 'conv_blocks': 2, 'filters': 96, 'kernel': 2, 'act': 'selu', 'dropout': 0.12569671136254235, 'dense': 128, 'l2': 0.00010833315697328629, 'pool': 'gmp', 'extra_dense': False}. Best is trial 8 with value: 0.4284524824342044.
🔁 Fold 0 | Precision=0.514 | Recall=0.592 | F1=0.564


Best trial: 8. Best value: 0.428452:  40%|████      | 12/30 [03:43<09:12, 30.68s/it, 223.48/2100 seconds]

🔁 Fold 1 | Precision=0.498 | Recall=0.461 | F1=0.473
[I 2025-06-07 23:35:58,539] Trial 11 pruned. 
🔁 Fold 0 | Precision=0.515 | Recall=0.506 | F1=0.509


Best trial: 8. Best value: 0.428452:  43%|████▎     | 13/30 [03:59<07:24, 26.12s/it, 239.11/2100 seconds]

🔁 Fold 1 | Precision=0.489 | Recall=0.279 | F1=0.326
[I 2025-06-07 23:36:14,165] Trial 12 finished with value: 0.5828336054231582 and parameters: {'window': 24, 'batch': 32, 'lr': 0.0012766043558568817, 'epochs': 20, 'conv_blocks': 1, 'filters': 96, 'kernel': 2, 'act': 'selu', 'dropout': 0.27852232036578506, 'dense': 128, 'l2': 1.0950172157239298e-06, 'pool': 'gmp', 'extra_dense': False}. Best is trial 8 with value: 0.4284524824342044.


Best trial: 8. Best value: 0.428452:  47%|████▋     | 14/30 [04:02<05:06, 19.13s/it, 242.08/2100 seconds]

🔁 Fold 0 | Precision=0.516 | Recall=0.719 | F1=0.635
[I 2025-06-07 23:36:17,139] Trial 13 pruned. 
🔁 Fold 0 | Precision=0.522 | Recall=0.479 | F1=0.493


Best trial: 8. Best value: 0.428452:  50%|█████     | 15/30 [04:13<04:13, 16.88s/it, 253.77/2100 seconds]

🔁 Fold 1 | Precision=0.491 | Recall=0.701 | F1=0.614
[I 2025-06-07 23:36:28,824] Trial 14 pruned. 
🔁 Fold 0 | Precision=0.525 | Recall=0.420 | F1=0.450


Best trial: 8. Best value: 0.428452:  53%|█████▎    | 16/30 [04:23<03:24, 14.60s/it, 263.08/2100 seconds]

🔁 Fold 1 | Precision=0.493 | Recall=0.445 | F1=0.460
[I 2025-06-07 23:36:38,135] Trial 15 finished with value: 0.5451165802588686 and parameters: {'window': 30, 'batch': 32, 'lr': 0.0015451137873775404, 'epochs': 40, 'conv_blocks': 1, 'filters': 96, 'kernel': 4, 'act': 'elu', 'dropout': 0.14016979925138917, 'dense': 128, 'l2': 0.00016893806027344123, 'pool': 'gmp', 'extra_dense': False}. Best is trial 8 with value: 0.4284524824342044.
🔁 Fold 0 | Precision=0.508 | Recall=0.353 | F1=0.393


Best trial: 8. Best value: 0.428452:  57%|█████▋    | 17/30 [04:33<02:53, 13.37s/it, 273.56/2100 seconds]

🔁 Fold 1 | Precision=0.496 | Recall=0.473 | F1=0.480
[I 2025-06-07 23:36:48,619] Trial 16 finished with value: 0.5631972004019259 and parameters: {'window': 18, 'batch': 64, 'lr': 0.000476858612051083, 'epochs': 20, 'conv_blocks': 1, 'filters': 32, 'kernel': 2, 'act': 'elu', 'dropout': 0.1477743480555192, 'dense': 128, 'l2': 1.444317644730438e-05, 'pool': 'gmp', 'extra_dense': False}. Best is trial 8 with value: 0.4284524824342044.


Best trial: 8. Best value: 0.428452:  60%|██████    | 18/30 [04:40<02:18, 11.54s/it, 280.85/2100 seconds]

🔁 Fold 0 | Precision=0.523 | Recall=0.571 | F1=0.554
[I 2025-06-07 23:36:55,908] Trial 17 pruned. 


Best trial: 8. Best value: 0.428452:  63%|██████▎   | 19/30 [04:43<01:39,  9.01s/it, 283.96/2100 seconds]

🔁 Fold 0 | Precision=0.530 | Recall=0.749 | F1=0.658
[I 2025-06-07 23:36:59,015] Trial 18 pruned. 
🔁 Fold 0 | Precision=0.518 | Recall=0.466 | F1=0.482


Best trial: 8. Best value: 0.428452:  67%|██████▋   | 20/30 [05:08<02:15, 13.59s/it, 308.24/2100 seconds]

🔁 Fold 1 | Precision=0.490 | Recall=0.586 | F1=0.550
[I 2025-06-07 23:37:23,302] Trial 19 pruned. 


Best trial: 8. Best value: 0.428452:  70%|███████   | 21/30 [05:16<01:47, 11.98s/it, 316.44/2100 seconds]

🔁 Fold 0 | Precision=0.526 | Recall=0.766 | F1=0.665
[I 2025-06-07 23:37:31,502] Trial 20 pruned. 


Best trial: 8. Best value: 0.428452:  73%|███████▎  | 22/30 [05:20<01:15,  9.45s/it, 320.01/2100 seconds]

🔁 Fold 0 | Precision=0.535 | Recall=0.516 | F1=0.522
[I 2025-06-07 23:37:35,069] Trial 21 pruned. 
🔁 Fold 0 | Precision=0.528 | Recall=0.471 | F1=0.489


Best trial: 8. Best value: 0.428452:  77%|███████▋  | 23/30 [05:28<01:04,  9.24s/it, 328.77/2100 seconds]

🔁 Fold 1 | Precision=0.498 | Recall=0.589 | F1=0.556
[I 2025-06-07 23:37:43,830] Trial 22 pruned. 


Best trial: 8. Best value: 0.428452:  80%|████████  | 24/30 [05:32<00:45,  7.60s/it, 332.54/2100 seconds]

🔁 Fold 0 | Precision=0.521 | Recall=0.919 | F1=0.733
[I 2025-06-07 23:37:47,599] Trial 23 pruned. 


Best trial: 8. Best value: 0.428452:  83%|████████▎ | 25/30 [05:36<00:32,  6.55s/it, 336.64/2100 seconds]

🔁 Fold 0 | Precision=0.528 | Recall=0.591 | F1=0.568
[I 2025-06-07 23:37:51,700] Trial 24 pruned. 


Best trial: 8. Best value: 0.428452:  87%|████████▋ | 26/30 [05:40<00:22,  5.60s/it, 340.03/2100 seconds]

🔁 Fold 0 | Precision=0.527 | Recall=0.621 | F1=0.586
[I 2025-06-07 23:37:55,084] Trial 25 pruned. 
🔁 Fold 0 | Precision=0.524 | Recall=0.499 | F1=0.507


Best trial: 8. Best value: 0.428452:  90%|█████████ | 27/30 [05:49<00:20,  6.67s/it, 349.20/2100 seconds]

🔁 Fold 1 | Precision=0.488 | Recall=0.657 | F1=0.589
[I 2025-06-07 23:38:04,259] Trial 26 pruned. 
🔁 Fold 0 | Precision=0.521 | Recall=0.502 | F1=0.508


Best trial: 8. Best value: 0.428452:  93%|█████████▎| 28/30 [06:03<00:17,  8.93s/it, 363.41/2100 seconds]

🔁 Fold 1 | Precision=0.492 | Recall=0.846 | F1=0.682
[I 2025-06-07 23:38:18,465] Trial 27 pruned. 
🔁 Fold 0 | Precision=0.546 | Recall=0.484 | F1=0.503


Best trial: 8. Best value: 0.428452:  97%|█████████▋| 29/30 [06:12<00:09,  9.06s/it, 372.76/2100 seconds]

🔁 Fold 1 | Precision=0.499 | Recall=0.681 | F1=0.607
[I 2025-06-07 23:38:27,820] Trial 28 pruned. 
🔁 Fold 0 | Precision=0.524 | Recall=0.495 | F1=0.504


Best trial: 8. Best value: 0.428452: 100%|██████████| 30/30 [06:22<00:00, 12.75s/it, 382.58/2100 seconds]

🔁 Fold 1 | Precision=0.496 | Recall=0.569 | F1=0.542
[I 2025-06-07 23:38:37,637] Trial 29 pruned. 

🏆 Best Weighted-F1 (α=2): 0.5715
📜 Best Hyperparameters:
{
  "window": 18,
  "batch": 64,
  "lr": 0.0007145565133513971,
  "epochs": 20,
  "conv_blocks": 1,
  "filters": 96,
  "kernel": 2,
  "act": "selu",
  "dropout": 0.1105138178778751,
  "dense": 128,
  "l2": 0.00019268985325226193,
  "pool": "gmp",
  "extra_dense": false
}
✅ Saved parameters → cnn_best_params.json
✅ Saved scaler     → cnn_scaler.pkl
⏰ Finished at: 2025-06-07 23:38:37





In [4]:
# cnn_optuna_f0_56.py
import os, json, warnings, joblib, optuna
from pathlib import Path
from datetime import datetime
import numpy as np, pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score, recall_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers

# ─────── Setup ─────────────────────────────────────────────────────────
warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
for g in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

# ─────── Config ────────────────────────────────────────────────────────
CSV_PATH   = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction"
                  r"\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")
VAL_RATIO  = 0.20
WF_FOLDS   = 2
BETA       = 0.56
N_TRIALS   = 30
TIMEOUT    = 35 * 60
SCALER_OUT = "cnn_scaler.pkl"
PARAMS_OUT = "cnn_best_params.json"

DROP_COLS = ['open', 'high', 'low', 'typical_price', 'EMA_7', 'EMA_21', 'SMA_20', 'SMA_50',
    'vwap_24h', 'close_4h', 'bollinger_upper', 'bollinger_lower', 'resistance_level',
    'support_level', 'high_low', 'high_close', 'low_close', 'true_range', 'volume_mean_20',
    'MACD_line', 'MACD_signal', 'bollinger_width', 'volatility_regime', 'CCI', 'stoch_%D',
    'parkinson_vol', 'ema_cross_down', 'macd_cross_down', 'vol_spike_1_5x', 'near_upper_band',
    'near_lower_band', 'break_upper_band', 'break_lower_band', 'rsi_oversold', 'rsi_overbought',
    'above_sma20', 'above_sma50', 'ema7_above_ema21', 'macd_positive', 'volume_breakout',
    'volume_breakdown', 'stoch_overbought', 'stoch_oversold', 'cci_overbought', 'cci_oversold',
    'trending_market', 'bullish_scenario_1', 'bullish_scenario_2', 'bullish_scenario_3',
    'bullish_scenario_4', 'bullish_scenario_5', 'bullish_scenario_6', 'bearish_scenario_1',
    'bearish_scenario_2', 'bearish_scenario_3', 'bearish_scenario_4', 'bearish_scenario_6',
    'ema_cross_up', 'macd_cross_up', 'oversold_reversal', 'overbought_reversal', 'close'
]

# ─────── Helpers ───────────────────────────────────────────────────────
def f_beta_score(y_true, y_pred, beta=BETA):
    p = precision_score(y_true, y_pred, zero_division=0)
    r = recall_score(y_true, y_pred, zero_division=0)
    return 0.0 if p + r == 0 else (1 + beta**2) * p * r / (beta**2 * p + r)

def make_windows(arr, labels, win):
    X, y = [], []
    for i in range(win, len(arr)):
        X.append(arr[i-win:i])
        y.append(labels[i])
    return np.asarray(X, np.float32), np.asarray(y, np.int8)

def load_and_scale():
    df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
    df = df.loc["2018-01-01":]
    df = df.drop(columns=[c for c in DROP_COLS if c in df.columns])
    df = df.dropna(subset=["target"]).dropna()

    X_all = df.drop(columns="target").values
    y_all = df["target"].astype(int).values
    split_real = int(len(df) * (1 - VAL_RATIO))
    scaler = StandardScaler().fit(X_all[:split_real])
    X_scaled = scaler.transform(X_all)

    joblib.dump(scaler, SCALER_OUT)
    print(f"✔ Scaler fitted on {split_real} rows")
    return X_scaled, y_all, scaler, split_real, X_all.shape[1]

# ─────── Data Load ─────────────────────────────────────────────────────
X_SCALED, Y_ALL, SCALER, REAL_SPLIT_IDX, N_FEATS = load_and_scale()

# ─────── CNN Model ─────────────────────────────────────────────────────
def build_model(trial, win):
    conv_blocks = trial.suggest_int("conv_blocks", 1, 3)
    base_filters = trial.suggest_int("filters", 32, 96, step=32)
    kernel = trial.suggest_int("kernel", 2, 4)
    activation = trial.suggest_categorical("act", ["relu", "elu", "selu"])
    dropout = trial.suggest_float("dropout", 0.05, 0.3)
    dense_units = trial.suggest_int("dense", 64, 128, step=64)
    l2reg = trial.suggest_float("l2", 1e-6, 1e-3, log=True)

    inp = layers.Input(shape=(win, N_FEATS))
    x = inp
    for b in range(conv_blocks):
        f = base_filters * (2**b)
        y = layers.Conv1D(f, kernel, padding="causal", activation=activation,
                          kernel_regularizer=regularizers.l2(l2reg))(x)
        y = layers.BatchNormalization()(y)
        if x.shape[-1] != y.shape[-1]:
            x = layers.Conv1D(f, 1, padding="same")(x)
        x = layers.add([x, y])

    pool = trial.suggest_categorical("pool", ["gap", "gmp"])
    x = layers.GlobalAveragePooling1D()(x) if pool == "gap" else layers.GlobalMaxPooling1D()(x)

    if trial.suggest_categorical("extra_dense", [True, False]):
        x = layers.Dense(dense_units, activation=activation,
                         kernel_regularizer=regularizers.l2(l2reg))(x)
        x = layers.Dropout(dropout)(x)

    out = layers.Dense(1, activation="sigmoid")(x)
    return keras.Model(inp, out)

# ─────── Optuna Objective ──────────────────────────────────────────────
def objective(trial):
    win = trial.suggest_int("window", 12, 48, step=6)
    batch = trial.suggest_categorical("batch", [32, 64])
    lr = trial.suggest_float("lr", 1e-4, 5e-3, log=True)
    epochs = trial.suggest_int("epochs", 20, 60, step=20)

    data_len = REAL_SPLIT_IDX - win
    val_size = data_len // (WF_FOLDS + 1)
    fold_scores = []

    for fold in range(WF_FOLDS):
        val_start = data_len - (WF_FOLDS - fold) * val_size
        val_end = val_start + val_size
        if val_start < win * 2:
            continue

        X_fold = X_SCALED[:val_end + win]
        y_fold = Y_ALL[:val_end + win]
        X_win, y_win = make_windows(X_fold, y_fold, win)
        train_end = val_start
        X_tr, y_tr = X_win[:train_end], y_win[:train_end]
        X_va, y_va = X_win[val_start:val_end], y_win[val_start:val_end]

        if len(X_tr) < 10 or len(X_va) < 5:
            continue

        tf.keras.backend.clear_session()
        model = build_model(trial, win)
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr),
                      loss="binary_crossentropy")

        model.fit(X_tr, y_tr,
                  epochs=epochs,
                  batch_size=batch,
                  validation_data=(X_va, y_va),
                  callbacks=[keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)],
                  verbose=0)

        preds = (model.predict(X_va, verbose=0).ravel() >= 0.5).astype(int)
        p = precision_score(y_va, preds, zero_division=0)
        r = recall_score(y_va, preds, zero_division=0)
        f = f_beta_score(y_va, preds, beta=BETA)
        print(f"🔁 Fold {fold} | Precision={p:.3f} | Recall={r:.3f} | Fβ={f:.3f}")
        fold_scores.append(f)

        trial.report(np.mean(fold_scores), step=fold)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return 1.0 - np.mean(fold_scores) if fold_scores else 1.0

# ─────── Run Optuna ────────────────────────────────────────────────────
if __name__ == "__main__":
    study = optuna.create_study(direction="minimize",
                                sampler=optuna.samplers.TPESampler(seed=SEED),
                                pruner=optuna.pruners.MedianPruner())
    study.optimize(objective, n_trials=N_TRIALS, timeout=TIMEOUT, show_progress_bar=True)

    best_fbeta = 1.0 - study.best_value
    print(f"\n🏆 Best Weighted-Fβ (β={BETA}): {best_fbeta:.4f}")
    print("📜 Best Hyperparameters:")
    print(json.dumps(study.best_params, indent=2))

    with open(PARAMS_OUT, "w") as f:
        json.dump(study.best_params, f, indent=2)
    print(f"✅ Saved parameters → {PARAMS_OUT}")
    print(f"✅ Saved scaler     → {SCALER_OUT}")
    print("⏰ Finished at:", datetime.now().strftime('%Y-%m-%d %H:%M:%S'))


[I 2025-06-07 18:39:30,875] A new study created in memory with name: no-name-8c826d58-ed73-47ce-849b-d6a3bc1d3886


✔ Scaler fitted on 12684 rows


  0%|          | 0/30 [00:00<?, ?it/s]

🔁 Fold 0 | Precision=0.525 | Recall=0.672 | Fβ=0.554


Best trial: 0. Best value: 0.48291:   3%|▎         | 1/30 [00:20<10:06, 20.93s/it, 20.93/2100 seconds]

🔁 Fold 1 | Precision=0.488 | Recall=0.457 | Fβ=0.480
[I 2025-06-07 18:39:51,804] Trial 0 finished with value: 0.4829096358918845 and parameters: {'window': 24, 'batch': 32, 'lr': 0.0010401663679887319, 'epochs': 20, 'conv_blocks': 1, 'filters': 32, 'kernel': 4, 'act': 'elu', 'dropout': 0.2924774630404986, 'dense': 128, 'l2': 4.335281794951567e-06, 'pool': 'gmp', 'extra_dense': False}. Best is trial 0 with value: 0.4829096358918845.
🔁 Fold 0 | Precision=0.527 | Recall=0.315 | Fβ=0.454


Best trial: 0. Best value: 0.48291:   7%|▋         | 2/30 [00:44<10:27, 22.41s/it, 44.38/2100 seconds]

🔁 Fold 1 | Precision=0.484 | Recall=0.570 | Fβ=0.502
[I 2025-06-07 18:40:15,253] Trial 1 finished with value: 0.5219072338479555 and parameters: {'window': 30, 'batch': 64, 'lr': 0.00017258215396625024, 'epochs': 20, 'conv_blocks': 2, 'filters': 64, 'kernel': 4, 'act': 'selu', 'dropout': 0.061612603179999434, 'dense': 128, 'l2': 3.247673570627449e-06, 'pool': 'gmp', 'extra_dense': True}. Best is trial 0 with value: 0.4829096358918845.
🔁 Fold 0 | Precision=0.516 | Recall=0.773 | Fβ=0.561


Best trial: 2. Best value: 0.462351:  10%|█         | 3/30 [01:02<09:10, 20.38s/it, 62.35/2100 seconds]

🔁 Fold 1 | Precision=0.490 | Recall=0.613 | Fβ=0.515
[I 2025-06-07 18:40:33,222] Trial 2 finished with value: 0.46235122560284614 and parameters: {'window': 24, 'batch': 64, 'lr': 0.0005595074635794797, 'epochs': 20, 'conv_blocks': 2, 'filters': 32, 'kernel': 4, 'act': 'elu', 'dropout': 0.1800170052944527, 'dense': 128, 'l2': 3.5856126103453987e-06, 'pool': 'gap', 'extra_dense': True}. Best is trial 2 with value: 0.46235122560284614.
🔁 Fold 0 | Precision=0.527 | Recall=0.676 | Fβ=0.556


Best trial: 2. Best value: 0.462351:  13%|█▎        | 4/30 [01:31<10:16, 23.72s/it, 91.18/2100 seconds]

🔁 Fold 1 | Precision=0.483 | Recall=0.674 | Fβ=0.518
[I 2025-06-07 18:41:02,057] Trial 3 finished with value: 0.46300977613833094 and parameters: {'window': 36, 'batch': 32, 'lr': 0.000215262809722153, 'epochs': 20, 'conv_blocks': 1, 'filters': 64, 'kernel': 2, 'act': 'relu', 'dropout': 0.18567402078956213, 'dense': 64, 'l2': 0.0002550298070162893, 'pool': 'gmp', 'extra_dense': True}. Best is trial 2 with value: 0.46235122560284614.
🔁 Fold 0 | Precision=0.518 | Recall=0.417 | Fβ=0.490


Best trial: 2. Best value: 0.462351:  17%|█▋        | 5/30 [01:47<08:47, 21.11s/it, 107.67/2100 seconds]

🔁 Fold 1 | Precision=0.510 | Recall=0.533 | Fβ=0.515
[I 2025-06-07 18:41:18,546] Trial 4 finished with value: 0.49748838219048075 and parameters: {'window': 12, 'batch': 32, 'lr': 0.001732053535845956, 'epochs': 60, 'conv_blocks': 1, 'filters': 64, 'kernel': 2, 'act': 'relu', 'dropout': 0.06588958757150591, 'dense': 64, 'l2': 9.4525713910723e-06, 'pool': 'gap', 'extra_dense': True}. Best is trial 2 with value: 0.46235122560284614.
🔁 Fold 0 | Precision=0.527 | Recall=0.528 | Fβ=0.528
🔁 Fold 1 | Precision=0.495 | Recall=0.586 | Fβ=0.514


Best trial: 2. Best value: 0.462351:  20%|██        | 6/30 [02:14<09:14, 23.10s/it, 134.64/2100 seconds]

[I 2025-06-07 18:41:45,511] Trial 5 pruned. 
🔁 Fold 0 | Precision=0.521 | Recall=0.570 | Fβ=0.532


Best trial: 2. Best value: 0.462351:  23%|██▎       | 7/30 [05:55<33:37, 87.72s/it, 355.40/2100 seconds]

🔁 Fold 1 | Precision=0.502 | Recall=0.764 | Fβ=0.546
[I 2025-06-07 18:45:26,274] Trial 6 pruned. 
🔁 Fold 0 | Precision=0.517 | Recall=0.395 | Fβ=0.481


Best trial: 2. Best value: 0.462351:  27%|██▋       | 8/30 [06:24<25:15, 68.90s/it, 384.01/2100 seconds]

🔁 Fold 1 | Precision=0.497 | Recall=0.407 | Fβ=0.472
[I 2025-06-07 18:45:54,883] Trial 7 finished with value: 0.5232915107380718 and parameters: {'window': 42, 'batch': 32, 'lr': 0.0007374699809816792, 'epochs': 40, 'conv_blocks': 1, 'filters': 32, 'kernel': 3, 'act': 'relu', 'dropout': 0.22575473972379445, 'dense': 64, 'l2': 0.0008228984573308163, 'pool': 'gap', 'extra_dense': True}. Best is trial 2 with value: 0.46235122560284614.
🔁 Fold 0 | Precision=0.527 | Recall=0.326 | Fβ=0.459


Best trial: 2. Best value: 0.462351:  30%|███       | 9/30 [06:37<18:04, 51.63s/it, 397.66/2100 seconds]

🔁 Fold 1 | Precision=0.491 | Recall=0.649 | Fβ=0.521
[I 2025-06-07 18:46:08,537] Trial 8 finished with value: 0.50995318656849 and parameters: {'window': 18, 'batch': 64, 'lr': 0.0007145565133513971, 'epochs': 20, 'conv_blocks': 1, 'filters': 96, 'kernel': 2, 'act': 'selu', 'dropout': 0.1105138178778751, 'dense': 128, 'l2': 0.00019268985325226193, 'pool': 'gmp', 'extra_dense': False}. Best is trial 2 with value: 0.46235122560284614.


Best trial: 2. Best value: 0.462351:  33%|███▎      | 10/30 [06:45<12:39, 38.00s/it, 405.13/2100 seconds]

🔁 Fold 0 | Precision=0.520 | Recall=0.619 | Fβ=0.541
[I 2025-06-07 18:46:16,003] Trial 9 pruned. 


Best trial: 2. Best value: 0.462351:  37%|███▋      | 11/30 [08:07<16:19, 51.54s/it, 487.37/2100 seconds]

🔁 Fold 0 | Precision=0.520 | Recall=0.845 | Fβ=0.572
[I 2025-06-07 18:47:38,245] Trial 10 pruned. 


Best trial: 2. Best value: 0.462351:  40%|████      | 12/30 [08:22<12:08, 40.48s/it, 502.56/2100 seconds]

🔁 Fold 0 | Precision=0.534 | Recall=0.456 | Fβ=0.513
[I 2025-06-07 18:47:53,439] Trial 11 pruned. 


Best trial: 2. Best value: 0.462351:  43%|████▎     | 13/30 [08:39<09:28, 33.45s/it, 519.82/2100 seconds]

🔁 Fold 0 | Precision=0.516 | Recall=0.485 | Fβ=0.508
[I 2025-06-07 18:48:10,694] Trial 12 pruned. 


Best trial: 2. Best value: 0.462351:  47%|████▋     | 14/30 [08:49<07:00, 26.30s/it, 529.62/2100 seconds]

🔁 Fold 0 | Precision=0.518 | Recall=0.663 | Fβ=0.546
[I 2025-06-07 18:48:20,496] Trial 13 pruned. 


Best trial: 2. Best value: 0.462351:  50%|█████     | 15/30 [09:07<05:56, 23.75s/it, 547.47/2100 seconds]

🔁 Fold 0 | Precision=0.522 | Recall=0.420 | Fβ=0.493
[I 2025-06-07 18:48:38,343] Trial 14 pruned. 


Best trial: 2. Best value: 0.462351:  53%|█████▎    | 16/30 [09:15<04:24, 18.91s/it, 555.14/2100 seconds]

🔁 Fold 0 | Precision=0.516 | Recall=0.732 | Fβ=0.555
[I 2025-06-07 18:48:46,013] Trial 15 pruned. 


Best trial: 2. Best value: 0.462351:  57%|█████▋    | 17/30 [09:25<03:30, 16.21s/it, 565.05/2100 seconds]

🔁 Fold 0 | Precision=0.518 | Recall=0.709 | Fβ=0.554
[I 2025-06-07 18:48:55,922] Trial 16 pruned. 


Best trial: 2. Best value: 0.462351:  60%|██████    | 18/30 [10:47<07:14, 36.24s/it, 647.94/2100 seconds]

🔁 Fold 0 | Precision=0.520 | Recall=1.000 | Fβ=0.587
[I 2025-06-07 18:50:18,816] Trial 17 pruned. 


Best trial: 2. Best value: 0.462351:  63%|██████▎   | 19/30 [10:54<05:01, 27.44s/it, 654.85/2100 seconds]

🔁 Fold 0 | Precision=0.519 | Recall=0.720 | Fβ=0.556
[I 2025-06-07 18:50:25,728] Trial 18 pruned. 


Best trial: 2. Best value: 0.462351:  67%|██████▋   | 20/30 [11:11<04:01, 24.18s/it, 671.44/2100 seconds]

🔁 Fold 0 | Precision=0.528 | Recall=0.474 | Fβ=0.514
[I 2025-06-07 18:50:42,320] Trial 19 pruned. 


Best trial: 2. Best value: 0.462351:  70%|███████   | 21/30 [11:24<03:07, 20.85s/it, 684.53/2100 seconds]

🔁 Fold 0 | Precision=0.519 | Recall=0.784 | Fβ=0.564
[I 2025-06-07 18:50:55,410] Trial 20 pruned. 


Best trial: 2. Best value: 0.462351:  73%|███████▎  | 22/30 [11:35<02:22, 17.75s/it, 695.06/2100 seconds]

🔁 Fold 0 | Precision=0.521 | Recall=0.716 | Fβ=0.557
[I 2025-06-07 18:51:05,933] Trial 21 pruned. 


Best trial: 2. Best value: 0.462351:  77%|███████▋  | 23/30 [11:50<01:58, 16.91s/it, 710.01/2100 seconds]

🔁 Fold 0 | Precision=0.521 | Recall=0.540 | Fβ=0.526
[I 2025-06-07 18:51:20,884] Trial 22 pruned. 


Best trial: 2. Best value: 0.462351:  80%|████████  | 24/30 [11:57<01:25, 14.20s/it, 717.87/2100 seconds]

🔁 Fold 0 | Precision=0.528 | Recall=0.399 | Fβ=0.490
[I 2025-06-07 18:51:28,749] Trial 23 pruned. 
🔁 Fold 0 | Precision=0.518 | Recall=0.380 | Fβ=0.477


Best trial: 2. Best value: 0.462351:  83%|████████▎ | 25/30 [12:23<01:28, 17.75s/it, 743.90/2100 seconds]

🔁 Fold 1 | Precision=0.493 | Recall=0.594 | Fβ=0.514
[I 2025-06-07 18:51:54,779] Trial 24 finished with value: 0.5049048718884204 and parameters: {'window': 18, 'batch': 32, 'lr': 0.0005543398004074266, 'epochs': 20, 'conv_blocks': 1, 'filters': 32, 'kernel': 4, 'act': 'elu', 'dropout': 0.09475936144251602, 'dense': 128, 'l2': 6.413009116721392e-06, 'pool': 'gmp', 'extra_dense': False}. Best is trial 2 with value: 0.46235122560284614.


Best trial: 2. Best value: 0.462351:  87%|████████▋ | 26/30 [12:39<01:08, 17.02s/it, 759.22/2100 seconds]

🔁 Fold 0 | Precision=0.522 | Recall=0.501 | Fβ=0.517
[I 2025-06-07 18:52:10,092] Trial 25 pruned. 


Best trial: 2. Best value: 0.462351:  90%|█████████ | 27/30 [12:48<00:43, 14.63s/it, 768.29/2100 seconds]

🔁 Fold 0 | Precision=0.525 | Recall=0.527 | Fβ=0.525
[I 2025-06-07 18:52:19,167] Trial 26 pruned. 
🔁 Fold 0 | Precision=0.520 | Recall=0.204 | Fβ=0.380


Best trial: 2. Best value: 0.462351:  93%|█████████▎| 28/30 [13:14<00:36, 18.14s/it, 794.61/2100 seconds]

🔁 Fold 1 | Precision=0.487 | Recall=0.615 | Fβ=0.513
[I 2025-06-07 18:52:45,491] Trial 27 finished with value: 0.5538786574581488 and parameters: {'window': 30, 'batch': 32, 'lr': 0.0020194297281664743, 'epochs': 40, 'conv_blocks': 2, 'filters': 64, 'kernel': 3, 'act': 'elu', 'dropout': 0.14723199156647648, 'dense': 128, 'l2': 4.984439020021232e-05, 'pool': 'gmp', 'extra_dense': True}. Best is trial 2 with value: 0.46235122560284614.


Best trial: 2. Best value: 0.462351:  97%|█████████▋| 29/30 [13:26<00:16, 16.13s/it, 806.05/2100 seconds]

🔁 Fold 0 | Precision=0.529 | Recall=0.625 | Fβ=0.549
[I 2025-06-07 18:52:56,925] Trial 28 pruned. 


Best trial: 2. Best value: 0.462351: 100%|██████████| 30/30 [14:08<00:00, 28.30s/it, 848.93/2100 seconds]

🔁 Fold 0 | Precision=0.516 | Recall=0.537 | Fβ=0.521
[I 2025-06-07 18:53:39,807] Trial 29 pruned. 

🏆 Best Weighted-Fβ (β=0.56): 0.5376
📜 Best Hyperparameters:
{
  "window": 24,
  "batch": 64,
  "lr": 0.0005595074635794797,
  "epochs": 20,
  "conv_blocks": 2,
  "filters": 32,
  "kernel": 4,
  "act": "elu",
  "dropout": 0.1800170052944527,
  "dense": 128,
  "l2": 3.5856126103453987e-06,
  "pool": "gap",
  "extra_dense": true
}
✅ Saved parameters → cnn_best_params.json
✅ Saved scaler     → cnn_scaler.pkl
⏰ Finished at: 2025-06-07 18:53:39





In [None]:
🏆 Top 5 Trials by F1-Score:
Rank	Trial	Precision	Recall	F1 Score	Notable Hyperparameters Summary
1️⃣	#3	0.527	0.676	0.593	window=36, batch=32, lr=0.00021, filters=64, kernel=2, act=relu, dropout=0.186, dense=64, conv_blocks=1, pool=gmp, extra_dense=True
2️⃣	#2	0.516	0.773	0.620	window=24, batch=64, lr=0.00056, filters=32, kernel=4, act=elu, dropout=0.180, dense=128, conv_blocks=2, pool=gap, extra_dense=True
3️⃣	#27	0.487	0.615	0.544	window=30, batch=32, lr=0.00202, filters=64, kernel=3, act=elu, dropout=0.147, dense=128, conv_blocks=2, pool=gmp, extra_dense=True
4️⃣	#4	0.527	0.528	0.527	window=12, batch=32, lr=0.00173, filters=64, kernel=2, act=relu, dropout=0.066, dense=64, conv_blocks=1, pool=gap, extra_dense=True
5️⃣	#24	0.528	0.399	0.454	window=18, batch=32, lr=0.00055, filters=32, kernel=4, act=elu, dropout=0.094, dense=128, conv_blocks=1, pool=gmp, extra_dense=False

In [10]:
# cnn_final_train_fixed.py
# ----------------------------------------------------------
# Trains a 1-D CNN with the optimal hyper-parameters you supplied.

import os, json, joblib, warnings
from pathlib import Path
from datetime import datetime

import numpy   as np
import pandas  as pd
import tensorflow as tf
from   tensorflow import keras
from   tensorflow.keras import layers, regularizers
from   sklearn.preprocessing import StandardScaler
from   sklearn.metrics       import (accuracy_score, precision_score,
                                     recall_score, roc_auc_score,
                                     confusion_matrix,
                                     classification_report)

# ═══════════════ Seeds / GPU set-up ══════════════════════
warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
for g in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

# ═══════════════ Paths & constants ═══════════════════════
CSV_PATH      = Path(r"C:/Users/ADMIN/Desktop/Coding_projects/stock_market_prediction/"
                     r"Stock-Market-Prediction/data/processed/"
                     r"gemini_btc_with_features_4h.csv")
MODEL_OUT     = "cnn_optimal_val20.h5"
SCALER_OUT    = "cnn_scaler.pkl"
SUMMARY_JSON  = "cnn_training_summary.json"

# ---- hyper-parameters (fixed) ----
WIN,  BATCH   = 36, 32
LR            = 2.1e-4
FILTERS       = 64
KERNEL        = 2
ACT           = "relu"
DROPOUT       = 0.186
DENSE_UNITS   = 64
CONV_BLOCKS   = 1
POOL_TYPE     = "gmp"      # gmp | gap
EXTRA_DENSE   = True
L2_REG        = 1e-6
EPOCHS        = 100
EARLY_STOP    = 12
ALPHA         = 2.0        # precision weight in weighted-F
VAL_FRAC      = 0.20

# ---- columns we must drop to avoid leakage ----
DROP_COLS = [
    'open','high','low','typical_price','EMA_7','EMA_21','SMA_20','SMA_50',
    'vwap_24h','close_4h','bollinger_upper','bollinger_lower','resistance_level',
    'support_level','high_low','high_close','low_close','true_range',
    'volume_mean_20','MACD_line','MACD_signal','bollinger_width',
    'volatility_regime','CCI','stoch_%D','parkinson_vol','ema_cross_down',
    'macd_cross_down','vol_spike_1_5x','near_upper_band','near_lower_band',
    'break_upper_band','break_lower_band','rsi_oversold','rsi_overbought',
    'above_sma20','above_sma50','ema7_above_ema21','macd_positive',
    'volume_breakout','volume_breakdown','stoch_overbought','stoch_oversold',
    'cci_overbought','cci_oversold','trending_market','bullish_scenario_1',
    'bullish_scenario_2','bullish_scenario_3','bullish_scenario_4',
    'bullish_scenario_5','bullish_scenario_6','bearish_scenario_1',
    'bearish_scenario_2','bearish_scenario_3','bearish_scenario_4',
    'bearish_scenario_6','ema_cross_up','macd_cross_up',
    'oversold_reversal','overbought_reversal','close'
]

# ═══════════════ helpers ═════════════════════════════════
def make_windows(arr: np.ndarray, labels: np.ndarray, win: int):
    xs, ys = [], []
    for i in range(win, len(arr)):
        xs.append(arr[i-win:i])
        ys.append(labels[i])
    return np.asarray(xs, np.float32), np.asarray(ys, np.int8)

def weighted_f(alpha: float, p: float, r: float) -> float:
    if p + r == 0:
        return 0.0
    return (1 + alpha) * p * r / (alpha * p + r)

def build_model(win: int, n_features: int) -> keras.Model:
    """**Fixed**: pooling layers are *called* on tensors."""
    inp = layers.Input(shape=(win, n_features))
    x   = inp

    for b in range(CONV_BLOCKS):
        f = FILTERS * (2 ** b)
        y = layers.Conv1D(f, KERNEL, padding="causal",
                          activation=ACT,
                          kernel_regularizer=regularizers.l2(L2_REG))(x)
        y = layers.BatchNormalization()(y)

        if CONV_BLOCKS > 1:          # skip-connection only if >1 block
            if x.shape[-1] != y.shape[-1]:
                x = layers.Conv1D(f, 1, padding="same")(x)
            x = layers.Add()([x, y])
        else:
            x = y

    if POOL_TYPE == "gmp":
        x = layers.GlobalMaxPooling1D()(x)       # <- () executes the layer
    else:
        x = layers.GlobalAveragePooling1D()(x)

    x = layers.Dropout(DROPOUT)(x)

    if EXTRA_DENSE:
        x = layers.Dense(DENSE_UNITS, activation=ACT,
                         kernel_regularizer=regularizers.l2(L2_REG))(x)
        x = layers.Dropout(DROPOUT)(x)

    out = layers.Dense(1, activation="sigmoid")(x)
    return keras.Model(inp, out)

# ═══════════════ 1. DATA ═════════════════════════════════
print("📊 Loading & preprocessing …")
df = (pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
        .loc["2018-01-01":]
        .drop(columns=[c for c in DROP_COLS if c in df.columns])
        .dropna(subset=["target"])
        .dropna())

X_raw = df.drop(columns="target").values
y_raw = df["target"].astype(int).values
n_features = X_raw.shape[1]

split_idx = int(len(df) * (1 - VAL_FRAC))
scaler    = StandardScaler().fit(X_raw[:split_idx])

X_train_s = scaler.transform(X_raw[:split_idx]).astype(np.float32)
X_val_s   = scaler.transform(X_raw[ split_idx:]).astype(np.float32)
y_train   = y_raw[: split_idx]
y_val     = y_raw[ split_idx:]

X_train, y_train = make_windows(X_train_s, y_train, WIN)
X_val,   y_val   = make_windows(X_val_s,   y_val,   WIN)

print(f"   Train windows : {len(X_train):,}")
print(f"   Val   windows : {len(X_val):,}")
print(f"   Features      : {n_features}")

# ═══════════════ 2. MODEL ═══════════════════════════════
print("\n🏗️ Building model …")
tf.keras.backend.clear_session()
model = build_model(WIN, n_features)
model.compile(optimizer=keras.optimizers.Adam(LR),
              loss="binary_crossentropy",
              metrics=["accuracy"])

# ═══════════════ 3. TRAIN ═══════════════════════════════
callbacks = [
    keras.callbacks.EarlyStopping(patience=EARLY_STOP,
                                  restore_best_weights=True,
                                  verbose=1),
    keras.callbacks.ReduceLROnPlateau(factor=0.5,
                                      patience=5,
                                      verbose=1,
                                      min_lr=1e-7)
]

print("\n🚀 Training …")
history = model.fit(X_train, y_train,
                    epochs=EPOCHS,
                    batch_size=BATCH,
                    validation_data=(X_val, y_val),
                    callbacks=callbacks,
                    verbose=2)

# ═══════════════ 4. EVALUATION ══════════════════════════
prob = model.predict(X_val, verbose=0).ravel()
pred = (prob >= 0.5).astype(int)

precision = precision_score(y_val, pred, zero_division=0)
recall    = recall_score(y_val, pred, zero_division=0)
wf1       = weighted_f(ALPHA, precision, recall)

metrics = {
    "accuracy"      : accuracy_score(y_val, pred),
    "precision"     : precision,
    "recall"        : recall,
    "weighted_f1_a2": wf1,
    "auc"           : roc_auc_score(y_val, prob)
}

print("\n── Validation metrics ──")
for k, v in metrics.items():
    print(f"{k:20s}: {v:6.3f}")

print("\nClassification report:")
print(classification_report(y_val, pred, target_names=["Down", "Up"]))

cm = confusion_matrix(y_val, pred)
print("\nConfusion-matrix:")
print(cm)

# ═══════════════ 5. SAVE ═══════════════════════════════=
print("\n💾 Saving artefacts …")
keras.models.save_model(model, MODEL_OUT)
joblib.dump(scaler, SCALER_OUT)

with open(SUMMARY_JSON, "w") as fp:
    json.dump({
        "timestamp"      : datetime.utcnow().isoformat(timespec="seconds") + "Z",
        "window_size"    : WIN,
        "n_features"     : n_features,
        "train_windows"  : int(len(X_train)),
        "val_windows"    : int(len(X_val)),
        "metrics"        : {k: float(v) for k, v in metrics.items()},
        "hyperparameters": {
            "filters" : FILTERS, "kernel": KERNEL, "activation": ACT,
            "dropout" : DROPOUT, "dense_units": DENSE_UNITS,
            "conv_blocks": CONV_BLOCKS, "pool": POOL_TYPE,
            "extra_dense": EXTRA_DENSE, "lr": LR, "batch": BATCH,
            "l2_reg": L2_REG
        },
        "confusion_matrix": cm.tolist()
    }, fp, indent=2)

print(f"\n✅ Model   saved → {MODEL_OUT}")
print(f"✅ Scaler  saved → {SCALER_OUT}")
print(f"✅ Summary saved → {SUMMARY_JSON}")
print("🎉 Training complete.")


📊 Loading & preprocessing …
   Train windows : 12,648
   Val   windows : 3,135
   Features      : 19

🏗️ Building model …

🚀 Training …
Epoch 1/100
396/396 - 2s - 4ms/step - accuracy: 0.4934 - loss: 1.0171 - val_accuracy: 0.4995 - val_loss: 0.7132 - learning_rate: 2.1000e-04
Epoch 2/100
396/396 - 1s - 2ms/step - accuracy: 0.5015 - loss: 0.7806 - val_accuracy: 0.5100 - val_loss: 0.7036 - learning_rate: 2.1000e-04
Epoch 3/100
396/396 - 1s - 2ms/step - accuracy: 0.5137 - loss: 0.7283 - val_accuracy: 0.5196 - val_loss: 0.6983 - learning_rate: 2.1000e-04
Epoch 4/100
396/396 - 1s - 2ms/step - accuracy: 0.4959 - loss: 0.7119 - val_accuracy: 0.5132 - val_loss: 0.6948 - learning_rate: 2.1000e-04
Epoch 5/100
396/396 - 1s - 2ms/step - accuracy: 0.5074 - loss: 0.7025 - val_accuracy: 0.5072 - val_loss: 0.6936 - learning_rate: 2.1000e-04
Epoch 6/100
396/396 - 1s - 2ms/step - accuracy: 0.5050 - loss: 0.6987 - val_accuracy: 0.5033 - val_loss: 0.6932 - learning_rate: 2.1000e-04
Epoch 7/100
396/396 - 1s



              precision    recall  f1-score   support

        Down       0.48      0.35      0.40      1498
          Up       0.52      0.66      0.58      1637

    accuracy                           0.51      3135
   macro avg       0.50      0.50      0.49      3135
weighted avg       0.50      0.51      0.50      3135


Confusion-matrix:
[[ 517  981]
 [ 559 1078]]

💾 Saving artefacts …

✅ Model   saved → cnn_optimal_val20.h5
✅ Scaler  saved → cnn_scaler.pkl
✅ Summary saved → cnn_training_summary.json
🎉 Training complete.


In [11]:
"""
cnn_compare_param_sets.py
─────────────────────────
Trains - and compares - several 1-D CNN configurations on the 4-hour BTC data.
Each configuration is built exactly from the hyper-parameter dictionary you
supply in `PARAM_SETS`.
The script prints Precision, Recall, F1 for every run and a final leaderboard.
"""

# ═══════════════════════════════════════════════════════════════════════
# Imports & global set-up
# ═══════════════════════════════════════════════════════════════════════
import os, json, joblib, warnings
from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (precision_score, recall_score, f1_score,
                             accuracy_score, roc_auc_score)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers

warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
for g in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

# ═══════════════════════════════════════════════════════════════════════
# Data paths & constants
# ═══════════════════════════════════════════════════════════════════════
CSV_PATH = Path(r"C:/Users/ADMIN/Desktop/Coding_projects/stock_market_prediction/"
                r"Stock-Market-Prediction/data/processed/"
                r"gemini_btc_with_features_4h.csv")

VAL_FRAC   = 0.20          # last 20 % is the validation window
EPOCHS     = 80            # same for every run
EARLY_STOP = 12
L2_REG_DEF = 1e-6          # fallback if a param dict misses 'l2'
CONV_BLOCKS = 1            # fixed depth – keep the test fast

# Columns to drop (same list you tuned on – prevents data leakage)
DROP_COLS = [  # … (list exactly as before – shortened for brevity here)
    'open','high','low','typical_price','EMA_7','EMA_21','SMA_20','SMA_50',
    'vwap_24h','close_4h','bollinger_upper','bollinger_lower','resistance_level',
    'support_level','high_low','high_close','low_close','true_range',
    #  … snip …
    'overbought_reversal','close'
]

# ═══════════════════════════════════════════════════════════════════════
# The six candidate hyper-parameter sets
# ═══════════════════════════════════════════════════════════════════════
PARAM_SETS = [
    {'window': 12, 'batch': 64, 'lr': 0.000898, 'filters': 64, 'kernel': 3,
     'act': 'elu',  'dropout': 0.209, 'dense': 64,  'l2': 3.35e-05,
     'pool': 'gap', 'extra_dense': False},

    {'window': 30, 'batch': 32, 'lr': 0.001545, 'filters': 96, 'kernel': 4,
     'act': 'elu',  'dropout': 0.14,  'dense': 128, 'l2': 0.000168,
     'pool': 'gmp', 'extra_dense': False},

    {'window': 36, 'batch': 32, 'lr': 0.000215, 'filters': 64, 'kernel': 2,
     'act': 'relu', 'dropout': 0.186, 'dense': 64,  'l2': 0.000255,
     'pool': 'gmp', 'extra_dense': True},

    {'window': 24, 'batch': 32, 'lr': 0.00104,  'filters': 32, 'kernel': 4,
     'act': 'elu',  'dropout': 0.292, 'dense': 128, 'l2': 4.33e-06,
     'pool': 'gmp', 'extra_dense': False},

    {'window': 30, 'batch': 64, 'lr': 0.000172, 'filters': 64, 'kernel': 4,
     'act': 'selu','dropout': 0.061, 'dense': 128, 'l2': 3.24e-06,
     'pool': 'gmp', 'extra_dense': True},

    # Duplicate of the first – still included for completeness
    {'window': 12, 'batch': 64, 'lr': 0.000898, 'filters': 64, 'kernel': 3,
     'act': 'elu',  'dropout': 0.209, 'dense': 64,  'l2': 3.35e-05,
     'pool': 'gap', 'extra_dense': False}
]

# ═══════════════════════════════════════════════════════════════════════
# Data prep helpers
# ═══════════════════════════════════════════════════════════════════════
def make_windows(arr, labels, win):
    xs, ys = [], []
    for i in range(win, len(arr)):
        xs.append(arr[i-win:i])
        ys.append(labels[i])
    return (np.asarray(xs, np.float32), np.asarray(ys, np.int8))

# ═══════════════════════════════════════════════════════════════════════
# Model factory
# ═══════════════════════════════════════════════════════════════════════
def build_model(cfg, n_features):
    """Construct a simple residual-style 1-D CNN from cfg dict."""
    l2 = cfg.get("l2", L2_REG_DEF)
    inp = layers.Input(shape=(cfg["window"], n_features))
    x   = inp

    for b in range(CONV_BLOCKS):
        f = cfg["filters"] * (2 ** b)
        y = layers.Conv1D(f, cfg["kernel"], padding="causal",
                          activation=cfg["act"],
                          kernel_regularizer=regularizers.l2(l2))(x)
        y = layers.BatchNormalization()(y)

        if CONV_BLOCKS > 1:
            if x.shape[-1] != y.shape[-1]:
                x = layers.Conv1D(f, 1, padding="same")(x)
            x = layers.Add()([x, y])
        else:
            x = y

    pool = layers.GlobalMaxPooling1D if cfg["pool"] == "gmp" else layers.GlobalAveragePooling1D
    x    = pool()(x)
    x    = layers.Dropout(cfg["dropout"])(x)

    if cfg["extra_dense"]:
        x = layers.Dense(cfg["dense"], activation=cfg["act"],
                         kernel_regularizer=regularizers.l2(l2))(x)
        x = layers.Dropout(cfg["dropout"])(x)

    out = layers.Dense(1, activation="sigmoid")(x)
    return keras.Model(inp, out)

# ═══════════════════════════════════════════════════════════════════════
# 1. Load / scale data once
# ═══════════════════════════════════════════════════════════════════════
print("📊 Loading & scaling data …")
df = (pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
        .loc["2018-01-01":]
        .drop(columns=[c for c in DROP_COLS if c in df.columns])
        .dropna(subset=["target"]).dropna())

X_raw = df.drop(columns="target").values
y_raw = df["target"].astype(int).values
n_features = X_raw.shape[1]

split = int(len(df) * (1 - VAL_FRAC))
scaler = StandardScaler().fit(X_raw[:split])
X_train_scaled = scaler.transform(X_raw[:split]).astype(np.float32)
X_val_scaled   = scaler.transform(X_raw[split:]).astype(np.float32)
y_train_raw    = y_raw[:split]
y_val_raw      = y_raw[split:]

# ═══════════════════════════════════════════════════════════════════════
# 2. Iterate over parameter sets
# ═══════════════════════════════════════════════════════════════════════
results = []

for idx, cfg in enumerate(PARAM_SETS, 1):
    tag = f"Run-{idx:02d}  (win={cfg['window']}, filt={cfg['filters']}, pool={cfg['pool']})"
    print(f"\n{tag}\n" + "─" * len(tag))

    # --- prepare windows for this window size
    X_tr, y_tr = make_windows(X_train_scaled, y_train_raw, cfg["window"])
    X_va, y_va = make_windows(X_val_scaled,   y_val_raw,   cfg["window"])

    # --- build & train
    tf.keras.backend.clear_session()
    model = build_model(cfg, n_features)
    model.compile(optimizer=keras.optimizers.Adam(cfg["lr"]),
                  loss="binary_crossentropy")

    cb = [keras.callbacks.EarlyStopping(patience=EARLY_STOP,
                                        restore_best_weights=True,
                                        verbose=0)]
    model.fit(X_tr, y_tr,
              epochs=EPOCHS,
              batch_size=cfg["batch"],
              validation_data=(X_va, y_va),
              callbacks=cb,
              verbose=0)

    # --- evaluate
    prob = model.predict(X_va, verbose=0).ravel()
    pred = (prob >= 0.5).astype(int)

    prec = precision_score(y_va, pred, zero_division=0)
    rec  = recall_score(y_va, pred,    zero_division=0)
    f1   = f1_score(y_va, pred,        zero_division=0)

    print(f"Precision : {prec:5.3f}   Recall : {rec:5.3f}   F1 : {f1:5.3f}")

    results.append({
        **cfg,
        "precision": prec,
        "recall"   : rec,
        "f1"       : f1,
        "auc"      : roc_auc_score(y_va, prob)
    })

# ═══════════════════════════════════════════════════════════════════════
# 3. Leaderboard
# ═══════════════════════════════════════════════════════════════════════
print("\n🏆  Leaderboard (sorted by F1)")
results_sorted = sorted(results, key=lambda d: d["f1"], reverse=True)

for rk, res in enumerate(results_sorted, 1):
    print(f"{rk:>2}. F1={res['f1']:.3f}  "
          f"P={res['precision']:.3f}  R={res['recall']:.3f}  "
          f"(win={res['window']}, filt={res['filters']}, pool={res['pool']})")

# ═══════════════════════════════════════════════════════════════════════
# 4. Optional – save summary JSON
# ═══════════════════════════════════════════════════════════════════════
summary_path = "cnn_param_comparison_summary.json"
with open(summary_path, "w") as fp:
    json.dump({
        "timestamp" : datetime.utcnow().isoformat(timespec="seconds") + "Z",
        "metrics"   : results_sorted
    }, fp, indent=2)

print(f"\n📑 Comparison summary saved → {summary_path}")


📊 Loading & scaling data …

Run-01  (win=12, filt=64, pool=gap)
───────────────────────────────────
Precision : 0.530   Recall : 0.492   F1 : 0.511

Run-02  (win=30, filt=96, pool=gmp)
───────────────────────────────────
Precision : 0.491   Recall : 0.144   F1 : 0.223

Run-03  (win=36, filt=64, pool=gmp)
───────────────────────────────────
Precision : 0.515   Recall : 0.535   F1 : 0.525

Run-04  (win=24, filt=32, pool=gmp)
───────────────────────────────────
Precision : 0.525   Recall : 0.659   F1 : 0.584

Run-05  (win=30, filt=64, pool=gmp)
───────────────────────────────────
Precision : 0.535   Recall : 0.138   F1 : 0.220

Run-06  (win=12, filt=64, pool=gap)
───────────────────────────────────
Precision : 0.530   Recall : 0.437   F1 : 0.479

🏆  Leaderboard (sorted by F1)
 1. F1=0.584  P=0.525  R=0.659  (win=24, filt=32, pool=gmp)
 2. F1=0.525  P=0.515  R=0.535  (win=36, filt=64, pool=gmp)
 3. F1=0.511  P=0.530  R=0.492  (win=12, filt=64, pool=gap)
 4. F1=0.479  P=0.530  R=0.437  (win

In [2]:
# cnn_final_train_fixed.py
# ----------------------------------------------------------
# Trains a 1-D CNN with the optimal hyper-parameters you supplied.

import os, json, joblib, warnings
from pathlib import Path
from datetime import datetime

import numpy   as np
import pandas  as pd
import tensorflow as tf
from   tensorflow import keras
from   tensorflow.keras import layers, regularizers
from   sklearn.preprocessing import StandardScaler
from   sklearn.metrics       import (accuracy_score, precision_score,
                                     recall_score, roc_auc_score,
                                     confusion_matrix,
                                     classification_report)

# ═══════════════ Seeds / GPU set-up ══════════════════════
warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
for g in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

# ═══════════════ Paths & constants ═══════════════════════
CSV_PATH      = Path(r"C:/Users/ADMIN/Desktop/Coding_projects/stock_market_prediction/"
                     r"Stock-Market-Prediction/data/processed/"
                     r"gemini_btc_with_features_4h.csv")
MODEL_OUT     = "cnn_optimal_val20.h5"
SCALER_OUT    = "cnn_scaler.pkl"
SUMMARY_JSON  = "cnn_training_summary.json"

# ---- hyper-parameters (fixed) ----
WIN,  BATCH   = 24, 32
LR            = 0.00104
FILTERS       = 32
KERNEL        = 4
ACT           = "elu"
DROPOUT       = 0.292
DENSE_UNITS   = 128
CONV_BLOCKS   = 1
POOL_TYPE     = "gmp"      # gmp | gap
EXTRA_DENSE   = False
L2_REG        = 4.33e-06
EPOCHS        = 100
EARLY_STOP    = 12
ALPHA         = 2.0        # precision weight in weighted-F
VAL_FRAC      = 0.20

DROP_COLS = [
    'open','high','low','typical_price','EMA_7','EMA_21','SMA_20','SMA_50',
    'vwap_24h','close_4h','bollinger_upper','bollinger_lower','resistance_level',
    'support_level','high_low','high_close','low_close','true_range',
    'volume_mean_20','MACD_line','MACD_signal','bollinger_width',
    'volatility_regime','CCI','stoch_%D','parkinson_vol','ema_cross_down',
    'macd_cross_down','vol_spike_1_5x','near_upper_band','near_lower_band',
    'break_upper_band','break_lower_band','rsi_oversold','rsi_overbought',
    'above_sma20','above_sma50','ema7_above_ema21','macd_positive',
    'volume_breakout','volume_breakdown','stoch_overbought','stoch_oversold',
    'cci_overbought','cci_oversold','trending_market','bullish_scenario_1',
    'bullish_scenario_2','bullish_scenario_3','bullish_scenario_4',
    'bullish_scenario_5','bullish_scenario_6','bearish_scenario_1',
    'bearish_scenario_2','bearish_scenario_3','bearish_scenario_4',
    'bearish_scenario_6','ema_cross_up','macd_cross_up',
    'oversold_reversal','overbought_reversal','close'
]

# ═══════════════ helpers ═════════════════════════════════
def make_windows(arr: np.ndarray, labels: np.ndarray, win: int):
    xs, ys = [], []
    for i in range(win, len(arr)):
        xs.append(arr[i-win:i])
        ys.append(labels[i])
    return np.asarray(xs, np.float32), np.asarray(ys, np.int8)

def weighted_f(alpha: float, p: float, r: float) -> float:
    if p + r == 0:
        return 0.0
    return (1 + alpha) * p * r / (alpha * p + r)

def build_model(win: int, n_features: int) -> keras.Model:
    """**Fixed**: pooling layers are *called* on tensors."""
    inp = layers.Input(shape=(win, n_features))
    x   = inp

    for b in range(CONV_BLOCKS):
        f = FILTERS * (2 ** b)
        y = layers.Conv1D(f, KERNEL, padding="causal",
                          activation=ACT,
                          kernel_regularizer=regularizers.l2(L2_REG))(x)
        y = layers.BatchNormalization()(y)

        if CONV_BLOCKS > 1:
            if x.shape[-1] != y.shape[-1]:
                x = layers.Conv1D(f, 1, padding="same")(x)
            x = layers.Add()([x, y])
        else:
            x = y

    if POOL_TYPE == "gmp":
        x = layers.GlobalMaxPooling1D()(x)
    else:
        x = layers.GlobalAveragePooling1D()(x)

    x = layers.Dropout(DROPOUT)(x)

    if EXTRA_DENSE:
        x = layers.Dense(DENSE_UNITS, activation=ACT,
                         kernel_regularizer=regularizers.l2(L2_REG))(x)
        x = layers.Dropout(DROPOUT)(x)

    out = layers.Dense(1, activation="sigmoid")(x)
    return keras.Model(inp, out)

# ═══════════════ 1. DATA ═════════════════════════════════
print("📊 Loading & preprocessing …")

df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df = df.loc["2018-01-01":]
df = df.drop(columns=[c for c in DROP_COLS if c in df.columns])
df = df.dropna(subset=["target"]).dropna()

X_raw = df.drop(columns="target").values
y_raw = df["target"].astype(int).values
n_features = X_raw.shape[1]

split_idx = int(len(df) * (1 - VAL_FRAC))
scaler    = StandardScaler().fit(X_raw[:split_idx])

X_train_s = scaler.transform(X_raw[:split_idx]).astype(np.float32)
X_val_s   = scaler.transform(X_raw[ split_idx:]).astype(np.float32)
y_train   = y_raw[: split_idx]
y_val     = y_raw[ split_idx:]

X_train, y_train = make_windows(X_train_s, y_train, WIN)
X_val,   y_val   = make_windows(X_val_s,   y_val,   WIN)

print(f"   Train windows : {len(X_train):,}")
print(f"   Val   windows : {len(X_val):,}")
print(f"   Features      : {n_features}")

# ═══════════════ 2. MODEL ═══════════════════════════════
print("\n🏗️ Building model …")
tf.keras.backend.clear_session()
model = build_model(WIN, n_features)
model.compile(optimizer=keras.optimizers.Adam(LR),
              loss="binary_crossentropy",
              metrics=["accuracy"])

# ═══════════════ 3. TRAIN ═══════════════════════════════
callbacks = [
    keras.callbacks.EarlyStopping(patience=EARLY_STOP,
                                  restore_best_weights=True,
                                  verbose=1),
    keras.callbacks.ReduceLROnPlateau(factor=0.5,
                                      patience=5,
                                      verbose=1,
                                      min_lr=1e-7)
]

print("\n🚀 Training …")
history = model.fit(X_train, y_train,
                    epochs=EPOCHS,
                    batch_size=BATCH,
                    validation_data=(X_val, y_val),
                    callbacks=callbacks,
                    verbose=2)

# ═══════════════ 4. EVALUATION ══════════════════════════
prob = model.predict(X_val, verbose=0).ravel()
pred = (prob >= 0.5).astype(int)

precision = precision_score(y_val, pred, zero_division=0)
recall    = recall_score(y_val, pred, zero_division=0)
wf1       = weighted_f(ALPHA, precision, recall)

metrics = {
    "accuracy"      : accuracy_score(y_val, pred),
    "precision"     : precision,
    "recall"        : recall,
    "weighted_f1_a2": wf1,
    "auc"           : roc_auc_score(y_val, prob)
}

print("\n── Validation metrics ──")
for k, v in metrics.items():
    print(f"{k:20s}: {v:6.3f}")

print("\nClassification report:")
print(classification_report(y_val, pred, target_names=["Down", "Up"]))

cm = confusion_matrix(y_val, pred)
print("\nConfusion-matrix:")
print(cm)

# ═══════════════ 5. SAVE ═══════════════════════════════=
print("\n💾 Saving artefacts …")
keras.models.save_model(model, MODEL_OUT)
joblib.dump(scaler, SCALER_OUT)

with open(SUMMARY_JSON, "w") as fp:
    json.dump({
        "timestamp"      : datetime.utcnow().isoformat(timespec="seconds") + "Z",
        "window_size"    : WIN,
        "n_features"     : n_features,
        "train_windows"  : int(len(X_train)),
        "val_windows"    : int(len(X_val)),
        "metrics"        : {k: float(v) for k, v in metrics.items()},
        "hyperparameters": {
            "filters" : FILTERS, "kernel": KERNEL, "activation": ACT,
            "dropout" : DROPOUT, "dense_units": DENSE_UNITS,
            "conv_blocks": CONV_BLOCKS, "pool": POOL_TYPE,
            "extra_dense": EXTRA_DENSE, "lr": LR, "batch": BATCH,
            "l2_reg": L2_REG
        },
        "confusion_matrix": cm.tolist()
    }, fp, indent=2)

print(f"\n✅ Model   saved → {MODEL_OUT}")
print(f"✅ Scaler  saved → {SCALER_OUT}")
print(f"✅ Summary saved → {SUMMARY_JSON}")
print("🎉 Training complete.")


📊 Loading & preprocessing …
   Train windows : 12,660
   Val   windows : 3,147
   Features      : 19

🏗️ Building model …


🚀 Training …
Epoch 1/100
396/396 - 2s - 4ms/step - accuracy: 0.5092 - loss: 0.8559 - val_accuracy: 0.4986 - val_loss: 0.7030 - learning_rate: 0.0010
Epoch 2/100
396/396 - 1s - 1ms/step - accuracy: 0.5043 - loss: 0.7407 - val_accuracy: 0.5005 - val_loss: 0.6970 - learning_rate: 0.0010
Epoch 3/100
396/396 - 1s - 2ms/step - accuracy: 0.5069 - loss: 0.7090 - val_accuracy: 0.5011 - val_loss: 0.6947 - learning_rate: 0.0010
Epoch 4/100
396/396 - 1s - 1ms/step - accuracy: 0.5086 - loss: 0.6994 - val_accuracy: 0.5068 - val_loss: 0.6941 - learning_rate: 0.0010
Epoch 5/100
396/396 - 1s - 1ms/step - accuracy: 0.5164 - loss: 0.6947 - val_accuracy: 0.5059 - val_loss: 0.6937 - learning_rate: 0.0010
Epoch 6/100
396/396 - 1s - 1ms/step - accuracy: 0.5137 - loss: 0.6948 - val_accuracy: 0.5043 - val_loss: 0.6938 - learning_rate: 0.0010
Epoch 7/100
396/396 - 1s - 1ms/step - accuracy:



              precision    recall  f1-score   support

        Down       0.49      0.24      0.33      1503
          Up       0.53      0.76      0.62      1644

    accuracy                           0.52      3147
   macro avg       0.51      0.50      0.47      3147
weighted avg       0.51      0.52      0.48      3147


Confusion-matrix:
[[ 368 1135]
 [ 388 1256]]

💾 Saving artefacts …

✅ Model   saved → cnn_optimal_val20.h5
✅ Scaler  saved → cnn_scaler.pkl
✅ Summary saved → cnn_training_summary.json
🎉 Training complete.
