In [None]:
keep 
atr_14  , OBV

drop

'ema_cross_up', 'macd_cross_up', 'oversold_reversal', 'overbought_reversal', 'trending_market'

In [None]:
drop_lstm_cnn = [
    'open', 'high', 'low', 'typical_price', 'EMA_21', 'SMA_20',
    'vwap_24h', 'close_4h', 'bollinger_upper', 'bollinger_lower',
    'resistance_level', 'support_level', 'high_low', 'high_close', 'low_close',
    'true_range', 'volume_mean_20', 'MACD_line', 'MACD_signal',
    'volatility_regime', 'trending_market', 'above_sma50', 'ema7_above_ema21',
    'rsi_overbought', 'stoch_oversold', 'cci_oversold',
    'vol_spike_1_5x', 'near_upper_band', 'near_lower_band',
    'break_upper_band', 'break_lower_band', 'rsi_oversold',
    'above_sma20', 'macd_positive', 'volume_breakout', 'volume_breakdown',
    'bullish_scenario_1', 'bullish_scenario_2', 'bullish_scenario_3',
    'bullish_scenario_4', 'bullish_scenario_5', 'bullish_scenario_6',
    'bearish_scenario_1', 'bearish_scenario_2', 'bearish_scenario_3',
    'bearish_scenario_4', 'bearish_scenario_6','ema_cross_up', 'macd_cross_up', 'oversold_reversal', 'overbought_reversal'
]

In [None]:
"""
cnn_lstm_optuna_search.py  · 2025-06-11
────────────────────────────────────────────────────────────────────────
Optuna hyper-parameter search for a CNN–LSTM classifier
optimising weighted-F1 with α = 2 (precision ×2 weight).

Outputs
-------
• cnn_lstm_scaler.pkl
• best_params_cnn_lstm_<ts>.json
• trials_cnn_lstm_<ts>.csv
• history_cnn_lstm_<ts>.png
"""

# ───────── imports & runtime hygiene ─────────
import os, json, gc, warnings, optuna
from datetime import datetime
from pathlib import Path
from typing import Dict, Tuple

import numpy as np
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (precision_score, recall_score,
                             f1_score, accuracy_score)

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from optuna_integration.tfkeras import TFKerasPruningCallback

warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

SEED, VAL_FRAC, ALPHA  = 42, 0.20, 2.0
N_TRIALS, TIMEOUT      = 100, 60 * 60      # ← 100 trials, 1 hour max

CSV_PATH = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction"
                r"\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")
SCALER_PKL = "cnn_lstm_scaler.pkl"

DROP_COLS = [
    'open', 'high', 'low', 'typical_price', 'EMA_21', 'SMA_20'
     ,'close_4h', 'bollinger_upper', 'bollinger_lower',
    'resistance_level', 'support_level', 'high_low', 'high_close', 'low_close',
    'true_range', 'volume_mean_20', 'MACD_line', 'MACD_signal',
    'volatility_regime', 'trending_market', 'above_sma50', 'ema7_above_ema21',
    'rsi_overbought', 'stoch_oversold', 'cci_oversold',
    'near_upper_band', 'near_lower_band',
    'break_upper_band', 'break_lower_band', 'rsi_oversold',
    'above_sma20', 'macd_positive', 'volume_breakout', 'volume_breakdown',
    'bullish_scenario_1', 'bullish_scenario_2', 'bullish_scenario_3',
    'bullish_scenario_4', 'bullish_scenario_5', 'bullish_scenario_6',
    'bearish_scenario_1', 'bearish_scenario_2', 'bearish_scenario_3',
    'bearish_scenario_4', 'bearish_scenario_6','ema_cross_up', 'macd_cross_up', 'oversold_reversal', 'overbought_reversal','close'
]

np.random.seed(SEED)
tf.random.set_seed(SEED)
for g in tf.config.experimental.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(g, True)

# ───────── data load & scale ─────────
df = (pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
        .loc["2018-01-01":]
        .drop(columns=[c for c in DROP_COLS if c in df.columns])
        .dropna(subset=["target"]).dropna())

X_raw = df.drop(columns="target").values
y_raw = df["target"].astype(int).values
n_feat = X_raw.shape[1]

split = int(len(df) * (1 - VAL_FRAC))
scaler = StandardScaler().fit(X_raw[:split])
joblib.dump(scaler, SCALER_PKL)

X_tr_raw = scaler.transform(X_raw[:split]).astype(np.float32)
X_va_raw = scaler.transform(X_raw[split:]).astype(np.float32)
y_tr_raw, y_va_raw = y_raw[:split], y_raw[split:]

pos = y_tr_raw.mean()
W0, W1 = np.float32(1.0), np.float32((1 - pos) / pos if pos else 1.0)

# ───────── window helper (cache) ─────────
_CACHE: Dict[Tuple[int,int,int], Tuple[np.ndarray,np.ndarray]] = {}
def make_windows(arr, lab, win):
    k = (len(arr), win, arr.shape[1])
    if k in _CACHE: return _CACHE[k]
    X, y = [], []
    for i in range(win, len(arr)):
        X.append(arr[i-win:i]); y.append(lab[i])
    Xw, yw = np.asarray(X,np.float32), np.asarray(y,np.int8)
    if Xw.nbytes+yw.nbytes < 1e9: _CACHE[k]=(Xw,yw)
    return Xw, yw

def wf1(y, p, alpha=ALPHA):
    hat = (p>=.5).astype(int)
    pr, rc = precision_score(y,hat,zero_division=0), recall_score(y,hat,zero_division=0)
    return 0 if pr+rc==0 else (1+alpha)*pr*rc/(alpha*pr+rc)

# ───────── model factory ─────────
def build(cfg):
    l2 = regularizers.l2(cfg["l2"])
    inp = layers.Input(shape=(cfg["window"], n_feat))

    if cfg["arch"] == "conv_lstm":
        x = inp
        for _ in range(cfg["conv_blocks"]):
            x = layers.Conv1D(cfg["filters"], cfg["kernel"], padding="causal",
                              activation=cfg["act"], kernel_regularizer=l2)(x)
            x = layers.Dropout(cfg["conv_drop"])(x)
        x = layers.LSTM(cfg["units"], dropout=cfg["lstm_drop"])(x)
    else:  # lstm_conv
        x = layers.LSTM(cfg["units"], dropout=cfg["lstm_drop"],
                        return_sequences=True)(inp)
        x = layers.Conv1D(cfg["filters"], cfg["kernel"], padding="same",
                          activation=cfg["act"], kernel_regularizer=l2)(x)
        x = (layers.GlobalMaxPooling1D()(x) if cfg["pool"]=="gmp"
             else layers.GlobalAveragePooling1D()(x))

    x = layers.Dense(cfg["dense"], activation=cfg["act"], kernel_regularizer=l2)(x)
    x = layers.Dropout(cfg["dropout"])(x)
    out = layers.Dense(1, activation="sigmoid")(x)
    model = keras.Model(inp, out)

    def wbce(y_t, y_p):
        y_t = tf.cast(y_t, y_p.dtype)
        w   = tf.where(tf.equal(y_t,1), W1, W0); w = tf.cast(w, y_p.dtype)
        return tf.reduce_mean(w * keras.losses.binary_crossentropy(y_t, y_p))

    # choose optimiser
    if cfg["optim"] == "adamw":
        opt = keras.optimizers.AdamW(cfg["lr"], weight_decay=cfg["l2"])
    elif cfg["optim"] == "nadam":
        opt = keras.optimizers.Nadam(cfg["lr"])
    else:
        opt = keras.optimizers.Adam(cfg["lr"])

    model.compile(opt, loss=wbce)
    return model

# ───────── Optuna objective ─────────
def objective(trial):
    cfg = dict(
        arch        = trial.suggest_categorical("arch", ["conv_lstm","lstm_conv"]),
        window      = trial.suggest_int("window", 12, 72, step=6),
        filters     = trial.suggest_categorical("filters",[32,48,64,96,128]),
        kernel      = trial.suggest_int("kernel", 2, 7),
        units       = trial.suggest_categorical("units",  [32,64,96,128,192]),
        conv_drop   = trial.suggest_float("conv_drop", 0,.3),
        lstm_drop   = trial.suggest_float("lstm_drop", 0,.3),
        dense       = trial.suggest_categorical("dense",[32,64,128]),
        dropout     = trial.suggest_float("dropout", 0,.4),
        l2          = trial.suggest_float("l2", 1e-6,1e-3,log=True),
        lr          = trial.suggest_float("lr", 5e-5,3e-3,log=True),
        batch       = trial.suggest_categorical("batch",[16,32,64,128]),
        act         = trial.suggest_categorical("act", ["relu","elu"]),
        pool        = trial.suggest_categorical("pool",["gmp","gap"]),
        conv_blocks = trial.suggest_int("conv_blocks",1,2),
        optim       = trial.suggest_categorical("optim",["adam","adamw","nadam"])
    )

    X_tr,y_tr = make_windows(X_tr_raw,y_tr_raw,cfg["window"])
    X_va,y_va = make_windows(X_va_raw,y_va_raw,cfg["window"])
    if len(X_tr) < cfg["batch"]*8: return float("inf")

    tf.keras.backend.clear_session(); gc.collect()
    model = build(cfg)
    cb = [
        keras.callbacks.EarlyStopping(patience=10,restore_best_weights=True,verbose=0),
        keras.callbacks.ReduceLROnPlateau(patience=5,factor=.5,min_lr=1e-6,verbose=0),
        TFKerasPruningCallback(trial,"val_loss")
    ]
    model.fit(X_tr,y_tr,epochs=100,batch_size=cfg["batch"],
              validation_data=(X_va,y_va),callbacks=cb,
              shuffle=False,verbose=0)

    prob = model.predict(X_va,batch_size=cfg["batch"],verbose=0).ravel()
    hat  = (prob>=.5).astype(int)
    score= wf1(y_va,prob)
    pr   = precision_score(y_va,hat,zero_division=0)
    rc   = recall_score   (y_va,hat,zero_division=0)
    f1   = f1_score       (y_va,hat,zero_division=0)
    acc  = accuracy_score (y_va,hat)

    trial.set_user_attr("precision",pr)
    trial.set_user_attr("recall",rc)

    print(f"Trial {trial.number:02d} | Fα2={score:.3f} P={pr:.3f} R={rc:.3f} "
          f"F1={f1:.3f} Acc={acc:.3f} {cfg['arch']} win={cfg['window']}")

    del model; tf.keras.backend.clear_session(); gc.collect()
    return -score

# ───────── run search ─────────
study = optuna.create_study(direction="minimize",
                            sampler=optuna.samplers.TPESampler(seed=SEED,
                                                               multivariate=True),
                            pruner=optuna.pruners.MedianPruner(n_startup_trials=5))

print(f"\n🚀  Starting {N_TRIALS}-trial search …")
study.optimize(objective, n_trials=N_TRIALS,
               timeout=TIMEOUT, show_progress_bar=True, gc_after_trial=True)

best, ts = study.best_trial, datetime.utcnow().strftime("%Y%m%d_%H%M%S")
print("\n✅ BEST TRIAL")
print(json.dumps({**best.params,
                  "Fα2": -best.value,
                  "P"  : best.user_attrs["precision"],
                  "R"  : best.user_attrs["recall"]}, indent=2))

json.dump(best.params, open(f"best_params_cnn_lstm_{ts}.json","w"), indent=2)
study.trials_dataframe().to_csv(f"trials_cnn_lstm_{ts}.csv", index=False)

try:
    import matplotlib.pyplot as plt
    optuna.visualization.matplotlib.plot_optimization_history(study)
    plt.tight_layout(); plt.savefig(f"history_cnn_lstm_{ts}.png", dpi=300); plt.close()
except Exception:
    pass

print(f"\n📝 Artefacts saved with timestamp {ts}.  Scaler → {SCALER_PKL}")


[I 2025-06-11 12:51:22,414] A new study created in memory with name: no-name-4a13c6ca-8c08-4449-91d3-c10d60bb8970



🚀  Starting 100-trial search …


  0%|          | 0/100 [00:00<?, ?it/s]

Trial 00 | Fα2=0.554 P=0.524 R=0.570 F1=0.546 Acc=0.506 lstm_conv win=60


  0%|          | 0/100 [02:05<?, ?it/s]

[I 2025-06-11 12:53:27,827] Trial 0 finished with value: -0.5537355173791451 and parameters: {'arch': 'lstm_conv', 'window': 60, 'filters': 128, 'kernel': 5, 'units': 96, 'conv_drop': 0.05454749016213018, 'lstm_drop': 0.055021352956030146, 'dense': 64, 'dropout': 0.11649165607921677, 'l2': 6.847920095574779e-05, 'lr': 8.851384099881297e-05, 'batch': 128, 'act': 'elu', 'pool': 'gmp', 'conv_blocks': 2, 'optim': 'nadam'}. Best is trial 0 with value: -0.5537355173791451.


Best trial: 0. Best value: -0.553736:   1%|          | 1/100 [02:06<3:29:03, 126.71s/it, 126.70/3600 seconds]

Trial 01 | Fα2=0.280 P=0.547 R=0.225 F1=0.319 Acc=0.499 conv_lstm win=30


Best trial: 0. Best value: -0.553736:   1%|          | 1/100 [02:30<3:29:03, 126.71s/it, 126.70/3600 seconds]

[I 2025-06-11 12:53:53,177] Trial 1 finished with value: -0.2801113360323887 and parameters: {'arch': 'conv_lstm', 'window': 30, 'filters': 48, 'kernel': 2, 'units': 32, 'conv_drop': 0.16401308380298388, 'lstm_drop': 0.05545633665765811, 'dense': 32, 'dropout': 0.35793094017105953, 'l2': 6.218704727769077e-05, 'lr': 0.0021787220464104273, 'batch': 128, 'act': 'relu', 'pool': 'gmp', 'conv_blocks': 1, 'optim': 'nadam'}. Best is trial 0 with value: -0.5537355173791451.


Best trial: 0. Best value: -0.553736:   2%|▏         | 2/100 [02:32<1:49:39, 67.14s/it, 152.15/3600 seconds] 

Trial 02 | Fα2=0.090 P=0.554 R=0.064 F1=0.114 Acc=0.485 lstm_conv win=60


Best trial: 0. Best value: -0.553736:   2%|▏         | 2/100 [05:56<1:49:39, 67.14s/it, 152.15/3600 seconds]

[I 2025-06-11 12:57:18,623] Trial 2 finished with value: -0.09008746355685131 and parameters: {'arch': 'lstm_conv', 'window': 60, 'filters': 64, 'kernel': 6, 'units': 128, 'conv_drop': 0.09926940745579475, 'lstm_drop': 0.01906750508580709, 'dense': 128, 'dropout': 0.25502298854208527, 'l2': 0.0004588156549160974, 'lr': 0.000345652389857876, 'batch': 64, 'act': 'relu', 'pool': 'gmp', 'conv_blocks': 1, 'optim': 'nadam'}. Best is trial 0 with value: -0.5537355173791451.


Best trial: 0. Best value: -0.553736:   3%|▎         | 3/100 [05:57<3:30:39, 130.30s/it, 357.61/3600 seconds]

Trial 03 | Fα2=0.567 P=0.519 R=0.594 F1=0.554 Acc=0.501 lstm_conv win=66


Best trial: 0. Best value: -0.553736:   3%|▎         | 3/100 [08:27<3:30:39, 130.30s/it, 357.61/3600 seconds]

[I 2025-06-11 12:59:50,245] Trial 3 finished with value: -0.5669152276295134 and parameters: {'arch': 'lstm_conv', 'window': 66, 'filters': 64, 'kernel': 3, 'units': 64, 'conv_drop': 0.24110162306973432, 'lstm_drop': 0.05597101766581075, 'dense': 32, 'dropout': 0.3584365199693973, 'l2': 8.995191735587168e-06, 'lr': 7.846192726793281e-05, 'batch': 128, 'act': 'elu', 'pool': 'gmp', 'conv_blocks': 1, 'optim': 'adamw'}. Best is trial 3 with value: -0.5669152276295134.


Best trial: 3. Best value: -0.566915:   4%|▍         | 4/100 [08:29<3:41:57, 138.72s/it, 509.24/3600 seconds]

Trial 04 | Fα2=0.000 P=0.000 R=0.000 F1=0.000 Acc=0.478 lstm_conv win=30


Best trial: 3. Best value: -0.566915:   4%|▍         | 4/100 [09:25<3:41:57, 138.72s/it, 509.24/3600 seconds]

[I 2025-06-11 13:00:47,566] Trial 4 finished with value: 0.0 and parameters: {'arch': 'lstm_conv', 'window': 30, 'filters': 32, 'kernel': 3, 'units': 64, 'conv_drop': 0.2724797657899961, 'lstm_drop': 0.07186856720009173, 'dense': 128, 'dropout': 0.09682210860460017, 'l2': 0.00010385003379927417, 'lr': 0.0011304331263607352, 'batch': 32, 'act': 'relu', 'pool': 'gap', 'conv_blocks': 1, 'optim': 'nadam'}. Best is trial 3 with value: -0.5669152276295134.


Best trial: 3. Best value: -0.566915:   5%|▌         | 5/100 [09:26<2:53:11, 109.39s/it, 566.61/3600 seconds]

Trial 05 | Fα2=0.586 P=0.526 R=0.622 F1=0.570 Acc=0.510 conv_lstm win=42


Best trial: 3. Best value: -0.566915:   5%|▌         | 5/100 [12:03<2:53:11, 109.39s/it, 566.61/3600 seconds]

[I 2025-06-11 13:03:26,190] Trial 5 finished with value: -0.5861405197305101 and parameters: {'arch': 'conv_lstm', 'window': 42, 'filters': 96, 'kernel': 7, 'units': 128, 'conv_drop': 0.07738248831454668, 'lstm_drop': 0.1979952138102537, 'dense': 32, 'dropout': 0.09674091636018067, 'l2': 1.902428324748959e-06, 'lr': 0.001969497011745637, 'batch': 16, 'act': 'elu', 'pool': 'gmp', 'conv_blocks': 2, 'optim': 'nadam'}. Best is trial 5 with value: -0.5861405197305101.


Best trial: 5. Best value: -0.586141:   6%|▌         | 6/100 [12:05<3:17:34, 126.11s/it, 725.20/3600 seconds]

Trial 06 | Fα2=0.030 P=0.559 R=0.020 F1=0.039 Acc=0.480 conv_lstm win=18


Best trial: 5. Best value: -0.586141:   6%|▌         | 6/100 [14:33<3:17:34, 126.11s/it, 725.20/3600 seconds]

[I 2025-06-11 13:05:56,316] Trial 6 finished with value: -0.029525797793021172 and parameters: {'arch': 'conv_lstm', 'window': 18, 'filters': 128, 'kernel': 5, 'units': 192, 'conv_drop': 0.1948898697141644, 'lstm_drop': 0.2547670231482534, 'dense': 32, 'dropout': 0.1470863212237734, 'l2': 6.24607368131809e-06, 'lr': 0.00013577521331829283, 'batch': 16, 'act': 'relu', 'pool': 'gmp', 'conv_blocks': 1, 'optim': 'adam'}. Best is trial 5 with value: -0.5861405197305101.


Best trial: 5. Best value: -0.586141:   7%|▋         | 7/100 [16:16<3:27:36, 133.94s/it, 875.24/3600 seconds]

[I 2025-06-11 13:07:39,087] Trial 7 pruned. Trial was pruned at epoch 14.


Best trial: 5. Best value: -0.586141:   8%|▊         | 8/100 [16:35<3:10:11, 124.04s/it, 978.09/3600 seconds]

[I 2025-06-11 13:07:57,890] Trial 8 pruned. Trial was pruned at epoch 0.


Best trial: 5. Best value: -0.586141:   9%|▉         | 9/100 [16:53<2:18:12, 91.13s/it, 996.86/3600 seconds] 

[I 2025-06-11 13:08:15,914] Trial 9 pruned. Trial was pruned at epoch 0.


Best trial: 5. Best value: -0.586141:  10%|█         | 10/100 [16:54<1:42:50, 68.56s/it, 1014.89/3600 seconds]

Trial 10 | Fα2=0.000 P=0.000 R=0.000 F1=0.000 Acc=0.478 conv_lstm win=36


Best trial: 5. Best value: -0.586141:  10%|█         | 10/100 [19:48<1:42:50, 68.56s/it, 1014.89/3600 seconds]

[I 2025-06-11 13:11:11,403] Trial 10 finished with value: 0.0 and parameters: {'arch': 'conv_lstm', 'window': 36, 'filters': 32, 'kernel': 6, 'units': 128, 'conv_drop': 0.07202341203296447, 'lstm_drop': 0.18814920204905028, 'dense': 64, 'dropout': 0.03425733184657786, 'l2': 3.0695249234567865e-06, 'lr': 0.001418232376111356, 'batch': 16, 'act': 'relu', 'pool': 'gmp', 'conv_blocks': 1, 'optim': 'nadam'}. Best is trial 5 with value: -0.5861405197305101.


Best trial: 5. Best value: -0.586141:  11%|█         | 11/100 [19:50<2:30:14, 101.29s/it, 1190.37/3600 seconds]

Trial 11 | Fα2=0.508 P=0.519 R=0.502 F1=0.511 Acc=0.497 lstm_conv win=66


Best trial: 5. Best value: -0.586141:  11%|█         | 11/100 [26:21<2:30:14, 101.29s/it, 1190.37/3600 seconds]

[I 2025-06-11 13:17:44,047] Trial 11 finished with value: -0.5077978789769183 and parameters: {'arch': 'lstm_conv', 'window': 66, 'filters': 64, 'kernel': 3, 'units': 128, 'conv_drop': 0.2757420493320118, 'lstm_drop': 0.09027008622194727, 'dense': 32, 'dropout': 0.3870562117313163, 'l2': 1.1071142752856649e-05, 'lr': 0.00012710261905184188, 'batch': 128, 'act': 'relu', 'pool': 'gap', 'conv_blocks': 1, 'optim': 'adamw'}. Best is trial 5 with value: -0.5861405197305101.


Best trial: 5. Best value: -0.586141:  12%|█▏        | 12/100 [26:44<4:38:34, 189.93s/it, 1583.06/3600 seconds]

[I 2025-06-11 13:18:07,157] Trial 12 pruned. Trial was pruned at epoch 0.


Best trial: 5. Best value: -0.586141:  13%|█▎        | 13/100 [27:01<3:22:06, 139.39s/it, 1606.14/3600 seconds]

[I 2025-06-11 13:18:24,090] Trial 13 pruned. Trial was pruned at epoch 0.


Best trial: 5. Best value: -0.586141:  14%|█▍        | 14/100 [27:03<2:26:47, 102.41s/it, 1623.10/3600 seconds]

Trial 14 | Fα2=0.766 P=0.522 R=1.000 F1=0.686 Acc=0.522 conv_lstm win=54


Best trial: 5. Best value: -0.586141:  14%|█▍        | 14/100 [33:35<2:26:47, 102.41s/it, 1623.10/3600 seconds]

[I 2025-06-11 13:24:58,034] Trial 14 finished with value: -0.7657869934024506 and parameters: {'arch': 'conv_lstm', 'window': 54, 'filters': 96, 'kernel': 7, 'units': 192, 'conv_drop': 0.023002858013431764, 'lstm_drop': 0.20388968390933723, 'dense': 32, 'dropout': 0.15387612135152867, 'l2': 6.633730435337335e-06, 'lr': 0.0027802249303014543, 'batch': 16, 'act': 'elu', 'pool': 'gap', 'conv_blocks': 2, 'optim': 'nadam'}. Best is trial 14 with value: -0.7657869934024506.


Best trial: 14. Best value: -0.765787:  15%|█▌        | 15/100 [34:11<4:29:33, 190.28s/it, 2017.03/3600 seconds]

[I 2025-06-11 13:25:34,111] Trial 15 pruned. Trial was pruned at epoch 0.


Best trial: 14. Best value: -0.765787:  16%|█▌        | 16/100 [34:13<3:21:25, 143.87s/it, 2053.13/3600 seconds]

In [None]:


# ─── imports ───────────────────────────────────────────────────
import os, json, gc, warnings, joblib, numpy as np, pandas as pd
from datetime import datetime
from pathlib import Path
from typing import Dict, Tuple

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (classification_report, confusion_matrix,
                             precision_score, recall_score, f1_score,
                             accuracy_score, roc_curve, auc,
                             precision_recall_curve)

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.callbacks import (ModelCheckpoint, EarlyStopping,
                                        ReduceLROnPlateau)

import matplotlib.pyplot as plt
import seaborn as sns
warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

# ─── fixed params from Optuna ───────────────────────────────────────────
BEST_PARAMS = dict(
    arch="lstm_conv", window=24, filters=64, kernel=5, units=32,
    conv_drop=0.220, lstm_drop=0.285, dense=32, dropout=0.378,
    l2=1.449e-6, lr=0.00135, batch=32, act="relu", pool="gmp"
)

# ─── paths & config ────────────────────────────────────────────
CSV_PATH = Path(r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction"
                r"\Stock-Market-Prediction\data\processed\gemini_btc_with_features_4h.csv")
DROP_COLS = ["open","high","low","close","typical_price",
             "EMA_21","SMA_20","vwap_24h","close_4h"]

SEED, VAL_FRAC, TEST_FRAC, ALPHA = 42, .15, .15, 2.0
EPOCHS, PATIENCE = 200, 20

OUT_DIR = Path("model_outputs")
OUT_DIR.mkdir(exist_ok=True)
STAMP = datetime.now().strftime("%Y%m%d_%H%M%S")

np.random.seed(SEED)
tf.random.set_seed(SEED)

# GPU setup with feedback
gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    for g in gpus:
        tf.config.experimental.set_memory_growth(g, True)
    print(f"✅ Found {len(gpus)} GPU(s)")
else:
    print("⚠️ No GPU found, using CPU")

# ─── data load & split ─────────────────────────────────────────────────
print("📊 Loading data...")
df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df = df.loc["2018-01-01":]
df = df.drop(columns=[c for c in DROP_COLS if c in df.columns], errors='ignore')
df = df.dropna(subset=["target"]).dropna()

# Store feature names before dropping target
feat_names = [col for col in df.columns if col != "target"]

X_raw = df.drop(columns="target").values.astype(np.float32)
y_raw = df["target"].astype(np.int8).values
n_feat = X_raw.shape[1]

n = len(df)
val_start = int(n * (1 - VAL_FRAC - TEST_FRAC))
test_start = int(n * (1 - TEST_FRAC))
train_idx = np.arange(0, val_start)
val_idx = np.arange(val_start, test_start)
test_idx = np.arange(test_start, n)

scaler = StandardScaler().fit(X_raw[train_idx])
X_train = scaler.transform(X_raw[train_idx])
y_train = y_raw[train_idx]
X_val = scaler.transform(X_raw[val_idx])
y_val = y_raw[val_idx]
X_test = scaler.transform(X_raw[test_idx])
y_test = y_raw[test_idx]

joblib.dump(scaler, OUT_DIR / f"scaler_{STAMP}.pkl")

pos = y_train.mean()
W0, W1 = 1.0, (1-pos)/pos if pos else 1.0

print(f"Data shape: {df.shape}")
print(f"Train: {len(train_idx)}, Val: {len(val_idx)}, Test: {len(test_idx)}")
print(f"Positive rate: {pos:.3f}, Weights: W0={W0:.2f}, W1={W1:.2f}")

# ─── window helper ─────────────────────────────────────────────────────
def windows(X, y, w):
    xs, ys = [], []
    for i in range(w, len(X)):
        xs.append(X[i-w:i])
        ys.append(y[i])
    return np.asarray(xs, np.float32), np.asarray(ys, np.int8)

win = BEST_PARAMS["window"]
X_tr_w, y_tr_w = windows(X_train, y_train, win)
X_va_w, y_va_w = windows(X_val, y_val, win)
X_te_w, y_te_w = windows(X_test, y_test, win)

print(f"Window size: {win}, Train windows: {len(X_tr_w)}")

# ─── weighted F1 score ─────────────────────────────────────────────────
def wf1(y, proba, alpha=ALPHA):
    preds = (proba >= 0.5).astype(int)
    pr = precision_score(y, preds, zero_division=0)
    rc = recall_score(y, preds, zero_division=0)
    return 0 if pr + rc == 0 else (1 + alpha) * pr * rc / (alpha * pr + rc)

# ─── model builder ─────────────────────────────────────────────
def build(cfg):
    l2 = regularizers.l2(cfg["l2"])
    inp = layers.Input(shape=(cfg["window"], n_feat))

    # LSTM → Conv1D branch
    x = layers.LSTM(cfg["units"], dropout=cfg["lstm_drop"],
                    return_sequences=True,
                    kernel_regularizer=l2)(inp)
    x = layers.Conv1D(cfg["filters"], cfg["kernel"], padding="same",
                      activation=cfg["act"], kernel_regularizer=l2)(x)
    x = (layers.GlobalMaxPooling1D()(x) if cfg["pool"]=="gmp"
         else layers.GlobalAveragePooling1D()(x))
    x = layers.Dropout(cfg["conv_drop"])(x)

    x = layers.Dense(cfg["dense"], activation=cfg["act"],
                     kernel_regularizer=l2)(x)
    x = layers.Dropout(cfg["dropout"])(x)
    out = layers.Dense(1, activation="sigmoid")(x)
    model = keras.Model(inp, out)

    w0 = tf.constant(W0, dtype=tf.float32)
    w1 = tf.constant(W1, dtype=tf.float32)
    
    def wbce(y_t, y_p):
        y_t = tf.cast(y_t, tf.float32)
        w = tf.where(tf.equal(y_t, 1), w1, w0)
        return tf.reduce_mean(w * keras.losses.binary_crossentropy(y_t, y_p))

    model.compile(keras.optimizers.Adam(cfg["lr"]), loss=wbce,
                  metrics=['accuracy',
                           keras.metrics.Precision(name='precision'),
                           keras.metrics.Recall(name='recall'),
                           keras.metrics.AUC(name='auc')])
    return model

print("\n🏗️ Building model...")
model = build(BEST_PARAMS)
model.summary()

# ─── callbacks & training ──────────────────────────────────────────────
print("\n🚀 Starting training...")
ckpt_path = OUT_DIR / f"best_model_{STAMP}.keras"

history = model.fit(
    X_tr_w, y_tr_w,
    validation_data=(X_va_w, y_va_w),
    epochs=EPOCHS, batch_size=BEST_PARAMS["batch"],
    class_weight={0: W0, 1: W1},
    callbacks=[
        ModelCheckpoint(str(ckpt_path), save_best_only=True, monitor='val_loss', verbose=1),
        EarlyStopping(patience=PATIENCE, restore_best_weights=True, verbose=1),
        ReduceLROnPlateau(patience=10, factor=.5, min_lr=1e-6, verbose=1)
    ],
    verbose=1
)

model.save(OUT_DIR / f"final_model_{STAMP}.keras")

# ─── evaluation helper ─────────────────────────────────────────────────
def evaluate(X, y, name):
    proba = model.predict(X, verbose=0).ravel()
    preds = (proba >= 0.5).astype(int)
    pr = precision_score(y, preds, zero_division=0)
    rc = recall_score(y, preds, zero_division=0)
    f1 = f1_score(y, preds, zero_division=0)
    fα = wf1(y, proba)
    acc = accuracy_score(y, preds)
    print(f"{name:<11} P={pr:.3f} R={rc:.3f} F1={f1:.3f} Fα2={fα:.3f} Acc={acc:.3f}")
    return dict(p=pr, r=rc, f1=f1, fα=fα, acc=acc, proba=proba, preds=preds, y=y)

print("\n📊 Metrics (0.5 threshold)")
train_res = evaluate(X_tr_w, y_tr_w, "Train")
val_res = evaluate(X_va_w, y_va_w, "Val")
test_res = evaluate(X_te_w, y_te_w, "Test")

# ─── visualizations ─────────────────────────────────────────────────
print("\n📈 Creating visualizations...")

# 1. Training history
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
axes[0,0].plot(history.history['loss'], label='Train')
axes[0,0].plot(history.history['val_loss'], label='Val')
axes[0,0].set_title('Loss'); axes[0,0].legend()

axes[0,1].plot(history.history['accuracy'], label='Train')
axes[0,1].plot(history.history['val_accuracy'], label='Val')
axes[0,1].set_title('Accuracy'); axes[0,1].legend()

axes[1,0].plot(history.history['precision'], label='Train')
axes[1,0].plot(history.history['val_precision'], label='Val')
axes[1,0].set_title('Precision'); axes[1,0].legend()

axes[1,1].plot(history.history['recall'], label='Train')
axes[1,1].plot(history.history['val_recall'], label='Val')
axes[1,1].set_title('Recall'); axes[1,1].legend()

plt.tight_layout()
plt.savefig(OUT_DIR / f"training_history_{STAMP}.png", dpi=150)
plt.close()

# 2. Confusion Matrix (Test Set)
cm = confusion_matrix(test_res['y'], test_res['preds'])
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Test Set')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.savefig(OUT_DIR / f"confusion_matrix_{STAMP}.png", dpi=150)
plt.close()

# ─── save JSON summary ─────────────────────────────────────────────────
json.dump(
    {"params": BEST_PARAMS,
     "train": {k: v for k, v in train_res.items() if not isinstance(v, np.ndarray)},
     "val": {k: v for k, v in val_res.items() if not isinstance(v, np.ndarray)},
     "test": {k: v for k, v in test_res.items() if not isinstance(v, np.ndarray)}},
    open(OUT_DIR / f"results_{STAMP}.json", "w"), indent=2
)

print(f"\n✅ All artifacts saved in {OUT_DIR} ({STAMP})")
print(f"   - Model: final_model_{STAMP}.keras")
print(f"   - Scaler: scaler_{STAMP}.pkl")
print(f"   - Results: results_{STAMP}.json")
print(f"   - Plots: *_{STAMP}.png")


In [None]:
{
  "arch": "conv_lstm",
  "window": 66,
  "filters": 32,
  "kernel": 5,
  "units": 32,
  "conv_drop": 0.1990,
  "lstm_drop": 0.0056,
  "dense": 32,
  "dropout": 0.3674,
  "l2": 1.73e-06,
  "lr": 5.455e-05,
  "batch": 32,
  "act": "relu",
  "pool": "gap",
  "conv_blocks": 2,
  "optim": "nadam",
  "precision": 0.554,
  "recall": 0.501,
  "f_alpha": 0.518
}

In [None]:
{
  "arch": "conv_lstm",
  "window": 72,
  "filters": 96,
  "kernel": 4,
  "units": 32,
  "conv_drop": 0.2349,
  "lstm_drop": 0.0741,
  "dense": 128,
  "dropout": 0.3562,
  "l2": 1.93e-06,
  "lr": 8.557e-05,
  "batch": 128,
  "act": "relu",
  "pool": "gmp",
  "conv_blocks": 2,
  "optim": "nadam",
  "precision": 0.547,
  "recall": 0.371,
  "f_alpha": 0.415
}

In [None]:
{
  "arch": "lstm_conv",
  "window": 66,
  "filters": 64,
  "kernel": 3,
  "units": 64,
  "conv_drop": 0.2411,
  "lstm_drop": 0.0560,
  "dense": 32,
  "dropout": 0.3584,
  "l2": 8.995e-06,
  "lr": 7.846e-05,
  "batch": 128,
  "act": "elu",
  "pool": "gmp",
  "conv_blocks": 1,
  "optim": "adamw",
  "precision": 0.533,
  "recall": 0.722,
  "f_alpha": 0.646
}