# In this notebook we would train the GRU model

In [None]:
keep 
atr_14  , OBV

drop

'ema_cross_up', 'macd_cross_up', 'oversold_reversal', 'overbought_reversal', 'trending_market'

In [None]:
drop_gru = [
    'open', 'high', 'low', 'typical_price', 'EMA_7', 'EMA_21', 'SMA_20', 'SMA_50',
    'vwap_24h', 'close_4h', 'bollinger_upper', 'bollinger_lower',
    'resistance_level', 'support_level', 'high_low', 'high_close', 'low_close',
    'true_range', 'volume_mean_20', 'MACD_line', 'MACD_signal',
    'bollinger_width', 'volatility_regime', 'CCI', 'stoch_%D',
    'parkinson_vol', 'ema_cross_down', 'macd_cross_down',
    'vol_spike_1_5x', 'near_upper_band', 'near_lower_band',
    'break_upper_band', 'break_lower_band', 'rsi_oversold', 'rsi_overbought',
    'above_sma20', 'above_sma50', 'ema7_above_ema21', 'macd_positive',
    'volume_breakout', 'volume_breakdown', 'stoch_overbought', 'stoch_oversold',
    'cci_overbought', 'cci_oversold', 'trending_market',
    'bullish_scenario_1', 'bullish_scenario_2', 'bullish_scenario_3',
    'bullish_scenario_4', 'bullish_scenario_5', 'bullish_scenario_6',
    'bearish_scenario_1', 'bearish_scenario_2', 'bearish_scenario_3',
    'bearish_scenario_4', 'bearish_scenario_6''ema_cross_up', 'macd_cross_up', 'oversold_reversal', 'overbought_reversal'
]

In [None]:
import os, json, optuna, warnings
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import fbeta_score
import tensorflow as tf
from tensorflow import keras

warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
np.random.seed(42)
tf.random.set_seed(42)

# GPU Memory growth setup
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

# Config
CSV_PATH = Path(r"C:\\Users\\ADMIN\\Desktop\\Coding_projects\\stock_market_prediction\\Stock-Market-Prediction\\data\\processed\\gemini_btc_with_features_4h.csv")
VAL_FRAC = 0.2
BETA = 2.0
SEED = 42
MODEL_NAME = "gru_fast_val20_model.h5"
N_TRIALS = 25
TIMEOUT = 15 * 60

drop_cols = [
    'open', 'high', 'low', 'typical_price', 'EMA_7', 'EMA_21', 'SMA_20', 'SMA_50',
    'vwap_24h', 'close_4h', 'bollinger_upper', 'bollinger_lower',
    'resistance_level', 'support_level', 'high_low', 'high_close', 'low_close',
    'true_range', 'volume_mean_20', 'MACD_line', 'MACD_signal',
    'bollinger_width', 'volatility_regime', 'CCI', 'stoch_%D',
    'parkinson_vol', 'ema_cross_down', 'macd_cross_down',
    'vol_spike_1_5x', 'near_upper_band', 'near_lower_band',
    'break_upper_band', 'break_lower_band', 'rsi_oversold', 'rsi_overbought',
    'above_sma20', 'above_sma50', 'ema7_above_ema21', 'macd_positive',
    'volume_breakout', 'volume_breakdown', 'stoch_overbought', 'stoch_oversold',
    'cci_overbought', 'cci_oversold', 'trending_market',
    'bullish_scenario_1', 'bullish_scenario_2', 'bullish_scenario_3',
    'bullish_scenario_4', 'bullish_scenario_5', 'bullish_scenario_6',
    'bearish_scenario_1', 'bearish_scenario_2', 'bearish_scenario_3',
    'bearish_scenario_4', 'bearish_scenario_6''ema_cross_up', 'macd_cross_up', 'oversold_reversal', 'overbought_reversal'
]

df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df = df[df.index >= "2018-01-01"]
df.drop(columns=[c for c in drop_cols if c in df.columns], inplace=True)
df = df[df["target"].notna()].dropna()

features = df.drop(columns="target")
target = df["target"].astype(int).values
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

split_idx = int(len(df) * (1 - VAL_FRAC))
X_train_raw, X_val_raw = features_scaled[:split_idx], features_scaled[split_idx:]
y_train, y_val = target[:split_idx], target[split_idx:]

def make_windows(data, labels, win):
    X, y = [], []
    for i in range(win, len(data)):
        X.append(data[i-win:i])
        y.append(labels[i])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.int8)

def objective(trial):
    win = trial.suggest_int("window", 12, 36, step=6)
    units = trial.suggest_int("units", 64, 192, step=64)
    layers = trial.suggest_int("layers", 1, 2)
    dropout = trial.suggest_float("dropout", 0.0, 0.3)
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    l2reg = trial.suggest_float("l2", 1e-6, 1e-3, log=True)
    batch = trial.suggest_categorical("batch", [32, 64])

    X_tr, y_tr = make_windows(X_train_raw, y_train, win)
    X_va, y_va = make_windows(X_val_raw, y_val, win)

    tf.keras.backend.clear_session()
    model = keras.Sequential()
    for i in range(layers):
        model.add(keras.layers.GRU(units, return_sequences=(i < layers-1),
                                   dropout=dropout,
                                   kernel_regularizer=keras.regularizers.l2(l2reg)))
    model.add(keras.layers.Dense(1, activation="sigmoid"))
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr),
                  loss="binary_crossentropy")

    model.fit(X_tr, y_tr,
              epochs=50,
              batch_size=batch,
              validation_data=(X_va, y_va),
              callbacks=[
                  keras.callbacks.EarlyStopping(patience=8, restore_best_weights=True)
              ],
              verbose=0)

    prob = model.predict(X_va, verbose=0).ravel()
    pred = (prob >= 0.5).astype(int)
    return 1.0 - fbeta_score(y_va, pred, beta=BETA, zero_division=0)

study = optuna.create_study(direction="minimize", sampler=optuna.samplers.TPESampler(seed=SEED))
study.optimize(objective, n_trials=N_TRIALS, timeout=TIMEOUT, show_progress_bar=True)

print("Best F2:", 1 - study.best_value)
print("Params:", study.best_params)

with open("gru_fast_best_params.json", "w") as f:
    json.dump(study.best_params, f, indent=2)

# Final model
p = study.best_params
X_tr, y_tr = make_windows(X_train_raw, y_train, p["window"])
X_va, y_va = make_windows(X_val_raw, y_val, p["window"])

tf.keras.backend.clear_session()
final = keras.Sequential()
for i in range(p["layers"]):
    final.add(keras.layers.GRU(p["units"], return_sequences=(i < p["layers"]-1),
                               dropout=p["dropout"],
                               kernel_regularizer=keras.regularizers.l2(p["l2"])))
final.add(keras.layers.Dense(1, activation="sigmoid"))
final.compile(optimizer=keras.optimizers.Adam(learning_rate=p["lr"]),
              loss="binary_crossentropy")
final.fit(X_tr, y_tr, epochs=50, batch_size=p["batch"],
          validation_data=(X_va, y_va),
          callbacks=[keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)],
          verbose=1)

final.save(MODEL_NAME)
print("\\n✔ GRU model saved →", MODEL_NAME)

[I 2025-06-06 22:43:01,166] A new study created in memory with name: no-name-d8105f4e-9a60-45b8-a1ee-2354578f522f
Best trial: 0. Best value: 0.434793:   4%|▍         | 1/25 [00:47<18:57, 47.41s/it, 47.41/900 seconds]

[I 2025-06-06 22:43:48,577] Trial 0 finished with value: 0.43479295573536414 and parameters: {'window': 18, 'units': 192, 'layers': 2, 'dropout': 0.17959754525911098, 'lr': 0.0002051338263087451, 'l2': 2.9375384576328313e-06, 'batch': 64}. Best is trial 0 with value: 0.43479295573536414.


Best trial: 0. Best value: 0.434793:   8%|▊         | 2/25 [01:21<15:07, 39.47s/it, 81.32/900 seconds]

[I 2025-06-06 22:44:22,490] Trial 1 finished with value: 0.517478152309613 and parameters: {'window': 30, 'units': 192, 'layers': 1, 'dropout': 0.29097295564859826, 'lr': 0.004622589001020831, 'l2': 4.335281794951567e-06, 'batch': 64}. Best is trial 0 with value: 0.43479295573536414.


Best trial: 2. Best value: 0.327151:  12%|█▏        | 3/25 [01:34<10:02, 27.37s/it, 94.30/900 seconds]

[I 2025-06-06 22:44:35,468] Trial 2 finished with value: 0.3271514192016284 and parameters: {'window': 18, 'units': 128, 'layers': 1, 'dropout': 0.08736874205941257, 'lr': 0.0016738085788752138, 'l2': 2.62108787826544e-06, 'batch': 64}. Best is trial 2 with value: 0.3271514192016284.


Best trial: 2. Best value: 0.327151:  16%|█▌        | 4/25 [02:09<10:42, 30.60s/it, 129.86/900 seconds]

[I 2025-06-06 22:45:11,021] Trial 3 finished with value: 0.5615826419910657 and parameters: {'window': 24, 'units': 192, 'layers': 1, 'dropout': 0.15427033152408348, 'lr': 0.0015304852121831463, 'l2': 1.3783237455007196e-06, 'batch': 32}. Best is trial 2 with value: 0.3271514192016284.


Best trial: 2. Best value: 0.327151:  20%|██        | 5/25 [02:48<11:12, 33.63s/it, 168.84/900 seconds]

[I 2025-06-06 22:45:50,006] Trial 4 finished with value: 0.49406582650189645 and parameters: {'window': 12, 'units': 192, 'layers': 2, 'dropout': 0.24251920443493832, 'lr': 0.0004066563313514797, 'l2': 1.9634341572933354e-06, 'batch': 32}. Best is trial 2 with value: 0.3271514192016284.


Best trial: 2. Best value: 0.327151:  24%|██▍       | 6/25 [03:02<08:28, 26.77s/it, 182.30/900 seconds]

[I 2025-06-06 22:46:03,467] Trial 5 finished with value: 0.45776599590312084 and parameters: {'window': 12, 'units': 128, 'layers': 1, 'dropout': 0.2727961206236346, 'lr': 0.00032927591344236165, 'l2': 9.717775305059631e-05, 'batch': 64}. Best is trial 2 with value: 0.3271514192016284.


Best trial: 6. Best value: 0.212958:  28%|██▊       | 7/25 [03:34<08:35, 28.61s/it, 214.71/900 seconds]

[I 2025-06-06 22:46:35,877] Trial 6 finished with value: 0.21295804567180032 and parameters: {'window': 24, 'units': 64, 'layers': 2, 'dropout': 0.23253984700833435, 'lr': 0.007568292060167619, 'l2': 0.00048359527764659497, 'batch': 64}. Best is trial 6 with value: 0.21295804567180032.


Best trial: 6. Best value: 0.212958:  32%|███▏      | 8/25 [03:43<06:18, 22.29s/it, 223.45/900 seconds]

[I 2025-06-06 22:46:44,614] Trial 7 finished with value: 0.4251781472684085 and parameters: {'window': 12, 'units': 64, 'layers': 1, 'dropout': 0.0975990992289793, 'lr': 0.0005989003672254305, 'l2': 6.516990611177181e-06, 'batch': 32}. Best is trial 6 with value: 0.21295804567180032.


Best trial: 6. Best value: 0.212958:  36%|███▌      | 9/25 [04:56<10:10, 38.14s/it, 296.46/900 seconds]

[I 2025-06-06 22:47:57,625] Trial 8 finished with value: 0.46489104116222757 and parameters: {'window': 18, 'units': 128, 'layers': 1, 'dropout': 0.2406590942262119, 'lr': 0.00014096175149815865, 'l2': 0.0009133995846860973, 'batch': 32}. Best is trial 6 with value: 0.21295804567180032.


In [2]:
"""
final_gru_train.py
------------------
GRU model for BTC next-hour direction with fixed hyper-parameters:

    # ── layer 0 ──
    units       = 32
    dropout     = 0.40
    return_seq  = True
    # ── layer 1 ──
    units       = 64
    dropout     = 0.40
    return_seq  = False
    # ── optimiser ──
    learning rate = 0.0027215464940055565

Outputs per-class metrics and (optionally) prediction table.
"""

# ─────────────────────────────────────────────────────────────
import numpy as np, pandas as pd, tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# ───────── configuration ─────────
CSV_PATH  = r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\data\processed\gemini_btc_data_final_version_with_features_2016_final.csv"
DROP_COLS = ["vol_ratio_24h", "macd_diff", "macd_line", "upper_shadow", "lower_shadow"]

SEQ_LEN   = 60           # timesteps per sample
VAL_FRAC  = 0.20         # 80 % train · 20 % validation
PREC_W    = 2.0          # β=2  (precision has ×2 weight in F-score)
LR        = 0.0027215464940055565
EPOCHS    = 25
BATCH     = 64
SEED      = 42
SAVE_PROB = True         # set False if you don’t want CSV

# ───────── custom weighted-F1 metric ─────────
class WeightedF1(tf.keras.metrics.Metric):
    def __init__(self, beta=2.0, name="weighted_f1", threshold=0.5, **kw):
        super().__init__(name=name, **kw)
        self.b  = beta; self.th = threshold
        self.tp = self.add_weight(name="tp", initializer="zeros")
        self.fp = self.add_weight(name="fp", initializer="zeros")
        self.fn = self.add_weight(name="fn", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.cast(y_pred >= self.th, tf.float32)
        y_true = tf.cast(y_true, tf.float32)
        self.tp.assign_add(tf.reduce_sum(y_true * y_pred))
        self.fp.assign_add(tf.reduce_sum((1 - y_true) * y_pred))
        self.fn.assign_add(tf.reduce_sum(y_true * (1 - y_pred)))

    def result(self):
        p = self.tp / (self.tp + self.fp + 1e-7)
        r = self.tp / (self.tp + self.fn + 1e-7)
        return (1 + self.b**2) * p * r / (self.b**2 * p + r + 1e-7)

    def reset_states(self):
        for v in (self.tp, self.fp, self.fn):
            v.assign(0.)

# ───────── data loading & preprocessing ─────────
df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df.drop(columns=[c for c in DROP_COLS if c in df.columns], inplace=True)
df["Volume BTC"] = np.log1p(df["Volume BTC"])
df["target"] = (df["close"].shift(-1) > df["close"]).astype(int)
df = df.dropna().select_dtypes(include=[np.number])

X_raw = df.drop(columns=["target"])
y_raw = df["target"].astype(int).values

split_raw = int(len(df) * (1 - VAL_FRAC))
X_train_raw, X_val_raw = X_raw.iloc[:split_raw], X_raw.iloc[split_raw:]
y_train, y_val         = y_raw[:split_raw], y_raw[split_raw:]

scaler = StandardScaler().fit(X_train_raw)
X_scaled = scaler.transform(X_raw)

def to_seq(matrix, labels, length):
    xs, ys = [], []
    for i in range(length, len(matrix)):
        xs.append(matrix[i-length:i])
        ys.append(labels[i])
    return np.array(xs, dtype=np.float32), np.array(ys, dtype=np.float32)

X_seq, y_seq = to_seq(X_scaled, y_raw, SEQ_LEN)
split_seq    = int(len(X_seq) * (1 - VAL_FRAC))
X_train, X_val = X_seq[:split_seq], X_seq[split_seq:]
y_train, y_val = y_seq[:split_seq], y_seq[split_seq:]
n_features = X_train.shape[2]

# ───────── build GRU model ─────────
tf.random.set_seed(SEED)

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(SEQ_LEN, n_features)),
    tf.keras.layers.GRU(32, dropout=0.4, return_sequences=True,
                        activation="tanh", recurrent_activation="sigmoid"),
    tf.keras.layers.GRU(64, dropout=0.4, return_sequences=False,
                        activation="tanh", recurrent_activation="sigmoid"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LR),
    loss="binary_crossentropy",
    metrics=[WeightedF1(beta=PREC_W)]   # β=2 → precision ×2
)

callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor="val_weighted_f1", mode="max",
        patience=8, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_weighted_f1", mode="max",
        factor=0.5, patience=4)
]

model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH,
    shuffle=False,
    callbacks=callbacks,
    verbose=2
)

# ───────── evaluation ─────────
y_prob = model.predict(X_val, batch_size=BATCH).flatten()
y_pred = (y_prob >= 0.5).astype(int)

acc  = accuracy_score(y_val, y_pred)
prec, rec, f1, _ = precision_recall_fscore_support(
    y_val, y_pred, labels=[0, 1], zero_division=0
)

print("\n──── Validation metrics (thr = 0.50) ────")
print(f"Accuracy          : {acc:6.3f}")
print(f"Class 0 (Down) →  Precision: {prec[0]:6.3f}  Recall: {rec[0]:6.3f}  F1: {f1[0]:6.3f}")
print(f"Class 1 (Up  ) →  Precision: {prec[1]:6.3f}  Recall: {rec[1]:6.3f}  F1: {f1[1]:6.3f}")
print(f"Macro-F1          : {f1.mean():6.3f}")

# ───────── optional: save predictions ─────────
if SAVE_PROB:
    pred_df = pd.DataFrame({
        "prob_up": y_prob,
        "pred": y_pred
    }, index=X_val_raw.index)              # original timestamps
    pred_df.to_csv("gru_val_predictions.csv")
    print("Predictions saved → gru_val_predictions.csv")


Epoch 1/50
1008/1008 - 21s - 21ms/step - loss: 0.6942 - val_loss: 0.6947 - val_weighted_f1: 0.2462 - weighted_f1: 0.5537 - learning_rate: 0.0027
Epoch 2/50
1008/1008 - 19s - 19ms/step - loss: 0.6922 - val_loss: 0.6932 - val_weighted_f1: 0.4798 - weighted_f1: 0.5613 - learning_rate: 0.0027
Epoch 3/50
1008/1008 - 30s - 29ms/step - loss: 0.6923 - val_loss: 0.6933 - val_weighted_f1: 0.3422 - weighted_f1: 0.5678 - learning_rate: 0.0027
Epoch 4/50
1008/1008 - 35s - 35ms/step - loss: 0.6922 - val_loss: 0.6926 - val_weighted_f1: 0.5837 - weighted_f1: 0.5724 - learning_rate: 0.0027
Epoch 5/50
1008/1008 - 35s - 35ms/step - loss: 0.6920 - val_loss: 0.6929 - val_weighted_f1: 0.5935 - weighted_f1: 0.5694 - learning_rate: 0.0027
Epoch 6/50
1008/1008 - 35s - 35ms/step - loss: 0.6924 - val_loss: 0.6929 - val_weighted_f1: 0.4824 - weighted_f1: 0.5637 - learning_rate: 0.0027
Epoch 7/50
1008/1008 - 35s - 35ms/step - loss: 0.6918 - val_loss: 0.6932 - val_weighted_f1: 0.6431 - weighted_f1: 0.5502 - learnin

ValueError: Length of values (16119) does not match length of index (16131)