# In this notebook we would train the GRU model

In [None]:
"""
gru_btc_direction.py
--------------------
High-quality GRU model for predicting whether BTC's next-hour close
will go Up (1) or Down (0).

Workflow:
1. Data prep (drop cols, log1p volume, chronological split, scaling).
2. Quick Keras-Tuner RandomSearch (10 trials × 3 epochs).
3. Print best hyper-params.
4. Re-build best GRU and train with early stopping + LR schedule.
5. Print accuracy + per-class precision / recall / F1.

Author: ChatGPT (OpenAI o3)
"""

# ───────────────────── imports ──────────────────────
import numpy as np, pandas as pd, tensorflow as tf, keras_tuner as kt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# ─────────────── paths & config ───────────────
CSV_PATH  = r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\data\processed\gemini_btc_data_final_version_with_features_2016_final.csv"
DROP_COLS = ["vol_ratio_24h", "macd_diff", "macd_line",
             "upper_shadow", "lower_shadow"]

SEQ_LEN   = 60        # past 60 hours → predict next hour
VAL_FRAC  = 0.20      # 80 % train · 20 % val
W_PREC    = 2.0       # precision weight in weighted-F1
TUNE_TRIALS  = 7     # quick search
TUNE_EPOCHS  = 3
FINAL_EPOCHS = 25
BATCH        = 64
SEED         = 42

# ─────────── weighted-F1 metric ────────────
class WeightedF1(tf.keras.metrics.Metric):
    def __init__(self, weight=2.0, name="weighted_f1", threshold=0.5, **kw):
        super().__init__(name=name, **kw)
        self.w  = weight; self.th = threshold
        self.tp = self.add_weight(name="tp", initializer="zeros")
        self.fp = self.add_weight(name="fp", initializer="zeros")
        self.fn = self.add_weight(name="fn", initializer="zeros")
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.cast(y_pred >= self.th, tf.float32)
        y_true = tf.cast(y_true, tf.float32)
        self.tp.assign_add(tf.reduce_sum(y_true * y_pred))
        self.fp.assign_add(tf.reduce_sum((1 - y_true) * y_pred))
        self.fn.assign_add(tf.reduce_sum(y_true * (1 - y_pred)))
    def result(self):
        p = self.tp / (self.tp + self.fp + 1e-7)
        r = self.tp / (self.tp + self.fn + 1e-7)
        return (1 + self.w) * p * r / (self.w * p + r + 1e-7)
    def reset_states(self):
        for v in (self.tp, self.fp, self.fn): v.assign(0.)

# ─────────── data loading & preprocessing ───────────
df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df.drop(columns=[c for c in DROP_COLS if c in df.columns], inplace=True)
df["Volume BTC"] = np.log1p(df["Volume BTC"])
df["target"] = (df["close"].shift(-1) > df["close"]).astype(int)
df = df.dropna().select_dtypes(include=[np.number])

X_raw = df.drop(columns=["target"])
y_raw = df["target"].astype(int).values

split_row   = int(len(df) * (1 - VAL_FRAC))
X_train_raw = X_raw.iloc[:split_row]
X_val_raw   = X_raw.iloc[split_row:]
y_train     = y_raw[:split_row]
y_val       = y_raw[split_row:]

scaler = StandardScaler().fit(X_train_raw)
X_scaled = scaler.transform(X_raw)

# build rolling sequences
def to_seq(mat, tgt, length):
    Xs, ys = [], []
    for i in range(length, len(mat)):
        Xs.append(mat[i-length:i])
        ys.append(tgt[i])
    return np.array(Xs, dtype=np.float32), np.array(ys, dtype=np.float32)

X_seq, y_seq = to_seq(X_scaled, y_raw, SEQ_LEN)
split_seq    = int(len(X_seq) * (1 - VAL_FRAC))
X_train, X_val = X_seq[:split_seq], X_seq[split_seq:]
y_train, y_val = y_seq[:split_seq], y_seq[split_seq:]
n_features = X_train.shape[2]

# ─────────── hyper-model builder ───────────
def build_gru(hp: kt.HyperParameters):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(SEQ_LEN, n_features)))

    # optional 1-2 GRU layers
    for i in range(hp.Int("gru_layers", 1, 2)):
        units = hp.Int(f"units_{i}", 32, 128, step=32)
        return_seq = (i < hp.get("gru_layers") - 1)
        model.add(tf.keras.layers.GRU(units,
                                      activation="tanh",
                                      recurrent_activation="sigmoid",
                                      dropout=hp.Float(f"dropout_{i}", 0.0, 0.4, step=0.1),
                                      return_sequences=return_seq))
    model.add(tf.keras.layers.Dense(1, activation="sigmoid"))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Float("lr", 1e-4, 1e-2, sampling="log")),
        loss="binary_crossentropy",
        metrics=[WeightedF1(weight=W_PREC)]
    )
    return model

# ─────────── quick RandomSearch ───────────
tuner = kt.RandomSearch(
    build_gru,
    objective=kt.Objective("val_weighted_f1", direction="max"),
    max_trials=TUNE_TRIALS,
    executions_per_trial=1,
    directory="tmp_gru_tune", project_name="run", overwrite=True
)

tuner.search(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=TUNE_EPOCHS,
    batch_size=BATCH,
    shuffle=False,
    verbose=0
)

best_hp = tuner.get_best_hyperparameters(1)[0]
print("\n──── Best hyper-parameters ────")
for k, v in best_hp.values.items():
    print(f"{k:<12}: {v}")

# ─────────── final GRU build & train ───────────
model = tuner.hypermodel.build(best_hp)

callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=8, restore_best_weights=True, monitor="val_weighted_f1", mode="max"),
    tf.keras.callbacks.ReduceLROnPlateau(patience=4, factor=0.5, monitor="val_weighted_f1", mode="max")
]

model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=FINAL_EPOCHS,
    batch_size=BATCH,
    shuffle=False,
    callbacks=callbacks,
    verbose=2
)

# ─────────── evaluation ───────────
y_prob = model.predict(X_val, batch_size=BATCH).flatten()
y_pred = (y_prob >= 0.5).astype(int)

acc  = accuracy_score(y_val, y_pred)
prec, rec, f1, _ = precision_recall_fscore_support(
    y_val, y_pred, labels=[0, 1], zero_division=0
)

print("\n──── Validation metrics (thr = 0.50) ────")
print(f"Accuracy          : {acc:6.3f}")
print(f"Class 0 (Down) →  Precision: {prec[0]:6.3f}  Recall: {rec[0]:6.3f}  F1: {f1[0]:6.3f}")
print(f"Class 1 (Up  ) →  Precision: {prec[1]:6.3f}  Recall: {rec[1]:6.3f}  F1: {f1[1]:6.3f}")
print(f"Macro-F1          : {f1.mean():6.3f}")




──── Best hyper-parameters ────
gru_layers  : 2
units_0     : 32
dropout_0   : 0.4
lr          : 0.0027215464940055565
units_1     : 64
dropout_1   : 0.4
Epoch 1/50
1008/1008 - 37s - 37ms/step - loss: 0.6944 - val_loss: 0.6941 - val_weighted_f1: 0.3110 - weighted_f1: 0.5504 - learning_rate: 0.0027
Epoch 2/50
1008/1008 - 34s - 34ms/step - loss: 0.6926 - val_loss: 0.6936 - val_weighted_f1: 0.3656 - weighted_f1: 0.5545 - learning_rate: 0.0027
Epoch 3/50
1008/1008 - 27s - 27ms/step - loss: 0.6923 - val_loss: 0.6932 - val_weighted_f1: 0.5021 - weighted_f1: 0.5455 - learning_rate: 0.0027
Epoch 4/50
1008/1008 - 19s - 18ms/step - loss: 0.6923 - val_loss: 0.6925 - val_weighted_f1: 0.5473 - weighted_f1: 0.5507 - learning_rate: 0.0027
Epoch 5/50
1008/1008 - 33s - 33ms/step - loss: 0.6922 - val_loss: 0.6934 - val_weighted_f1: 0.4332 - weighted_f1: 0.5554 - learning_rate: 0.0027
Epoch 6/50
1008/1008 - 35s - 34ms/step - loss: 0.6925 - val_loss: 0.6926 - val_weighted_f1: 0.6569 - weighted_f1: 0.545

In [2]:
"""
final_gru_train.py
------------------
GRU model for BTC next-hour direction with fixed hyper-parameters:

    # ── layer 0 ──
    units       = 32
    dropout     = 0.40
    return_seq  = True
    # ── layer 1 ──
    units       = 64
    dropout     = 0.40
    return_seq  = False
    # ── optimiser ──
    learning rate = 0.0027215464940055565

Outputs per-class metrics and (optionally) prediction table.
"""

# ─────────────────────────────────────────────────────────────
import numpy as np, pandas as pd, tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# ───────── configuration ─────────
CSV_PATH  = r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\data\processed\gemini_btc_data_final_version_with_features_2016_final.csv"
DROP_COLS = ["vol_ratio_24h", "macd_diff", "macd_line", "upper_shadow", "lower_shadow"]

SEQ_LEN   = 60           # timesteps per sample
VAL_FRAC  = 0.20         # 80 % train · 20 % validation
PREC_W    = 2.0          # β=2  (precision has ×2 weight in F-score)
LR        = 0.0027215464940055565
EPOCHS    = 25
BATCH     = 64
SEED      = 42
SAVE_PROB = True         # set False if you don’t want CSV

# ───────── custom weighted-F1 metric ─────────
class WeightedF1(tf.keras.metrics.Metric):
    def __init__(self, beta=2.0, name="weighted_f1", threshold=0.5, **kw):
        super().__init__(name=name, **kw)
        self.b  = beta; self.th = threshold
        self.tp = self.add_weight(name="tp", initializer="zeros")
        self.fp = self.add_weight(name="fp", initializer="zeros")
        self.fn = self.add_weight(name="fn", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.cast(y_pred >= self.th, tf.float32)
        y_true = tf.cast(y_true, tf.float32)
        self.tp.assign_add(tf.reduce_sum(y_true * y_pred))
        self.fp.assign_add(tf.reduce_sum((1 - y_true) * y_pred))
        self.fn.assign_add(tf.reduce_sum(y_true * (1 - y_pred)))

    def result(self):
        p = self.tp / (self.tp + self.fp + 1e-7)
        r = self.tp / (self.tp + self.fn + 1e-7)
        return (1 + self.b**2) * p * r / (self.b**2 * p + r + 1e-7)

    def reset_states(self):
        for v in (self.tp, self.fp, self.fn):
            v.assign(0.)

# ───────── data loading & preprocessing ─────────
df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df.drop(columns=[c for c in DROP_COLS if c in df.columns], inplace=True)
df["Volume BTC"] = np.log1p(df["Volume BTC"])
df["target"] = (df["close"].shift(-1) > df["close"]).astype(int)
df = df.dropna().select_dtypes(include=[np.number])

X_raw = df.drop(columns=["target"])
y_raw = df["target"].astype(int).values

split_raw = int(len(df) * (1 - VAL_FRAC))
X_train_raw, X_val_raw = X_raw.iloc[:split_raw], X_raw.iloc[split_raw:]
y_train, y_val         = y_raw[:split_raw], y_raw[split_raw:]

scaler = StandardScaler().fit(X_train_raw)
X_scaled = scaler.transform(X_raw)

def to_seq(matrix, labels, length):
    xs, ys = [], []
    for i in range(length, len(matrix)):
        xs.append(matrix[i-length:i])
        ys.append(labels[i])
    return np.array(xs, dtype=np.float32), np.array(ys, dtype=np.float32)

X_seq, y_seq = to_seq(X_scaled, y_raw, SEQ_LEN)
split_seq    = int(len(X_seq) * (1 - VAL_FRAC))
X_train, X_val = X_seq[:split_seq], X_seq[split_seq:]
y_train, y_val = y_seq[:split_seq], y_seq[split_seq:]
n_features = X_train.shape[2]

# ───────── build GRU model ─────────
tf.random.set_seed(SEED)

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(SEQ_LEN, n_features)),
    tf.keras.layers.GRU(32, dropout=0.4, return_sequences=True,
                        activation="tanh", recurrent_activation="sigmoid"),
    tf.keras.layers.GRU(64, dropout=0.4, return_sequences=False,
                        activation="tanh", recurrent_activation="sigmoid"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LR),
    loss="binary_crossentropy",
    metrics=[WeightedF1(beta=PREC_W)]   # β=2 → precision ×2
)

callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor="val_weighted_f1", mode="max",
        patience=8, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_weighted_f1", mode="max",
        factor=0.5, patience=4)
]

model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH,
    shuffle=False,
    callbacks=callbacks,
    verbose=2
)

# ───────── evaluation ─────────
y_prob = model.predict(X_val, batch_size=BATCH).flatten()
y_pred = (y_prob >= 0.5).astype(int)

acc  = accuracy_score(y_val, y_pred)
prec, rec, f1, _ = precision_recall_fscore_support(
    y_val, y_pred, labels=[0, 1], zero_division=0
)

print("\n──── Validation metrics (thr = 0.50) ────")
print(f"Accuracy          : {acc:6.3f}")
print(f"Class 0 (Down) →  Precision: {prec[0]:6.3f}  Recall: {rec[0]:6.3f}  F1: {f1[0]:6.3f}")
print(f"Class 1 (Up  ) →  Precision: {prec[1]:6.3f}  Recall: {rec[1]:6.3f}  F1: {f1[1]:6.3f}")
print(f"Macro-F1          : {f1.mean():6.3f}")

# ───────── optional: save predictions ─────────
if SAVE_PROB:
    pred_df = pd.DataFrame({
        "prob_up": y_prob,
        "pred": y_pred
    }, index=X_val_raw.index)              # original timestamps
    pred_df.to_csv("gru_val_predictions.csv")
    print("Predictions saved → gru_val_predictions.csv")


Epoch 1/50
1008/1008 - 21s - 21ms/step - loss: 0.6942 - val_loss: 0.6947 - val_weighted_f1: 0.2462 - weighted_f1: 0.5537 - learning_rate: 0.0027
Epoch 2/50
1008/1008 - 19s - 19ms/step - loss: 0.6922 - val_loss: 0.6932 - val_weighted_f1: 0.4798 - weighted_f1: 0.5613 - learning_rate: 0.0027
Epoch 3/50
1008/1008 - 30s - 29ms/step - loss: 0.6923 - val_loss: 0.6933 - val_weighted_f1: 0.3422 - weighted_f1: 0.5678 - learning_rate: 0.0027
Epoch 4/50
1008/1008 - 35s - 35ms/step - loss: 0.6922 - val_loss: 0.6926 - val_weighted_f1: 0.5837 - weighted_f1: 0.5724 - learning_rate: 0.0027
Epoch 5/50
1008/1008 - 35s - 35ms/step - loss: 0.6920 - val_loss: 0.6929 - val_weighted_f1: 0.5935 - weighted_f1: 0.5694 - learning_rate: 0.0027
Epoch 6/50
1008/1008 - 35s - 35ms/step - loss: 0.6924 - val_loss: 0.6929 - val_weighted_f1: 0.4824 - weighted_f1: 0.5637 - learning_rate: 0.0027
Epoch 7/50
1008/1008 - 35s - 35ms/step - loss: 0.6918 - val_loss: 0.6932 - val_weighted_f1: 0.6431 - weighted_f1: 0.5502 - learnin

ValueError: Length of values (16119) does not match length of index (16131)