# In this notebook we will train the LSTM model

In [16]:
"""
5-minute hyper-parameter sweep for a BTC-direction LSTM
(predicts Up/Down for the NEXT close price).

Prereqs:
    pip install pandas numpy scikit-learn tensorflow keras-tuner
"""

import numpy as np, pandas as pd, tensorflow as tf, keras_tuner as kt
from sklearn.preprocessing import StandardScaler

# ───────────── Config ─────────────
CSV_PATH  = r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\data\processed\gemini_btc_data_final_version_with_features_2016_final.csv"
DROP_COLS = ["vol_ratio_24h", "macd_diff", "macd_line", "upper_shadow", "lower_shadow"]
SEQ_LEN   = 60          # past 60 hours → predict next hour
VAL_FRAC  = 0.20        # final 20 % of samples for validation
W_PREC    = 2.0         # precision weight in weighted-F1

# ───────────── Load & label ─────────────
df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df = df.drop(columns=[c for c in DROP_COLS if c in df.columns])
df["Volume BTC"] = np.log1p(df["Volume BTC"])

# 1 = Up, 0 = Down (next close vs current close)
df["target"] = (df["close"].shift(-1) > df["close"]).astype(int)
df = df.dropna().select_dtypes(include=[np.number])   # drop last row (NaN label)

feature_cols = df.columns.drop("target")

# ───────────── Chronological train/val split ─────────────
split_raw = int(len(df) * (1 - VAL_FRAC))
train_raw, val_raw = df.iloc[:split_raw], df.iloc[split_raw:]

scaler = StandardScaler().fit(train_raw[feature_cols])

df_scaled = pd.DataFrame(
    scaler.transform(df[feature_cols]),
    columns=feature_cols,
    index=df.index
)
labels = df["target"].values.astype(np.float32)

# ───────────── Build sequences ─────────────
def make_sequences(mat, tgt, length):
    Xs, ys = [], []
    for i in range(length, len(mat)):
        Xs.append(mat[i-length:i])
        ys.append(tgt[i])
    return np.array(Xs, dtype=np.float32), np.array(ys, dtype=np.float32)

X_all, y_all = make_sequences(df_scaled.values, labels, SEQ_LEN)

split_seq = int(len(X_all) * (1 - VAL_FRAC))
X_train, X_val = X_all[:split_seq], X_all[split_seq:]
y_train, y_val = y_all[:split_seq], y_all[split_seq:]

# ───────────── Custom weighted-F1 metric ─────────────
class WeightedF1(tf.keras.metrics.Metric):
    def __init__(self, weight=2.0, name="weighted_f1", threshold=0.5, **kw):
        super().__init__(name=name, **kw)
        self.w = weight
        self.th = threshold
        self.tp = self.add_weight(name="tp", initializer="zeros")
        self.fp = self.add_weight(name="fp", initializer="zeros")
        self.fn = self.add_weight(name="fn", initializer="zeros")


    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.cast(y_pred >= self.th, tf.float32)
        y_true = tf.cast(y_true, tf.float32)
        self.tp.assign_add(tf.reduce_sum(y_true * y_pred))
        self.fp.assign_add(tf.reduce_sum((1 - y_true) * y_pred))
        self.fn.assign_add(tf.reduce_sum(y_true * (1 - y_pred)))

    def result(self):
        precision = self.tp / (self.tp + self.fp + 1e-7)
        recall    = self.tp / (self.tp + self.fn + 1e-7)
        return (1 + self.w) * precision * recall / (self.w * precision + recall + 1e-7)

    def reset_states(self):
        for var in (self.tp, self.fp, self.fn):
            var.assign(0.)

# ───────────── Hyper-model ─────────────
def build_model(hp):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(SEQ_LEN, X_train.shape[2])),
        tf.keras.layers.LSTM(
            units=hp.Int("units", 32, 128, step=32),
            activation="tanh",
            recurrent_activation="sigmoid",
            dropout=hp.Float("dropout", 0.0, 0.4, step=0.1)
        ),
        tf.keras.layers.Dense(1, activation="sigmoid")
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Float("lr", 1e-4, 1e-2, sampling="log")),
        loss="binary_crossentropy",
        metrics=[WeightedF1(weight=W_PREC)]
    )
    return model

tuner = kt.RandomSearch(
    build_model,
    objective=kt.Objective("val_weighted_f1", direction="max"),
    max_trials=7,
    executions_per_trial=1,
    directory="lstm_tune",
    project_name="btc_dir_quick"
)

early_stop = tf.keras.callbacks.EarlyStopping(patience=2, restore_best_weights=True)

tuner.search(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=3,            # tiny → ~5 min
    batch_size=64,
    shuffle=False,       # keep order!
    callbacks=[early_stop],
    verbose=1
)

best = tuner.get_best_hyperparameters(1)[0]
print("\nBest hyper-parameters (max val-weighted-F1):")
print(f"  units   : {best.get('units')}")
print(f"  dropout : {best.get('dropout')}")
print(f"  lr      : {best.get('lr')}")


Trial 7 Complete [00h 01m 39s]
val_weighted_f1: 0.22747260332107544

Best val_weighted_f1 So Far: 0.5337280035018921
Total elapsed time: 00h 06m 33s

Best hyper-parameters (max val-weighted-F1):
  units   : 32
  dropout : 0.4
  lr      : 0.0012789451776909946


In [19]:
from sklearn.metrics import precision_score, recall_score, accuracy_score

# Get predictions as probabilities
y_pred_proba = best_model.predict(X_val).flatten()

# Convert to class labels
y_pred = (y_pred_proba >= 0.5).astype(int)

# Compute metrics
precision_per_class = precision_score(y_val, y_pred, average=None)
recall_per_class    = recall_score(y_val, y_pred, average=None)
accuracy            = accuracy_score(y_val, y_pred)

# Print results clearly
print(f"🔍 Precision by class:")
print(f"  Class 0: {precision_per_class[0]:.4f}")
print(f"  Class 1: {precision_per_class[1]:.4f}")

print(f"\n🔍 Recall by class:")
print(f"  Class 0: {recall_per_class[0]:.4f}")
print(f"  Class 1: {recall_per_class[1]:.4f}")

print(f"\n✅ Overall Accuracy: {accuracy:.4f}")


[1m504/504[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
🔍 Precision by class:
  Class 0: 0.4977
  Class 1: 0.5173

🔍 Recall by class:
  Class 0: 0.4637
  Class 1: 0.5512

✅ Overall Accuracy: 0.5083


In [20]:
"""
Final training + evaluation for BTC-direction LSTM
==================================================
Label: 1 if next-hour close > current close, else 0.
Metrics printed: precision, recall, F1 for both classes and overall accuracy.
"""

# ───── Imports ──────────────────────────────────────────────────────
import numpy as np, pandas as pd, tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# ───── File & column config ────────────────────────────────────────
CSV_PATH  = r"C:\Users\ADMIN\Desktop\Coding_projects\stock_market_prediction\Stock-Market-Prediction\data\processed\gemini_btc_data_final_version_with_features_2016_final.csv"
DROP_COLS = ["vol_ratio_24h", "macd_diff", "macd_line", "upper_shadow", "lower_shadow"]

SEQ_LEN   = 60      # past 60 hours → predict next hour
VAL_FRAC  = 0.20    # last 20 % of sequences become validation

# ───── Hyper-parameters found in tuning ────────────────────────────
UNITS     = 32
DROPOUT   = 0.4
LR        = 0.0012789451776909946
EPOCHS    = 30      # upper bound (early stopping will usually finish sooner)
BATCH     = 64

# ───── Load, clean, engineer label ─────────────────────────────────
df = pd.read_csv(CSV_PATH, index_col=0, parse_dates=True)
df = df.drop(columns=[c for c in DROP_COLS if c in df.columns])
df["Volume BTC"] = np.log1p(df["Volume BTC"])

df["target"] = (df["close"].shift(-1) > df["close"]).astype(int)
df = df.dropna().select_dtypes(include=[np.number])          # remove final NaN label row

feature_cols = df.columns.drop("target")

# ───── Chronological train / validation split (raw rows) ───────────
split_raw = int(len(df) * (1 - VAL_FRAC))
train_raw, val_raw = df.iloc[:split_raw], df.iloc[split_raw:]

# scale INPUT features with params from *train* only
scaler = StandardScaler().fit(train_raw[feature_cols])
df_scaled = pd.DataFrame(
    scaler.transform(df[feature_cols]),
    columns=feature_cols,
    index=df.index
)
labels = df["target"].values.astype(np.float32)

# ───── Convert to rolling sequences ────────────────────────────────
def make_sequences(mat, tgt, length):
    Xs, ys = [], []
    for i in range(length, len(mat)):
        Xs.append(mat[i-length:i])
        ys.append(tgt[i])
    return np.array(Xs, dtype=np.float32), np.array(ys, dtype=np.float32)

X_all, y_all = make_sequences(df_scaled.values, labels, SEQ_LEN)
split_seq = int(len(X_all) * (1 - VAL_FRAC))
X_train, X_val = X_all[:split_seq], X_all[split_seq:]
y_train, y_val = y_all[:split_seq], y_all[split_seq:]

# ───── Model definition ────────────────────────────────────────────
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(SEQ_LEN, X_train.shape[2])),
    tf.keras.layers.LSTM(UNITS, dropout=DROPOUT, activation="tanh",
                         recurrent_activation="sigmoid"),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LR),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

early_stop = tf.keras.callbacks.EarlyStopping(patience=5,
                                              restore_best_weights=True,
                                              monitor="val_loss")

# ───── Train ───────────────────────────────────────────────────────
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=EPOCHS,
    batch_size=BATCH,
    shuffle=False,          # keep chronological order
    callbacks=[early_stop],
    verbose=2
)

# ───── Evaluate on validation set ──────────────────────────────────
y_prob = model.predict(X_val, batch_size=BATCH).flatten()
y_pred = (y_prob >= 0.5).astype(int)

acc = accuracy_score(y_val, y_pred)
prec, rec, f1, _ = precision_recall_fscore_support(
    y_val, y_pred, labels=[0, 1], zero_division=0
)

print("\n──────── Validation metrics (threshold = 0.50) ────────")
print(f"Accuracy          : {acc:6.3f}")
print(f"Class 0 (Down) →  Precision: {prec[0]:6.3f}   Recall: {rec[0]:6.3f}   F1: {f1[0]:6.3f}")
print(f"Class 1 (Up  ) →  Precision: {prec[1]:6.3f}   Recall: {rec[1]:6.3f}   F1: {f1[1]:6.3f}")
print(f"Macro-F1          : {f1.mean():6.3f}")


Epoch 1/30
1008/1008 - 8s - 8ms/step - accuracy: 0.5181 - loss: 0.6933 - val_accuracy: 0.5066 - val_loss: 0.6967
Epoch 2/30
1008/1008 - 7s - 7ms/step - accuracy: 0.5160 - loss: 0.6924 - val_accuracy: 0.5054 - val_loss: 0.6947
Epoch 3/30
1008/1008 - 7s - 7ms/step - accuracy: 0.5184 - loss: 0.6920 - val_accuracy: 0.5070 - val_loss: 0.6941
Epoch 4/30
1008/1008 - 7s - 7ms/step - accuracy: 0.5190 - loss: 0.6918 - val_accuracy: 0.5047 - val_loss: 0.6952
Epoch 5/30
1008/1008 - 7s - 7ms/step - accuracy: 0.5232 - loss: 0.6914 - val_accuracy: 0.5087 - val_loss: 0.6956
Epoch 6/30
1008/1008 - 7s - 7ms/step - accuracy: 0.5210 - loss: 0.6914 - val_accuracy: 0.5105 - val_loss: 0.6935
Epoch 7/30
1008/1008 - 7s - 7ms/step - accuracy: 0.5238 - loss: 0.6913 - val_accuracy: 0.5081 - val_loss: 0.6947
Epoch 8/30
1008/1008 - 7s - 7ms/step - accuracy: 0.5235 - loss: 0.6911 - val_accuracy: 0.5101 - val_loss: 0.6955
Epoch 9/30
1008/1008 - 7s - 7ms/step - accuracy: 0.5240 - loss: 0.6911 - val_accuracy: 0.5108 - 