In [11]:
!pip install tensorflow



Collecting tensorflow
  Downloading tensorflow-2.20.0-cp311-cp311-win_amd64.whl.metadata (4.6 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.12.19-py2.py3-none-any.whl.metadata (1.0 kB)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.7.0-py3-none-any.whl.metadata (1.5 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf>=5.28.0 (from tensorflow)
  Downloading protobuf-6.33.4

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
streamlit 1.16.0 requires protobuf<4,>=3.12, but you have protobuf 6.33.4 which is incompatible.
torch 2.5.1 requires sympy==1.13.1; python_version >= "3.9", but you have sympy 1.13.2 which is incompatible.


In [13]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [15]:

# =====================
# CONFIG
# =====================
DATA_PATH = Path("data/processed/nasdaq100_ml_dataset.csv")
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

LOOKBACK = 30
TEST_SIZE = 0.2
RANDOM_STATE = 42

tf.random.set_seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)

# =====================
# DATA LOADING
# =====================
def load_data():
    df = pd.read_csv(DATA_PATH)

    y = df["Target"].values
    X = df.drop(columns=["date", "Target"]).values

    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    return X, y, scaler

# =====================
# SEQUENCE CREATION
# =====================
def create_sequences(X, y, lookback):
    X_seq, y_seq = [], []

    for i in range(lookback, len(X)):
        X_seq.append(X[i - lookback:i])
        y_seq.append(y[i])

    return np.array(X_seq), np.array(y_seq)

# =====================
# MODEL
# =====================
def build_lstm(input_shape, num_classes):
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=input_shape),
        Dropout(0.3),
        LSTM(32),
        Dropout(0.3),
        Dense(num_classes, activation="softmax")
    ])

    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )

    return model

# =====================
# TRAINING
# =====================
def main():
    X, y, scaler = load_data()
    X_seq, y_seq = create_sequences(X, y, LOOKBACK)

    split_idx = int(len(X_seq) * (1 - TEST_SIZE))

    X_train, X_test = X_seq[:split_idx], X_seq[split_idx:]
    y_train, y_test = y_seq[:split_idx], y_seq[split_idx:]

    model = build_lstm(
        input_shape=(LOOKBACK, X_train.shape[2]),
        num_classes=len(np.unique(y))
    )

    callbacks = [
        EarlyStopping(patience=10, restore_best_weights=True),
        ModelCheckpoint(
            MODEL_PATH / "best_lstm_model.keras",
            save_best_only=True
        )
    ]

    history = model.fit(
        X_train,
        y_train,
        validation_split=0.1,
        epochs=50,
        batch_size=32,
        callbacks=callbacks,
        verbose=1
    )

    # =====================
    # EVALUATION
    # =====================
    y_pred_prob = model.predict(X_test)
    y_pred = np.argmax(y_pred_prob, axis=1)

    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="macro")

    print("\nLSTM Results")
    print(f"Accuracy : {acc:.4f}")
    print(f"F1-macro : {f1:.4f}")

    # Save scaler
    import joblib
    joblib.dump(scaler, MODEL_PATH / "scaler_lstm.pkl")

if __name__ == "__main__":
    main()


  super().__init__(**kwargs)


Epoch 1/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 35ms/step - accuracy: 0.4850 - loss: 1.0395 - val_accuracy: 0.3649 - val_loss: 1.1304
Epoch 2/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - accuracy: 0.4977 - loss: 1.0274 - val_accuracy: 0.3649 - val_loss: 1.1561
Epoch 3/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - accuracy: 0.4962 - loss: 1.0233 - val_accuracy: 0.3649 - val_loss: 1.2176
Epoch 4/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - accuracy: 0.4966 - loss: 1.0174 - val_accuracy: 0.3649 - val_loss: 1.2098
Epoch 5/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step - accuracy: 0.4944 - loss: 1.0183 - val_accuracy: 0.3649 - val_loss: 1.1735
Epoch 6/50
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - accuracy: 0.4966 - loss: 1.0188 - val_accuracy: 0.3649 - val_loss: 1.1910
Epoch 7/50
[1m84/84[0m [32m━━━━