# 07_deep_learning_improved.ipynb  
## Advanced MLP & LSTM on the Rich Feature Set

**Goal:** apply our improved features (lags, volatility, volume) to build a **well-tuned MLP** and **LSTM**, and compare their performance.

---

### 1. Setup & Imports  

In [14]:
import numpy as np
import pandas as pd
from pathlib           import Path
from sklearn.metrics   import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import Sequential, layers
from keras_tuner import RandomSearch



### 2. Load Improved Features  

In [15]:
nb_dir = Path().resolve()             
root   = nb_dir.parent  
basic_csv    = root/"data"/"processed"/"features.csv"

# Create models folder if not exists
models_dir= root/"models"
models_dir.mkdir(exist_ok=True)

kt_dir = root/"notebooks"/"keras_tuner"
kt_dir.mkdir(exist_ok=True)

df = pd.read_csv(basic_csv, index_col="Date", parse_dates=True)

# Lagged returns 1–5
for lag in range(1,6):
    df[f"ret_lag_{lag}"] = df["return"].shift(lag)

# 10-day rolling volatility
df["vol_10"] = df["return"].rolling(10).std()

# Volume % change (from raw CSV)
raw_csv = root/"data"/"raw"/"sp500.csv"
vol = pd.read_csv(raw_csv, index_col="Date", parse_dates=True)["Volume"]
df["vol_pct"] = vol.pct_change()

# Drop NaNs
df = df.dropna()
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,return,rsi,macd,ret_lag_1,ret_lag_2,ret_lag_3,ret_lag_4,ret_lag_5,vol_10,vol_pct
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-07-24,3218.58,3227.26,3200.05,3215.63,2460505000.0,-0.00621,56.181238,3.240325,-0.012396,0.005731,0.001678,0.008372,0.002845,0.008538,-0.102679
2020-07-27,3219.84,3241.43,3214.25,3239.41,2355871000.0,0.007368,58.949535,1.761863,-0.00621,-0.012396,0.005731,0.001678,0.008372,0.0079,-0.042525
2020-07-28,3234.27,3243.72,3216.17,3218.44,2244363000.0,-0.006494,55.612967,-0.938636,0.007368,-0.00621,-0.012396,0.005731,0.001678,0.007391,-0.047332
2020-07-29,3227.22,3264.74,3227.22,3258.44,2471114000.0,0.012352,60.236275,-0.370417,-0.006494,0.007368,-0.00621,-0.012396,0.005731,0.007867,0.101031
2020-07-30,3231.76,3250.92,3204.13,3246.22,2436861000.0,-0.003757,58.240473,-1.11493,0.012352,-0.006494,0.007368,-0.00621,-0.012396,0.00789,-0.013861


## 3. Train/Test Split  
80% train / 20% test, **chronologically**.


In [28]:
feature_cols = [
    "rsi", "macd",
    "ret_lag_1","ret_lag_2","ret_lag_3","ret_lag_4","ret_lag_5",
    "vol_10", "vol_pct"
]

# X = df.drop(columns=["return","Open","High","Low","Close","Volume"])
X = df[feature_cols].to_numpy()
# y = df["return"].values
y = df["return"].to_numpy()
# split = int(len(X)*0.8)
# X_tr, X_te = X.iloc[:split].values, X.iloc[split:].values
# y_tr, y_te = y[:split], y[split:]

# X_num and y_num defined earlier as NumPy arrays
split = int(len(X) * 0.8)

# NumPy slicing (no .iloc)
X_tr, X_te = X[:split], X[split:]
y_tr, y_te = y[:split], y[split:]
print("Shapes:", X_tr.shape, X_te.shape)


Shapes: (969, 9) (243, 9)


## 4. Scale for Neural Nets  
We scale now so both MLP & LSTM get standardized inputs.


In [17]:
scaler     = StandardScaler().fit(X_tr)
X_tr_s, X_te_s = scaler.transform(X_tr), scaler.transform(X_te)

# Save scaler for later deployment
import joblib
joblib.dump(scaler, models_dir/"scaler.pkl")

['C:\\Users\\Antho\\OneDrive\\Documentos\\Santiago\\Finance project\\sp500_dl\\models\\scaler.pkl']

## 5. Hyper-tune MLP with RandomSearch

In [18]:
def build_mlp(hp):
    m = Sequential()
    for i in range(hp.Int("layers", 1, 3)):
        units = hp.Int(f"units_{i}", 32, 256, step=32)
        m.add(layers.Dense(units, activation="relu"))
        m.add(layers.Dropout(hp.Float(f"dropout_{i}", 0.0, 0.5, step=0.1)))
    m.add(layers.Dense(1))
    m.compile(
      optimizer=tf.keras.optimizers.Adam(
        learning_rate=hp.Float("lr", 1e-4, 1e-2, sampling="log")
      ),
      loss="mse",
      metrics=["mae"]
    )
    return m

# Set up the tuner
tuner = RandomSearch(
    build_mlp,
    objective="val_mae",
    max_trials=10,
    executions_per_trial=1,
    directory=str(kt_dir),       # point to the real folder
    project_name="mlp_improved",
    overwrite=True               # start fresh each run
)

# Run the search
tuner.search(
    X_tr_s, y_tr,
    validation_data=(X_te_s, y_te),
    epochs=30,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=5)]
)

# Grab the best model
best_mlp = tuner.get_best_models(1)[0]

# Build it explicitly so summary() works
best_mlp.build(input_shape=(None, X_tr_s.shape[1]))

print("Best MLP summary:")
best_mlp.summary()


Trial 10 Complete [00h 00m 11s]
val_mae: 0.009557699784636497

Best val_mae So Far: 0.006955979857593775
Total elapsed time: 00h 03m 56s
Best MLP summary:


  saveable.load_own_variables(weights_store.get(inner_path))


## 6. Evaluate the Tuned MLP  


In [42]:
y_pred_mlp = best_mlp.predict(X_te_s).flatten()
mae_mlp   = mean_absolute_error(y_te, y_pred_mlp)
r2_mlp     = r2_score(y_te, y_pred_mlp)
print(f"MLP improved → MAE: {mae_mlp:.5f},  R²: {r2_mlp:.3f}")

# Save MLP as HDF5
best_mlp.save(models_dir/"mlp_improved_model.h5", save_format="h5")

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 




MLP improved → MAE: 0.00696,  R²: 0.249


## 7. Build & Train a Stacked LSTM  
We use a **10-day window** and two LSTM layers.


In [None]:
# 1) Sequence prep on the SCALED numeric arrays
t = 10
def make_seq(X, y, t):
    xs, ys = [], []
    for i in range(len(X) - t):
        xs.append(X[i : i + t])
        ys.append(y[i + t])
    return np.array(xs), np.array(ys)

X_seq, y_seq       = make_seq(X_tr_s, y_tr, t)   # train sequences
X_te_seq, y_te_seq = make_seq(X_te_s, y_te, t)   # test sequences

print("Train seq:", X_seq.shape, y_seq.shape)
print("Test  seq:", X_te_seq.shape, y_te_seq.shape)

# 2) Build the LSTM model
from tensorflow.keras import Sequential, layers

lstm = Sequential([
    layers.Input(shape=(t, X_tr_s.shape[1])),
    layers.LSTM(64, return_sequences=True),
    layers.LSTM(32),
    layers.Dense(16, activation="relu"),
    layers.Dense(1)
])
lstm.compile(optimizer="adam", loss="mse", metrics=["mae"])

# 3) Train
hist = lstm.fit(
    X_seq, y_seq,
    validation_data=(X_te_seq, y_te_seq),
    epochs=50,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=7)]
)

# 4) Evaluate & save
from sklearn.metrics import mean_absolute_error, r2_score

y_pred_lstm = lstm.predict(X_te_seq).flatten()
mae_lstm    = mean_absolute_error(y_te_seq, y_pred_lstm)
r2_lstm     = r2_score(y_te_seq, y_pred_lstm)
print(f"LSTM stacked → MAE: {mae_lstm:.5f},  R²: {r2_lstm:.3f}")

# Save as HDF5 for deployment
lstm.save(models_dir/"lstm_improved_model.h5", save_format="h5")


Train seq: (959, 10, 9) (959,)
Test  seq: (233, 10, 9) (233,)
Epoch 1/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step - loss: 9.8705e-04 - mae: 0.0210 - val_loss: 1.7591e-04 - val_mae: 0.0098
Epoch 2/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 1.8127e-04 - mae: 0.0101 - val_loss: 1.7785e-04 - val_mae: 0.0095
Epoch 3/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 1.3132e-04 - mae: 0.0086 - val_loss: 2.2693e-04 - val_mae: 0.0107
Epoch 4/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 1.2456e-04 - mae: 0.0083 - val_loss: 2.0392e-04 - val_mae: 0.0102
Epoch 5/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 1.3442e-04 - mae: 0.0086 - val_loss: 2.1719e-04 - val_mae: 0.0107
Epoch 6/50
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - loss: 1.3165e-04 - mae: 0.0087 - val_loss: 1.9399e-04 - 



LSTM stacked → MAE: 0.00970,  R²: -0.239


## 8. Evaluate the Stacked LSTM  


In [39]:
y_pred_lstm = lstm.predict(X_te2).flatten()
mae_lstm    = mean_absolute_error(y_te2, y_pred_lstm)
r2_lstm     = r2_score(y_te2, y_pred_lstm)
print(f"LSTM improved → MAE: {mae_lstm:.5f},  R²: {r2_lstm:.3f}")

# Save LSTM as HDF5
lstm.save(models_dir/"lstm_improved_model.h5", save_format="h5")

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 




LSTM improved → MAE: 0.02823,  R²: -7.113


## 9. Summary & Comparison  


In [43]:
import pandas as pd
summary3 = pd.DataFrame({
    "Model": ["MLP (basic)", "MLP (tuned)", "LSTM (basic)", "LSTM (stacked)"],
    "MAE":   [0.11887,         mae_mlp,       0.01439,         mae_lstm],
    "R²":    [-234.074,        r2_mlp,        -4.157,          r2_lstm]
})
display(summary3.style.format({"MAE":"{:.5f}", "R²":"{:.3f}"}))


Unnamed: 0,Model,MAE,R²
0,MLP (basic),0.11887,-234.074
1,MLP (tuned),0.00696,0.249
2,LSTM (basic),0.01439,-4.157
3,LSTM (stacked),0.02823,-7.113


## 10. Next Steps  
1. **Ensemble** final RF+XGB+MLP+LSTM.  
2. **Walk‐forward** validation on the ensemble.  
3. **Deploy** via Streamlit with live data.  
4. **Document** your journey in a final PDF or blog post.