In [6]:
# ============================================
# Phase 2: LSTM Model Setup
# ============================================

# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow import keras
from tensorflow.keras import layers

# 1️ Load the feature data you created in Phase 1
data = pd.read_csv("../data/processed/AAPL_features.csv", index_col=0, parse_dates=True)
print(data.head())

# 2 Select the feature(s) and target
# We'll predict the next closing price based on previous ones
features = ["Close", "MA7", "MA30", "Volatility", "Volume"]
target = "Close"

dataset = data[features].dropna()

# 3️ Scale data for neural networks
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(dataset)

# 4️ Create sequences for LSTM (e.g. last 60 days -> next day)
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])
        y.append(data[i, 0])  # first column = "Close"
    return np.array(X), np.array(y)

SEQ_LEN = 60
X, y = create_sequences(scaled_data, SEQ_LEN)

print(f"X shape: {X.shape}, y shape: {y.shape}")

# 5️ Train/Test split
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

print(f"Train size: {len(X_train)}, Test size: {len(X_test)}")

# 6️ Define LSTM model
model = keras.Sequential([
    layers.LSTM(64, return_sequences=True, input_shape=(SEQ_LEN, X.shape[2])),
    layers.Dropout(0.2),
    layers.LSTM(64),
    layers.Dropout(0.2),
    layers.Dense(1)
])

model.compile(optimizer="adam", loss="mean_squared_error")

model.summary()

# 7️ (Optional) Save model structure for tomorrow
model.save("models/lstm_structure.keras")
print("✅ LSTM model structure saved — ready for training tomorrow!")

# Tomorrow:
# We'll do:
# model.fit(...)
# evaluation
# predictions & visualization


                        Close               High                Low  \
Price                                                                 
Ticker                   AAPL               AAPL               AAPL   
Date                      NaN                NaN                NaN   
2020-01-02  72.53850555419922  72.59888386623527  71.29229630932706   
2020-01-03  71.83329010009766  72.59405542623057  71.60868452797844   
2020-01-06  72.40567016601562   72.4443131708714  70.70300468370534   

                         Open     Volume  
Price                                     
Ticker                   AAPL       AAPL  
Date                      NaN        NaN  
2020-01-02  71.54588227171874  135480400  
2020-01-03  71.76566667933918  146322800  
2020-01-06  70.95418053006813  118387200  


  data = pd.read_csv("../data/processed/AAPL_features.csv", index_col=0, parse_dates=True)


KeyError: "['MA7', 'MA30', 'Volatility'] not in index"