# 03 LSTM Modeling
- LSTM model for TSLA
- Data scaling, sequence generation
- Model architecture, training
- Model evaluation (MAE, RMSE, MAPE)

# 03 LSTM Modeling
This notebook demonstrates how to build, train, and evaluate an LSTM model for TSLA stock price forecasting.

In [2]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

ModuleNotFoundError: No module named 'tensorflow.python'

In [3]:
# Load processed TSLA data
file_path = '../data/processed/TSLA_processed.csv'
df = pd.read_csv(file_path)
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date').sort_index()

# Use only the 'Adj Close' column for forecasting
series = df['Adj Close'].values.reshape(-1, 1)
series.shape

Split date 2024-01-01 not in data. Using closest available date.


TypeError: DatetimeIndex.get_loc() got an unexpected keyword argument 'method'

In [5]:
# Split data into train and test sets (train: 2015-2023, test: 2024-2025)
split_idx = df.index.get_loc('2024-01-01')
train, test = series[:split_idx], series[split_idx:]

# Scale data to [0, 1] for LSTM
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train)
test_scaled = scaler.transform(test)

# Function to create sequences for LSTM
def create_sequences(data, seq_length):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        xs.append(data[i:i+seq_length])
        ys.append(data[i+seq_length])
    return np.array(xs), np.array(ys)

seq_length = 30  # Use 30 days for each input sequence
X_train, y_train = create_sequences(train_scaled, seq_length)
X_test, y_test = create_sequences(np.concatenate([train_scaled[-seq_length:], test_scaled]), seq_length)

print(f'X_train shape: {X_train.shape}, y_train shape: {y_train.shape}')
print(f'X_test shape: {X_test.shape}, y_test shape: {y_test.shape}')

KeyError: '2024-01-01'

In [None]:
# Build and train the LSTM model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(seq_length, 1)),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

In [None]:
# Plot training and validation loss curves
plt.figure(figsize=(8,4))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('LSTM Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Make predictions with the LSTM model
# Predict on test set
pred_scaled = model.predict(X_test)
pred = scaler.inverse_transform(pred_scaled)
y_test_inv = scaler.inverse_transform(y_test)

# Plot actual vs predicted
plt.figure(figsize=(12,6))
plt.plot(df.index[-len(y_test_inv):], y_test_inv, label='Actual')
plt.plot(df.index[-len(pred):], pred, label='LSTM Forecast')
plt.title('LSTM Forecast vs Actual')
plt.xlabel('Date')
plt.ylabel('Adj Close')
plt.legend()
plt.show()

In [None]:
# Evaluate LSTM forecast performance
mae = mean_absolute_error(y_test_inv, pred)
rmse = np.sqrt(mean_squared_error(y_test_inv, pred))
mape = np.mean(np.abs((y_test_inv - pred) / y_test_inv)) * 100
print(f'LSTM MAE: {mae:.4f}')
print(f'LSTM RMSE: {rmse:.4f}')
print(f'LSTM MAPE: {mape:.2f}%')

In [None]:
# Plot residuals to check for patterns
residuals = y_test_inv.flatten() - pred.flatten()
plt.figure(figsize=(12,4))
plt.plot(residuals)
plt.title('LSTM Forecast Residuals (Actual - Forecast)')
plt.xlabel('Time Step')
plt.ylabel('Residual')
plt.show()

plt.figure(figsize=(6,4))
plt.hist(residuals, bins=30, edgecolor='k')
plt.title('Distribution of LSTM Residuals')
plt.xlabel('Residual')
plt.ylabel('Frequency')
plt.show()

## Discussion
- Summarize the LSTM model's performance and discuss any patterns or issues observed in the residuals.
- Consider next steps: further tuning, more features, or comparison with ARIMA.