In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import seaborn as sns

# Set random seed for reproducibility
np.random.seed(42)

# Load the preprocessed data from Day 1
# Note: You need to run Day 1 notebook first to have these variables available
# or load them from saved files

# Build the LSTM model with multiple layers
def create_lstm_model(sequence_length, n_features):
    model = Sequential([
        # First LSTM layer with return_sequences=True to connect to the next LSTM layer
        LSTM(units=50, return_sequences=True, input_shape=(sequence_length, n_features)),
        Dropout(0.2),
        # Second LSTM layer
        LSTM(units=50),
        Dropout(0.2),
        # Output layer
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Create the model
sequence_length = 60  # Same as in Day 1
n_features = 5  # Number of features (Open, High, Low, Close, Volume)
model = create_lstm_model(sequence_length, n_features)

# Display model summary
model.summary()


In [None]:
# Define early stopping callback
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

# Train the model
epochs = 50
batch_size = 32

history = model.fit(
    X_train,
    y_train,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping],
    verbose=1
)

# Plot training history
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss During Training')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

# Save the model and training history
model.save('models/tesla_lstm_model.keras')

# Save training history to a file
history_dict = history.history
np.save('models/training_history.npy', history_dict)

# You can load the history later with:
# history_dict = np.load('models/training_history.npy', allow_pickle=True).item()


In [None]:
# Make predictions
y_pred = model.predict(X_test)

# Inverse transform predictions and actual values to original scale
# We need to reshape the data back to the original format for inverse transform
def inverse_transform_predictions(scaler, y_pred, y_test):
    # Create a dummy array with the same shape as our original data
    dummy = np.zeros((len(y_pred), 5))
    # Put our predictions in the Close price column (index 3)
    dummy[:, 3] = y_pred.flatten()
    # Inverse transform
    y_pred_transformed = scaler.inverse_transform(dummy)[:, 3]
    
    # Do the same for actual values
    dummy[:, 3] = y_test.flatten()
    y_test_transformed = scaler.inverse_transform(dummy)[:, 3]
    
    return y_pred_transformed, y_test_transformed

# Transform predictions back to original scale
y_pred_orig, y_test_orig = inverse_transform_predictions(scaler, y_pred, y_test)

# Calculate metrics
mae = mean_absolute_error(y_test_orig, y_pred_orig)
rmse = np.sqrt(mean_squared_error(y_test_orig, y_pred_orig))
mape = mean_absolute_percentage_error(y_test_orig, y_pred_orig)

print(f'Mean Absolute Error: ${mae:.2f}')
print(f'Root Mean Squared Error: ${rmse:.2f}')
print(f'Mean Absolute Percentage Error: {mape*100:.2f}%')

# Plot predictions vs actual values
plt.figure(figsize=(15, 7))
plt.plot(y_test_orig, label='Actual Prices', alpha=0.8)
plt.plot(y_pred_orig, label='Predicted Prices', alpha=0.8)
plt.title('Tesla Stock Price Prediction vs Actual')
plt.xlabel('Time')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True)
plt.show()

# Plot prediction error distribution
errors = y_test_orig - y_pred_orig
plt.figure(figsize=(15, 7))
plt.subplot(1, 2, 1)
sns.histplot(errors, bins=50)
plt.title('Distribution of Prediction Errors')
plt.xlabel('Error (USD)')
plt.ylabel('Frequency')

plt.subplot(1, 2, 2)
sns.boxplot(y=errors)
plt.title('Box Plot of Prediction Errors')
plt.ylabel('Error (USD)')
plt.show()
