# Model Training and Evaluation for Stock Price Prediction

This notebook demonstrates the process of training and evaluating LSTM models for stock price prediction.

In [None]:
# Import libraries
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

# Add parent directory to path to import modules
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))

# Set plot style
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 12

# Set pandas display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

## 1. Load and Prepare Data

First, let's load the stock price dataset and prepare it for training.

In [None]:
# Define paths
data_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))), 'data')
raw_dir = os.path.join(data_dir, 'raw')
processed_dir = os.path.join(data_dir, 'processed')
models_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))), 'models')
results_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))), 'results')

# Ensure directories exist
os.makedirs(processed_dir, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)
os.makedirs(results_dir, exist_ok=True)

# Load stock data
from src.data_acquisition import load_stock_data

stock_data_path = os.path.join(raw_dir, 'stock_data.csv')
stock_data = load_stock_data(stock_data_path)

# Display the first few rows
stock_data.head()

## 2. Feature Engineering

Let's prepare features for our model.

In [None]:
from src.feature_engineering import prepare_features

# Select target column (first stock)
target_col = stock_data.columns[0]

# Prepare features
processed_data, transformers = prepare_features(
    stock_data,
    target_col=target_col,
    include_technical=False,  # Set to False since we don't have OHLCV data
    include_statistical=True,
    include_lags=True,
    normalize=True,
    reduce_dim=False,
    forecast_horizon=5
)

# Save processed data
processed_data.to_csv(os.path.join(processed_dir, 'processed_stock_data.csv'))

# Display the first few rows of processed data
processed_data.head()

## 3. Data Preparation for Training

Let's prepare the data for training our LSTM model.

In [None]:
from src.data_preparation import prepare_data_for_training

# Prepare data for training
seq_length = 20
forecast_horizon = 1  # Single-step forecasting
batch_size = 32

train_loader, val_loader, test_loader, feature_dim = prepare_data_for_training(
    processed_data,
    target_col=f'Target_5',  # Target column created by prepare_features
    seq_length=seq_length,
    forecast_horizon=forecast_horizon,
    val_size=0.15,
    test_size=0.15,
    batch_size=batch_size
)

print(f"Feature dimension: {feature_dim}")

# Check a batch from the training loader
for X_batch, y_batch in train_loader:
    print(f"X_batch shape: {X_batch.shape}")
    print(f"y_batch shape: {y_batch.shape}")
    break

## 4. Model Definition

Let's define our LSTM models.

In [None]:
from src.model import create_model

# Define model parameters
hidden_dim = 64
num_layers = 2
output_dim = 1  # Single-step forecasting
dropout_prob = 0.2

# Create models
lstm_model = create_model('lstm', feature_dim, hidden_dim, num_layers, output_dim, dropout_prob)
lstm_attention_model = create_model('lstm_attention', feature_dim, hidden_dim, num_layers, output_dim, dropout_prob)

# Print model architectures
print("LSTM Model:")
print(lstm_model)
print("\nLSTM with Attention Model:")
print(lstm_attention_model)

## 5. Model Training

Let's train our LSTM models.

In [None]:
from src.train import train_model

# Define training parameters
num_epochs = 50
learning_rate = 0.001
weight_decay = 1e-5

# Define loss function
criterion = nn.MSELoss()

# Train LSTM model
print("Training LSTM model...")
lstm_model.to(device)
optimizer = optim.Adam(lstm_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)

lstm_model_path = os.path.join(models_dir, 'lstm_model.pth')
lstm_history = train_model(
    lstm_model, train_loader, val_loader, criterion, optimizer, scheduler,
    num_epochs, device, early_stopping_patience=10, model_save_path=lstm_model_path
)

In [None]:
# Train LSTM with Attention model
print("Training LSTM with Attention model...")
lstm_attention_model.to(device)
optimizer = optim.Adam(lstm_attention_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)

lstm_attention_model_path = os.path.join(models_dir, 'lstm_attention_model.pth')
lstm_attention_history = train_model(
    lstm_attention_model, train_loader, val_loader, criterion, optimizer, scheduler,
    num_epochs, device, early_stopping_patience=10, model_save_path=lstm_attention_model_path
)

## 6. Plot Training History

Let's visualize the training history.

In [None]:
from src.train import plot_training_history

# Plot LSTM training history
plot_training_history(lstm_history, save_path=os.path.join(results_dir, 'lstm_training_history.png'))

# Plot LSTM with Attention training history
plot_training_history(lstm_attention_history, save_path=os.path.join(results_dir, 'lstm_attention_training_history.png'))

## 7. Model Evaluation

Let's evaluate our trained models.

In [None]:
from src.train import evaluate_model, calculate_metrics

# Load the best models
lstm_model.load_state_dict(torch.load(lstm_model_path, map_location=device))
lstm_attention_model.load_state_dict(torch.load(lstm_attention_model_path, map_location=device))

# Evaluate LSTM model
print("Evaluating LSTM model...")
lstm_test_loss, lstm_predictions, lstm_targets = evaluate_model(lstm_model, test_loader, criterion, device)
lstm_metrics = calculate_metrics(lstm_predictions, lstm_targets)
print(f"LSTM Test Metrics: {lstm_metrics}")

# Evaluate LSTM with Attention model
print("\nEvaluating LSTM with Attention model...")
lstm_attention_test_loss, lstm_attention_predictions, lstm_attention_targets = evaluate_model(lstm_attention_model, test_loader, criterion, device)
lstm_attention_metrics = calculate_metrics(lstm_attention_predictions, lstm_attention_targets)
print(f"LSTM with Attention Test Metrics: {lstm_attention_metrics}")

## 8. Visualize Predictions

Let's visualize the predictions of our models.

In [None]:
from src.evaluate import plot_predictions, plot_residuals, plot_scatter

# Plot LSTM predictions
plot_predictions(
    lstm_targets, lstm_predictions,
    title='LSTM: Actual vs Predicted Values',
    save_path=os.path.join(results_dir, 'lstm_predictions.png')
)

# Plot LSTM with Attention predictions
plot_predictions(
    lstm_attention_targets, lstm_attention_predictions,
    title='LSTM with Attention: Actual vs Predicted Values',
    save_path=os.path.join(results_dir, 'lstm_attention_predictions.png')
)

In [None]:
# Plot residuals
plot_residuals(
    lstm_targets, lstm_predictions,
    title='LSTM: Residuals Analysis',
    save_path=os.path.join(results_dir, 'lstm_residuals.png')
)

plot_residuals(
    lstm_attention_targets, lstm_attention_predictions,
    title='LSTM with Attention: Residuals Analysis',
    save_path=os.path.join(results_dir, 'lstm_attention_residuals.png')
)

In [None]:
# Plot scatter plots
plot_scatter(
    lstm_targets, lstm_predictions,
    title='LSTM: Actual vs Predicted Scatter Plot',
    save_path=os.path.join(results_dir, 'lstm_scatter.png')
)

plot_scatter(
    lstm_attention_targets, lstm_attention_predictions,
    title='LSTM with Attention: Actual vs Predicted Scatter Plot',
    save_path=os.path.join(results_dir, 'lstm_attention_scatter.png')
)

## 9. Uncertainty Quantification

Let's quantify the uncertainty in our predictions using Monte Carlo dropout.

In [None]:
from src.evaluate import monte_carlo_dropout_prediction, plot_prediction_intervals

# Get a batch from the test loader
for X_batch, y_batch in test_loader:
    X_test_sample = X_batch
    y_test_sample = y_batch
    break

# Move to device
X_test_sample = X_test_sample.to(device)
y_test_sample = y_test_sample.cpu().numpy()

# Generate predictions with uncertainty for LSTM model
lstm_mean_pred, lstm_std_pred = monte_carlo_dropout_prediction(lstm_model, X_test_sample, n_samples=100, device=device)

# Generate predictions with uncertainty for LSTM with Attention model
lstm_attention_mean_pred, lstm_attention_std_pred = monte_carlo_dropout_prediction(lstm_attention_model, X_test_sample, n_samples=100, device=device)

# Plot prediction intervals
plot_prediction_intervals(
    y_test_sample, lstm_mean_pred, lstm_std_pred, confidence=0.95,
    title='LSTM: Predictions with 95% Confidence Intervals',
    save_path=os.path.join(results_dir, 'lstm_prediction_intervals.png')
)

plot_prediction_intervals(
    y_test_sample, lstm_attention_mean_pred, lstm_attention_std_pred, confidence=0.95,
    title='LSTM with Attention: Predictions with 95% Confidence Intervals',
    save_path=os.path.join(results_dir, 'lstm_attention_prediction_intervals.png')
)

## 10. Attention Visualization

Let's visualize the attention weights to understand which time steps are most important for predictions.

In [None]:
from src.evaluate import visualize_attention

# Visualize attention weights
visualize_attention(
    lstm_attention_model, X_test_sample[:1],
    save_path=os.path.join(results_dir, 'attention_weights.png')
)

## 11. Comprehensive Evaluation

Let's perform a comprehensive evaluation of our models.

In [None]:
from src.evaluate import evaluate_model_comprehensive

# Comprehensive evaluation of LSTM model
print("Performing comprehensive evaluation of LSTM model...")
lstm_output_dir = os.path.join(results_dir, 'lstm')
lstm_metrics = evaluate_model_comprehensive(lstm_model, test_loader, device, output_dir=lstm_output_dir)
print(f"LSTM Metrics: {lstm_metrics}")

# Comprehensive evaluation of LSTM with Attention model
print("\nPerforming comprehensive evaluation of LSTM with Attention model...")
lstm_attention_output_dir = os.path.join(results_dir, 'lstm_attention')
lstm_attention_metrics = evaluate_model_comprehensive(lstm_attention_model, test_loader, device, output_dir=lstm_attention_output_dir)
print(f"LSTM with Attention Metrics: {lstm_attention_metrics}")

## 12. Model Comparison

Let's compare the performance of our models.

In [None]:
# Create a comparison table
comparison = pd.DataFrame({
    'Metric': list(lstm_metrics.keys()),
    'LSTM': list(lstm_metrics.values()),
    'LSTM with Attention': list(lstm_attention_metrics.values())
})

# Display the comparison table
comparison

In [None]:
# Plot comparison
plt.figure(figsize=(12, 6))

metrics = ['MSE', 'RMSE', 'MAE', 'MAPE']
lstm_values = [lstm_metrics[metric] for metric in metrics]
lstm_attention_values = [lstm_attention_metrics[metric] for metric in metrics]

x = np.arange(len(metrics))
width = 0.35

plt.bar(x - width/2, lstm_values, width, label='LSTM')
plt.bar(x + width/2, lstm_attention_values, width, label='LSTM with Attention')

plt.xlabel('Metric')
plt.ylabel('Value')
plt.title('Model Comparison')
plt.xticks(x, metrics)
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(os.path.join(results_dir, 'model_comparison.png'))
plt.show()

## 13. Save Model Metadata

Let's save metadata for our best model.

In [None]:
import json
from datetime import datetime

# Determine the best model based on RMSE
if lstm_metrics['RMSE'] < lstm_attention_metrics['RMSE']:
    best_model_type = 'lstm'
    best_model_path = lstm_model_path
    best_metrics = lstm_metrics
else:
    best_model_type = 'lstm_attention'
    best_model_path = lstm_attention_model_path
    best_metrics = lstm_attention_metrics

# Create metadata
metadata = {
    'model_type': best_model_type,
    'input_dim': feature_dim,
    'hidden_dim': hidden_dim,
    'num_layers': num_layers,
    'output_dim': output_dim,
    'dropout_prob': dropout_prob,
    'feature_cols': processed_data.drop(columns=[f'Target_5']).columns.tolist(),
    'seq_length': seq_length,
    'training_date': datetime.now().strftime('%Y%m%d_%H%M%S'),
    'test_metrics': best_metrics
}

# Save metadata
metadata_path = os.path.join(models_dir, 'model_metadata.json')
with open(metadata_path, 'w') as f:
    json.dump(metadata, f, indent=4)

print(f"Metadata saved to {metadata_path}")
print(f"Best model: {best_model_type} with RMSE: {best_metrics['RMSE']:.4f}")

## 14. Conclusion

In this notebook, we've trained and evaluated LSTM models for stock price prediction. We've compared the performance of a basic LSTM model and an LSTM model with attention mechanism.

Key findings:
- We've prepared features using technical indicators, statistical features, and lag features
- We've trained LSTM models with and without attention mechanism
- We've evaluated the models using various metrics (RMSE, MAPE, etc.)
- We've visualized predictions, residuals, and attention weights
- We've quantified prediction uncertainty using Monte Carlo dropout

Next steps:
- Deploy the best model as an API or dashboard
- Implement a retraining pipeline for continuous model updates
- Explore multi-step forecasting for longer-term predictions