# LSTM Model Training
## Stage 08: Long Short-Term Memory Network for Time Series Prediction

This notebook explores training an LSTM model for cryptocurrency price prediction using sequential time series data.

In [None]:
import os
os.chdir('../')
%pwd

## 1. Configuration Entity

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class LSTMModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    hidden_size: int
    num_layers: int
    dropout_rate: float
    learning_rate: float
    batch_size: int
    epochs: int
    early_stopping_patience: int
    sequence_length: int
    target_column: str

## 2. LSTM Architecture

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
import joblib
import json
from mlProject import logger

In [None]:
class CryptoLSTM(nn.Module):
    """LSTM Network for Cryptocurrency Price Prediction with Time Series"""
    
    def __init__(self, input_size, hidden_size=128, num_layers=2, dropout_rate=0.2):
        super(CryptoLSTM, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # LSTM layers
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout_rate if num_layers > 1 else 0,
            batch_first=True
        )
        
        # Fully connected layers
        self.fc1 = nn.Linear(hidden_size, 64)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        
    def forward(self, x):
        # x shape: (batch, seq_len, input_size)
        
        # LSTM forward pass
        lstm_out, (h_n, c_n) = self.lstm(x)
        
        # Use the last hidden state
        out = lstm_out[:, -1, :]  # (batch, hidden_size)
        
        # Fully connected layers
        out = self.fc1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc3(out)
        
        return out

## 3. Time Series Dataset

LSTM models require sequential data. We create sequences of historical data to predict future prices.

In [None]:
class TimeSeriesDataset(Dataset):
    """Dataset for creating sequences from time series data"""
    
    def __init__(self, features, targets, sequence_length=10):
        self.features = features
        self.targets = targets
        self.sequence_length = sequence_length
        
    def __len__(self):
        return len(self.features) - self.sequence_length
    
    def __getitem__(self, idx):
        # Get sequence of features
        feature_sequence = self.features[idx:idx + self.sequence_length]
        
        # Get target (next time step after sequence)
        target = self.targets[idx + self.sequence_length]
        
        return (
            torch.FloatTensor(feature_sequence),
            torch.FloatTensor([target])
        )

## 4. LSTM Trainer Component

In [None]:
class LSTMModelTrainer:
    def __init__(self, config):
        self.config = config
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        logger.info(f"Using device: {self.device}")
        
    def train(self, sequence_length=10):
        """Train LSTM model with sequential data"""
        try:
            # Load data
            logger.info("Loading training and test data...")
            train_data = pd.read_csv(self.config['train_data_path'])
            test_data = pd.read_csv(self.config['test_data_path'])
            
            # Prepare features and target
            target_column = self.config['target_column']
            train_x = train_data.drop([target_column], axis=1)
            test_x = test_data.drop([target_column], axis=1)
            train_y = train_data[target_column].values
            test_y = test_data[target_column].values
            
            input_size = train_x.shape[1]
            logger.info(f"Input features: {input_size}")
            logger.info(f"Training samples: {len(train_x)}, Test samples: {len(test_x)}")
            logger.info(f"Sequence length: {sequence_length}")
            
            # Scale features
            logger.info("Scaling features...")
            scaler = StandardScaler()
            train_x_scaled = scaler.fit_transform(train_x)
            test_x_scaled = scaler.transform(test_x)
            
            # Save scaler
            scaler_path = os.path.join(self.config['root_dir'], 'scaler.joblib')
            joblib.dump(scaler, scaler_path)
            logger.info(f"Scaler saved to {scaler_path}")
            
            # Create sequences for LSTM
            logger.info(f"Creating sequences with length {sequence_length}...")
            
            train_dataset = TimeSeriesDataset(
                train_x_scaled, train_y, sequence_length
            )
            test_dataset = TimeSeriesDataset(
                test_x_scaled, test_y, sequence_length
            )
            
            # Data loaders
            train_loader = DataLoader(
                train_dataset, 
                batch_size=self.config['batch_size'],
                shuffle=True
            )
            test_loader = DataLoader(
                test_dataset,
                batch_size=self.config['batch_size'],
                shuffle=False
            )
            
            # Initialize LSTM model
            model = CryptoLSTM(
                input_size=input_size,
                hidden_size=self.config['hidden_size'],
                num_layers=self.config['num_layers'],
                dropout_rate=self.config['dropout_rate']
            ).to(self.device)
            
            logger.info(f"LSTM Model Architecture:\n{model}")
            
            # Loss and optimizer
            criterion = nn.MSELoss()
            optimizer = optim.Adam(
                model.parameters(), 
                lr=self.config['learning_rate'],
                weight_decay=1e-5
            )
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode='min', factor=0.5, patience=5
            )
            
            # Training loop
            best_loss = float('inf')
            patience_counter = 0
            train_losses = []
            test_losses = []
            
            logger.info("Starting LSTM training...")
            for epoch in range(self.config['epochs']):
                # Training phase
                model.train()
                train_loss = 0.0
                
                for batch_features, batch_targets in train_loader:
                    batch_features = batch_features.to(self.device)
                    batch_targets = batch_targets.to(self.device)
                    
                    # Forward pass
                    optimizer.zero_grad()
                    outputs = model(batch_features)
                    loss = criterion(outputs, batch_targets)
                    
                    # Backward pass
                    loss.backward()
                    optimizer.step()
                    
                    train_loss += loss.item()
                
                train_loss /= len(train_loader)
                train_losses.append(train_loss)
                
                # Validation phase
                model.eval()
                test_loss = 0.0
                
                with torch.no_grad():
                    for batch_features, batch_targets in test_loader:
                        batch_features = batch_features.to(self.device)
                        batch_targets = batch_targets.to(self.device)
                        
                        outputs = model(batch_features)
                        loss = criterion(outputs, batch_targets)
                        test_loss += loss.item()
                
                test_loss /= len(test_loader)
                test_losses.append(test_loss)
                
                # Learning rate scheduling
                scheduler.step(test_loss)
                
                # Log progress
                if (epoch + 1) % 10 == 0:
                    logger.info(
                        f"Epoch [{epoch+1}/{self.config['epochs']}] "
                        f"Train Loss: {train_loss:.6f}, Test Loss: {test_loss:.6f}"
                    )
                
                # Early stopping
                if test_loss < best_loss:
                    best_loss = test_loss
                    patience_counter = 0
                    
                    # Save best model
                    model_path = os.path.join(self.config['root_dir'], self.config['model_name'])
                    torch.save(model.state_dict(), model_path)
                    logger.info(f"Best LSTM model saved with test loss: {best_loss:.6f}")
                else:
                    patience_counter += 1
                    
                if patience_counter >= self.config['early_stopping_patience']:
                    logger.info(f"Early stopping at epoch {epoch+1}")
                    break
            
            # Save model configuration
            model_config = {
                'model_type': 'LSTM',
                'input_size': input_size,
                'hidden_size': self.config['hidden_size'],
                'num_layers': self.config['num_layers'],
                'dropout_rate': self.config['dropout_rate'],
                'sequence_length': sequence_length,
                'feature_names': list(train_x.columns)
            }
            
            config_path = os.path.join(self.config['root_dir'], 'model_config.json')
            with open(config_path, 'w') as f:
                json.dump(model_config, f, indent=4)
            logger.info(f"Model config saved to {config_path}")
            
            # Save training history
            history = {
                'train_losses': train_losses,
                'test_losses': test_losses,
                'best_loss': best_loss,
                'epochs_trained': len(train_losses)
            }
            
            history_path = os.path.join(self.config['root_dir'], 'training_history.json')
            with open(history_path, 'w') as f:
                json.dump(history, f, indent=4)
            logger.info(f"Training history saved to {history_path}")
            
            logger.info("LSTM training completed successfully!")
            logger.info(f"Best test loss: {best_loss:.6f}")
            
        except Exception as e:
            logger.exception(f"Error during LSTM training: {str(e)}")
            raise e

## 5. Execute LSTM Training

In [None]:
# Configuration
lstm_config = {
    'root_dir': 'artifacts/deep_model_trainer',
    'train_data_path': 'artifacts/data_transformation/train.csv',
    'test_data_path': 'artifacts/data_transformation/test.csv',
    'model_name': 'best_deep_model.pth',
    'hidden_size': 128,
    'num_layers': 2,
    'dropout_rate': 0.2,
    'learning_rate': 0.001,
    'batch_size': 64,
    'epochs': 100,
    'early_stopping_patience': 15,
    'target_column': 'target_price_1h'
}

try:
    lstm_trainer = LSTMModelTrainer(config=lstm_config)
    lstm_trainer.train(sequence_length=10)
except Exception as e:
    raise e

## 6. Visualize Training Progress

In [None]:
import matplotlib.pyplot as plt

# Load training history
with open('artifacts/deep_model_trainer/training_history.json', 'r') as f:
    history = json.load(f)

# Plot losses
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history['train_losses'], label='Train Loss')
plt.plot(history['test_losses'], label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.title('LSTM Training Progress')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history['test_losses'], label='Test Loss', color='orange')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.title('LSTM Test Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

print(f"\nLSTM Training Results:")
print(f"Best Loss: {history['best_loss']:.6f}")
print(f"Epochs Trained: {history['epochs_trained']}")

## 7. Understanding LSTM Architecture

### Why LSTM for Time Series?

- **Memory Cells**: LSTMs can remember information for long periods
- **Sequential Dependencies**: Captures temporal patterns in crypto prices
- **Gates Mechanism**: Input, forget, and output gates control information flow

### Our Architecture:
1. **Input**: Sequence of 10 time steps with 29 features each
2. **LSTM Layers**: 2 layers with 128 hidden units
3. **Fully Connected**: 64 → 32 → 1 (price prediction)
4. **Regularization**: Dropout (0.2) to prevent overfitting