# LSTM Model Evaluation
## Stage 09: Evaluating LSTM Performance & MLflow Integration

This notebook evaluates the trained LSTM model and compares it with the standard DNN.

In [None]:
import os
os.chdir('../')
%pwd

## 1. Load Required Libraries

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import json
import joblib
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from mlProject import logger
from mlProject.utils.common import save_json
from pathlib import Path
import mlflow
from urllib.parse import urlparse
import matplotlib.pyplot as plt

## 2. LSTM Architecture Definition

In [None]:
class CryptoLSTM(nn.Module):
    """LSTM Network for Cryptocurrency Price Prediction"""
    
    def __init__(self, input_size, hidden_size=128, num_layers=2, dropout_rate=0.2):
        super(CryptoLSTM, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # LSTM layers
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout_rate if num_layers > 1 else 0,
            batch_first=True
        )
        
        # Fully connected layers
        self.fc1 = nn.Linear(hidden_size, 64)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout_rate)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        
    def forward(self, x):
        lstm_out, (h_n, c_n) = self.lstm(x)
        out = lstm_out[:, -1, :]
        out = self.fc1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc3(out)
        return out

In [None]:
from torch.utils.data import Dataset

class TimeSeriesDataset(Dataset):
    """Dataset for LSTM sequences"""
    
    def __init__(self, features, targets, sequence_length=10):
        self.features = features
        self.targets = targets
        self.sequence_length = sequence_length
        
    def __len__(self):
        return len(self.features) - self.sequence_length
    
    def __getitem__(self, idx):
        feature_sequence = self.features[idx:idx + self.sequence_length]
        target = self.targets[idx + self.sequence_length]
        return (
            torch.FloatTensor(feature_sequence),
            torch.FloatTensor([target])
        )

## 3. LSTM Evaluation Component

In [None]:
class LSTMModelEvaluation:
    def __init__(self, config):
        self.config = config
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def eval_metrics(self, actual, pred):
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2

    def log_into_mlflow(self):
        try:
            # Load test data
            test_data = pd.read_csv(self.config['test_data_path'])
            
            # Load model configuration
            with open(self.config['model_config_path'], 'r') as f:
                model_config = json.load(f)
            
            # Load scaler
            scaler = joblib.load(self.config['scaler_path'])
            
            # Prepare test data
            test_x = test_data.drop([self.config['target_column']], axis=1)
            test_y = test_data[self.config['target_column']].values
            
            # Scale features
            test_x_scaled = scaler.transform(test_x)
            
            # Get sequence length from model config
            sequence_length = model_config.get('sequence_length', 10)
            
            # Create dataset
            test_dataset = TimeSeriesDataset(test_x_scaled, test_y, sequence_length)
            
            # Initialize LSTM model
            model = CryptoLSTM(
                input_size=model_config['input_size'],
                hidden_size=model_config['hidden_size'],
                num_layers=model_config['num_layers'],
                dropout_rate=model_config['dropout_rate']
            ).to(self.device)
            
            # Load trained weights
            model.load_state_dict(torch.load(self.config['model_path'], map_location=self.device))
            model.eval()
            
            # Make predictions
            predictions = []
            actuals = []
            
            with torch.no_grad():
                for i in range(len(test_dataset)):
                    features, target = test_dataset[i]
                    features = features.unsqueeze(0).to(self.device)
                    pred = model(features).cpu().numpy().flatten()[0]
                    predictions.append(pred)
                    actuals.append(target.item())
            
            predictions = np.array(predictions)
            actuals = np.array(actuals)
            
            # Calculate metrics
            (rmse, mae, r2) = self.eval_metrics(actuals, predictions)
            
            # Save metrics locally
            scores = {"rmse": rmse, "mae": mae, "r2": r2}
            save_json(path=Path(self.config['metric_file_name']), data=scores)
            
            # MLflow logging
            mlflow.set_registry_uri(self.config['mlflow_uri'])
            
            # Set experiment
            experiment_name = "LSTM_CryptoPredict"
            try:
                mlflow.create_experiment(experiment_name)
            except mlflow.exceptions.MlflowException:
                pass
            
            mlflow.set_experiment(experiment_name)
            
            # Create unique run name
            import time
            run_name = f"lstm_eval_{int(time.time())}"
            
            with mlflow.start_run(run_name=run_name):
                # Log parameters
                params_to_log = {
                    "model_type": "LSTM",
                    "hidden_size": model_config['hidden_size'],
                    "num_layers": model_config['num_layers'],
                    "sequence_length": sequence_length,
                    "input_size": model_config['input_size'],
                    "dropout_rate": model_config['dropout_rate'],
                    "device": str(self.device)
                }
                
                for key, value in params_to_log.items():
                    try:
                        mlflow.log_param(key, value)
                    except mlflow.exceptions.MlflowException as param_e:
                        logger.warning(f"Could not log parameter {key}: {param_e}")
                
                # Log metrics
                mlflow.log_metric("rmse", rmse)
                mlflow.log_metric("mae", mae)
                mlflow.log_metric("r2", r2)
                
                # Log model
                tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
                if tracking_url_type_store != "file":
                    mlflow.pytorch.log_model(model, "model", registered_model_name="LSTMCryptoPriceModel")
                else:
                    mlflow.pytorch.log_model(model, "model")
            
            logger.info(f"LSTM evaluation completed. RMSE: {rmse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}")
            return scores, predictions, actuals
            
        except Exception as e:
            logger.exception(f"Error during LSTM evaluation: {str(e)}")
            raise e

## 4. Execute LSTM Evaluation

In [None]:
# Configuration
evaluation_config = {
    'test_data_path': 'artifacts/data_transformation/test.csv',
    'model_path': 'artifacts/deep_model_trainer/best_deep_model.pth',
    'scaler_path': 'artifacts/deep_model_trainer/scaler.joblib',
    'model_config_path': 'artifacts/deep_model_trainer/model_config.json',
    'metric_file_name': 'artifacts/deep_model_evaluation/metrics.json',
    'target_column': 'target_price_1h',
    'mlflow_uri': 'https://dagshub.com/Loza-Tadesse/SolPredict-AI.mlflow'
}

try:
    lstm_evaluation = LSTMModelEvaluation(config=evaluation_config)
    scores, predictions, actuals = lstm_evaluation.log_into_mlflow()
    
    print("\n" + "="*50)
    print("LSTM Model Performance")
    print("="*50)
    print(f"RMSE: {scores['rmse']:.4f}")
    print(f"MAE:  {scores['mae']:.4f}")
    print(f"R²:   {scores['r2']:.4f}")
    print("="*50)
    
except Exception as e:
    raise e

## 5. Visualize LSTM Predictions

In [None]:
# Plot predictions vs actual
plt.figure(figsize=(15, 10))

# Scatter plot
plt.subplot(2, 2, 1)
plt.scatter(actuals, predictions, alpha=0.5)
plt.plot([actuals.min(), actuals.max()], [actuals.min(), actuals.max()], 'r--', lw=2)
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('LSTM: Actual vs Predicted Prices')
plt.grid(True)

# Time series plot (first 200 points)
plt.subplot(2, 2, 2)
plt.plot(actuals[:200], label='Actual', alpha=0.7)
plt.plot(predictions[:200], label='Predicted', alpha=0.7)
plt.xlabel('Sample Index')
plt.ylabel('Price')
plt.title('LSTM: Time Series Prediction (First 200 samples)')
plt.legend()
plt.grid(True)

# Residuals
plt.subplot(2, 2, 3)
residuals = actuals - predictions
plt.hist(residuals, bins=50, edgecolor='black')
plt.xlabel('Residuals (Actual - Predicted)')
plt.ylabel('Frequency')
plt.title('LSTM: Residuals Distribution')
plt.grid(True)

# Residuals over time
plt.subplot(2, 2, 4)
plt.scatter(range(len(residuals)), residuals, alpha=0.5)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('Sample Index')
plt.ylabel('Residuals')
plt.title('LSTM: Residuals Over Time')
plt.grid(True)

plt.tight_layout()
plt.show()

# Print statistical summary
print("\nResiduals Statistics:")
print(f"Mean: {residuals.mean():.4f}")
print(f"Std:  {residuals.std():.4f}")
print(f"Min:  {residuals.min():.4f}")
print(f"Max:  {residuals.max():.4f}")

## 6. Compare LSTM vs DNN Performance

In [None]:
# Load both model metrics
with open('artifacts/deep_model_evaluation/metrics.json', 'r') as f:
    metrics = json.load(f)

# Create comparison
comparison_data = {
    'Model': ['LSTM', 'Standard DNN'],
    'RMSE': [scores['rmse'], metrics.get('rmse', 'N/A')],
    'MAE': [scores['mae'], metrics.get('mae', 'N/A')],
    'R²': [scores['r2'], metrics.get('r2', 'N/A')]
}

comparison_df = pd.DataFrame(comparison_data)
print("\n" + "="*60)
print("Model Performance Comparison")
print("="*60)
print(comparison_df.to_string(index=False))
print("="*60)

# Visualize comparison
if isinstance(comparison_df['RMSE'][1], (int, float)):
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    metrics_to_plot = ['RMSE', 'MAE', 'R²']
    colors = ['#FF6B6B', '#4ECDC4']
    
    for idx, metric in enumerate(metrics_to_plot):
        axes[idx].bar(comparison_df['Model'], comparison_df[metric], color=colors)
        axes[idx].set_ylabel(metric)
        axes[idx].set_title(f'{metric} Comparison')
        axes[idx].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 7. Key Insights

### LSTM Advantages:
- **Temporal Memory**: Captures long-term dependencies in price movements
- **Sequential Learning**: Learns from historical patterns
- **Better for volatile markets**: Handles sudden price changes

### When to use LSTM vs DNN:
- **LSTM**: When temporal patterns are critical (hourly/daily predictions)
- **DNN**: When feature relationships matter more than sequence
- **Ensemble**: Combine both for robust predictions

### Model Selection Criteria:
1. Lower RMSE = Better price accuracy
2. Higher R² = Better variance explanation
3. Lower MAE = Better average error