# Deep Learning Model Evaluation
## Stage 07: PyTorch DNN Model Performance Evaluation & MLflow Logging

This notebook evaluates the trained Deep Neural Network and logs results to MLflow.

In [None]:
import os
os.chdir('../')
%pwd

## 1. Configuration Entity

In [None]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DeepModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    model_path: Path
    scaler_path: Path
    model_config_path: Path
    all_params: dict
    metric_file_name: Path
    target_column: str
    mlflow_uri: str

## 2. Configuration Manager

In [None]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories

In [None]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath=CONFIG_FILE_PATH,
            params_filepath=PARAMS_FILE_PATH,
            schema_filepath=SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_deep_model_evaluation_config(self) -> DeepModelEvaluationConfig:
        config = self.config.deep_model_evaluation
        params = self.params.DeepModel
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        deep_model_evaluation_config = DeepModelEvaluationConfig(
            root_dir=config.root_dir,
            test_data_path=config.test_data_path,
            model_path=config.model_path,
            scaler_path=config.scaler_path,
            model_config_path=config.model_config_path,
            all_params=params,
            metric_file_name=config.metric_file_name,
            target_column=schema.name,
            mlflow_uri="https://dagshub.com/Loza-Tadesse/VinoPredict.mlflow"
        )

        return deep_model_evaluation_config

## 3. Model Architecture & Evaluation Component

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import json
import joblib
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from mlProject import logger
from mlProject.utils.common import save_json
import mlflow
from urllib.parse import urlparse

In [None]:
class CryptoPriceNet(nn.Module):
    """Neural Network for Cryptocurrency Price Prediction"""
    
    def __init__(self, input_size, hidden_layers, dropout_rate=0.2):
        super(CryptoPriceNet, self).__init__()
        
        layers_list = []
        prev_size = input_size
        
        # Hidden layers
        for hidden_size in hidden_layers:
            layers_list.append(nn.Linear(prev_size, hidden_size))
            layers_list.append(nn.ReLU())
            layers_list.append(nn.Dropout(dropout_rate))
            prev_size = hidden_size
        
        # Output layer
        layers_list.append(nn.Linear(prev_size, 1))
        
        self.network = nn.Sequential(*layers_list)
    
    def forward(self, x):
        return self.network(x)

In [None]:
class DeepModelEvaluation:
    def __init__(self, config: DeepModelEvaluationConfig):
        self.config = config
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def eval_metrics(self, actual, pred):
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2

    def log_into_mlflow(self):
        try:
            # Load test data
            test_data = pd.read_csv(self.config.test_data_path)
            
            # Load model configuration
            with open(self.config.model_config_path, 'r') as f:
                model_config = json.load(f)
            
            # Load scaler
            scaler = joblib.load(self.config.scaler_path)
            
            # Initialize model architecture
            model = CryptoPriceNet(
                input_size=model_config['input_size'],
                hidden_layers=model_config['hidden_layers'],
                dropout_rate=model_config['dropout_rate']
            ).to(self.device)
            
            # Load trained weights
            model.load_state_dict(torch.load(self.config.model_path, map_location=self.device))
            model.eval()
            
            # Prepare test data
            test_x = test_data.drop([self.config.target_column], axis=1)
            test_y = test_data[self.config.target_column]
            
            # Scale features
            test_x_scaled = scaler.transform(test_x)
            test_x_tensor = torch.FloatTensor(test_x_scaled).to(self.device)
            
            # Make predictions
            with torch.no_grad():
                predicted_qualities = model(test_x_tensor).cpu().numpy().flatten()
            
            # Calculate metrics
            (rmse, mae, r2) = self.eval_metrics(test_y, predicted_qualities)
            
            # Save metrics locally
            scores = {"rmse": rmse, "mae": mae, "r2": r2}
            save_json(path=Path(self.config.metric_file_name), data=scores)
            
            # MLflow logging
            mlflow.set_registry_uri(self.config.mlflow_uri)
            tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
            
            # Set or create experiment
            experiment_name = "Deep_Learning_CryptoPredict"
            try:
                mlflow.create_experiment(experiment_name)
            except mlflow.exceptions.MlflowException:
                pass
            
            mlflow.set_experiment(experiment_name)
            
            # Create unique run name
            import time
            run_name = f"deep_model_eval_{int(time.time())}"
            
            with mlflow.start_run(run_name=run_name):
                # Log parameters
                params_to_log = {
                    "model_type": "PyTorch_Neural_Network",
                    "architecture": str(model_config['hidden_layers']),
                    "device": str(self.device),
                    "input_size": model_config['input_size'],
                    "dropout_rate": model_config['dropout_rate']
                }
                
                for key, value in params_to_log.items():
                    try:
                        mlflow.log_param(key, value)
                    except mlflow.exceptions.MlflowException as param_e:
                        logger.warning(f"Could not log parameter {key}: {param_e}")
                
                # Log metrics
                mlflow.log_metric("rmse", rmse)
                mlflow.log_metric("mae", mae)
                mlflow.log_metric("r2", r2)
                
                # Register model
                if tracking_url_type_store != "file":
                    mlflow.pytorch.log_model(model, "model", registered_model_name="DeepCryptoPriceModel")
                else:
                    mlflow.pytorch.log_model(model, "model")
            
            logger.info(f"Deep model evaluation completed. RMSE: {rmse:.4f}, MAE: {mae:.4f}, R2: {r2:.4f}")
            
        except Exception as e:
            logger.exception(f"Error during deep model evaluation: {str(e)}")
            raise e

## 4. Execute Evaluation Pipeline

In [None]:
try:
    config = ConfigurationManager()
    deep_model_evaluation_config = config.get_deep_model_evaluation_config()
    deep_model_evaluation = DeepModelEvaluation(config=deep_model_evaluation_config)
    deep_model_evaluation.log_into_mlflow()
except Exception as e:
    raise e

## 5. Visualize Results

In [None]:
import matplotlib.pyplot as plt

# Load metrics
with open('artifacts/deep_model_evaluation/metrics.json', 'r') as f:
    metrics = json.load(f)

print("Deep Learning Model Performance:")
print(f"RMSE: {metrics['rmse']:.4f}")
print(f"MAE: {metrics['mae']:.4f}")
print(f"R²: {metrics['r2']:.4f}")

# Visualize predictions vs actual
test_data = pd.read_csv('artifacts/data_transformation/test.csv')
test_x = test_data.drop(['target_price_1h'], axis=1)
test_y = test_data['target_price_1h']

# Load model and make predictions
scaler = joblib.load('artifacts/deep_model_trainer/scaler.joblib')
with open('artifacts/deep_model_trainer/model_config.json', 'r') as f:
    model_config = json.load(f)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CryptoPriceNet(
    input_size=model_config['input_size'],
    hidden_layers=model_config['hidden_layers'],
    dropout_rate=model_config['dropout_rate']
).to(device)

model.load_state_dict(torch.load('artifacts/deep_model_trainer/best_deep_model.pth', map_location=device))
model.eval()

test_x_scaled = scaler.transform(test_x)
test_x_tensor = torch.FloatTensor(test_x_scaled).to(device)

with torch.no_grad():
    predictions = model(test_x_tensor).cpu().numpy().flatten()

# Plot predictions
plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
plt.scatter(test_y, predictions, alpha=0.5)
plt.plot([test_y.min(), test_y.max()], [test_y.min(), test_y.max()], 'r--', lw=2)
plt.xlabel('Actual Price')
plt.ylabel('Predicted Price')
plt.title('Actual vs Predicted Prices')
plt.grid(True)

plt.subplot(1, 2, 2)
residuals = test_y - predictions
plt.hist(residuals, bins=50, edgecolor='black')
plt.xlabel('Residuals (Actual - Predicted)')
plt.ylabel('Frequency')
plt.title('Residuals Distribution')
plt.grid(True)

plt.tight_layout()
plt.show()