In [1]:
import os
os.chdir("../")
%pwd

'd:\\Final-Year-Project\\Credit-Card-Fraud-Detection-Using-GNN'

In [2]:
from pathlib import Path
from dataclasses import dataclass
import torch
import torch.nn.functional as F
from torch_geometric.nn import HeteroConv, SAGEConv
import os

# Importing constants and utility functions
from Credit_Card_Fraud_Detection.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH, SCHEMA_FILE_PATH
from Credit_Card_Fraud_Detection.utils.common import read_yaml, create_directories
from Credit_Card_Fraud_Detection import logger

In [3]:
# ====================================================
# ENTITY: ModelTrainerConfig
# ====================================================

@dataclass(frozen=True)
class ModelTrainerConfig:
    """
    This class stores configuration details for model training.
    - root_dir: Main directory where model training artifacts are stored.
    - data_path: Path to the dataset used for training.
    - model_name: Name of the model to be trained.
    - hidden_channels: Number of hidden channels in the model.
    - learning_rate: Learning rate for the model's optimizer.
    - epochs: Number of training epochs.
    """
    root_dir: Path
    data_path: Path
    model_name: str
    hidden_channels: int
    learning_rate: float
    epochs: int



In [4]:
# ====================================================
# CONFIGURATION MANAGER
# ====================================================

class ConfigurationManager:
    """
    This class manages the configuration settings by reading YAML files.
    It loads config, parameters, and schema details.
    """
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH,
                 schema_filepath=SCHEMA_FILE_PATH):
        
        # Read YAML configuration files
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        # Create required directories
        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        """Retrieves model trainer settings and ensures directories exist."""
        config = self.config.model_trainer
        params = self.params.GCN

        create_directories([config.root_dir])

        return ModelTrainerConfig(
            root_dir=Path(config.root_dir),
            data_path=Path(config.data_path),
            model_name=config.model_name,
            hidden_channels=params.hidden_channels,
            learning_rate=params.learning_rate,
            epochs=params.epochs,
        )

In [6]:
# ====================================================
# COMPONENT: Model Construction
# ====================================================

class GNN(torch.nn.Module):
    """
    Graph Neural Network for fraud detection.
    This model uses heterogeneous graph convolutional layers to learn node representations
    and predict transaction fraudulence.
    """
    def __init__(self, metadata, hidden_dim):
        """
        Initializes the GNN model.

        Args:
            metadata (tuple): Metadata containing node and edge types.
            hidden_dim (int): Dimensionality of hidden layers.
        """
        super(GNN, self).__init__()
        # Heterogeneous convolutional layers (to process different edge types)
        self.conv1 = HeteroConv({edge_type: SAGEConv((-1, -1), hidden_dim) for edge_type in metadata[1]}, aggr='mean')
        self.conv2 = HeteroConv({edge_type: SAGEConv((-1, -1), hidden_dim) for edge_type in metadata[1]}, aggr='mean')
        self.conv3 = HeteroConv({edge_type: SAGEConv((-1, -1), hidden_dim) for edge_type in metadata[1]}, aggr='mean')
        # Linear layer for final prediction (to map node representations to fraud probabilities)
        self.lin = torch.nn.Linear(hidden_dim, 1)
        # Dropout layer for regularization (to prevent overfitting)
        self.dropout = torch.nn.Dropout(0.5)

    def forward(self, x_dict, edge_index_dict):
        """
        Forward pass of the GNN model.
        This function defines how data flows through the model.

        Args:
            x_dict (dict): Node feature dictionaries.
            edge_index_dict (dict): Edge index dictionaries.

        Returns:
            torch.Tensor: Model predictions (fraud probabilities).
        """
        x_dict = self.conv1(x_dict, edge_index_dict)  # First convolutional layer
        x_dict = {key: F.relu(x) for key, x in x_dict.items()}  # ReLU activation
        x_dict = {key: self.dropout(x) for key, x in x_dict.items()}  # Dropout
        x_dict = self.conv2(x_dict, edge_index_dict)  # Second convolutional layer
        x_dict = {key: F.relu(x) for key, x in x_dict.items()}  # ReLU activation
        x_dict = {key: self.dropout(x) for key, x in x_dict.items()}  # Dropout
        x_dict = self.conv3(x_dict, edge_index_dict)  # Third convolutional layer
        return self.lin(x_dict["transaction"]).squeeze(-1)  # Linear layer and squeeze to get predictions

class GNNModelTrainer:
    """
    Handles the training and saving of the GNN model.
    This class encapsulates the training logic and model saving functionality.
    """
    def __init__(self, config):
        """
        Initializes the GNNModelTrainer.

        Args:
            config: Configuration object containing training parameters.
        """
        self.config = config
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  # Use GPU if available
        self.data = torch.load(self.config.data_path).to(self.device)  # Load data and move to device
        self.model = self._build_model().to(self.device)  # Build model and move to device
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.config.learning_rate)  # Initialize optimizer

        # Calculate class weights to handle class imbalance (more non-fraud than fraud)
        num_non_fraud = (self.data["transaction"].y == 0).sum().item()
        num_fraud = (self.data["transaction"].y == 1).sum().item()
        fraud_weight = (num_non_fraud / num_fraud) * 0.5  # Adjust weight based on class imbalance
        self.criterion = torch.nn.BCEWithLogitsLoss(pos_weight=torch.tensor([fraud_weight], device=self.device))  # Initialize loss function with class weights

    def _build_model(self):
        """
        Builds the GNN model based on the configuration.

        Returns:
            GNN: The built GNN model.
        """
        metadata = (list(self.data.x_dict.keys()), list(self.data.edge_index_dict.keys()))  # Extract metadata from data
        return GNN(metadata, hidden_dim=self.config.hidden_channels)  # Create GNN model

    def train(self):
        """
        Performs a single training epoch.

        Returns:
            float: The training loss.
        """
        self.model.train()  # Set model to training mode
        self.optimizer.zero_grad()  # Zero gradients

        out = self.model(self.data.x_dict, self.data.edge_index_dict)  # Forward pass

        loss = self.criterion(out, self.data["transaction"].y.squeeze(1))  # Calculate loss
        loss.backward()  # Backpropagation
        self.optimizer.step()  # Update weights
        self.optimizer.zero_grad()  # Zero gradients again
        torch.cuda.empty_cache()  # Clear GPU cache

        return loss.item()  # Return loss

    def run_training(self):
        """
        Runs the full training loop for the specified number of epochs.
        """
        num_rows = self.data['transaction'].y.shape[0]  # Get number of training samples
        logger.info(f"Training on {num_rows} rows.")

        for epoch in range(self.config.epochs):  # Loop through epochs
            loss = self.train()  # Train for one epoch
            if epoch % 10 == 0 or epoch == self.config.epochs - 1:  # Print loss every 10 epochs or at the end
                print(f"Epoch {epoch}, Loss: {loss:.4f}")

        self.save_model()  # Save trained model

    def save_model(self):
        """
        Saves the trained model to the specified directory.
        """
        model_save_dir = self.config.root_dir  # Get model save directory
        os.makedirs(model_save_dir, exist_ok=True)  # Create directory if it doesn't exist
        model_save_path = os.path.join(model_save_dir, self.config.model_name)  # Get model save path
        torch.save(self.model.state_dict(), model_save_path)  # Save model state dictionary
        logger.info(f"Trained model saved to: {model_save_path}")

In [7]:
try:
    # Instantiate the ConfigurationManager to load configurations
    config_manager = ConfigurationManager()
    logger.info("ConfigurationManager instantiated.")

    # Retrieve the model trainer configuration from the ConfigurationManager
    model_trainer_config = config_manager.get_model_trainer_config()
    logger.info("Model trainer configuration retrieved.")

    # Instantiate the GNNModelTrainer with the retrieved configuration
    trainer = GNNModelTrainer(model_trainer_config)
    logger.info("GNNModelTrainer instantiated.")

    # Run the training process
    trainer.run_training()
    logger.info("Model training completed successfully.")

except Exception as e:
    # Log the exception with detailed traceback
    logger.exception("An error occurred during model training.")

    # Re-raise the exception to propagate it
    raise e

[2025-03-26 11:25:42,821: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-03-26 11:25:42,836: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-26 11:25:42,838: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-03-26 11:25:42,838: INFO: common: created directory at: artifacts]
[2025-03-26 11:25:42,840: INFO: 3858935648: ConfigurationManager instantiated.]
[2025-03-26 11:25:42,841: INFO: common: created directory at: artifacts/model_trainer]
[2025-03-26 11:25:42,842: INFO: 3858935648: Model trainer configuration retrieved.]


  self.data = torch.load(self.config.data_path).to(self.device)


[2025-03-26 11:25:43,654: INFO: 3858935648: GNNModelTrainer instantiated.]
[2025-03-26 11:25:43,657: INFO: 155872622: Training on 1295934 rows.]
Epoch 0, Loss: 1.0755
Epoch 10, Loss: 0.6155
Epoch 20, Loss: 0.5457
Epoch 30, Loss: 0.5129
Epoch 40, Loss: 0.4955
Epoch 50, Loss: 0.4616
Epoch 60, Loss: 0.4436
Epoch 70, Loss: 0.4075
Epoch 80, Loss: 0.3664
Epoch 90, Loss: 0.3444
Epoch 99, Loss: 0.3202
[2025-03-26 11:25:57,993: INFO: 155872622: Trained model saved to: artifacts\model_trainer\fraud_detection_gcn.pth]
[2025-03-26 11:25:57,995: INFO: 3858935648: Model training completed successfully.]
