In [1]:
import os
os.chdir("../")
%pwd

'd:\\Final-Year-Project\\Credit-Card-Fraud-Detection-Using-GNN'

In [2]:
from dataclasses import dataclass
from pathlib import Path
import os
import pandas as pd
from torch_geometric.data import HeteroData
from Credit_Card_Fraud_Detection import logger
import torch

In [3]:
@dataclass(frozen=True)
class TestGraphConstructionConfig:
    root_dir: Path
    transformed_test_data_path: Path
    test_graph_data_path: Path

In [4]:
from Credit_Card_Fraud_Detection.constants import *
from Credit_Card_Fraud_Detection.utils.common import read_yaml, create_directories

In [5]:

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_test_graph_construction_config(self) -> TestGraphConstructionConfig:
        config = self.config.test_graph_construction
        create_directories([config.root_dir])

        test_graph_construction_config = TestGraphConstructionConfig(
            root_dir=config.root_dir,
            transformed_test_data_path=config.transformed_test_data_path,
            test_graph_data_path=config.test_graph_data_path,
        )
        return test_graph_construction_config

In [6]:
import torch
from torch_geometric.data import HeteroData

# --- Graph Construction Class (For Test Data) ---
class TestGraphConstructor:
    def __init__(self, config):
        self.config = config

    def create_node_ids(self, df):
        df["transaction_node"] = df["transaction_unique"].astype(int)
        df["customer_node"] = df["customer_id"].astype(int)
        df["merchant_node"] = df["merchant_id"].astype(int)
        df.drop(columns=["customer_id", "merchant_id", "transaction_unique"], inplace=True)
        return df

    def create_edge_indices(self, df):
        customer_to_transaction_edges = torch.tensor(df[["customer_node", "transaction_node"]].values.T, dtype=torch.long)
        transaction_to_merchant_edges = torch.tensor(df[["transaction_node", "merchant_node"]].values.T, dtype=torch.long)
        return customer_to_transaction_edges, transaction_to_merchant_edges

    def create_node_features(self, df):
        customer_features_list = ["customer_avg_amt", "customer_min_amt", "customer_amt_std"]
        merchant_features_list = ["merchant_avg_amt", "merchant_min_amt", "merchant_amt_std"]
        transaction_features_list = [
            "high_amt", "amt_ratio_merchant", "sqrt_amt", "amt", "amt_diff_customer_avg",
            "hour_cos", "amt_per_city_pop"
        ]

        customer_features_dim = len(customer_features_list)
        merchant_features_dim = len(merchant_features_list)
        transaction_features_dim = len(transaction_features_list)

        unique_customer_nodes = df["customer_node"].unique()
        unique_merchant_nodes = df["merchant_node"].unique()

        customer_features = torch.zeros((len(unique_customer_nodes), customer_features_dim), dtype=torch.float32)
        merchant_features = torch.zeros((len(unique_merchant_nodes), merchant_features_dim), dtype=torch.float32)
        transaction_features = torch.tensor(df[transaction_features_list].values, dtype=torch.float32)

        for i, customer_id in enumerate(unique_customer_nodes):
            group = df[df["customer_node"] == customer_id]
            customer_features[i] = torch.tensor(group[customer_features_list].mean().values, dtype=torch.float32)

        for i, merchant_id in enumerate(unique_merchant_nodes):
            group = df[df["merchant_node"] == merchant_id]
            merchant_features[i] = torch.tensor(group[merchant_features_list].mean().values, dtype=torch.float32)

        return customer_features, merchant_features, transaction_features

    def construct_test_graph(self, df):
        df = self.create_node_ids(df)
        customer_to_transaction_edges, transaction_to_merchant_edges = self.create_edge_indices(df)
        customer_features, merchant_features, transaction_features = self.create_node_features(df)

        # Extract and remove 'is_fraud' column, and transaction id
        y_labels = list(zip(df["transaction_node"].tolist(), df["is_fraud"].tolist()))
        df.drop(columns=["is_fraud"], inplace=True)

        data = HeteroData()
        data["customer"].x = customer_features
        data["merchant"].x = merchant_features
        data["transaction"].x = transaction_features
        data["customer", "transacts", "transaction"].edge_index = customer_to_transaction_edges
        data["transaction", "occurs_at", "merchant"].edge_index = transaction_to_merchant_edges
        data["transaction", "transacted_by", "customer"].edge_index = customer_to_transaction_edges.flip(0)
        data["merchant", "related_to", "transaction"].edge_index = transaction_to_merchant_edges.flip(0)
        data["transaction"].y = torch.tensor([label[1] for label in y_labels], dtype=torch.float32).view(-1, 1)

        data["customer"].n_id = torch.tensor(df["customer_node"].unique())
        data["merchant"].n_id = torch.tensor(df["merchant_node"].unique())
        data["transaction"].n_id = torch.tensor(df["transaction_node"].unique())

        merchant_id_mapping = {merchant_id: idx for idx, merchant_id in enumerate(df["merchant_node"].unique())}

        data["transaction", "occurs_at", "merchant"].edge_index[1] = torch.tensor([merchant_id_mapping[merchant_id.item()] for merchant_id in data["transaction", "occurs_at", "merchant"].edge_index[1]])
        data["merchant", "related_to", "transaction"].edge_index[0] = torch.tensor([merchant_id_mapping[merchant_id.item()] for merchant_id in data["merchant", "related_to", "transaction"].edge_index[0]])

        return data, y_labels  # Return data and is_fraud labels with transaction_node

In [7]:
try:
    config = ConfigurationManager()
    test_graph_construction_config = config.get_test_graph_construction_config()
    test_graph_constructor = TestGraphConstructor(config=test_graph_construction_config)

    test_df = pd.read_csv(test_graph_construction_config.transformed_test_data_path)
    test_data, is_fraud_labels = test_graph_constructor.construct_test_graph(test_df)

    torch.save(test_data, test_graph_construction_config.test_graph_data_path)
    print(f"Test graph data saved to: {test_graph_construction_config.test_graph_data_path}")

    # Save the is_fraud labels at the specified location
    labels_save_path = os.path.join(test_graph_construction_config.root_dir, "is_fraud_labels.pt")
    torch.save(torch.tensor(is_fraud_labels), labels_save_path)
    print(f"is_fraud labels saved to: {labels_save_path}")

except Exception as e:
    raise e

[2025-03-24 16:54:05,006: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-03-24 16:54:05,008: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-24 16:54:05,008: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-03-24 16:54:05,008: INFO: common: created directory at: artifacts]
[2025-03-24 16:54:05,011: INFO: common: created directory at: artifacts/Testing]
Test graph data saved to: artifacts/Testing/test_graph_data.pt
is_fraud labels saved to: artifacts/Testing\is_fraud_labels.pt
