In [1]:
import os
os.chdir("../")
%pwd

'd:\\Final-Year-Project\\Credit-Card-Fraud-Detection-Using-GNN'

In [2]:
# Entity

from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path

In [3]:
from Credit_Card_Fraud_Detection.constants import *
from Credit_Card_Fraud_Detection.utils.common import read_yaml, create_directories

In [4]:
# Configuration

class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
        )

        return data_transformation_config

In [5]:
import os
import numpy as np
import pandas as pd
from Credit_Card_Fraud_Detection import logger
from sklearn.preprocessing import LabelEncoder
from datetime import datetime, timedelta

In [6]:
# Components

class DataTransformation:
    def __init__(self, config: DataTransformationConfig):
        self.config = config
        self.label_encoders = {}  # Initialize label encoders
    
    def create_card_id(self, df):
        df["card_id"] = df["User"].astype(str) + "_" + df["Card"].astype(str)
        return df
    
    def clean_amount_column(self, df):
        df["Amount"] = df["Amount"].str.replace("$", "", regex=True)
        df["Amount"] = pd.to_numeric(df["Amount"], errors="coerce").fillna(0)  # Handle missing values safely
        return df
    
    def extract_time_features(self, df):
        df["Hour"] = df["Time"].str[:2].astype(int)
        df["Minute"] = df["Time"].str[3:5].astype(int)
        return df
    
    def drop_unnecessary_columns(self, df):
        df = df.drop(["Time", "User", "Card", "Merchant State", "Zip"], axis=1)
        return df
    
    def fill_missing_errors(self, df):
        df["Errors?"] = df["Errors?"].fillna("No error")
        return df
    
    def encode_categorical_columns(self, df, columns):
        for col in columns:
            if col in df.columns:
                if col not in self.label_encoders:
                    self.label_encoders[col] = LabelEncoder()
                    df[col] = self.label_encoders[col].fit_transform(df[col])
                else:
                    df[col] = self.label_encoders[col].transform(df[col])
        return df
    
    def convert_target_column(self, df):
        df["Is Fraud?"] = df["Is Fraud?"].map({"Yes": 1, "No": 0})
        return df
    
    def transform(self):
        dataset = pd.read_csv(self.config.data_path)
        logger.info("Data read successfully")

        dataset = self.create_card_id(dataset)
        dataset = self.clean_amount_column(dataset)
        dataset = self.extract_time_features(dataset)
        dataset = self.drop_unnecessary_columns(dataset)
        dataset = self.fill_missing_errors(dataset)

        categorical_columns = ["Merchant City", "Use Chip", "Errors?"]
        dataset = self.encode_categorical_columns(dataset, categorical_columns)
        dataset = self.convert_target_column(dataset)

        # Retain `card_id` as an index for graph construction
        dataset.set_index("card_id", inplace=True)
        dataset = dataset.sort_index()

        # Save transformed dataset
        transformed_path = os.path.join(self.config.root_dir, "transformed_dataset.csv")
        dataset.to_csv(transformed_path)
        logger.info(f"Transformed dataset saved at {transformed_path}")

        

In [7]:
# Pipeline

try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    transformed_data = data_transformation.transform()  # Corrected method call
except Exception as e:
    raise e  # This will re-raise the exception if any error occurs

[2025-03-17 12:38:37,419: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-03-17 12:38:37,421: INFO: common: yaml file: params.yaml loaded successfully]
[2025-03-17 12:38:37,423: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-03-17 12:38:37,424: INFO: common: created directory at: artifacts]
[2025-03-17 12:38:37,425: INFO: common: created directory at: artifacts/data_transformation]


[2025-03-17 12:39:00,490: INFO: 3007030047: Data read successfully]
[2025-03-17 12:40:56,274: INFO: 3007030047: Transformed dataset saved at artifacts/data_transformation\transformed_dataset.csv]
