In [1]:
import os

In [2]:
%pwd

'c:\\Users\\ainao\\OneDrive\\Project\\Fraud detection system\\Fraud--Detection---System\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\ainao\\OneDrive\\Project\\Fraud detection system\\Fraud--Detection---System'

In [5]:
from dataclasses import dataclass
from pathlib import Path
@dataclass
class ModelTrainerConfig:
    root_dir : Path
    model_save_path : Path


In [6]:
import sys
import os

sys.path.append(os.path.abspath("src"))
from fraud_detection.utils.common import read_yaml, create_directories
from fraud_detection.constants import *
from fraud_detection.entity import DataTransformationConfig

In [7]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH):
        self.config = read_yaml(config_filepath)
        create_directories([self.config.artifacts_root])

    def get_data_transformation(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            train_path=config.train_path,
            test_path=config.test_path,
            train_data=config.train_data,
            test_data=config.test_data,
            preprocessor=config.preprocessor
        )

        return data_transformation_config

    def get_model_trainer(self) -> ModelTrainerConfig:
        config = self.config.model_trainer

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=Path(config.root_dir),
            model_save_path=Path(config.model_save_path),
        )

        return model_trainer_config


In [None]:
from pathlib import Path
import logging
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier


In [None]:


BEST_PARAMS = {
    "border_count": 102,
    "depth": 7,
    "iterations": 648,
    "l2_leaf_reg": 0.20495107742294383,
    "learning_rate": 0.1204262037029521,
    "scale_pos_weight": 9.442828734775043,
    "subsample": 0.7468055921327309,
    "bootstrap_type": "Bernoulli",
}

class ModelTrainer:
    def __init__(self, config, data_transformer):
        self.config = config
        self.data_transformer = data_transformer

    

    def train(self):
        train_arr, test_arr = self.data_transformer.initiate_data_transformation_and_split()

        X_train, y_train = train_arr[:, :-1], train_arr[:, -1]
        X_test,  y_test  = test_arr[:, :-1],  test_arr[:, -1]

        assert X_train.shape[0] == y_train.shape[0]
        assert X_test.shape[0]  == y_test.shape[0]


        logging.info("Initializing CatBoostClassifier with BEST_PARAMS.")
        model = CatBoostClassifier(
            loss_function="Logloss",
            eval_metric="AUC",
            random_seed=42,
            verbose=False,
            allow_writing_files=False,
            **BEST_PARAMS,
        )

        logging.info("Fitting CatBoost model on training data.")
        model.fit(X_train, y_train)

        score = float(model.score(X_test, y_test))

        model_path: Path = self.config.model_save_path
        model_path.parent.mkdir(parents=True, exist_ok=True)
        model.save_model(str(model_path))
        logging.info("Model saved to %s", str(model_path))

        return model_path, score

In [10]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation()
    data_transformer = DataTransformation(config=data_transformation_config)
    model_trainer_config = config.get_model_trainer()
    model_trainer = ModelTrainer(config=model_trainer_config, data_transformer=data_transformer)
    model_trainer.train()
except Exception as e:
    raise e

[2025-10-26 10:43:04,847: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-10-26 10:43:04,861: INFO: common: created directory at: artifacts]
[2025-10-26 10:43:04,867: INFO: common: created directory at: artifacts/data_transformation]
[2025-10-26 10:43:04,870: INFO: common: created directory at: artifacts/model_trainer]
[2025-10-26 10:43:19,030: INFO: data_transformation: Building preprocessing pipeline.]
[2025-10-26 10:43:19,322: INFO: data_transformation: Applying preprocessing pipeline.]
[2025-10-26 10:43:20,093: INFO: 2526092707: Initializing CatBoostClassifier with BEST_PARAMS.]
[2025-10-26 10:43:20,093: INFO: 2526092707: Fitting CatBoost model on training data.]
[2025-10-26 10:45:07,697: INFO: 2526092707: Model saved to artifacts\model_trainer\model.pkl]
