In [1]:
import os

In [2]:
%pwd

'c:\\Users\\ainao\\Downloads\\Projects\\Fraud Detection\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\ainao\\Downloads\\Projects\\Fraud Detection'

In [5]:
from dataclasses import dataclass
from pathlib import Path
@dataclass
class ModelTunerConfig:
    root_dir : Path
    tuner_save_path: str
    param_dist: dict
    cv_folds: int
    scoring: str
    model_save_path: Path
    model_name : str

In [6]:
from fraud_detection.utils.common import read_yaml
from fraud_detection.constants import *
from fraud_detection.utils.common import create_directories, save_object
from fraud_detection.entity import DataTransformationConfig

In [7]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH):
        self.config = read_yaml(config_filepath)
        create_directories([self.config.artifacts_root])

    def get_data_transformation(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            train_path=config.train_path,
            test_path=config.test_path,
            train_data=config.train_data,
            test_data=config.test_data,
            preprocessor=config.preprocessor
        )

        return data_transformation_config


    def get_model_tuner(self) -> ModelTunerConfig:
        
        config = self.config.model_tuner

        create_directories([config.root_dir])

        model_tuner_config = ModelTunerConfig(
        root_dir=config.root_dir,
        tuner_save_path = config.tuner_save_path,
         param_dist = config.param_dist,
        cv_folds = config.cv_folds,
        scoring = config.scoring,
        model_save_path = config.model_save_path,
        model_name = config. model_name
         )

        return model_tuner_config

In [8]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import roc_auc_score, make_scorer
from sklearn.model_selection import StratifiedKFold
from scipy.stats import uniform, randint
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier

In [9]:
class ModelTuner:
    def __init__(self, config, data_transformer):
        self.config = config
        self.data_transformer = data_transformer

    def _roc_auc(self, y_true, y_pred_proba):
        """Compute ROC AUC score (probabilities required)."""
        return roc_auc_score(y_true, y_pred_proba[:, 1])

    def tune(self):
        # Unpack with val set included
        X_train, X_val, X_test, y_train, y_val, y_test, preprocessor_path = self.data_transformer.initiate_data_transformation_and_split()

        # Define models and param grids from config
        model_name = self.config.model_name
        param_dist = self.config.param_dist.get(model_name, None)

        if param_dist is None:
            raise ValueError(f"[ModelTuner] No param_dist found for {model_name} in config.")

        # Initialize model based on model_name
        if model_name == "Random Forest":
            from sklearn.ensemble import RandomForestClassifier
            model = RandomForestClassifier(random_state=42, n_jobs=-1)
        elif model_name == "AdaBoost":
            from sklearn.ensemble import AdaBoostClassifier
            model = AdaBoostClassifier(random_state=42)
        elif model_name == "Gradient Boosting":
            from sklearn.ensemble import GradientBoostingClassifier
            model = GradientBoostingClassifier(random_state=42, n_iter_no_change=5, validation_fraction=0.1)
        elif model_name == "LightGBM":
            from lightgbm import LGBMClassifier
            model = LGBMClassifier(random_state=42, n_jobs=-1)
        else:
            raise ValueError(f"[ModelTuner] Unsupported model: {model_name}")

        scoring = make_scorer(roc_auc_score, needs_proba=True)
        cv = StratifiedKFold(n_splits=self.config.cv_folds, shuffle=True, random_state=42)

        random_search = RandomizedSearchCV(
            estimator=model,
            param_distributions=param_dist,
            scoring='roc_auc',
            n_iter=10,
            cv=cv,
            n_jobs=-1,
            verbose=2,
            random_state=42,
        )

        print(f"[ModelTuner] Starting hyperparameter tuning for {model_name}...")

        random_search.fit(X_train, y_train)

        best_model = random_search.best_estimator_
        best_params = random_search.best_params_

        print(f"[ModelTuner] Best parameters for {model_name}: {best_params}")

        if self.config.tuner_save_path:
            save_object(self.config.tuner_save_path, best_model)
            print(f"[ModelTuner] Best tuned model saved to: {self.config.tuner_save_path}")

        return best_model, best_params


In [10]:
from fraud_detection.conponents.model_trainer import ModelTrainer
from fraud_detection.config.configuration import DataTransformationConfig
from fraud_detection.conponents.data_transformation import DataTransformation

In [11]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation()
    data_transformer = DataTransformation(config=data_transformation_config)
    model_tuner_config = config.get_model_tuner()
    model_tuner= ModelTuner(config=model_tuner_config, data_transformer=data_transformer)
    model_tuner.tune()
except Exception as e:
    raise e

[2025-06-30 22:36:41,272: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-06-30 22:36:41,276: INFO: common: created directory at: artifacts]
[2025-06-30 22:36:41,279: INFO: common: created directory at: artifacts/data_transformation]
[2025-06-30 22:36:41,280: INFO: common: created directory at: artifacts/model_tuner]
Transaction Date column after conversion:
0   2024-02-20 05:58:41
1   2024-02-25 08:09:45
2   2024-03-18 03:42:55
3   2024-03-16 20:41:31
4   2024-01-15 05:08:17
Name: Transaction Date, dtype: datetime64[ns]
Data type: datetime64[ns]
Transaction Date column after conversion:
0   2024-03-24 23:42:43
1   2024-01-22 00:53:31
2   2024-01-22 08:06:03
3   2024-01-16 20:34:53
4   2024-01-16 15:47:23
Name: Transaction Date, dtype: datetime64[ns]
Data type: datetime64[ns]
[2025-06-30 22:36:41,423: INFO: data_transformation: Building preprocessing pipeline.]
[2025-06-30 22:36:41,423: INFO: data_transformation: Applying preprocessing pipeline.]
[ModelTuner] Sta