In [None]:
import os

In [2]:
%pwd

'c:\\Users\\ainao\\Downloads\\Projects\\Bulldozer-Prediction\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\ainao\\Downloads\\Projects\\Bulldozer-Prediction'

In [5]:
from dataclasses import dataclass
from pathlib import Path
@dataclass
class ModelTunerConfig:
    root_dir : Path
    tuner_save_path: str
    param_dist: dict
    cv_folds: int
    scoring: str
    model_save_path: Path

In [6]:
from bullprediction.utils.common import read_yaml
from bullprediction.constants import *
from bullprediction.utils.common import create_directories, save_object
from bullprediction.entity import DataTransformationConfig

In [7]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH):
        self.config = read_yaml(config_filepath)
        create_directories([self.config.artifacts_root])

    def get_data_transformation(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            train_path=config.train_path,
            test_path=config.test_path,
            train_data=config.train_data,
            test_data=config.test_data,
            preprocessor=config.preprocessor
        )

        return data_transformation_config


    def get_model_tuner(self) -> ModelTunerConfig:
        
        config = self.config.model_tuner

        create_directories([config.root_dir])

        model_tuner_config = ModelTunerConfig(
        root_dir=config.root_dir,
        tuner_save_path = config.tuner_save_path,
         param_dist = config.param_dist,
        cv_folds = config.cv_folds,
        scoring = config.scoring,
        model_save_path = config.model_save_path
         )

        return model_tuner_config

        

    

In [8]:
from bullprediction.utils.common import save_object
from sklearn.model_selection import TimeSeriesSplit, RandomizedSearchCV
from sklearn.metrics import mean_squared_log_error, make_scorer
from catboost import CatBoostRegressor
import numpy as np
from bullprediction.entity import DataTransformationConfig
from bullprediction.conponents.data_transformation import DataTransformation

In [9]:

class ModelTuner:
    def __init__(self, config: ModelTunerConfig, data_transformer: DataTransformation):
        self.config = config
        self.data_transformer = data_transformer

    def _rmsle(self, y_true, y_pred):
        """Compute RMSLE after clipping to avoid log(0)."""
        y_true = np.clip(y_true, a_min=0, a_max=None)
        y_pred = np.clip(y_pred, a_min=0, a_max=None)
        return np.sqrt(mean_squared_log_error(y_true, y_pred))

    def tune(self):
        (
            X_train,
            X_val,
            X_test,
            y_train,
            y_val,
            preprocessor_path
        ) = self.data_transformer.initiate_data_transformation_and_split()

        print("[ModelTuner] Starting tuning for CatBoostRegressor")

        param_dist = self.config.param_dist.get("CatBoosting Regressor", None)

        if not param_dist:
            raise ValueError("[ModelTuner] No param dist" \
            "found for CatBoosting Regressor in config.")

        catboost_model = CatBoostRegressor(verbose=False, random_state=42)

        scoring = make_scorer(self._rmsle, greater_is_better=False)
        tscv = TimeSeriesSplit(n_splits=self.config.cv_folds)

        random_search = RandomizedSearchCV(
            estimator=catboost_model,
            param_distributions=param_dist,
            scoring=scoring,
            n_iter=30,
            cv=tscv,
            n_jobs=-1,
            verbose=1,
            random_state=42
        )

        random_search.fit(X_train, y_train)

        best_model = random_search.best_estimator_
        best_params = random_search.best_params_

        print(f"[ModelTuner] Best parameters: {best_params}")

        if self.config.tuner_save_path:
            save_object(self.config.tuner_save_path, best_model)
            print(f"[ModelTuner] Tuned CatBoost model saved to: {self.config.tuner_save_path}")

        return best_model, best_params




In [10]:

from bullprediction.conponents.model_trainer import ModelTrainer
from bullprediction.config.configuration import DataTransformationConfig
from bullprediction.conponents.data_transformation import DataTransformation

In [11]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation()
    data_transformer = DataTransformation(config=data_transformation_config)
    model_tuner_config = config.get_model_tuner()
    model_tuner= ModelTuner(config=model_tuner_config, data_transformer=data_transformer)
    model_tuner.tune()
except Exception as e:
    raise e

[2025-06-28 15:13:17,053: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-06-28 15:13:17,053: INFO: common: created directory at: artifacts]
[2025-06-28 15:13:17,053: INFO: common: created directory at: artifacts/data_transformation]
[2025-06-28 15:13:17,053: INFO: common: created directory at: artifacts/model_tuner]
[2025-06-28 15:13:19,617: INFO: data_transformation: Applying preprocessing pipeline to train and test data.]
[ModelTuner] Starting tuning for CatBoostRegressor




Fitting 3 folds for each of 30 candidates, totalling 90 fits
[ModelTuner] Best parameters: {'learning_rate': 0.01, 'l2_leaf_reg': 3, 'iterations': 1000, 'grow_policy': 'SymmetricTree', 'depth': 6}
[ModelTuner] Tuned CatBoost model saved to: artifacts/model_tuner/best_model.pkl
