In [1]:
import os

In [2]:
%pwd

'c:\\Users\\ainao\\Downloads\\Projects\\Bulldozer-Prediction\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\ainao\\Downloads\\Projects\\Bulldozer-Prediction'

In [5]:
from dataclasses import dataclass
from pathlib import Path
@dataclass
class ModelTrainerConfig:
    root_dir : Path
    model_save_path : Path



In [6]:
from bullprediction.utils.common import create_directories, read_yaml
from bullprediction.constants import *
from bullprediction.entity import DataTransformationConfig

In [7]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH):
        self.config = read_yaml(config_filepath)
        create_directories([self.config.artifacts_root])

    def get_data_transformation(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            train_path=config.train_path,
            test_path=config.test_path,
            train_data=config.train_data,
            test_data=config.test_data,
            preprocessor=config.preprocessor
        )

        return data_transformation_config

    def get_model_trainer(self) -> ModelTrainerConfig:
        config = self.config.model_trainer

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            model_save_path=config.model_save_path
        )

        return model_trainer_config


In [8]:
from bullprediction.entity import DataTransformationConfig
from bullprediction.conponents.data_transformation import DataTransformation
import os
import joblib
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import (
    RandomForestRegressor,
    AdaBoostRegressor,
    GradientBoostingRegressor
)
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_log_error
import numpy as np
from bullprediction.utils.common import save_object

In [9]:
class ModelTrainer:
    def __init__(self,config: ModelTrainerConfig, data_transformer:DataTransformation):
        self.config = config
        self.data_transformer = data_transformer

    def train(self):
        (
                sample_train_processed,
                sample_valid_processed,
                input_feature_test_processed,
                sample_target_train,
                sample_target_valid,
                preprocessor_path
        ) = self.data_transformer.initiate_data_transformation_and_split()

        models = {
        "Linear Regression": LinearRegression(),
        "Lasso": Lasso(random_state=42),
        "Ridge": Ridge(random_state=42),
        "K-Neighbors Regressor": KNeighborsRegressor(),
        "Decision Tree": DecisionTreeRegressor(random_state=42),
        "Random Forest Regressor": RandomForestRegressor(random_state=42),
        "XGBRegressor": XGBRegressor(random_state=42), 
        "CatBoosting Regressor": CatBoostRegressor(verbose=False,random_state=42),
        "AdaBoost Regressor": AdaBoostRegressor(random_state=42),
        "Gradient Boosting Regressor" : GradientBoostingRegressor(random_state=42),
         }
    
        best_model = None
        best_model_name = ""
        best_rmsle = float("inf")
        scores = {}

        for name, model in models.items():
            model.fit(sample_train_processed, sample_target_train)
            y_pred = model.predict(sample_valid_processed)

        # Clip predictions and true values to avoid negative values or zeros (log issue)
            y_pred_clip = np.clip(y_pred, a_min=0, a_max=None)
            y_val_clip = np.clip(sample_target_valid, a_min=0, a_max=None)

            rmsle = np.sqrt(mean_squared_log_error(y_val_clip, y_pred_clip))
            scores[name] = rmsle

            if rmsle < best_rmsle:
                best_rmsle = rmsle
                best_model = model
                best_model_name = name

            print(f"[ModelTrainer] Best Model: {best_model_name} | RMSLE: {best_rmsle:.6f}")

        if self.config.model_save_path:
                save_object(self.config.model_save_path, best_model)
                print(f"Best model saved to: {self.config.model_save_path}")
        
        return {
            "best_model": best_model,
            "best_model_name": best_model_name,
            "best_rmsle": best_rmsle,
            "all_rmsle_scores": scores,
            "X_train": sample_train_processed,
            "y_train": sample_target_train,
            "X_val": sample_valid_processed,
            "y_val": sample_target_valid,
            "X_test": input_feature_test_processed,
            "preprocessor_path": preprocessor_path
           }

         
         


        


In [10]:
try:
    config = ConfigurationManager()
    data_transformation_config = config.get_data_transformation()
    data_transformer = DataTransformation(config=data_transformation_config)
    model_trainer_config = config.get_model_trainer()
    model_trainer = ModelTrainer(config=model_trainer_config, data_transformer=data_transformer)
    model_trainer.train()
except Exception as e:
    raise e

[2025-06-28 14:30:40,645: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-06-28 14:30:40,650: INFO: common: created directory at: artifacts]
[2025-06-28 14:30:40,652: INFO: common: created directory at: artifacts/data_transformation]


[2025-06-28 14:30:40,653: INFO: common: created directory at: artifacts/model_trainer]
[2025-06-28 14:30:44,295: INFO: data_transformation: Applying preprocessing pipeline to train and test data.]




[ModelTrainer] Best Model: Linear Regression | RMSLE: 6.124255
[ModelTrainer] Best Model: Lasso | RMSLE: 6.121916
[ModelTrainer] Best Model: Ridge | RMSLE: 6.101537


  model = cd_fast.enet_coordinate_descent(


[ModelTrainer] Best Model: K-Neighbors Regressor | RMSLE: 0.854792
[ModelTrainer] Best Model: Decision Tree | RMSLE: 0.748166
[ModelTrainer] Best Model: Decision Tree | RMSLE: 0.748166
[ModelTrainer] Best Model: XGBRegressor | RMSLE: 0.685127
[ModelTrainer] Best Model: CatBoosting Regressor | RMSLE: 0.614887
[ModelTrainer] Best Model: CatBoosting Regressor | RMSLE: 0.614887
[ModelTrainer] Best Model: CatBoosting Regressor | RMSLE: 0.614887
Best model saved to: artifacts/model_trainer/model.pkl
