In [1]:
import os

In [2]:
%pwd

'c:\\Users\\PASCAL\\Car_Price_Predictions\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\PASCAL\\Car_Price_Predictions'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_path: Path

In [6]:
from src.CarPrice.constants import *
from src.CarPrice.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
        root_dir=config.root_dir,
        data_path=config.data_path,
        model_path=config.model_path
    )

        return model_trainer_config

In [8]:
import os
import sys
from dataclasses import dataclass

from catboost import CatBoostRegressor
from sklearn.ensemble import (
    AdaBoostRegressor,
    GradientBoostingRegressor,
    RandomForestRegressor,
)
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from src.CarPrice.logger import logging
from src.CarPrice.utils.common import evaluate_models
from src.CarPrice.components.data_transformation import DataTransformation
import pickle

In [26]:
import os
import pickle
import logging
import numpy as np
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from catboost import CatBoostRegressor

class ModelTrainer:
    def __init__(self, config):
        self.config = config

    def evaluate_models(self, X_train, y_train, X_test, y_test, models, params):
        model_report = {}
        for model_name, model in models.items():
            if model_name in params:
                model_params = params[model_name]
                model = model.set_params(**model_params)
            
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            r2 = r2_score(y_test, y_pred)
            model_report[model_name] = r2

        return model_report

    def initiate_model_trainer(self, train_arr, test_arr):
        try:
            logging.info("Splitting training and test input data")
            X_train, y_train, X_test, y_test = (
                train_arr[:, :-1],
                train_arr[:, -1],
                test_arr[:, :-1],
                test_arr[:, -1]
            )

            models = {
                "Random Forest": RandomForestRegressor(),
                "Decision Tree": DecisionTreeRegressor(),
                "Gradient Boosting": GradientBoostingRegressor(),
                "Linear Regression": LinearRegression(),
                "XGBRegressor": XGBRegressor(),
                "CatBoosting Regressor": CatBoostRegressor(verbose=False),
                "AdaBoost Regressor": AdaBoostRegressor(),
            }

            params = {
                "Decision Tree": {
                    'criterion': ['poisson', 'absolute_error', 'squared_error', 'friedman_mse'],
                },
                "Random Forest": {
                    'n_estimators': [8, 16, 32, 64, 128, 256]
                },
                "Gradient Boosting": {
                    'learning_rate': [0.1, 0.01, 0.05, 0.001],
                    'subsample': [0.6, 0.7, 0.75, 0.8, 0.85, 0.9],
                    'n_estimators': [8, 16, 32, 64, 128, 256]
                },
                "Linear Regression": {},
                "XGBRegressor": {
                    'learning_rate': [0.1, 0.01, 0.05, 0.001],
                    'n_estimators': [8, 16, 32, 64, 128, 256]
                },
                "CatBoosting Regressor": {
                    'depth': [6, 8, 10],
                    'learning_rate': [0.01, 0.05, 0.1],
                    'iterations': [30, 50, 100]
                },
                "AdaBoost Regressor": {
                    'learning_rate': [0.1, 0.01, 0.5, 0.001],
                    'n_estimators': [8, 16, 32, 64, 128, 256]
                }
            }

            model_report = self.evaluate_models(X_train, y_train, X_test, y_test, models, params)

            best_model_score = max(sorted(model_report.values()))
            best_model_name = list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]
            best_model = models[best_model_name]

            if best_model_score < 0.6:
                raise Exception("No best model found")

            logging.info("Best found model on both training and testing dataset")

            output_dir = os.path.join(self.config.root_dir, "artifacts/model_trainer")
            os.makedirs(output_dir, exist_ok=True)

            output_file = os.path.join(output_dir, "model.pkl")
            with open(output_file, "wb") as file:
                pickle.dump(best_model, file)

            predicted = best_model.predict(X_test)
            r2_square = r2_score(y_test, predicted)
            logging.info("Model Trainer completed and saved as a pickle file.")
            return r2_square

        except Exception as e:
            logging.error(f"Error in initiate_model_trainer: {str(e)}")


In [27]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    data_transformation = DataTransformation(model_trainer_config)
    train_arr, test_arr, _ = data_transformation.initiate_data_transformation()

    model_trainer = ModelTrainer(model_trainer_config)
    model_trainer.initiate_model_trainer(train_arr, test_arr)
except Exception as e:
    raise e


[2023-07-23 20:45:43,896: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-07-23 20:45:43,899: INFO: common: yaml file: params.yaml loaded successfully]
[2023-07-23 20:45:43,900: INFO: common: created directory at: artifacts]
[2023-07-23 20:45:43,901: INFO: common: created directory at: artifacts/model_trainer]
[2023-07-23 20:45:43,902: INFO: data_transformation: Read train and test data completed]
[2023-07-23 20:45:43,903: INFO: data_transformation: Obtaining preprocessing object]
[2023-07-23 20:45:43,904: INFO: data_transformation: Categorical columns: ['Fuel_Type', 'Seller_Type', 'Transmission']]
[2023-07-23 20:45:43,904: INFO: data_transformation: Numerical columns: ['Selling_Price', 'Present_Price', 'Kms_Driven', 'Owner', 'car_age']]
[2023-07-23 20:45:43,926: INFO: data_transformation: Applying preprocessing object on training dataframe and testing dataframe.]
[2023-07-23 20:45:43,927: ERROR: data_transformation: Error in initiate_data_transformation: A given

TypeError: cannot unpack non-iterable NoneType object