In [1]:
import os

In [2]:
%pwd

'd:\\Scaler Course\\OLACaseStudy\\Ola-driver-churn\\research'

In [3]:
os.chdir("../")

In [19]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    model_score: Path
    learning_rate: list
    max_depth: list
    max_features: list
    min_samples_leaf: list
    min_samples_split: list 
    n_estimators: list
    target_column: str
    


In [24]:
from src.OLAChurnPred.constants import *
from src.OLAChurnPred.utils.common import read_yaml, create_directories
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH):
        
        self.config=read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.Model_Params
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name=config.model_name,
            model_score=config.model_score,
            learning_rate=params.learning_rate,
            max_depth=params.max_depth,
            max_features=params.max_features,
            min_samples_leaf=params.min_samples_leaf,
            min_samples_split=params.min_samples_split,
            n_estimators=params.n_estimators,
            target_column=schema.name

        )
        return model_trainer_config

In [34]:
import os
from src.OLAChurnPred import logger
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from src.OLAChurnPred.utils.common import save_bin
import joblib


class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config=config

    def train(self):
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)

        X_train = train_data.drop(columns=[self.config.target_column])
        y_train = train_data[self.config.target_column]
        
        X_test = test_data.drop(columns=[self.config.target_column])
        y_test = test_data[self.config.target_column]
        random_grid = {
               'n_estimators': self.config.n_estimators,
               'max_features': [None,'sqrt'],
               'max_depth': self.config.max_depth,
               'min_samples_split': self.config.min_samples_split,
               'min_samples_leaf': self.config.min_samples_leaf,
               'learning_rate': self.config.learning_rate
               }
        
        gbc = GradientBoostingClassifier()
        gbc_randomcv = RandomizedSearchCV(
                                        estimator=gbc,
                                        param_distributions=random_grid,
                                        n_iter=100,
                                        cv=4,
                                        random_state=41, 
                                        n_jobs = -1,
                                        verbose=3,error_score='raise'
                                        )
        
        gbc_randomcv.fit(X_train,y_train)
        print(gbc_randomcv.best_score_)
        print(gbc_randomcv.best_params_)
        with open(os.path.join(self.config.model_score),'w') as f:
            f.write('Best model score is: {gbc_randomcv.best_score_}')
            f.write('Best model parameters are: {gbc_randomcv.best_params_}')

        
        
        joblib.dump(gbc_randomcv.best_estimator_, os.path.join(self.config.root_dir, self.config.model_name))


        





    

In [35]:
try:
    config = ConfigurationManager()
    model_trainer_config = config.get_model_trainer_config()
    moderl_trainer = ModelTrainer(config=model_trainer_config)
    moderl_trainer.train()
except Exception as e:
    raise e

[2024-12-12 17:15:30,885: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-12-12 17:15:30,889: INFO: common: yaml file: params.yaml loaded successfully]
[2024-12-12 17:15:30,891: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-12-12 17:15:30,892: INFO: common: created directory at: artifacts]
[2024-12-12 17:15:30,893: INFO: common: created directory at: artifacts/model_trainer]
Fitting 4 folds for each of 100 candidates, totalling 400 fits
0.5640756302521008
{'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 4, 'max_features': 'sqrt', 'max_depth': 2, 'learning_rate': 0.01}
