In [21]:
import os
print(os.getcwd())
os.chdir('d:\\vscode_machineLearning\\internship\\Customer-Churn-Prediction')
print(os.getcwd())

d:\vscode_machineLearning\internship\Customer-Churn-Prediction
d:\vscode_machineLearning\internship\Customer-Churn-Prediction


In [22]:
import pandas as pd

## Entity

In [23]:
from dataclasses import dataclass
from pathlib import Path

In [24]:
@dataclass(frozen=True)
class ModelTrainerConfig:
    train_data : Path
    test_data : Path
    model_dir : Path
    y_train_path : Path
    y_test_path : Path
    model_params_dir : dict

## configuration

In [25]:
from churnPredictor.constants import *
from churnPredictor.utils import *

In [26]:
class ConfigurationManager:
    def __init__(
        self,
        config_file_path = CONFIG_FILE_PATH,
        schema_file_path = SCHEMA_FILE_PATH,
        params_file_path=PARAMS_FILE_PATH):

        self.config = read_yaml(config_file_path)
        self.schema = read_yaml(schema_file_path)
        self.params = read_yaml(params_file_path)

        create_dirs([self.config.artifacts_root])
        
    
    def get_modelTrainer_config(self):
        config = self.config.model_trainer
        params = self.params.models

        create_dirs([config.model_dir])
        

        return ModelTrainerConfig(
            train_data=config.train_data,
            test_data=config.test_data,
            model_dir=config.model_dir,
            y_train_path=config.y_train_path,
            y_test_path=config.y_test_path,
            model_params_dir=params)


## component

In [27]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, recall_score, accuracy_score, precision_score, classification_report
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [28]:
dict(read_yaml(PARAMS_FILE_PATH).randomforest)

[2023-09-17 04:50:03,054: INFO: utils: yaml file: params.yaml loaded successfully]


{'n_estimators': 256,
 'min_samples_split': 10,
 'min_samples_leaf': 2,
 'max_features': 'sqrt',
 'max_depth': 40,
 'criterion': 'entropy',
 'oob_score': True}

In [29]:
class ModelTrainer:
    def __init__(self,config:ModelTrainerConfig):
        self.config = config

    def initiate_model_training(self):
        config = self.config

        # train_df = pd.read_csv(config.train_data)
        # test_df = pd.read_csv(config.test_data)

        X_train = pd.read_csv(config.train_data)
        y_train = pd.read_csv(config.y_train_path)
        X_test = pd.read_csv(config.test_data)
        y_test = pd.read_csv(config.y_test_path)
        print("X_train shape:", X_train.shape)
        print("y_train shape:", y_train.shape)
        print("X_test shape:", X_test.shape)
        print("y_test shape:", y_test.shape)

        self.models = {
            "GradientBoostingClassifier": GradientBoostingClassifier(),
            "XGBoostClassifier": XGBClassifier(),
            "CatBoostClassifier": CatBoostClassifier(),
            "AdaBoostClassifier": AdaBoostClassifier(),
            "RandomForestClassifier": RandomForestClassifier()
        }

        trained_models = {}
        directory_path = 'artifacts/model'
        os.makedirs(directory_path, exist_ok=True)


        for model_name in self.models.keys():
            model = self.models[model_name]
            model.set_params(**dict(config.model_params_dir[model_name]))
            model.fit(X_train,y_train.values.ravel())
            logger.info(f'the {model} model trained successfully!')
            obj_name = model_name.strip('')
            obj_name = ''.join(obj_name)

            joblib.dump(model,open(file=os.path.join(r'artifacts\model',f'{obj_name}.joblib'),mode='wb'))

            trained_models[model_name] = model
        # rfc = RandomForestClassifier(n_estimators=config.n_estimators,oob_score=config.oob_score)

        # rfc.fit(X_train,y_train.values.ravel())
        # logger.info(f'the {rfc} model trained successfully')
        # joblib.dump(rfc,config.model_ojb)

        return trained_models , X_test , y_test

    def evaluate(self,true,pred):
        
        cm = confusion_matrix(true, pred)
        accuracy = accuracy_score(true, pred)
        recall = recall_score(true, pred)
        
        precision = precision_score(true, pred)
        
        report = classification_report(true, pred)

        evaluation_report = {
            'confusion_matrix': cm,
            'accuracy': accuracy,
            'recall': recall,
            'precision': precision,
            'classification_report': report
        }
        logger.info(f'evaluation_report -> {evaluation_report}')
        
        return evaluation_report
    
    def train_model(self):
        model ,  X_test , y_test = self.initiate_model_training()

        # y_pred = model.predict(X_test)
        # self.evaluate(y_test,y_pred)
        

## pipeline

In [30]:
try:
    config = ConfigurationManager()
    trainer_config = config.get_modelTrainer_config()
    model_trainer = ModelTrainer(config=trainer_config)
    model_trainer.train_model()
except Exception as e:
    raise CustomException(e)


[2023-09-17 04:50:03,086: INFO: utils: yaml file: config\config.yaml loaded successfully]
[2023-09-17 04:50:03,089: INFO: utils: yaml file: schema.yaml loaded successfully]
[2023-09-17 04:50:03,092: INFO: utils: yaml file: params.yaml loaded successfully]
[2023-09-17 04:50:03,094: INFO: utils: Created artifacts]
[2023-09-17 04:50:03,095: INFO: utils: Created artifacts\model]
X_train shape: (80000, 9)
y_train shape: (80000, 1)
X_test shape: (20000, 9)
y_test shape: (20000, 1)
[2023-09-17 04:50:05,482: INFO: 2561447599: the GradientBoostingClassifier(loss='exponential', max_features='log2',
                           n_estimators=64, subsample=0.7) model trained successfully!]


  if is_sparse(dtype):
  is_categorical_dtype(dtype) or is_pa_ext_categorical_dtype(dtype)
  if is_categorical_dtype(dtype):
  return is_int or is_bool or is_float or is_categorical_dtype(dtype)


[2023-09-17 04:50:05,905: INFO: 2561447599: the XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=7, max_leaves=None,
              min_child_weight=1, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=64, n_jobs=None,
              num_parallel_tree=None, random_state=None, ...) model trained successfully!]
0:	learn: 0.6930206	total: 17.3ms	remaining: 1.71s
1:	learn: 0.6928926	total: 33.1ms	remaining: 1.62s
2:	learn: 0.6927271	total: 46.6ms	remaining: 1.51s
3:	learn: 0.6925552	t