In [10]:
import os
print(os.getcwd())
os.chdir('d:\\vscode_machineLearning\\internship\\Customer-Churn-Prediction')
print(os.getcwd())

d:\vscode_machineLearning\internship\Customer-Churn-Prediction
d:\vscode_machineLearning\internship\Customer-Churn-Prediction


In [11]:
import pandas as pd

## Entity

In [12]:
from dataclasses import dataclass
from pathlib import Path

In [13]:
@dataclass(frozen=True)
class ModelTrainerConfig:
    train_data : Path
    test_data : Path
    model_dir : Path
    model_ojb : str
    n_estimators : int
    oob_score : bool
    y_train_path : Path
    y_test_path : Path

## configuration

In [14]:
from churnPredictor.constants import *
from churnPredictor.utils import *

In [15]:
class ConfigurationManager:
    def __init__(
        self,
        config_file_path = CONFIG_FILE_PATH,
        schema_file_path = SCHEMA_FILE_PATH,
        params_file_path=PARAMS_FILE_PATH):

        self.config = read_yaml(config_file_path)
        self.schema = read_yaml(schema_file_path)
        self.params = read_yaml(params_file_path)

        create_dirs([self.config.artifacts_root])
        
    
    def get_modelTrainer_config(self):
        config = self.config.model_trainer
        params = self.params.RandomForest

        create_dirs([config.model_dir])
        

        return ModelTrainerConfig(
            train_data=config.train_data,
            test_data=config.test_data,
            model_dir=config.model_dir,
            model_ojb=config.model_obj,
            n_estimators=params.n_estimators,
            oob_score=params.oob_score,
            y_train_path=config.y_train_path,
            y_test_path=config.y_test_path)


## component

In [16]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, recall_score, accuracy_score, precision_score, classification_report

In [17]:
class ModelTrainer:
    def __init__(self,config:ModelTrainerConfig):
        self.config = config

    def initiate_model_training(self):
        config = self.config

        # train_df = pd.read_csv(config.train_data)
        # test_df = pd.read_csv(config.test_data)

        X_train = pd.read_csv(config.train_data)
        y_train = pd.read_csv(config.y_train_path)
        X_test = pd.read_csv(config.test_data)
        y_test = pd.read_csv(config.y_test_path)
        print("X_train shape:", X_train.shape)
        print("y_train shape:", y_train.shape)
        print("X_test shape:", X_test.shape)
        print("y_test shape:", y_test.shape)



        rfc = RandomForestClassifier(n_estimators=config.n_estimators,oob_score=config.oob_score)


        rfc.fit(X_train,y_train)
        logger.info(f'the {rfc} model trained successfully')
        joblib.dump(rfc,config.model_ojb)

        return rfc , X_test , y_test

    def evaluate(self,true,pred):
        
        cm = confusion_matrix(true, pred)
        accuracy = accuracy_score(true, pred)
        recall = recall_score(true, pred)
        
        precision = precision_score(true, pred)
        
        report = classification_report(true, pred)

        evaluation_report = {
            'confusion_matrix': cm,
            'accuracy': accuracy,
            'recall': recall,
            'precision': precision,
            'classification_report': report
        }
        logger.info(f'evaluation_report -> {evaluation_report}')
        
        return evaluation_report
    
    def train_model(self):
        model ,  X_test , y_test = self.initiate_model_training()

        # y_pred = model.predict(X_test)
        # self.evaluate(y_test,y_pred)
        

## pipeline

In [18]:
try:
    config = ConfigurationManager()
    trainer_config = config.get_modelTrainer_config()
    model_trainer = ModelTrainer(config=trainer_config)
    model_trainer.train_model()
except Exception as e:
    raise CustomException(e)


[2023-09-16 13:47:52,451: INFO: utils: yaml file: config\config.yaml loaded successfully]
[2023-09-16 13:47:52,454: INFO: utils: yaml file: schema.yaml loaded successfully]
[2023-09-16 13:47:52,455: INFO: utils: yaml file: params.yaml loaded successfully]
[2023-09-16 13:47:52,457: INFO: utils: Created artifacts]
[2023-09-16 13:47:52,458: INFO: utils: Created artifacts\model]
X_train shape: (80000, 9)
y_train shape: (80000, 1)
X_test shape: (20000, 9)
y_test shape: (20000, 1)


  return fit_method(estimator, *args, **kwargs)


[2023-09-16 13:48:40,471: INFO: 1776608302: the RandomForestClassifier(n_estimators=132, oob_score=True) model trained successfully]
