In [32]:
import os
print(os.getcwd())
os.chdir('d:\\vscode_machineLearning\\internship\\Customer-Churn-Prediction')
print(os.getcwd())

d:\vscode_machineLearning\internship\Customer-Churn-Prediction
d:\vscode_machineLearning\internship\Customer-Churn-Prediction


In [33]:
import pandas as pd

In [34]:
train_df = pd.read_csv(r'artifacts/transformed_data/train_transformed.csv')
test_df = pd.read_csv(r'artifacts/transformed_data/test_transformed.csv')

## Entity

In [35]:
from dataclasses import dataclass
from pathlib import Path

In [36]:
@dataclass(frozen=True)
class ModelTrainerConfig:
    train_data : Path
    test_data : Path
    model_dir : Path
    model_ojb : str
    n_estimators : int
    oob_score : bool

## configuration

In [37]:
from churnPredictor.constants import *
from churnPredictor.utils import *

In [38]:
class ConfigurationManager:
    def __init__(
        self,
        config_file_path = CONFIG_FILE_PATH,
        schema_file_path = SCHEMA_FILE_PATH,
        params_file_path=PARAMS_FILE_PATH):

        self.config = read_yaml(config_file_path)
        self.schema = read_yaml(schema_file_path)
        self.params = read_yaml(params_file_path)

        create_dirs([self.config.artifacts_root])
        
    
    def get_modelTrainer_config(self):
        config = self.config.model_trainer
        params = self.params.RandomForest

        create_dirs([config.model_dir])
        

        return ModelTrainerConfig(
            train_data=config.train_data,
            test_data=config.test_data,
            model_dir=config.model_dir,
            model_ojb=config.model_obj,
            n_estimators=params.n_estimators,
            oob_score=params.oob_score)


## component

In [39]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, recall_score, accuracy_score, precision_score, classification_report

In [40]:
class ModelTrainer:
    def __init__(self,config:ModelTrainerConfig):
        self.config = config

    def initiate_model_training(self):
        config = self.config

        train_df = pd.read_csv(config.train_data)
        test_df = pd.read_csv(config.test_data)

        X_train = train_df.drop('Churn',axis=1)
        y_train = train_df['Churn']
        X_test = test_df.drop('Churn',axis=1)
        y_test = test_df['Churn']

        rfc = RandomForestClassifier(n_estimators=config.n_estimators,oob_score=config.oob_score)


        rfc.fit(X_train,y_train)
        logger.info(f'the {rfc} model trained successfully')
        joblib.dump(rfc,config.model_ojb)

        return rfc , X_test , y_test

    def evaluate(self,true,pred):
        
        cm = confusion_matrix(true, pred)
        accuracy = accuracy_score(true, pred)
        recall = recall_score(true, pred)
        
        precision = precision_score(true, pred)
        
        report = classification_report(true, pred)

        evaluation_report = {
            'confusion_matrix': cm,
            'accuracy': accuracy,
            'recall': recall,
            'precision': precision,
            'classification_report': report
        }
        logger.info(f'evaluation_report -> {evaluation_report}')
        
        return evaluation_report
    
    def train_model(self):
        model ,  X_test , y_test = self.initiate_model_training()

        # y_pred = model.predict(X_test)
        # self.evaluate(y_test,y_pred)
        

## pipeline

In [41]:
try:
    config = ConfigurationManager()
    trainer_config = config.get_modelTrainer_config()
    model_trainer = ModelTrainer(config=trainer_config)
    model_trainer.train_model()
except Exception as e:
    raise CustomException(e)


[2023-09-15 19:24:11,560: INFO: utils: yaml file: config\config.yaml loaded successfully]
[2023-09-15 19:24:11,563: INFO: utils: yaml file: schema.yaml loaded successfully]
[2023-09-15 19:24:11,565: INFO: utils: yaml file: params.yaml loaded successfully]
[2023-09-15 19:24:11,566: INFO: utils: Created artifacts]
[2023-09-15 19:24:11,568: INFO: utils: Created artifacts\model]
[2023-09-15 19:24:39,109: INFO: 1110022211: the RandomForestClassifier(n_estimators=132, oob_score=True) model trained successfully]
[2023-09-15 19:24:39,978: INFO: 1110022211: evaluation_report -> {'confusion_matrix': array([[5315, 4797],
       [5169, 4719]], dtype=int64), 'accuracy': 0.5017, 'recall': 0.47724514563106796, 'precision': 0.4959016393442623, 'classification_report': '              precision    recall  f1-score   support\n\n         0.0       0.51      0.53      0.52     10112\n         1.0       0.50      0.48      0.49      9888\n\n    accuracy                           0.50     20000\n   macro avg