In [1]:
import os

In [2]:
pwd

'd:\\PW_DS\\Machine_Learning\\End-To-End-ML-Project-Implementation\\research'

In [3]:
os.chdir('../')

In [4]:
pwd

'd:\\PW_DS\\Machine_Learning\\End-To-End-ML-Project-Implementation'

## Entity

In [5]:
from dataclasses import  dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    train_data: Path
    test_data: Path
    metrics: Path
    trans_obj: Path
    target_col: Path
    best_model: Path



## Config Manager

In [6]:
from heart_disease_pred.utils.commom import  read_yaml, create_directories
from heart_disease_pred.constants import *

In [7]:
class ConfigManager:
    def __init__(self, config_file_path  = CONFIG_FILE_PATH, schema_file_path = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_file_path)
        self.schema = read_yaml(schema_file_path)

        create_directories([self.config.artifacts_root])

    
    def get_model_training_config(self) -> ModelTrainingConfig:

        config = self.config.model_training
        schema = self.schema
        create_directories([config.root_dir])

        model_training_config = ModelTrainingConfig(
            root_dir = config.root_dir,
            train_data = config.train_data,
            test_data = config.test_data,
            metrics = config.metrics,
            trans_obj = config.trans_obj,
            best_model = config.best_model,
            target_col = schema.target
        )

        return model_training_config

## Components

In [8]:
!pip install xgboost



In [9]:
import pandas  as pd
from box import  ConfigBox

from heart_disease_pred.utils.commom import load_pickle, save_json, save_pickle

from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report


In [10]:
class ModelTraining:
    def __init__(self, config: ModelTrainingConfig):
        self.config = config

    

    def train(self):
        train_data = pd.read_csv(self.config.train_data)
        test_data = pd.read_csv(self.config.test_data)
        target = list(self.config.target_col.keys())[0]



        y_train  = train_data[target]
        X_train  = train_data.drop([target], axis=1)

        y_test  = test_data[target]
        X_test  = test_data.drop([target], axis=1)


        # Transform the data
        transform_obj = load_pickle(self.config.trans_obj)

        X_train_trans = pd.DataFrame(transform_obj.fit_transform(X_train),columns=transform_obj.get_feature_names_out())
        X_test_trans = pd.DataFrame(transform_obj.transform(X_test),columns=transform_obj.get_feature_names_out())  


        # Tranform the target data
        target_map = {'No':0, 'Yes':1}
        y_train_trans = y_train.map(target_map) 
        y_test_trans = y_test.map(target_map) 


        metrics = {
            'rf' : {
                'name': ['RandomForestClassifier'],
                # 'model_obj': [],
                'accuracy': [],
                'precision': [],
                'recall_score': [],
                'f1_score': [],
            },

            'xg': {
                'name': ['XGBClassifier'],
                # 'model_obj': [],
                'accuracy': [],
                'precision': [],
                'recall_score': [],
                'f1_score': [],

            }
        }

        models = {
            'rf': RandomForestClassifier(),
            'xg': XGBClassifier()
        }

        # metrics = ConfigBox(metrics)

        best_accuracy = 0
        best_model_obj = None

        for model_name in list(models.keys()):

            model_obj = models[model_name]

            model_obj.fit(X_train_trans, y_train_trans)

            y_pred = model_obj.predict(X_test_trans)

            # metrics[model_name]['model_obj'].append(model_obj)
            metrics[model_name]['accuracy'].append(accuracy_score(y_test_trans, y_pred))
            metrics[model_name]['precision'].append(precision_score(y_test_trans, y_pred))
            metrics[model_name]['recall_score'].append(recall_score(y_test_trans, y_pred))
            metrics[model_name]['f1_score'].append(f1_score(y_test_trans, y_pred))

            if metrics[model_name]['accuracy'][0] > best_accuracy:
                best_accuracy = metrics[model_name]['accuracy']
                best_model_obj = model_obj
        
        print(best_model_obj)
        print(best_accuracy)
        print(metrics)
        
        save_json(Path(self.config.metrics), metrics)
        save_pickle(Path(self.config.best_model), best_model_obj)
        save_pickle(Path(self.config.trans_obj), transform_obj)
        



## Pipeline

In [11]:
config = ConfigManager()
model_training_config = config.get_model_training_config()
model_trainer = ModelTraining(model_training_config)

model_trainer.train()


RandomForestClassifier()
[0.9297602874797318]
{'rf': {'name': ['RandomForestClassifier'], 'accuracy': [0.9297602874797318], 'precision': [0.9288716794865086], 'recall_score': [0.9311779825497019], 'f1_score': [0.9300234012084804]}, 'xg': {'name': ['XGBClassifier'], 'accuracy': [0.8922126298260222], 'precision': [0.9175859952002678], 'recall_score': [0.8624259061740484], 'f1_score': [0.8891512835304298]}}


In [12]:
models = {
    'rf': RandomForestClassifier(),
    'xg': XGBClassifier()
}

metrics = {
            'rf' : {
                'name': 'RandomForestClassifier',
                'model_obj': [],
                'accuracy': [],
                'precision': [],
                'recall_score': [],
                'f1_score': [],
            },

            'xg': {
                'name': 'XGBClassifier',
                'model_obj': [],
                'accuracy': [],
                'precision': [],
                'recall_score': [],
                'f1_score': [],

            }
        }

In [13]:
metrics

{'rf': {'name': 'RandomForestClassifier',
  'model_obj': [],
  'accuracy': [],
  'precision': [],
  'recall_score': [],
  'f1_score': []},
 'xg': {'name': 'XGBClassifier',
  'model_obj': [],
  'accuracy': [],
  'precision': [],
  'recall_score': [],
  'f1_score': []}}

In [14]:
for model in list(models.keys()):
    temp = metrics[model]
    print(temp)

{'name': 'RandomForestClassifier', 'model_obj': [], 'accuracy': [], 'precision': [], 'recall_score': [], 'f1_score': []}
{'name': 'XGBClassifier', 'model_obj': [], 'accuracy': [], 'precision': [], 'recall_score': [], 'f1_score': []}
