In [1]:
import os

In [2]:
pwd

'd:\\PW_DS\\Machine_Learning\\End-To-End-ML-Project-Implementation\\research'

In [3]:
os.chdir('../')

In [4]:
pwd

'd:\\PW_DS\\Machine_Learning\\End-To-End-ML-Project-Implementation'

## Entity

In [16]:
from dataclasses import  dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    train_data: Path
    test_data: Path
    metrics: Path
    trans_obj: Path
    target_col: Path



## Config Manager

In [6]:
from heart_disease_pred.utils.commom import  read_yaml, create_directories
from heart_disease_pred.constants import *

In [26]:
class ConfigManager:
    def __init__(self, config_file_path  = CONFIG_FILE_PATH, schema_file_path = SCHEMA_FILE_PATH):
        
        self.config = read_yaml(config_file_path)
        self.schema = read_yaml(schema_file_path)

        create_directories([self.config.artifacts_root])

    
    def get_model_training_config(self) -> ModelTrainingConfig:

        config = self.config.model_training
        schema = self.schema
        create_directories([config.root_dir])

        model_training_config = ModelTrainingConfig(
            root_dir = config.root_dir,
            train_data = config.train_data,
            test_data = config.test_data,
            metrics = config.metrics,
            trans_obj = config.trans_obj,
            target_col = schema.target
        )

        return model_training_config

## Components

In [9]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-2.0.3-py3-none-win_amd64.whl.metadata (2.0 kB)
Downloading xgboost-2.0.3-py3-none-win_amd64.whl (99.8 MB)
   ---------------------------------------- 0.0/99.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/99.8 MB 1.4 MB/s eta 0:01:14
   ---------------------------------------- 0.1/99.8 MB 1.3 MB/s eta 0:01:16
   ---------------------------------------- 0.2/99.8 MB 1.1 MB/s eta 0:01:27
   ---------------------------------------- 0.2/99.8 MB 1.3 MB/s eta 0:01:16
   ---------------------------------------- 0.3/99.8 MB 1.4 MB/s eta 0:01:10
   ---------------------------------------- 0.4/99.8 MB 1.4 MB/s eta 0:01:10
   ---------------------------------------- 0.5/99.8 MB 1.6 MB/s eta 0:01:03
   ---------------------------------------- 0.6/99.8 MB 1.7 MB/s eta 0:00:59
   ---------------------------------------- 0.7/99.8 MB 1.8 MB/s eta 0:00:57
   ---------------------------------------- 0.8/99.8 MB 1.7 MB/s eta 0:00:59
   ----------

In [18]:
import pandas  as pd
from box import  ConfigBox

from heart_disease_pred.utils.commom import load_pickle, save_json

from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report


In [58]:
class ModelTraining:
    def __init__(self, config: ModelTrainingConfig):
        self.config = config

    

    def train(self):
        train_data = pd.read_csv(self.config.train_data)
        test_data = pd.read_csv(self.config.test_data)
        target = list(self.config.target_col.keys())[0]



        y_train  = train_data[target]
        X_train  = train_data.drop([target], axis=1)

        y_test  = test_data[target]
        X_test  = test_data.drop([target], axis=1)


        # Transform the data
        transform_obj = load_pickle(self.config.trans_obj)

        X_train_trans = pd.DataFrame(transform_obj.fit_transform(X_train),columns=transform_obj.get_feature_names_out())
        X_test_trans = pd.DataFrame(transform_obj.transform(X_test),columns=transform_obj.get_feature_names_out())  

        # Tranform the target data
        target_map = {'No':0, 'Yes':1}
        y_train_trans = y_train.map(target_map) 
        y_test_trans = y_test.map(target_map) 


        metrics = {
            'rf' : {
                'name': ['RandomForestClassifier'],
                # 'model_obj': [],
                'accuracy': [],
                'precision': [],
                'recall_score': [],
                'f1_score': [],
            },

            'xg': {
                'name': ['XGBClassifier'],
                # 'model_obj': [],
                'accuracy': [],
                'precision': [],
                'recall_score': [],
                'f1_score': [],

            }
        }

        models = {
            'rf': RandomForestClassifier(),
            'xg': XGBClassifier()
        }

        # metrics = ConfigBox(metrics)


        for model_name in list(models.keys()):

            model_obj = models[model_name]

            model_obj.fit(X_train_trans, y_train_trans)

            y_pred = model_obj.predict(X_test_trans)

            # metrics[model_name]['model_obj'].append(model_obj)
            metrics[model_name]['accuracy'].append(accuracy_score(y_test_trans, y_pred))
            metrics[model_name]['precision'].append(precision_score(y_test_trans, y_pred))
            metrics[model_name]['recall_score'].append(recall_score(y_test_trans, y_pred))
            metrics[model_name]['f1_score'].append(f1_score(y_test_trans, y_pred))

            
        
        save_json(Path(self.config.metrics), metrics)




## Pipeline

In [59]:
config = ConfigManager()
model_training_config = config.get_model_training_config()
model_trainer = ModelTraining(model_training_config)

model_trainer.train()


In [51]:
models = {
    'rf': RandomForestClassifier(),
    'xg': XGBClassifier()
}

metrics = {
            'rf' : {
                'name': 'RandomForestClassifier',
                'model_obj': [],
                'accuracy': [],
                'precision': [],
                'recall_score': [],
                'f1_score': [],
            },

            'xg': {
                'name': 'XGBClassifier',
                'model_obj': [],
                'accuracy': [],
                'precision': [],
                'recall_score': [],
                'f1_score': [],

            }
        }

In [52]:
metrics

{'rf': {'name': 'RandomForestClassifier',
  'model_obj': [],
  'accuracy': [],
  'precision': [],
  'recall_score': [],
  'f1_score': []},
 'xg': {'name': 'XGBClassifier',
  'model_obj': [],
  'accuracy': [],
  'precision': [],
  'recall_score': [],
  'f1_score': []}}

In [53]:
for model in list(models.keys()):
    temp = metrics[model]
    print(temp)

{'name': 'RandomForestClassifier', 'model_obj': [], 'accuracy': [], 'precision': [], 'recall_score': [], 'f1_score': []}
{'name': 'XGBClassifier', 'model_obj': [], 'accuracy': [], 'precision': [], 'recall_score': [], 'f1_score': []}
