In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [4]:
import os
os.chdir('../')

In [5]:
from src.Project4.utils.common import logger
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

In [6]:
%pwd

'c:\\Users\\arbas\\Documents\\Projects\\Machine-Learning\\EndToEnd_Deployed\\Project 4'

In [15]:
from dataclasses import dataclass
from pathlib import Path
@dataclass
class Model_Trainer_Config:
    root_dir: Path
    train_path: Path
    test_path: Path
    params: dict
    target_column: str

In [16]:
from src.Project4.constants import *
from src.Project4.utils.common import read_yaml,create_directories
class Configuration_Manager:
    def __init__(self,config_path=CONFIG_FILE_PATH,schema_path = SCHEMA_FILE_PATH, params_path=PARAMS_FILE_PATH):
        self.config = read_yaml(config_path)
        self.schema = read_yaml(schema_path)
        self.params = read_yaml(params_path)
        create_directories([self.config.artifacts_root])
    
    def get_model_trainer_config(self)->Model_Trainer_Config:
        config = self.config.model_trainer
        schema = self.schema.TARGET_COLUMN
        params = self.params
        model_trainer_config = Model_Trainer_Config(root_dir=config.root_dir,train_path=config.train_path,test_path=config.test_path,target_column=schema,params=params)
        return model_trainer_config

In [17]:
from src.Project4.utils.common import save_bin
import joblib
from sklearn.model_selection import GridSearchCV
class Model_Trainer:
    def __init__(self,config):
        self.config = config
        self.models = {
                        'LogisticRegression': LogisticRegression(),
                        'KNeighborsClassifier': KNeighborsClassifier(),
                        'GaussianNB': GaussianNB(),
                        'DecisionTreeClassifier': DecisionTreeClassifier(),
                        'RandomForestClassifier': RandomForestClassifier(),
                        'SVC': SVC(),
                        'GradientBoostingClassifier': GradientBoostingClassifier()
                      }
    
    
    def finetune_best_model(self,best_model,model_name,X_train,y_train):
        searcher = GridSearchCV(best_model,param_grid=self.config.params.model_name) #! Error: params.model_name won't work
        searcher.fit(X_train,y_train)
        best_params = searcher.best_params_
        print("best params are:", best_params)
        finetuned_model = best_model.set_params(**best_params)
        return finetuned_model
        
       
    def train_and_evaluate_models(self):
        train_data_path = self.config.train_path
        test_data_path = self.config.test_path
        
        df_train = pd.read_csv(train_data_path)
        X_train = df_train.iloc[:,:-1]
        y_train = df_train.iloc[:,-1]
        
        df_test = pd.read_csv(test_data_path)
        X_test = df_test.iloc[:,:-1]
        y_test = df_test.iloc[:,-1]
        
        
        scaler = StandardScaler()

        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        report = {}
        for i in self.models:
            logger.info(f'Training on Model: {i}')
            self.models[i].fit(X_train_scaled,y_train)
            logger.info(f'Finished Training Model: {i}')
            preds = self.models[i].predict(X_test_scaled)
            accuracy = accuracy_score(y_test,preds)
            report[i] = accuracy
            logger.info(f'Successfully Finised Evaluating Model: {i}')
            logger.info(f'Model: {i}, Accuracy: {accuracy}')
            logger.info('--------------------------------------------')

        best_model_score = max(sorted(report.values()))
        best_model_name = list(self.models.keys())[list(report.values()).index(best_model_score)]
        best_model = self.models[best_model_name]
        logger.info(f'Finetuning Best Model')
        fine_tuned_model = self.finetune_best_model(best_model,best_model_name,X_train=X_train,y_train=y_train)
        joblib.dump(fine_tuned_model)
        print(f'Best Model: {best_model_name}, Accuracy: {best_model_score}')

In [18]:
try:
    config = Configuration_Manager()
    model_trainer_config = config.get_model_trainer_config()
    model_trainer = Model_Trainer(config=model_trainer_config)
    model_trainer.train_and_evaluate_models()
except Exception as e:
    raise e

2024-02-25 14:03:09,253 - src_logger - INFO - Yaml file:config\config.yaml loaded successfully
2024-02-25 14:03:09,257 - src_logger - INFO - Yaml file:schema.yaml loaded successfully
2024-02-25 14:03:09,266 - src_logger - INFO - Yaml file:params.yaml loaded successfully
2024-02-25 14:03:09,268 - src_logger - INFO - Created directory at: artifacts
2024-02-25 14:03:09,286 - src_logger - INFO - Training on Model: LogisticRegression
2024-02-25 14:03:09,291 - src_logger - INFO - Finished Training Model: LogisticRegression
2024-02-25 14:03:09,293 - src_logger - INFO - Successfully Finised Evaluating Model: LogisticRegression
2024-02-25 14:03:09,294 - src_logger - INFO - Model: LogisticRegression, Accuracy: 0.881578947368421
2024-02-25 14:03:09,295 - src_logger - INFO - --------------------------------------------
2024-02-25 14:03:09,296 - src_logger - INFO - Training on Model: KNeighborsClassifier
2024-02-25 14:03:09,298 - src_logger - INFO - Finished Training Model: KNeighborsClassifier
202

BoxKeyError: "'ConfigBox' object has no attribute 'model_name'"