In [3]:
import os

In [4]:
%pwd

'd:\\Projects\\mushroom_classification\\mushroom_classification\\notebooks'

In [5]:
os.chdir('../')

In [6]:
%pwd

'd:\\Projects\\mushroom_classification\\mushroom_classification'

In [7]:
from sklearn.metrics import accuracy_score

In [8]:
def evaluate_model(X_train, y_train, X_test, y_test, models):
    try:
        report = {}
        for model_name, model in models.items():
            
            # Train model on the full training set (no need to fit it twice)
            model.fit(X_train, y_train)

            # Predict Testing data
            y_test_pred = model.predict(X_test)

            # Predict Training data
            y_train_pred = model.predict(X_train)

            # Get R2 scores for train and test data
            train_model_score = accuracy_score(y_train, y_train_pred)
            test_model_score = accuracy_score(y_test, y_test_pred)

            report[model_name] = {'train_score': train_model_score, 'test_score': test_model_score}

        return report

    except Exception as e:
        logging.info('Exception occurred during model training')

        raise customexception(e, sys)

In [9]:
import os
import sys
import pickle
import numpy as np
import pandas as pd
from src.mushroom_classification.logger import logging
from src.mushroom_classification.exception import customexception
from dataclasses import dataclass
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

In [10]:
@dataclass 
class ModelTrainerConfig:
    trained_model_file_path = os.path.join('artifacts','model.pkl')

In [11]:
class ModelTrainer:
    def __init__(self):
        self.model_trainer_config = ModelTrainerConfig()
    
    def initate_model_training(self,train_array,test_array):
        try:
            logging.info('Splitting Dependent and Independent variables from train and test data')
            X_train, y_train, X_test, y_test = (
                train_array[:,:-1],
                train_array[:,-1],
                test_array[:,:-1],
                test_array[:,-1]
            )

            # Extracting the first 5 rows for logging
            logging.info(f'X_train (first 5 rows):\n{pd.DataFrame(train_array[:,:-1]).head()}')
            logging.info(f'y_train (first 5 rows):\n{pd.DataFrame(train_array[:,-1]).head()}')
            logging.info(f'X_test (first 5 rows):\n{pd.DataFrame(test_array[:,:-1]).head()}')
            logging.info(f'y_test (first 5 rows):\n{pd.DataFrame(test_array[:,-1]).head()}')
            

            models={
                    'LogisticRegression':LogisticRegression(),
                    'DecisionTreeRegressor':DecisionTreeClassifier(),
                    'RandomForestRegressor':RandomForestClassifier(),
                    'KNN': KNeighborsClassifier(),
                    'SVC': SVC(),
                    'GaussianNB': GaussianNB()

                 }
            
            logging.info('Evaluating models...')
            model_report:dict=evaluate_model(X_train,y_train,X_test,y_test,models)
            print(model_report)
            print('\n====================================================================================\n')
            logging.info(f'Model Report : {model_report}')

            # To get best model score from dictionary 
            best_model_score = max(model_report.values(), key=lambda x: x['test_score'])


            best_model_name = list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]
            
            best_model = models[best_model_name]

            print(f'Best Model Found , Model Name : {best_model_name} , Accuracy Score : {best_model_score}')
            print('\n====================================================================================\n')
            logging.info(f'Best Model Found , Model Name : {best_model_name} , Accuracy Score : {best_model_score}')

            save_object(
                 file_path=self.model_trainer_config.trained_model_file_path,
                 obj=best_model
            )
          

        except Exception as e:
            logging.info('Exception occured at Model Training')
            raise customexception(e,sys)
            

In [12]:
def save_object(file_path, obj):
    try:
        dir_path = os.path.dirname(file_path)

        os.makedirs(dir_path, exist_ok=True)

        with open(file_path, "wb") as file_obj:
            pickle.dump(obj, file_obj)

    except Exception as e:
        raise customexception(e, sys)

In [13]:
from src.mushroom_classification.pipelines.training_pipeline import train_data_path,test_data_path,train_arr,test_arr

[2024-01-26 14:36:02,482: INFO: training_pipeline: Training Pipeline has started]
[2024-01-26 14:36:02,487: INFO: data_ingestion: Data ingestion has started]
[2024-01-26 14:36:03,363: INFO: data_ingestion: files saved to artifacts]
[2024-01-26 14:36:03,368: INFO: data_transformation: Reading train and test data from CSV files]
[2024-01-26 14:36:03,616: INFO: data_transformation: Read train and test data complete]
[2024-01-26 14:36:03,693: INFO: data_transformation: Train Dataframe Head:
  class cap-shape cap-surface cap-color bruises odor gill-attachment gill-spacing gill-size gill-color stalk-shape stalk-root stalk-surface-above-ring stalk-surface-below-ring stalk-color-above-ring stalk-color-below-ring veil-type veil-color ring-number ring-type spore-print-color population habitat
0     p         k           s         e       f    s               f            c         n          b           t          ?                        s                        k                      p        

In [14]:
model_trainer_obj=ModelTrainer()
model_trainer_obj.initate_model_training(train_arr,test_arr)

[2024-01-26 14:36:25,028: INFO: 485326021: Splitting Dependent and Independent variables from train and test data]
[2024-01-26 14:36:25,074: INFO: 485326021: X_train (first 5 rows):
        0        1         2         3        4         5         6    \
0 -0.239658 -0.02149  -0.78349  2.906238 -0.06459 -0.913412 -0.629868   
1 -0.239658 -0.02149  -0.78349 -0.344087 -0.06459  1.094796 -0.629868   
2 -0.239658 -0.02149  1.276341 -0.344087 -0.06459 -0.913412 -0.629868   
3 -0.239658 -0.02149  1.276341 -0.344087 -0.06459 -0.913412  1.587635   
4  4.172621 -0.02149  -0.78349 -0.344087 -0.06459 -0.913412 -0.629868   

       7         8         9    ...       107       108       109       110  \
0 -0.02149  1.474225 -0.817125  ... -0.423917  1.009741 -0.517781  1.259425   
1 -0.02149  1.474225 -0.817125  ... -0.423917  1.009741 -0.517781 -0.794013   
2 -0.02149 -0.678322  1.223803  ... -0.423917  1.009741 -0.517781 -0.794013   
3 -0.02149 -0.678322 -0.817125  ... -0.423917  1.009741 -0.5177