In [1]:
import os
os.chdir('../')
%pwd

'c:\\Users\\EI13136\\Documents\\mlops'

In [2]:
import pandas as pd
from sklearn.metrics import precision_score, recall_score, accuracy_score, roc_auc_score

In [12]:
test_df = pd.read_csv("artifacts/data_transformation/test.csv")
valid_df = pd.read_csv("artifacts/data_transformation/valid.csv")

In [13]:
x_test = test_df.drop(columns='Satisfaction')
y_test = test_df['Satisfaction']

x_valid = valid_df.drop(columns='Satisfaction')
y_valid = valid_df['Satisfaction']

In [6]:
def model_evaluation(y_true, y_pred):
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    acc_score = accuracy_score(y_true, y_pred)
    roc_auc = roc_auc_score(y_true, y_pred)
    #conf_matrix = confusion_matrix(y_true, y_pred)
    #f1 = f1_score(y_true, y_pred)
    print("  {:.2%}\t  {:.2%}\t  {:.2%}\t\t{:.2%}".format(precision, recall, roc_auc, acc_score))
    # print("Confusion Matrix:")
    # print(conf_matrix)
    return acc_score

In [12]:
import joblib
from airline_passenger_satisfaction.logger import logger

model_directory = 'artifacts/model_trainer'
model_list = []
accuracy_score_list = []

for model_file in os.listdir(model_directory):
    if model_file.endswith('.joblib'):
        model_path = os.path.join(model_directory, model_file)
        model = joblib.load(model_path)

        y_test_pred = model.predict(x_test)
        y_valid_pred = model.predict(x_valid)

        print(f"{'>'*5} {model_file.replace('.joblib','')} {'<'*5}".center(64, ' '), '\n')
        print("*** Test Dataset ***".center(64, ' '))
        print(" Precision ", " Recall ", " ROC-AUC Score ", "  Accuracy Score ", sep='\t')
        print('-' * 64)
        acc_score = model_evaluation(y_test, y_test_pred)

        print("*** Validation Dataset ***".center(64, ' '))
        print(" Precision ", " Recall ", " ROC-AUC Score ", "  Accuracy Score ", sep='\t')
        print('-' * 64)
        model_evaluation(y_test, y_test_pred)

        model_list.append(model_file.replace('.joblib',''))
        accuracy_score_list.append(acc_score)


best_model_index = accuracy_score_list.index(max(accuracy_score_list))
best_model_name = model_list[best_model_index].replace('.joblib','')
best_model_score = accuracy_score_list[best_model_index]

print("Model :", best_model_name, "Accuracy Score :", "{0:.2%}".format(best_model_score))

if best_model_score > 0.6:
    joblib.dump(best_model_name, 'best_model.joblib')
    print(f"Best model : {best_model_name} saved as 'best_model'.joblib")
else:
    print("No model with accuracy score > 60% found. Skipping saving.")


                 >>>>> Adaboost Classifier <<<<<                  

                      *** Test Dataset ***                      
 Precision 	 Recall 	 ROC-AUC Score 	  Accuracy Score 
----------------------------------------------------------------
  91.97%	  90.98%	  92.46%		92.66%
                   *** Validation Dataset ***                   
 Precision 	 Recall 	 ROC-AUC Score 	  Accuracy Score 
----------------------------------------------------------------
  91.97%	  90.98%	  92.46%		92.66%

                      >>>>> best_model <<<<<                      

                      *** Test Dataset ***                      
 Precision 	 Recall 	 ROC-AUC Score 	  Accuracy Score 
----------------------------------------------------------------
  97.53%	  94.17%	  96.18%		96.45%
                   *** Validation Dataset ***                   
 Precision 	 Recall 	 ROC-AUC Score 	  Accuracy Score 
----------------------------------------------------------------
  97.53%	  94.17%

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    test_data_path: Path
    valid_data_path: Path
    models_path: Path
    best_model_path: Path
    metrics_file_name: Path
    target_column: str

In [4]:
from airline_passenger_satisfaction.constants import *
from airline_passenger_satisfaction.utils.common import read_yaml, create_directories, save_json

In [5]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH,
                 schema_filepath = SCHEMA_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_evaluation(self)-> ModelEvaluationConfig:
        config = self.config.model_evaluation
        params = self.params
        schema = self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir= config.root_dir,
            test_data_path= config.test_data_path,
            valid_data_path= config.valid_data_path,
            models_path= config.models_path,
            best_model_path= config.best_model_path,
            target_column= schema.name,
            metrics_file_name= config.metric_file_name,
        ) 

        return model_evaluation_config

In [18]:
from urllib.parse import urlparse
from pathlib import Path
import joblib
from airline_passenger_satisfaction.logger import logger
from airline_passenger_satisfaction.utils.common import model_evaluation

class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig) -> None:
        self.config = config
    
    def identify_best_model(self):
        test_df = pd.read_csv(self.config.test_data_path)
        valid_df = pd.read_csv(self.config.valid_data_path)

        x_test = test_df.drop(columns= self.config.target_column)
        y_test = test_df[self.config.target_column]

        x_valid = valid_df.drop(columns=self.config.target_column)
        y_valid = valid_df[self.config.target_column]


        model_list = []
        accuracy_score_list = []

        for model_file in os.listdir(self.config.models_path):
            if model_file.endswith('.joblib'):
                model_path = Path(os.path.join(self.config.models_path, model_file))
                model = joblib.load(model_path)

                y_test_pred = model.predict(x_test)
                y_valid_pred = model.predict(x_valid)

                logger.info(f"{'>'*5} {model_file.replace('.joblib','')} {'<'*5}".center(64))
                logger.info("*** Test Dataset ***".center(64))
                logger.info(" Precision \t Recall \tROC-AUC Score \t  Accuracy Score ")
                logger.info('-' * 64)
                test_precision, test_recall, test_roc_auc, test_acc_score = model_evaluation(y_test, y_test_pred)
                logger.info(f" {test_precision:.2%}\t  {test_recall:.2%}\t  {test_roc_auc:.2%}\t\t {test_acc_score:.2%}")

                logger.info("*** Validation Dataset ***".center(64))
                logger.info(" Precision \t Recall \tROC-AUC Score \t  Accuracy Score ")
                logger.info('-' * 64)
                precision, recall, roc_auc, acc_score = model_evaluation(y_valid, y_valid_pred)
                logger.info(f" {precision:.2%}\t  {recall:.2%}\t  {roc_auc:.2%}\t\t {acc_score:.2%}"+'\n')
        
                model_list.append(model_file)
                avg_acc_score = (acc_score+test_acc_score)/2
                accuracy_score_list.append(avg_acc_score)

        best_model_index = accuracy_score_list.index(max(accuracy_score_list))
        best_model_name = model_list[best_model_index]
        best_model_score = accuracy_score_list[best_model_index]


        if best_model_score > 0.6:
            logger.info(f"Model: {best_model_name} with a best Accuracy Score : {best_model_score:.2%}")
            save_json(path=Path(self.config.metrics_file_name), data={"accuracy_score" :best_model_score})

            with open(f'{self.config.models_path}/best_model.txt', 'w') as file:
                file.write(f"Model : {best_model_name.replace('.joblib','')} with Accuracy Score: {best_model_score:.2%}") 
                logger.info(f"Best model: {best_model_name} saved as Best_model.joblib file format")
                model = joblib.load(f'{self.config.models_path}/{best_model_name}')
                joblib.dump(model, f'{self.config.models_path}/Best_model.joblib')
    

        else:
            logger.info("No model with accuracy score > 60% found. Skipping saving.")
            

In [19]:
import sys
from airline_passenger_satisfaction.exception import CustomException

try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation()
    model_evaluation_config = ModelEvaluation(config=model_evaluation_config)
    model_evaluation_config.identify_best_model()
except Exception as e:
    raise CustomException(e,sys)

[2024-02-15 14:41:53,756] [INFO] [Airline Passenger Reviews Logger] [common] : yaml file config\config.yaml loaded successfully
[2024-02-15 14:41:53,758] [INFO] [Airline Passenger Reviews Logger] [common] : yaml file params.yaml loaded successfully
[2024-02-15 14:41:53,762] [INFO] [Airline Passenger Reviews Logger] [common] : yaml file schema.yaml loaded successfully
[2024-02-15 14:41:53,763] [INFO] [Airline Passenger Reviews Logger] [common] : Created directory at : artifacts
[2024-02-15 14:41:53,764] [INFO] [Airline Passenger Reviews Logger] [common] : Created directory at : artifacts/model_evaluation
[2024-02-15 14:41:54,270] [INFO] [Airline Passenger Reviews Logger] [370800297] :                 >>>>> Adaboost Classifier <<<<<                 
[2024-02-15 14:41:54,271] [INFO] [Airline Passenger Reviews Logger] [370800297] :                       *** Test Dataset ***                      
[2024-02-15 14:41:54,271] [INFO] [Airline Passenger Reviews Logger] [370800297] :  Precision 	 