In [1]:
import os
os.chdir("../")

In [2]:
%pwd

'c:\\Users\\agost\\Data_Career\\End_to_end_projects\\Abalone_Age_Prediction'

In [3]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class ModelEvaluationConfig:
    root_dir: Path
    local_data_file: Path
    trained_model_path: Path
    best_model_path: Path
    param_target_col: str
    elastic_pickle: Path
    lasso_pickle: Path
    lr_pickle: Path
    rfr_pickle: Path
    ridge_pickle: Path
    svr_pickle: Path

In [4]:
from Abalone_Age_Prediction.utils.common import create_directories, read_yaml, load_object, save_object
from Abalone_Age_Prediction.constants import *


In [5]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath = CONFIG_FILE_PATH,
                 params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root]) #the artifacts_root is the key of the dictionary created
                                                # in the yaml file and we can read this key like that instead of
                                                # ["artifacts_root"] because we used the ConfigBox in the common.py file


    def get_model_evaluation_config(self) -> ModelEvaluationConfig:
        config = self.config.model_evaluation #model evaluation is the other key value of the dictionary in the config.yaml file

        create_directories([config.root_dir])

        model_evaluation_config = ModelEvaluationConfig(
            root_dir=config.root_dir,
            local_data_file = config.local_data_file,
            trained_model_path = config.trained_model_path,
            best_model_path= config.best_model_path,
            param_target_col=self.params.TARGET,
            elastic_pickle= config.elastic_pickle,
            lasso_pickle= config.lasso_pickle,
            lr_pickle= config.lr_pickle,
            rfr_pickle= config.rfr_pickle,
            ridge_pickle= config.ridge_pickle,
            svr_pickle= config.svr_pickle
        )                                     

        return model_evaluation_config

In [27]:
import pandas as pd
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
from Abalone_Age_Prediction import logger

In [34]:
class ModelEvaluation:
    def __init__(self, config: ModelEvaluationConfig):
        self.config = config

    
    
    def read_file(self):
        """
        Read the csv file
        """
        try:
            
            testing_data = pd.read_csv(self.config.local_data_file)

            return testing_data

        except Exception as e:
            e

    
    def evaluation_metrics(self,X_test,y_test, models: dict):
        """
        Evaluate all the models and returns 3 dictionaries with the different evaluation scores
        """
        r2_dic = {}
        mse_dic = {}
        mae_dic = {}

        for key in models.keys():
            r2_val =  r2_score(y_test,models[key].predict(X_test))
            mse_val = mean_squared_error(y_test,models[key].predict(X_test))
            mae_val = mean_absolute_error(y_test,models[key].predict(X_test))
            r2_dic[key] = r2_val
            mse_dic[key] = mse_val
            mae_dic[key] = mae_val
        return r2_dic, mse_dic, mae_dic


        


    def model_evaluation(self,testing_data):
        """
        Loading the data and the models and evaluate them
        """
        X_test = testing_data.drop(labels = self.config.param_target_col, axis = 1)
        y_test = testing_data[self.config.param_target_col]

        models = {
            "elastic" : load_object(Path(self.config.elastic_pickle)),
            "lasso" : load_object(Path(self.config.lasso_pickle)),
            "lr" : load_object(Path(self.config.lr_pickle)),
            "rfr" : load_object(Path(self.config.rfr_pickle)),
            "ridge" : load_object(Path(self.config.ridge_pickle)),
            "svr" : load_object(Path(self.config.svr_pickle))
        }

        r2_dic, mse_dic, mae_dic = self.evaluation_metrics(X_test,y_test,models)

        best_model = [i for i in r2_dic if r2_dic[i]==max(r2_dic.values())]
        best_model_score = max(r2_dic.values())
        
        logger.info(f"R2 Score dictionary : {r2_dic} \n")
        logger.info(f"MSE dictionary : {mse_dic} \n")
        logger.info(f"MAE Score dictionary : {mae_dic} \n")
        logger.info(f"The best model is {best_model[0]} with {round(best_model_score,3)} R2 score")

        save_object(Path(self.config.best_model_path), models[best_model[0]],"best_model.pkl")
        
        return None
        

            
            



In [35]:
try:
    config = ConfigurationManager()
    model_evaluation_config = config.get_model_evaluation_config()
    model_evaluation = ModelEvaluation(config=model_evaluation_config)
    testing_data = model_evaluation.read_file()
    model_evaluation.model_evaluation(testing_data)
except Exception as e:
    raise e

[2024-06-15 17:06:52,002: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-06-15 17:06:52,003: INFO: common: yaml file: params.yaml loaded successfully]
[2024-06-15 17:06:52,004: INFO: common: The directory artifacts already exists]
[2024-06-15 17:06:52,005: INFO: common: The directory artifacts/training already exists]
[2024-06-15 17:06:52,441: INFO: 1508143048: R2 Score dictionary : {'elastic': -0.0006478694463609713, 'lasso': -0.0006478694463609713, 'lr': 0.5387176640137978, 'rfr': 0.5589292807597441, 'ridge': 0.5273528291133511, 'svr': 0.4908606722019585} 
]
[2024-06-15 17:06:52,442: INFO: 1508143048: MSE dictionary : {'elastic': 9.90927254962901, 'lasso': 9.90927254962901, 'lr': 4.568012913619503, 'rfr': 4.367860167464115, 'ridge': 4.680557246095193, 'svr': 5.041933849993716} 
]
[2024-06-15 17:06:52,442: INFO: 1508143048: MAE Score dictionary : {'elastic': 2.3395858186458227, 'lasso': 2.3395858186458227, 'lr': 1.5813770314261746, 'rfr': 1.487188995215311, 'ri

In [26]:
dic = {'key1' : 1,
       'key2': 3,
       'key3': 2}
value = [i for i in dic if dic[i]==max(dic.values())]


#max(dic[dic.keys() == max(dic.values())])