### Model Evaluation and MLflow Integration

In [2]:
import os

In [3]:
%pwd

'd:\\Silent_Night\\mlops\\AML_Classification\\research'

In [4]:
os.chdir("../")

In [5]:
%pwd

'd:\\Silent_Night\\mlops\\AML_Classification'

In [6]:
import dagshub

In [7]:
dagshub.init(repo_owner='adirajjohn2000', repo_name='AML_Classification', mlflow=True)

In [1]:
import sklearn
import pickle

In [8]:
with open("artifacts/prepare_base_model/base_model.plk","rb") as file:
    model=pickle.load(file)

In [9]:
#Entity
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class EvaluationConfig:
    path_of_model: Path
    training_data: Path
    all_params: dict
    mlflow_url: str

In [23]:
#ConfigrationManger
from AML_Classifier.constants.__init__ import CONFIG_FILE_PATH,PARAMS_FILE_PATH
from AML_Classifier.utils.common import read_yaml, create_directories, save_json
from pathlib import Path

class ConfigurationManager:
    def __init__(self,config_filepath = CONFIG_FILE_PATH,params_filepath = PARAMS_FILE_PATH):
        self.config= read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])


    def get_evaluation_config(self) -> EvaluationConfig:
        config = self.config.eval_model


        eval_config = EvaluationConfig(
            path_of_model=config.path_of_model,
            training_data=config.training_data,
            mlflow_url=config.mlflow_url,
            all_params=self.params

        )
        return eval_config

ImportError: cannot import name 'save_json' from 'AML_Classifier.utils.common' (d:\silent_night\mlops\aml_classification\src\AML_Classifier\utils\common.py)

In [35]:
#Compontents
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score,log_loss
import os
import pickle
import sklearn
import dagshub
import mlflow.sklearn
import numpy as np
import pandas as pd
from urllib.parse import urlparse

In [36]:
class Evaluation:
    def __init__(self,config: EvaluationConfig):
        self.config = config

    def eval(self):
        data=pd.read_csv(Path(self.config.training_data))

        #Create Dependent and Independent Variable
        y=data[["is_laundering"]]
        x=data.drop(["is_laundering"],axis=1)
        y=np.array(y)
        y=y.ravel()

        # Split data into training and test sets
        X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)


        #Get the trained model
        with open(self.config.path_of_model,"rb") as file:
            model=pickle.load(file)

        #Generate y_pred
        y_pred=model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)

        #Score
        save_score={
            "Accuracy": accuracy_score(y_test,y_pred),
            "F1_Score": f1_score(y_test,y_pred, average="weighted"),
            "loss": log_loss(y_test,y_pred_proba)
        }

        #Save the score JSON file
        #save_json(data=save_score,path=Path("scores.json"))


        #MLflow Integration
        dagshub.init(repo_owner='adirajjohn2000', repo_name='AML_Classification', mlflow=True)
        mlflow.set_registry_uri(self.config.mlflow_url)
        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme

        with mlflow.start_run():
            mlflow.log_params(self.config.all_params)
            mlflow.log_metrics(save_score)
            if tracking_url_type_store != "file":
                mlflow.sklearn.log_model(model,"model",registered_model_name="RandomForest")
            else:
                mlflow.sklearn.log_model(model,"model")


In [39]:
#pipeline
try:
    config=ConfigurationManager()
    eval_config=config.get_evaluation_config()
    evaluation = Evaluation(eval_config)
    evaluation.eval()

except Exception as e:
    raise e

[2024-06-29 15:48:36,230: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-06-29 15:48:36,236: INFO: common: yaml file: params.yaml loaded successfully]
[2024-06-29 15:48:36,239: INFO: common: created directory at: artifacts]


[2024-06-29 15:48:38,487: INFO: helpers: Initialized MLflow to track repo "adirajjohn2000/AML_Classification"]


[2024-06-29 15:48:38,509: INFO: helpers: Repository adirajjohn2000/AML_Classification initialized!]


Registered model 'RandomForest' already exists. Creating a new version of this model...
2024/06/29 15:49:25 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: RandomForest, version 2
Created version '2' of model 'RandomForest'.
