In [1]:
import os
os.chdir('../')

In [2]:
import pickle

In [3]:
model = pickle.load(open('artifacts/training/model.pkl', 'rb'))

In [4]:
model

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class EvaluationConfig:
    model_path: Path
    training_data: Path
    all_params: dict
    mlflow_uri: str
    random_state: int
    feature_columns: list
    target_column: str

In [6]:
from src.MatchAnalysis.constants import *
from src.MatchAnalysis.utils.common import read_yaml, create_directories


In [8]:
secrets = read_yaml(SECRETS_FILE_PATH)

[2024-01-31 20:36:37,823: INFO: common] yaml file: secrets.yaml loaded successfully


In [9]:
os.environ["MLFLOW_TRACKING_URI"] = secrets.MLFLOW_TRACKING_URI
os.environ["MLFLOW_TRACKING_USERNAME"] = secrets.MLFLOW_TRACKING_USERNAME
os.environ["MLFLOW_TRACKING_PASSWORD"] = secrets.MLFLOW_TRACKING_PASSWORD

In [8]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_evaluation_config(self) -> EvaluationConfig:
        evaluation_config = EvaluationConfig(
            model_path = self.config.training.trained_model_path,
            training_data = self.config.prepare_data.prepared_data_path,
            all_params = self.params,
            mlflow_uri = "https://dagshub.com/GuidoMainardi/match-analysis.mlflow",
            random_state = self.params.VAL_SPLIT_RANDOM_STATE,
            feature_columns = self.config.prepare_data.feature_columns,
            target_column = self.config.prepare_data.target_column
        )
        return evaluation_config
    

In [9]:
import pathlib as Path
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
import sklearn



In [10]:
from urllib.parse import urlparse
import pandas as pd
import json

from src.MatchAnalysis import logger
from src.MatchAnalysis.utils.common import save_json

class Evaluation:
    def __init__(self, config: EvaluationConfig):
        self.config = config

    def get_data(self):
        self.training_data = pd.read_csv(self.config.training_data)
        
    def train_valid_data_split(self):

        self.X = self.training_data[self.config.feature_columns]
        self.y = self.training_data[self.config.target_column]
        
        self.train_X, self.valid_X, self.train_y, self.valid_y = train_test_split(
            self.X, self.y, test_size=0.2, random_state=self.config.random_state
        )

    @staticmethod
    def load_model(path: Path) -> sklearn.linear_model.LogisticRegression:
        return pickle.load(open(path, 'rb'))
    
    def evaluate(self):
        self.model = self.load_model(self.config.model_path)
        self.train_valid_data_split()
        self.score = model.score(self.valid_X, self.valid_y)

        self.save_score()


    def save_score(self):
        scores = {
            "accuracy": self.score
        }

        save_json(path='scores.json', data=scores)


    def log_into_mlflow(self):
        mlflow.set_registry_uri(self.config.mlflow_uri)
        tracking_url_type_store = urlparse(mlflow.get_registry_uri()).scheme

        with mlflow.start_run():
            mlflow.log_params(self.config.all_params)

            mlflow.log_metrics({
                "accuracy": self.score
            })

            if tracking_url_type_store != "file":
                mlflow.sklearn.log_model(self.model, "model", registered_model_name="LogisticRegression")
            else:
                mlflow.sklearn.log_model(self.model, "model")




In [11]:
try:
    config = ConfigurationManager()
    eval_config = config.get_evaluation_config()
    evaluation = Evaluation(eval_config)
    evaluation.get_data()
    evaluation.evaluate()
    evaluation.log_into_mlflow()

except Exception as e:
    raise e

[2024-01-31 19:40:48,949: INFO: common] yaml file: config/config.yaml loaded successfully
[2024-01-31 19:40:48,950: INFO: common] yaml file: params.yaml loaded successfully
[2024-01-31 19:40:48,951: INFO: common] Creating directory: artifacts
[2024-01-31 19:40:49,014: INFO: common] json file saved at: scores.json


Registered model 'LogisticRegression' already exists. Creating a new version of this model...
2024/01/31 19:40:55 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: LogisticRegression, version 3
Created version '3' of model 'LogisticRegression'.
