In [15]:
import os
os.chdir("x:\DL\Projects\MLOPs")
%pwd

'x:\\DL\\Projects\\MLOPs'

In [16]:
import os

os.environ["MLFLOW_TRACKING_URI"] = "http://127.0.0.1:5000"

In [17]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data: Path
    unzip_dir: Path

@dataclass(frozen=True)
class DataValidationConfig:
    current_dset: Path
    root_dir: Path
    status_file_dir: Path
    req_files: list
    
@dataclass(frozen=True)
class TrainLogConfig:
    model: str
    save_path: Path
    mlflow_uri: str
    experiment_name: str
    model_name: str

@dataclass(frozen=True)
class Params:
    optimizer: str
    lr0: float
    save_period: int
    batch: int
    epochs: int
    resume: bool
    seed: int
    imgsz: int 

In [18]:
from scripts.MLOPs.constants import *
from scripts.MLOPs.utils.common import read_yaml, create_directories
#from scripts.MLOPs.entity.config_entity import *

class ConfigurationManager:
    def __init__(self,config_filepath = CONFIG_FILE_PATH, params_filepath = PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_dataingestion_config(self)-> DataIngestionConfig:
        config = self.config.data_ingestion
        create_directories([config.root_dir])
        data_ingestion_config = DataIngestionConfig(
            root_dir = config.root_dir,
            source_URL= config.source_URL,
            local_data= config.local_data,
            unzip_dir= config.unzip_dir
            )
        return data_ingestion_config
    
    def get_datavalidation_config(self)->DataValidationConfig:
        config = self.config.data_validation
        create_directories([config.data_val_dir])
        data_validation_config = DataValidationConfig(
            current_dset= config.current_dset,
            root_dir=config.data_val_dir,
            status_file_dir= config.data_val_status,
            req_files= config.data_val_req
            )
        return data_validation_config
    
    def get_train_log_config(self)-> TrainLogConfig:
        config = self.config.train_log_config
        trainlogconfig = TrainLogConfig(
            model= config.model,
            save_path= config.save_path,
            mlflow_uri= config.mlflow_uri,
            experiment_name= config.experiment_name,
            model_name= config.model_name
        )
        return trainlogconfig
    
    def get_params(self)-> Params:
        param = self.config.param
        params = Params(
            optimizer = param.optimizer,
            lr0 = param.lr0,
            save_period = param.save_period,
            batch = param.batch,
            epochs = param.epochs,
            resume = param.resume,
            seed = param.seed,
            imgsz = param.imgsz
        )
        return params
    
    

In [19]:
import logging
import numpy as np
import pandas as pd
import mlflow
from ultralytics import YOLO
from scripts.MLOPs.utils.common import get_highest_train_folder


class YoloWrapper(mlflow.pyfunc.PythonModel):
    def __init__(self):
        self.results = None
        self.results_df = None
        self.data = None

    def load_context(self, context: object):
        """Load Yolo model from context path

        Args:
            context (object): An MLFlow object that is used to define the path to the model.
        """
        runs_f = get_highest_train_folder("runs/detect")
        #logging.info(f"artifacts[path]: runs/detect/{runs_f}/weights/best.pt")
        self.model = YOLO(f"runs/detect/{runs_f}/weights/best.pt")

    def reformat_data(self):
        """Reformat given dictionary object that was coerced into numpy arrays into str,int,flow"""
        # For each key-value pair, convert value to string.
        for key, value in self.data.items():
            if isinstance(value, np.ndarray):
                self.data[key] = ",".join(map(str, [value]))

        # For each key-value pair, convert value to appropriate type.
        for key, value in self.data.items():
            if value.isnumeric():  # Check if the value is an integer
                self.data[key] = int(value)
            elif value.replace(".", "", 1).isdigit():  # Check if the value is a float
                self.data[key] = float(value)
            elif value.lower() in ["true", "false"]:  # Check if the value is a boolean
                self.data[key] = value.lower() == "true"

    def yolo_results_to_df(self):
        """Create Yolo results as a df"""
        # Retrieve bounding boxes
        boxes = self.results[0].boxes
        # Map class to string names
        names = []
        for object_class in boxes.cls.numpy():
            names.append(self.model.names[object_class])
        # Create return df
        self.results_df = pd.DataFrame(
            np.c_[boxes.xyxy.numpy(), boxes.conf, boxes.cls.numpy(), np.array(names)],
            columns=["X1", "Y1", "X2", "Y2", "conf", "cls", "names"],
        )

    def predict(self, context: object, data: dict):
        """Wrapper function around Yolo's predict function. Results are returned as a pandas dataframe.

        Args:
            context (object): An MLFlow object that is used to define the path to the model.
            data (dict): dictionary with source for inference and override parameters for the model.

        Returns:
            _type_: _description_
        """
        self.data = data
        logging.info(f"Data input: f{self.data}")
        # Reformat data
        self.reformat_data()
        logging.info(f"Data after reformat: f{self.data}")

        # Pass inputs to predict
        self.results = self.model.predict(**self.data)
        # Transform results to pandas df
        self.yolo_results_to_df()

        return self.results_df

In [22]:
"Registers model in MLFlow"
import os
from pathlib import Path
import csv
import logging
import yaml
import cloudpickle
import pandas as pd
import mlflow
# from scripts.mlflow_utils import model_wrapper
# from scripts.mlflow_utils.model_wrapper import YoloWrapper


def get_experiment_id(name: str):
    """Retrieve experiment if registered name, else create experiment.

    Args:
        name (str): Mlflow experiment name

    Returns:
        str: Mlfow experiment id
    """
    exp = mlflow.get_experiment_by_name(name)
    if exp is None:
        exp_id = mlflow.create_experiment(name)
        return exp_id
    return exp.experiment_id


def read_lines(path: str):
    """Given a path to a file, this function reads file, seperates the lines and returns a list of those separated lines.

    Args:
        path (str): Path to file

    Returns:
        list: List made of of file lines
    """
    with open(path) as f:
        return f.read().splitlines()


def log_metrics(save_dir: str, log_results: bool = True):
    """Log metrics to Mlflow from the Yolo model outputs.

    Args:
        save_dir (str): Path to Yolo save directory, i.e - runs/train
        log_results (bool): If True, the results are logged to MLflow server
    """
    save_dir = Path(save_dir)
    try:
        with open(save_dir / "results.csv", "r") as csv_file:
            metrics_reader = csv.DictReader(csv_file)
            metrics_list = []
            for metrics in metrics_reader:
                # Create an empty dictionary to store the updated key-value pairs for this row
                updated_metrics = {}
                # Iterate through the key-value pairs in this row's dictionary
                for key, value in metrics.items():
                    # Remove whitespace from the key
                    key = key.strip()
                    value = value.strip()
                    # Remove extra strings in keys
                    patterns = ["(B)", "metrics/"]
                    for pattern in patterns:
                        key = key.replace(pattern, "")
                    # Add the updated key-value pair to the updated row dictionary
                    try:
                        # Add the updated key-value pair to the updated row dictionary
                        updated_metrics[key] = float(value)
                    except ValueError:
                        logging.error(f"ValueError: Could not convert {value} to float.")
                    metrics_list.append(updated_metrics)
                    if log_results:
                        mlflow.log_metrics(updated_metrics)
        return metrics_list
    except FileNotFoundError:
        print(f"FileNotFoundError: Could not find {save_dir / 'results.csv'}.")
    except IOError:
        print(f"IOError: Could not read {save_dir / 'results.csv'}.")


def get_path_w_extension(
    path: str, extension: str, limit: int, ignore_files: list = []
):
    """Finds files that match extensions and returns a list of those files that match up to thhe limit while ignoring files in the ignore_files list.

    Args:
        path (str): Directory to search for files in.
        extension (str): Type of extension to look for.
        ignore_files (list, optional): Specify list of files that will be ignored. Defaults to [].

    Returns:
        list: List of paths to files with extensions.
    """
    logging.debug(f"Path: {path}")
    logging.debug(f"Extension: {extension}")
    if isinstance(path, str):
        abs_path = os.path.abspath(path)
    elif isinstance(path, Path):
        abs_path = path.absolute()
    else:
        raise ValueError(f"Error: Path {path} is not valid.")

    if not os.path.exists(abs_path):
        raise ValueError(f"Error: Path {abs_path} does not exist.")

    if os.path.isdir(abs_path):
        pt_files = []
        for root, dirs, files in os.walk(abs_path):
            for file in files:
                if (
                    file.endswith(extension)
                    and os.path.basename(file) not in ignore_files
                ):
                    pt_files.append(os.path.join(root, file))
        if len(pt_files) <= limit and len(pt_files) > 0:
            return pt_files
        if len(pt_files) > limit:
            raise ValueError(
                f"Error: Given limit: {limit} while number of files found with {extension} extension in directory {abs_path} is {len(pt_files)}"
            )
        else:
            raise FileNotFoundError(
                f"Error: No {extension} files found in directory {abs_path}."
            )
    elif os.path.isfile(abs_path) and abs_path.endswith(extension):
        return [abs_path]
    else:
        raise ValueError(
            f"Error: Path {abs_path} is not a valid directory or {extension} file."
        )


def register_model(experiment_name: str, model_name: str, save_dir: Path):
    """Registers a model with mlflow

    Args:
        experiment_name (str): Name of Mlfow experiment
        model_name (str): Name that will be registered with Mlflow
        save_dir (Path): Path object where the results of the Yolo model are saved. I.e 'runs' directory
    """
    save_dir = Path(save_dir)
    logging.debug(f"Save Directory: {save_dir}")

    '''model_path = get_path_w_extension(
        path=save_dir, extension=".pt", limit=1, ignore_files=["last.pt"]
    )[0]'''
    model_path = f"{save_dir}/weights/best.pt"
    artifacts = {"path": model_path}

    model = YoloWrapper()

    exp_id = get_experiment_id(experiment_name)

    #cloudpickle.register_pickle_by_value(model_wrapper)

    with mlflow.start_run(experiment_id=exp_id) as run:
        # Log some params
        with open(save_dir / "args.yaml", "r") as param_file:
            params = yaml.safe_load(param_file)
        mlflow.log_params(params)

        log_metrics(save_dir, True)
        mlflow.log_artifact(f"{save_dir}/weights/best.pt")
        pip_reqs = read_lines("requirements.txt")
        mlflow.pyfunc.log_model(
            "model",
            python_model=model,
            pip_requirements=pip_reqs,
            artifacts=artifacts,
            registered_model_name=model_name,
        )
        run_id = run.info.run_uuid
        experiment_id = run.info.experiment_id
        mlflow.end_run()
        logging.info(f"artifact_uri = {mlflow.get_artifact_uri()}")
        logging.info(f"runID: {run_id}")
        logging.info(f"experiment_id: {experiment_id}")




In [23]:
try:
    x = ConfigurationManager()
    trainlog = x.get_train_log_config()
    runs_f = get_highest_train_folder("runs/detect")
    dirr = Path(f"runs/detect/{runs_f}")


    register_model(experiment_name=trainlog.experiment_name,model_name=trainlog.model_name,save_dir=dirr)
except Exception as e:
    raise e

[2024-08-05 11:43:12,931: INFO: common: yaml file: config\config.yaml loaded sucessfully]
[2024-08-05 11:43:12,931: INFO: common: yaml file: params.yaml loaded sucessfully]
[2024-08-05 11:43:12,936: INFO: common: created directory at artifacts]


2024/08/05 11:43:13 INFO mlflow.types.utils: Unsupported type hint: <class 'dict'>, skipping schema inference
Downloading artifacts: 100%|██████████| 1/1 [00:00<00:00, 157.72it/s]
Registered model 'yolov8ndet' already exists. Creating a new version of this model...
2024/08/05 11:43:14 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: yolov8ndet, version 10
Created version '10' of model 'yolov8ndet'.
2024/08/05 11:43:14 INFO mlflow.tracking._tracking_service.client: 🏃 View run placid-mouse-301 at: http://127.0.0.1:5000/#/experiments/275689030631216605/runs/6c14248dc6b24815a9929b85262980e4.
2024/08/05 11:43:14 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/275689030631216605.


[2024-08-05 11:43:14,521: INFO: 741786398: artifact_uri = mlflow-artifacts:/0/25b7111d874d4038a98a45bf411f2336/artifacts]
[2024-08-05 11:43:14,521: INFO: 741786398: runID: 6c14248dc6b24815a9929b85262980e4]
[2024-08-05 11:43:14,521: INFO: 741786398: experiment_id: 275689030631216605]


2024/08/05 11:43:14 INFO mlflow.tracking._tracking_service.client: 🏃 View run bouncy-bear-760 at: http://127.0.0.1:5000/#/experiments/0/runs/25b7111d874d4038a98a45bf411f2336.
2024/08/05 11:43:14 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0.
