## MLFlow Data Tracking

In [None]:
import mlflow
import os
from pathlib import Path
from datetime import datetime
import shutil

# === PATHS ===
BASE_DIR = Path("/Users/ajayyy/Desktop/Deep_Learning/Smart-Quality-Inspection-System")
DATA_PATH = BASE_DIR / "data" / "processed"
MLFLOW_DIR = BASE_DIR / "deployment/mlflow_tracking/mlruns"

if not DATA_PATH.exists():
    raise FileNotFoundError(f"Processed data folder not found at: {DATA_PATH}")

# === CLEAN CORRUPTED EXPERIMENTS ===
def clean_corrupted_experiments(mlruns_path):
    for exp_id in os.listdir(mlruns_path):
        exp_path = mlruns_path / exp_id
        meta_file = exp_path / "meta.yaml"
        if not meta_file.exists() or os.path.getsize(meta_file) == 0:
            print(f"âš  Removing corrupted experiment: {exp_id}")
            shutil.rmtree(exp_path)

clean_corrupted_experiments(MLFLOW_DIR)

# === MLflow CONFIG ===
EXPERIMENT_NAME = "data_versioning"
mlflow.set_tracking_uri(f"file:{MLFLOW_DIR}")
mlflow.set_experiment(EXPERIMENT_NAME)  # Creates experiment if missing

# Version tagging
dataset_version = f"v1.0_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

# === ZIP DATASET ===
zip_path = BASE_DIR / f"processed_dataset_{dataset_version}"
shutil.make_archive(str(zip_path), 'zip', DATA_PATH)

# === LOG TO MLflow ===
with mlflow.start_run(run_name="dataset_logging") as run:
    mlflow.log_param("dataset_version", dataset_version)
    mlflow.log_artifact(f"{zip_path}.zip")

    print(" Dataset zipped and logged successfully!")
    print(f"Run ID: {run.info.run_id}")
    print(f"Artifacts path: {MLFLOW_DIR}/{run.info.run_id}/artifacts/")