# MLflow data tracking

In [1]:
import mlflow
import os
from pathlib import Path
from datetime import datetime

BASE_DIR = Path("/Users/ajayyy/Desktop/Deep_Learning/Smart-Quality-Inspection-System")

# Path to preprocessed dataset
DATA_PATH = BASE_DIR / "data" / "processed"

# Check if the folder exists
if not DATA_PATH.exists():
    raise FileNotFoundError(f"Processed data folder not found at: {DATA_PATH}")

# Configure MLflow tracking
mlflow.set_tracking_uri("file:./mlruns")
mlflow.set_experiment("data_versioning")

# Version tagging based on timestamp (optional)
dataset_version = f"v1.0_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

# Start MLflow run
with mlflow.start_run(run_name="dataset_logging") as run:
    mlflow.log_param("dataset_version", dataset_version)
    mlflow.log_artifact(str(DATA_PATH))  # Log the entire processed folder

    print(f"Dataset logged successfully!")
    print(f"Run ID: {run.info.run_id}")
    print(f"Artifacts saved at: ./mlruns/{run.info.run_id}/artifacts/")


Dataset logged successfully!
Run ID: 94cb37f3e8954bdf92a61537635dc96c
Artifacts saved at: ./mlruns/94cb37f3e8954bdf92a61537635dc96c/artifacts/
