In this notebook see how to get a summary of the experiments, aswell as how to retrieve, load, and use the registered models from DAGsHub

## Libraries

In [1]:
import mlflow
import dagshub
import pandas as pd
from mlflow.tracking import MlflowClient

## DAGsHub Conection

In [2]:
# Initialize DagsHub and MLflow
try:
    dagshub.init(repo_owner='Febas-MCD', repo_name='waveled', mlflow=True)
except Exception as e:
    print(f"⚠️ Failed to connect to remote tracking: {str(e)}")
    mlflow.set_tracking_uri("file://./mlruns")
    print("⚠️ Falling back to local MLflow tracking")

## Experiments

In [13]:
# Get all experiments
experiments = mlflow.search_experiments()

model_summaries = []

for exp in experiments:
    runs = mlflow.search_runs(exp.experiment_id)
    
    for _, run in runs.iterrows():
        run_id = run.run_id
        artifacts = client.list_artifacts(run_id)
        
        # Modified condition: Check if there are any artifacts at all
        # (since in your case all runs with artifacts appear to be models)
        if artifacts:
            model_info = {
                "run_id": run_id,
                "experiment_id": exp.experiment_id,
                "experiment_name": exp.name,
                "run_name": run.get('tags.mlflow.runName', ''),
                "metrics": {k: v for k, v in run.items() if k.startswith('metrics.')},
                "params": {k: v for k, v in run.items() if k.startswith('params.')},
                "artifacts": [artifact.path for artifact in artifacts],
                "model_type": artifacts[0].path  # Since each run has exactly one model artifact
            }
            model_summaries.append(model_info)

# Convert to DataFrame for easy viewing
summary_df = pd.DataFrame(model_summaries)
summary_df

Unnamed: 0,run_id,experiment_id,experiment_name,run_name,metrics,params,artifacts,model_type
0,b5606b62df26453cbe54635618cc9a42,0,Default,XGBoost_20250517_214912,"{'metrics.accuracy': 0.8756793478260869, 'metr...","{'params.max_depth': '6', 'params.n_estimators...",[XGBoost],XGBoost
1,ebad1b80510242e889ec5b36b3d053a1,0,Default,RandomForest_20250517_214701,"{'metrics.accuracy': 0.8620923913043478, 'metr...","{'params.max_depth': '10', 'params.n_estimator...",[RandomForest],RandomForest
2,38c2ad4632f14dcaa08cce4deab2b7d2,0,Default,LinearSVC_PCA_20250517_214640,"{'metrics.accuracy': 0.858016304347826, 'metri...","{'params.max_depth': None, 'params.n_estimator...",[LinearSVC_PCA],LinearSVC_PCA
3,2bd01ee707044c6fa3c424f263b3aea4,0,Default,LogisticRegression_PCA_20250517_214553,"{'metrics.accuracy': 0.8582427536231884, 'metr...","{'params.max_depth': None, 'params.n_estimator...",[LogisticRegression_PCA],LogisticRegression_PCA
4,03eff35c96664ccaa43b5a4b26aa2568,0,Default,XGBoost_20250517_201607,"{'metrics.accuracy': 0.8747735507246377, 'metr...","{'params.max_depth': '6', 'params.n_estimators...",[XGBoost],XGBoost
5,b94c6c7a55c742af85fff6f1c85ca3d7,0,Default,RandomForest_20250517_201323,"{'metrics.accuracy': 0.8643568840579711, 'metr...","{'params.max_depth': '10', 'params.n_estimator...",[RandomForest],RandomForest
6,87bbf827ef734d83b34f20d2a117e147,0,Default,LinearSVC_PCA_20250517_201302,"{'metrics.accuracy': 0.8605072463768116, 'metr...","{'params.max_depth': None, 'params.n_estimator...",[LinearSVC_PCA],LinearSVC_PCA
7,a6338385570b420fa687c5a80b65e98f,0,Default,LogisticRegression_PCA_20250517_201221,"{'metrics.accuracy': 0.859375, 'metrics.f1_sco...","{'params.max_depth': None, 'params.n_estimator...",[LogisticRegression_PCA],LogisticRegression_PCA


## Models

In [5]:
client = MlflowClient()
registered_models = client.search_registered_models()

for model in registered_models:
    print(f"Model Name: {model.name}")
    print(f"Latest Version: {model.latest_versions[0].version}")
    print(f"Description: {model.description}")
    print("-----")

Model Name: LinearSVC_PCA
Latest Version: 2
Description: 
-----
Model Name: LogisticRegression_PCA
Latest Version: 2
Description: 
-----
Model Name: RandomForest
Latest Version: 2
Description: 
-----
Model Name: XGBoost
Latest Version: 2
Description: 
-----


In [6]:
for model in registered_models:
    # We load the model and its latest version
    model_name = model.name
    version = model.latest_versions[0].version

    # For a registered model
    model = mlflow.pyfunc.load_model(f"models:/{model_name}/{version}")

    # Get the model info
    model_info = mlflow.models.get_model_info(f"models:/{model_name}/{version}")
    break

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
# 1. To see ALL available flavors
print("Available flavors:", model_info.flavors.keys())

# 2. To see the primary/pyfunc flavor details
print("\nPyfunc flavor details:", model_info.flavors.get("python_function", None))

# 3. To check for specific framework flavors
if "sklearn" in model_info.flavors:
    print("\nSklearn model details:", model_info.flavors["sklearn"])
if "tensorflow" in model_info.flavors:
    print("\nTensorFlow model details:", model_info.flavors["tensorflow"])
if "pytorch" in model_info.flavors:
    print("\nPyTorch model details:", model_info.flavors["pytorch"])

# 4. To get the model signature (if available)
if model_info.signature:
    print("\nInput schema:", model_info.signature.inputs)
    print("Output schema:", model_info.signature.outputs)
else:
    print("\nNo signature available - model was saved without schema")

Available flavors: dict_keys(['python_function', 'sklearn'])

Pyfunc flavor details: {'env': {'conda': 'conda.yaml', 'virtualenv': 'python_env.yaml'}, 'loader_module': 'mlflow.sklearn', 'model_path': 'model.pkl', 'predict_fn': 'predict', 'python_version': '3.11.0'}

Sklearn model details: {'code': None, 'pickled_model': 'model.pkl', 'serialization_format': 'cloudpickle', 'sklearn_version': '1.6.1'}

Input schema: [Tensor('float32', (-1, 965))]
Output schema: [Tensor('float32', (-1,))]


In [8]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
registered_models = client.search_registered_models()

for model in registered_models:
    latest_version = model.latest_versions[0].version
    client.set_registered_model_alias(
        name=model.name,
        alias="Champion",  # That is the current form of "Production"
        version=latest_version
    )