In this notebook see how to get a summary of the experiments, aswell as how to retrieve, load, and use the registered models from DAGsHub

## Libraries

In [1]:
import mlflow
import dagshub
import pandas as pd
from mlflow.tracking import MlflowClient

## DAGsHub Conection

In [2]:
# Initialize DagsHub Conection
dagshub.init(repo_owner='felytz', repo_name='waveled', mlflow=True)
mlflow.set_tracking_uri('https://dagshub.com/felytz/waveled.mlflow')

## Experiments

In [3]:
client = MlflowClient()

# Get all experiments
experiments = mlflow.search_experiments()

model_summaries = []

for exp in experiments:
    runs = mlflow.search_runs(exp.experiment_id)
    
    for _, run in runs.iterrows():
        run_id = run.run_id
        artifacts = client.list_artifacts(run_id)
        
        # Check if this run has a model
        if any("model" in artifact.path for artifact in artifacts):
            model_info = {
                "run_id": run_id,
                "experiment_id": exp.experiment_id,
                "experiment_name": exp.name,
                "run_name": run.get('tags.mlflow.runName', ''),
                "metrics": {k: v for k, v in run.items() if k.startswith('metrics.')},
                "params": {k: v for k, v in run.items() if k.startswith('params.')},
                "artifacts": [artifact.path for artifact in artifacts]
            }
            model_summaries.append(model_info)

# Convert to DataFrame for easy viewing
summary_df = pd.DataFrame(model_summaries)
summary_df

Unnamed: 0,run_id,experiment_id,experiment_name,run_name,metrics,params,artifacts
0,33f4b4c845cc4591a9b194d9cea9e00d,0,waveled-models,RandomForest_20250514_004206,"{'metrics.f1_score': 0.8598249953613295, 'metr...","{'params.n_estimators': '100', 'params.max_dep...","[RandomForest, classification_report.json, sca..."
1,6851562d7ea04c16ab55c3ccc4af2436,0,waveled-models,XGBoost_20250514_001513,"{'metrics.f1_score': 0.875244277712905, 'metri...","{'params.n_estimators': '100', 'params.max_dep...","[XGBoost, classification_report.json, scaler_m..."
2,d2e3ecf646994825977e6271a44f2a81,0,waveled-models,RandomForest_20250514_001200,"{'metrics.f1_score': 0.8598249953613295, 'metr...","{'params.n_estimators': '100', 'params.max_dep...","[RandomForest, classification_report.json, sca..."
3,6551a1efc94948a090bb70c6ef95d29a,0,waveled-models,LogisticRegression_PCA,"{'metrics.f1_score': 0.8570018456360606, 'metr...","{'params.n_estimators': None, 'params.max_dept...","[LogisticRegression_PCA, classification_report..."
4,75a4c1c90102464e8f085bc40ebd1357,0,waveled-models,LogisticRegression_PCA,"{'metrics.f1_score': 0.8570018456360606, 'metr...","{'params.n_estimators': None, 'params.max_dept...","[LogisticRegression_PCA, classification_report..."
5,0fdbcba24aef4aa8999c8b0658def247,0,waveled-models,LogisticRegression_PCA,"{'metrics.f1_score': 0.8625711828013963, 'metr...","{'params.n_estimators': None, 'params.max_dept...","[LogisticRegression_PCA, classification_report..."
6,58ced13ac0804341ad7c863f19b63ca3,0,waveled-models,LogisticRegression_PCA,"{'metrics.f1_score': 0.8625711828013963, 'metr...","{'params.n_estimators': None, 'params.max_dept...","[LogisticRegression_PCA, class_distribution.js..."
7,8a475a1b868847a6a05429b9362fc9b4,0,waveled-models,LogisticRegression_PCA,"{'metrics.f1_score': 0.8625711828013963, 'metr...","{'params.n_estimators': None, 'params.max_dept...","[LogisticRegression_PCA, classification_report..."
8,95a82933d456494984644985dfa5d82b,0,waveled-models,XGBoost,"{'metrics.f1_score': 0.8737171503593492, 'metr...","{'params.n_estimators': '100', 'params.max_dep...","[XGBoost, classification_report.json, scaler_m..."
9,639a4d6c86144fbcab73884145ff9006,0,waveled-models,RandomForest,"{'metrics.f1_score': 0.8625508814572417, 'metr...","{'params.n_estimators': '50', 'params.max_dept...","[RandomForest, classification_report.json, sca..."


## Models

In [7]:
client = MlflowClient()
registered_models = client.search_registered_models()

for model in registered_models:
    print(f"Model Name: {model.name}")
    print(f"Latest Version: {model.latest_versions[0].version}")
    print(f"Description: {model.description}")
    print("-----")

Model Name: LinearSVC_PCA
Latest Version: 6
Description: 
-----
Model Name: LogisticRegression_PCA
Latest Version: 6
Description: 
-----
Model Name: RandomForest
Latest Version: 6
Description: 
-----
Model Name: XGBoost
Latest Version: 5
Description: 
-----


In [14]:
for model in registered_models:
    # We load the model and its latest version
    model_name = model.name
    version = model.latest_versions[0].version

    # For a registered model
    model = mlflow.pyfunc.load_model(f"models:/{model_name}/{version}")

    # Get the model info
    model_info = mlflow.models.get_model_info(f"models:/{model_name}/{version}")
    break

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [15]:
# 1. To see ALL available flavors
print("Available flavors:", model_info.flavors.keys())

# 2. To see the primary/pyfunc flavor details
print("\nPyfunc flavor details:", model_info.flavors.get("python_function", None))

# 3. To check for specific framework flavors
if "sklearn" in model_info.flavors:
    print("\nSklearn model details:", model_info.flavors["sklearn"])
if "tensorflow" in model_info.flavors:
    print("\nTensorFlow model details:", model_info.flavors["tensorflow"])
if "pytorch" in model_info.flavors:
    print("\nPyTorch model details:", model_info.flavors["pytorch"])

# 4. To get the model signature (if available)
if model_info.signature:
    print("\nInput schema:", model_info.signature.inputs)
    print("Output schema:", model_info.signature.outputs)
else:
    print("\nNo signature available - model was saved without schema")

Available flavors: dict_keys(['python_function', 'sklearn'])

Pyfunc flavor details: {'env': {'conda': 'conda.yaml', 'virtualenv': 'python_env.yaml'}, 'loader_module': 'mlflow.sklearn', 'model_path': 'model.pkl', 'predict_fn': 'predict', 'python_version': '3.11.0'}

Sklearn model details: {'code': None, 'pickled_model': 'model.pkl', 'serialization_format': 'cloudpickle', 'sklearn_version': '1.6.1'}

Input schema: [Tensor('float32', (-1, 965))]
Output schema: [Tensor('float32', (-1,))]


In [16]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
registered_models = client.search_registered_models()

for model in registered_models:
    latest_version = model.latest_versions[0].version
    client.set_registered_model_alias(
        name=model.name,
        alias="Champion",  # That is the current form of "Production"
        version=latest_version
    )