In [1]:
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
import pprint



In [2]:
def aggregate_results():
    client = MlflowClient()
    experiments = client.search_experiments()
    experiments = [exp for exp in experiments if exp.name.endswith("50-5-NEW")]
    results = {}
    for exp in experiments:
        runs = client.search_runs(exp.experiment_id)
        for run in runs:
            # pprint.pp(run.to_dictionary())
            # return
            if run.to_dictionary()['data']['tags']['mlflow.runName'].startswith('fold') and run.to_dictionary()["info"]["status"] == "FINISHED":
                # print(run.to_dictionary())
                dataset_name = run.to_dictionary()['data']['params']['dataset']
                alg_name =  run.to_dictionary()['data']['params']['model_type']
                metrics = run.to_dictionary()['data']['metrics']
                metrics.pop('total_time', None)


                # metrics['id'] = run.to_dictionary()["info"]["experiment_id"]
                if dataset_name not in results:
                    results[dataset_name] = {}
                if alg_name not in results[dataset_name]:
                    results[dataset_name][alg_name] = []
                results[dataset_name][alg_name].append(metrics)

    return results

In [3]:
results = aggregate_results()

In [4]:
results

{'INNHotelsGroup': {'amformer': [{'Avg_Recall': 0.8293794433075307,
    'val_loss': 0.29517762313850804,
    'LogLoss': 0.295177623138508,
    'Avg_Precision': 0.8672451111500388,
    'inference_time': 1.342641830444336,
    'AUC': 0.9384192696593822,
    'Accuracy': 0.8691657478868063,
    'train_time': 866.2993321418762,
    'F1': 0.9066351953842119},
   {'Avg_Recall': 0.8432272424404854,
    'val_loss': 0.26937581276876227,
    'LogLoss': 0.26937581276876227,
    'Avg_Precision': 0.8684894541264497,
    'inference_time': 1.3443169593811035,
    'AUC': 0.9460936294115403,
    'Accuracy': 0.8754134509371555,
    'train_time': 865.0146727561951,
    'F1': 0.909984067976633},
   {'Avg_Recall': 0.8386964310375986,
    'val_loss': 0.2801528592587462,
    'LogLoss': 0.2801528592587462,
    'Avg_Precision': 0.8603800042958794,
    'inference_time': 1.3465819358825684,
    'AUC': 0.9413536670847641,
    'Accuracy': 0.8699007717750827,
    'train_time': 864.7250964641571,
    'F1': 0.90570058

In [5]:
def nested_dict_to_df(results_dict):
    records = []
    for dataset_name, methods in results_dict.items():
        for method_name, folds in methods.items():
            for fold_idx, result_dict in enumerate(folds):
                record = {
                    "dataset": dataset_name,
                    "method": method_name,
                    "fold": fold_idx,
                    **result_dict
                }
                records.append(record)
    return pd.DataFrame(records)

In [6]:
out = nested_dict_to_df(results)

In [7]:
out.to_csv('out.csv')