In [1]:
import mlflow
import pandas as pd
from datetime import datetime

# 1. Configuration (inchangée)
db_path = "sqlite:///../results/mlflow.db" # Assurez-vous que le chemin est bon
mlflow.set_tracking_uri(db_path)

def get_runs_df(experiment_names, start_after=None, child_only=True):
    """
    Récupère les runs sous forme de DataFrame Pandas propre et typé.
    """
    # Construction de la query string native MLflow (SQL-like)
    filter_clauses = ["attributes.status = 'FINISHED'"]
    
    if start_after:
        # MLflow attend des millisecondes pour le temps
        ts = int(start_after.timestamp() * 1000)
        filter_clauses.append(f"attributes.start_time >= {ts}")
    
    if child_only:
        # Dans Hydra/MLflow, les runs enfants (trials) ont un parentRunId défini.
        # La syntaxe pour vérifier l'existence d'un tag dépend parfois du backend, 
        # mais pour SQLite/Hydra, vérifier qu'il n'est pas NULL fonctionne bien.
        filter_clauses.append("tags.mlflow.parentRunId != ''")

    filter_string = " AND ".join(filter_clauses)
    
    # --- LA MAGIE EST ICI ---
    df = mlflow.search_runs(
        experiment_names=experiment_names,
        filter_string=filter_string,
        output_format="pandas"  # <--- Ceci remplace votre fonction runs_to_frame
    )
    
    return df

# 2. Chargement des données
start_date = datetime(2025, 11, 23)

# Unsupervised
unsupervised_df = get_runs_df(
    ["tsseg-experiment-unsupervised-6-12"], 
    start_after=start_date
)
# Ajout manuel de la colonne pour distinguer après concaténation (si elle n'existe pas déjà via les params)
unsupervised_df["supervision_context"] = "unsupervised"

# Supervised
supervised_df = get_runs_df(
    ["tsseg-experiment-supervised-6-12"], 
    start_after=start_date
)
supervised_df["supervision_context"] = "semi_supervised"

# 3. Concaténation
runs_df = pd.concat([unsupervised_df, supervised_df], ignore_index=True)

# Nettoyage optionnel des préfixes (MLflow ajoute 'params.', 'metrics.', 'tags.')
# Si vous préférez vos noms courts :
# runs_df.columns = [c.replace("params.", "").replace("metrics.", "").replace("tags.", "") for c in runs_df.columns]

print(f"Total runs: {len(runs_df)}")
display(runs_df.head())

2025/12/11 01:31:08 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2025/12/11 01:31:08 INFO mlflow.store.db.utils: Updating database tables
2025-12-11 01:31:08 INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
2025-12-11 01:31:08 INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
2025-12-11 01:31:08 INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
2025-12-11 01:31:08 INFO  [alembic.runtime.migration] Will assume non-transactional DDL.


Total runs: 414


Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.f1_score_recall,metrics.f1_score_score,metrics.gaussian_f1_matched_weight,metrics.bidirectional_covering_prediction_covering,...,params.trial_supervision_override_max_cps,params.trial_supervision_change_points,params.trial_supervision_override_n_states,params.trial_supervision_override_n_changepoints,params.trial_supervision_override_n_segments,params.trial_supervision_override_n_cps,params.trial_supervision_override_n_max_states,params.trial_supervision_override_k_max,params.trial_supervision_override_alphabet_size,params.trial_supervision_override_n_change_points
0,0fa05df0d83f4d8081b3b213424027cc,6,FINISHED,/scratch/fchavell/tsseg-exp/mlartifacts/6/0fa0...,2025-12-08 08:41:19.696000+00:00,2025-12-08 09:21:37.586000+00:00,0.0,0.0,8.844237e-12,0.219179,...,,,,,,,,,,
1,b15d1c6b0b204092b0f1d2215441950b,6,FINISHED,/scratch/fchavell/tsseg-exp/mlartifacts/6/b15d...,2025-12-08 08:06:31.979000+00:00,2025-12-08 08:41:19.647000+00:00,0.142857,0.25,0.9813833,0.394783,...,,,,,,,,,,
2,bce2c51f3d974eb19c244d968c5b8c23,6,FINISHED,/scratch/fchavell/tsseg-exp/mlartifacts/6/bce2...,2025-12-08 06:59:38.953000+00:00,2025-12-08 08:06:31.933000+00:00,0.0,0.0,0.0002349243,0.395358,...,,,,,,,,,,
3,8b55307d62684319bee2cbd10696d717,6,FINISHED,/scratch/fchavell/tsseg-exp/mlartifacts/6/8b55...,2025-12-08 06:35:24.115000+00:00,2025-12-08 06:59:38.904000+00:00,0.0,0.0,1.9923010000000002e-39,0.252969,...,,,,,,,,,,
4,237abfcad23940c5931c44ffafb1af5d,6,FINISHED,/scratch/fchavell/tsseg-exp/mlartifacts/6/237a...,2025-12-08 04:53:39.313000+00:00,2025-12-08 06:35:24.053000+00:00,0.0,0.0,1.053995e-20,0.11918,...,,,,,,,,,,


In [4]:
runs_df.columns

Index(['run_id', 'experiment_id', 'status', 'artifact_uri', 'start_time',
       'end_time', 'metrics.f1_score_recall', 'metrics.f1_score_score',
       'metrics.gaussian_f1_matched_weight',
       'metrics.bidirectional_covering_prediction_covering',
       'metrics.execution_time_seconds', 'metrics.gaussian_f1_score',
       'metrics.bidirectional_covering_ground_truth_covering',
       'metrics.gaussian_f1_recall', 'metrics.bidirectional_covering_score',
       'metrics.f1_score_precision', 'metrics.covering_score',
       'metrics.gaussian_f1_precision', 'metrics.state_matching_score_score',
       'metrics.adjusted_mutual_info_score',
       'metrics.adjusted_rand_index_score',
       'metrics.weighted_adjusted_rand_index_score',
       'metrics.normalized_mutual_info_score', 'params.modality',
       'params.algorithm_semi_supervised', 'params.supervision_mode',
       'params.algorithm_name', 'tags.dataset_name', 'tags.algorithm_task',
       'tags.mlflow.user', 'tags.dataset_tr