In [36]:
import wandb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import concurrent.futures
from tqdm.notebook import tqdm


# Authenticate with W&B
wandb.login(timeout=128)

# Configure project and other parameters if necessary
project_name = "contrastive_exploration"
entity = "pletctj6"

# Retrieve the runs from the project
api = wandb.Api()
runs = api.runs(f"{entity}/{project_name}")

# Initialize a list to store coverage and shannon entropy data
experiments_data = {}

### Check runs

In [35]:
run_0 = runs[0]
# for run in runs:
#     if run.state == "finished":
#         if run_0 is None:
#             run_0 = run
#             break
print('status:', run_0.state)
system_metrics = run_0.history(stream='systemMetrics')
for k in system_metrics.keys():
    if 'system.cpu.' in k and 'cpu_percent' in k:
        print(k)
    #     # check all columns of panda series
    #     print(system_metrics[k].mean())

status: killed


In [37]:
def process_run(run):
    # Vérification de l'état du run
    if run.state != "finished":
        # print(f"Skipping run {run.name} because it is not finished.")
        return None
    # Récupération de la configuration du run
    config = run.config
    exp_name = config.get('exp_name', 'unknown_exp')
    env_name = config.get('env_id', 'unknown_env')
    seed = config.get('seed', 'unknown_seed')
    system_metrics = run.history(stream='systemMetrics')
    cpu_usage = {}
    # per cpu usage 
    for k in system_metrics.keys():
        if 'system.cpu.' in k and 'cpu_percent' in k:
            cpu_usage[k] = system_metrics[k].sum()

    # Retour des données structurées
    return {
        'exp_name': exp_name,
        'env_name': env_name,
        'seed': seed,
        'data': {
            "cpu_usage": cpu_usage,
            'config': config
        }
    }

experiments_data = {}
max_workers = 8
# Utilisation de ThreadPoolExecutor pour paralléliser les exécutions de runs
# Spécifiez le nombre de threads avec max_workers, par exemple 4 threads
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = {executor.submit(process_run, run): run for run in runs}
    for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Processing runs"):
        result = future.result()
        if result is not None:
            exp_name = result['exp_name']
            env_name = result['env_name']
            seed = result['seed']
            data = result['data']

            if exp_name not in experiments_data:
                experiments_data[exp_name] = {}
            if env_name not in experiments_data[exp_name]:
                experiments_data[exp_name][env_name] = {}
            if seed not in experiments_data[exp_name][env_name]:
                experiments_data[exp_name][env_name][seed] = data

Processing runs:   0%|          | 0/1527 [00:00<?, ?it/s]

## Check experiments data 

In [38]:
print(experiments_data.keys())

dict_keys(['smm_ppo', 'metra_ppo', 'lsd_ppo', 'ngu_ppo', 'icm_ppo', 'diayn_ppo', 'csd_ppo', 'v2_ppo_lipshitz_adaptive_sampling', 'rnd_ppo', 'ppo', 'v2_ppo_kl_adaptive_sampling', 'v1_ppo_lipshitz_adaptive_sampling', 'aux_ppo', 'apt_ppo', 'v1_ppo_kl_adaptive_sampling'])


In [3]:
def dict_to_dataframe(experiments_data):
    records = []
    for exp_name, envs in experiments_data.items():
        for env_name, seeds in envs.items():
            for seed, metrics in seeds.items():
                cpu_usage = metrics['cpu_usage']
                memory_usage = metrics['memory_usage']
                global_step = metrics['global_step']
                if is_not_empty(cpu_usage) and is_not_empty(global_step):
                    for cpu, mem, step in zip(cpu_usage, memory_usage, global_step):
                        records.append({
                            'exp_name': exp_name,
                            'env_name': env_name,
                            'seed': seed,
                            'cpu_usage': cpu,
                            'memory_usage': mem,
                            'global_step': step
                        })
    return pd.DataFrame(records)

# Fonction pour vérifier si une liste ou une série est vide
def is_not_empty(obj):
    if isinstance(obj, pd.Series):
        return not obj.empty
    return bool(obj)

# Créer le DataFrame
df = dict_to_dataframe(experiments_data)

# Visualiser l'utilisation des ressources
sns.set(style="whitegrid")
g = sns.FacetGrid(df, col="env_name", hue="exp_name", sharey=False, height=5, aspect=1.5)
g.map(plt.plot, "global_step", "cpu_usage")
g.add_legend()
plt.show()

g = sns.FacetGrid(df, col="env_name", hue="exp_name", sharey=False, height=5, aspect=1.5)
g.map(plt.plot, "global_step", "memory_usage")
g.add_legend()
plt.show()

KeyError: 'exp_name'