In [1]:
import wandb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import concurrent.futures
from tqdm.notebook import tqdm


# Authenticate with W&B
wandb.login(timeout=128)

# Configure project and other parameters if necessary
project_name = "run_away_sac_explore"
entity = "pletctj6"

# Retrieve the runs from the project
api = wandb.Api()
runs = api.runs(f"{entity}/{project_name}")

# Initialize a list to store coverage and shannon entropy data
experiments_data = {}

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mpletctj6[0m. Use [1m`wandb login --relogin`[0m to force relogin


### Check runs

In [2]:
run_0 = runs[0]
# for run in runs:
#     if run.state == "finished":
#         if run_0 is None:
#             run_0 = run
#             break
print('status:', run_0.state)
system_metrics = run_0.history(stream='systemMetrics')
for k in system_metrics.keys():
    if 'system.cpu.' in k and 'cpu_percent' in k:
        print(k)
    #     # check all columns of panda series
    #     print(system_metrics[k].mean())

status: finished
system.cpu.60.cpu_percent
system.cpu.52.cpu_percent
system.cpu.42.cpu_percent
system.cpu.22.cpu_percent
system.cpu.9.cpu_percent
system.cpu.37.cpu_percent
system.cpu.1.cpu_percent
system.cpu.49.cpu_percent
system.cpu.67.cpu_percent
system.cpu.7.cpu_percent
system.cpu.47.cpu_percent
system.cpu.21.cpu_percent
system.cpu.53.cpu_percent
system.cpu.32.cpu_percent
system.cpu.23.cpu_percent
system.cpu.35.cpu_percent
system.cpu.16.cpu_percent
system.cpu.27.cpu_percent
system.cpu.51.cpu_percent
system.cpu.29.cpu_percent
system.cpu.33.cpu_percent
system.cpu.70.cpu_percent
system.cpu.34.cpu_percent
system.cpu.25.cpu_percent
system.cpu.26.cpu_percent
system.cpu.6.cpu_percent
system.cpu.61.cpu_percent
system.cpu.54.cpu_percent
system.cpu.62.cpu_percent
system.cpu.36.cpu_percent
system.cpu.46.cpu_percent
system.cpu.24.cpu_percent
system.cpu.48.cpu_percent
system.cpu.56.cpu_percent
system.cpu.11.cpu_percent
system.cpu.3.cpu_percent
system.cpu.15.cpu_percent
system.cpu.4.cpu_percent
s

In [3]:
def process_run(run):
    # Vérification de l'état du run
    if run.state != "finished":
        # print(f"Skipping run {run.name} because it is not finished.")
        return None
    # Récupération de la configuration du run
    config = run.config
    exp_name = config.get('exp_name', 'unknown_exp')
    env_name = config.get('env_id', 'unknown_env')
    seed = config.get('seed', 'unknown_seed')
    system_metrics = run.history(stream='systemMetrics')
    cpu_usage_per_cpu = []
    # per cpu usage 
    for k in system_metrics.keys():
        if 'system.cpu.' in k and 'cpu_percent' in k:
            cpu_usage_per_cpu.append(system_metrics[k].sum()/100.0) #percentage to ratio
    mean_cpu_usage = np.mean(cpu_usage_per_cpu)
    

    # Retour des données structurées
    return {
        'exp_name': exp_name,
        'env_name': env_name,
        'seed': seed,
        'data': {
            "cpu_usage": mean_cpu_usage,
            'config': config
        }
    }

experiments_data = {}
max_workers = 8
# Utilisation de ThreadPoolExecutor pour paralléliser les exécutions de runs
# Spécifiez le nombre de threads avec max_workers, par exemple 4 threads
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
    futures = {executor.submit(process_run, run): run for run in runs}
    for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures), desc="Processing runs"):
        result = future.result()
        if result is not None:
            exp_name = result['exp_name']
            env_name = result['env_name']
            seed = result['seed']
            data = result['data']

            if exp_name not in experiments_data:
                experiments_data[exp_name] = {}
            if env_name not in experiments_data[exp_name]:
                experiments_data[exp_name][env_name] = {}
            if seed not in experiments_data[exp_name][env_name]:
                experiments_data[exp_name][env_name][seed] = data

Processing runs:   0%|          | 0/1294 [00:00<?, ?it/s]

## Check experiments data 

In [5]:
print(experiments_data.keys())
# print(experiments_data['metra_ppo']['HalfCheetah-v3'][1]['cpu_usage'])

dict_keys(['sac_vanilla', 'rnd_sac', 'ngu_sac', 'icm_sac', 'v2wsac', 'diayn_sac', 'smm_sac', 'lsd_sac', 'aux_sac', 'metra_sac', 'v2klsac', 'apt_sac', 'v1klsac', 'csd_sac', 'v1wsac'])


## Process

In [6]:
def process_data(experiments_data):
    mean_cpu_usage_per_exp = {}
    std_cpu_usage_per_exp = {}
    for exp_name in experiments_data:
        cpu_usage_per_env = []
        for env_name in experiments_data[exp_name]:
            cpu_usage_per_seed = []
            for seed in experiments_data[exp_name][env_name]:
                cpu_usage_seed= experiments_data[exp_name][env_name][seed]['cpu_usage']
                cpu_usage_per_seed.append(cpu_usage_seed)
            cpu_usage_per_env.append(np.mean(cpu_usage_per_seed))
        mean_cpu_usage_per_exp[exp_name] = np.mean(cpu_usage_per_env)
        std_cpu_usage_per_exp[exp_name] = np.std(cpu_usage_per_env)
    # min max normalization
    max_cpu = max(mean_cpu_usage_per_exp.values())
    min_cpu = min(mean_cpu_usage_per_exp.values())
    for exp_name in mean_cpu_usage_per_exp.keys():
        mean_cpu_usage_per_exp[exp_name] = (mean_cpu_usage_per_exp[exp_name] - min_cpu)/(max_cpu - min_cpu)*100.0
    return mean_cpu_usage_per_exp, std_cpu_usage_per_exp
                



# Créer le DataFrame
mean_cpu_usage_per_exp, std_cpu_usage_per_exp = process_data(experiments_data)




In [None]:
# remove ppo 
mean_cpu_usage_per_exp.pop('ppo')
std_cpu_usage_per_exp.pop('ppo')

## Plot 

In [None]:
# histogram : axis : algo, value : cpu usage +/- std
df = pd.DataFrame.from_dict(mean_cpu_usage_per_exp, orient='index', columns=['mean_cpu_usage'])
df['std_cpu_usage'] = df.index.map(std_cpu_usage_per_exp)
df = df.sort_values(by='mean_cpu_usage', ascending=False)
print('df:', df)
unique_exp_names = df['mean_cpu_usage'].index
palette = plt.get_cmap("tab20").colors
color_map = {exp_name: palette[i % len(palette)] for i, exp_name in enumerate(unique_exp_names)}
# Plotting
plt.figure(figsize=(14, 8))
barplot = sns.barplot(x='mean_cpu_usage', y=df.index, data=df, palette=color_map)
for i, exp_name in enumerate(unique_exp_names):
    barplot.get_children()[i].set_color(color_map[exp_name])
# Add error bars with caps
plt.errorbar(
    x=df['mean_cpu_usage'],
    y=np.arange(len(df)),
    xerr=df['std_cpu_usage'],
    fmt='o',
    color='black',
    capsize=5  # Add caps to the error bars
)
plt.errorbar(x=df['mean_cpu_usage'], y=df.index, xerr=df['std_cpu_usage'], fmt='o', color='black')
plt.xlabel('Mean CPU Usage (%)')
plt.ylabel('Experiment')
plt.title('Mean CPU Usage per Experiment')
plt.show()
