In [16]:
import comet_ml

import os
os.environ["COMET_API_KEY"] = "I5AiXfuD0TVuSz5UOtujrUM9i"

PROJECT_NAME="robust-training"
WORKSPACE_NAME="maxheuillet"

# Get all Experiemnts in workspace
api = comet_ml.API()

# all_experiments_in_workspace = api.get_experiments(WORKSPACE_NAME)
all_experiments_in_project = api.get_experiments(WORKSPACE_NAME, PROJECT_NAME)


In [17]:
# Convert the experiment summary to an APIExperiment using the experiment key
from tqdm.notebook import tqdm

data = {}
for i in tqdm( range(len(all_experiments_in_project)) ):
    summary = all_experiments_in_project[i].get_parameters_summary()

    d = {}
    for e in summary:
        if e['name'] in ['run_id', 'arch','aug','sched', 'batch_size','init_lr','sched','pruning_strategy','pruning_ratio','loss_function','iterations']:
            if e['name'] == 'run_id':
                id = e['valueMax']
            else:
                d[e['name']] = e['valueMax']
        
    if id not in data.keys():
        try:
            adv_acc  = all_experiments_in_project[i].get_metrics('final_robust_accuracy')[0]
            d[ 'adv_acc' ] = adv_acc['metricValue']
            nat_acc = all_experiments_in_project[i].get_metrics('final_clean_accuracy')[0]
            d[ 'nat_acc' ] = nat_acc['metricValue']
        except:
            d[ 'adv_acc' ] = None
            d[ 'nat_acc' ] = None

        data[id] = d

data

  0%|          | 0/1763 [00:00<?, ?it/s]

{'51344299': {'arch': 'resnet50',
  'batch_size': '1042',
  'init_lr': '0.2',
  'iterations': '60',
  'loss_function': 'TRADES_v2',
  'pruning_ratio': '0.7',
  'pruning_strategy': 'decay_based_v2',
  'sched': 'sched',
  'adv_acc': '0.2617',
  'nat_acc': '0.7769'},
 '51344302': {'arch': 'resnet50',
  'batch_size': '1042',
  'init_lr': '0.2',
  'iterations': '60',
  'loss_function': 'TRADES_v2',
  'pruning_ratio': '0.7',
  'pruning_strategy': 'uncertainty',
  'sched': 'sched',
  'adv_acc': '0.2311',
  'nat_acc': '0.7605'},
 '51344301': {'arch': 'resnet50',
  'batch_size': '1042',
  'init_lr': '0.2',
  'iterations': '60',
  'loss_function': 'TRADES_v2',
  'pruning_ratio': '0.7',
  'pruning_strategy': 'random',
  'sched': 'sched',
  'adv_acc': '0.2757',
  'nat_acc': '0.7996'},
 '51344298': {'arch': 'resnet50',
  'batch_size': '1042',
  'init_lr': '0.2',
  'iterations': '60',
  'loss_function': 'TRADES_v2',
  'pruning_ratio': '0.7',
  'pruning_strategy': 'decay_based_v3',
  'sched': 'sched'

In [20]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# Assuming 'data' is defined
df = pd.DataFrame(data).T
df['adv_acc'] = pd.to_numeric(df['adv_acc'], errors='coerce')
df['nat_acc'] = pd.to_numeric(df['nat_acc'], errors='coerce')
df['aug'] = df['aug'].fillna('aug')

pruning_ratio = '0.3'
batch_size = '128'
aug = 'aug'
sched = 'sched'
init_lr = '0.01'
name = pruning_ratio+'_'+batch_size+'_'+aug+'_'+sched+'_'+init_lr

# Apply filtering
df = df[(df['pruning_ratio'] == pruning_ratio) & 
        (df['batch_size'] == batch_size) &
        (df['aug'] == aug) &
        (df['sched'] == sched) &
        (df['init_lr'] == init_lr)   ]

# Sort the dataframe by natural and adversarial accuracy
sorted_df = df.sort_values(by=['nat_acc', 'adv_acc'], ascending=[False, False])

# Identify the Pareto frontier
pareto_front = []
current_best = -np.inf

for index, row in sorted_df.iterrows():
    if row['adv_acc'] > current_best:
        pareto_front.append((row['nat_acc'], row['adv_acc'], row['pruning_strategy']))
        current_best = row['adv_acc']

pareto_front = np.array(pareto_front)

# Color map for different pruning strategies
color_map = {
    'random': 'blue',
    'TS_pruning': 'green', 
    'score_v1': 'orange',
    'score_v2': 'purple',
    'decay_based_v2': 'brown',
    'decay_based_v3': 'magenta',
    'uncertainty': 'red'
}

# Step 3: Plot using plotly
fig = go.Figure()

# Add all methods as separate scatter points, one for each pruning_strategy
for strategy, color in color_map.items():
    df_subset = df[df['pruning_strategy'] == strategy]
    fig.add_trace(go.Scatter(
        x=df_subset['nat_acc'],
        y=df_subset['adv_acc'],
        mode='markers',
        marker=dict(color=color, size=10),
        name=strategy,
        text=df_subset['pruning_strategy'],  # Hover text for each point
        showlegend=True
    ))

# Add Pareto frontier as a red line but retain original marker colors
fig.add_trace(go.Scatter(
    x=pareto_front[:, 0].astype(float),
    y=pareto_front[:, 1].astype(float),
    mode='lines+markers',
    line=dict(color='black', width=2),
    marker=dict(
        color=[color_map[strategy] for strategy in pareto_front[:, 2]],
        size=10
    ),
    name='Pareto Frontier',
    text=pareto_front[:, 2],  # Hover text for Pareto points
    showlegend=False
))

# Update layout with legend inside the figure and square aspect ratio
fig.update_layout(
    title='Frontier Analysis: ' + name,
    xaxis_title='Clean Accuracy',
    yaxis_title='Robust Accuracy',
    xaxis=dict(range=[0.7, 0.9]),
    yaxis=dict(range=[0.25, 0.5]),
    margin=dict(l=0, r=0, t=35, b=0),  # Margins set to 0
    width=400,  # Set figure width to 3 inches (300px at 96dpi)
    height=400,  # Set figure height to 3 inches (300px at 96dpi)
    showlegend=True,
    legend=dict(
        x=0.02,  # Position the legend inside the figure
        y=0.98,
        traceorder="normal",
        bgcolor="rgba(255, 255, 255, 1)",  # Semi-transparent background
        bordercolor="Black",
        borderwidth=1
    )
)

# Display the plot
fig.show()

# Save the figure
fig.write_image("./figures/pareto_front_{}.png".format(name))


In [6]:
df

Unnamed: 0,arch,batch_size,init_lr,iterations,loss_function,pruning_ratio,pruning_strategy,sched,adv_acc,nat_acc,aug
51357313,resnet50,1042,0.01,60,TRADES_v2,0.3,decay_based_v3,nosched,,,noaug
51357316,resnet50,1042,0.01,60,TRADES_v2,0.3,decay_based_v2,nosched,,,noaug
51357331,resnet50,1042,0.01,60,TRADES_v2,0.3,score_v2,nosched,,,noaug
51357322,resnet50,1042,0.01,60,TRADES_v2,0.3,random,nosched,,,noaug
51357328,resnet50,1042,0.01,60,TRADES_v2,0.3,score_v1,nosched,,,noaug
51357325,resnet50,1042,0.01,60,TRADES_v2,0.3,uncertainty,nosched,,,noaug
51357319,resnet50,1042,0.01,60,TRADES_v2,0.3,TS_pruning,nosched,,,noaug
51357553,resnet50,1042,0.01,60,TRADES_v2,0.3,decay_based_v3,nosched,,,noaug
51357560,resnet50,1042,0.01,60,TRADES_v2,0.3,TS_pruning,nosched,0.4239,0.8349,noaug
51357557,resnet50,1042,0.01,60,TRADES_v2,0.3,decay_based_v2,nosched,0.4072,0.8389,noaug


In [4]:
import plotly.graph_objects as go

metric = 'final_PGD_accuracy_mean' #'final_clean_accuracy_mean'
# Convert the learning rate column to string to match keys in the lr_styles dictionary
data['learning_rate'] = data['learning_rate'].astype(str)

# Update the strategy_colors dictionary to include all unique strategies from the data
strategy_colors = {'random': 'red', 'uncertainty': 'blue'}

# Define line styles for learning rates
lr_styles = {'0.01': 'dot', '0.001': 'solid',  '0.0001': 'dashdot', '1e-05': 'longdash'}

# Reinitialize the figure with increased marker size and improved legend
fig = go.Figure()

# Add traces for each combination of sched and active_strategy
for sched in data['scheduler'].unique():
    for active_strategy in data['active_strategy'].unique():
        subset = data[(data['scheduler'] == sched) & (data['active_strategy'] == active_strategy)]
        for lr in subset['learning_rate'].unique():
            subsubset = subset[subset['learning_rate'] == lr]
            fig.add_trace(go.Scatter(
                x=subsubset['size'],
                y=subsubset[metric], 
                mode='lines+markers',
                name=f'{active_strategy}, {sched}, {lr}',
                line=dict(color=strategy_colors[active_strategy], dash=lr_styles[lr]),
                marker=dict(symbol='circle' if sched == 'nosched' else 'square', size=10)
            ))

# Update layout for better legend visibility
fig.update_layout(
    margin=dict(l=0, r=0, t=50, b=0),
    title='{} by Size and Strategy'.format(metric),
    xaxis_title='Size',
    yaxis_title=metric,
    legend_title='Strategy Details',
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="right",
        x=1.3,
        title_font=dict(size=15),
        font=dict(size=15)
    )
)

# fig.write_image("./figures/results_{}_{}_{}_{}_{}.pdf".format(metric, data_name, model, n_rounds, nb_epochs) )

In [20]:
import plotly.graph_objects as go
import pandas as pd

# Load and prepare the data
new_data = pd.read_csv('results/means.csv', skiprows=2)
new_data.columns = ['active_strategy', 'size', 'final_clean_accuracy',
                    'final_clean_accuracy_std', 'final_PGD_accuracy', 'final_PGD_accuracy_std']
new_data['size'] = new_data['size'].astype(int)

for performance in ['PGD_accuracy', 'clean_accuracy' ]:
# performance = 'final_PGD_accuracy' 
# # performance = 'final_clean_accuracy'
    fig = go.Figure()

    # fig.add_trace(go.Scatter(x=[0, 50000], y=[ init_means_stds['init_'+performance]['mean'][0], init_means_stds['init_'+performance]['mean'][0] ],
    #         mode='lines', line=dict(color='rgba({},{},{},1)'.format(0,0,204), width=2, dash='dot'),
    #                     name='Init Accuracy', showlegend=True))


    performance = 'final_'+performance
    performance_std = performance + '_std'  # Corresponding standard deviation column

    strategies = {
        # 'attack_uncertainty': {'color': [204, 0, 204], 'label': 'attack-uncertainty'},  # Red
        # 'full': {'color': [0, 0, 0], 'label': 'full'},  # Red
        'uncertainty': {'color': [255, 161, 90], 'label':'uncertainty'},                    # Yellow
        'random': {'color': [25, 211, 243], 'label':'random'},                    # Cyan
        # 'entropy': {'color': [255, 102, 146], 'label':'entropy'},                  # Magenta
        # 'attack': {'color': [182, 232, 128], 'label':'attack'},                   # Orange
        # 'margin': {'color': [254, 203, 82], 'label':'margin'},
    }


    z_value = 2.576
    n = 5

    # Add a line for each strategy
    for strategy, v in strategies.items():
        r,g,b = v['color']
        c = 'rgba({},{},{},1)'.format(r,g,b)

        # Add a horizontal line for the 'full' strategy
        if strategy == 'full':
            pass
        #     full_data = new_data[new_data['active_strategy'] == 'full']
        #     # Calculate the mean PGD accuracy for the 'full' strategy
        #     full_mean_accuracy = full_data[performance].mean()
        #     fig.add_trace(go.Scatter(x=[min(new_data['size']), max(new_data['size'])], y=[full_mean_accuracy, full_mean_accuracy],
        #                             mode='lines', line=dict(color=c, width=2, dash='dash'),
        #                             name='Full Dataset Average', showlegend=True))
            
        #     strategy_data = new_data[new_data['active_strategy'] == strategy]
        #     fig.add_trace(go.Scatter( x=strategy_data['size'],  y=strategy_data[performance],  mode='lines+markers',
        #                     name=strategy, line=dict(color=c  ) ,
        #                     error_y=dict(
        #                     type='data',  symmetric=False,  
        #                     array=z_value * strategy_data[performance_std] / n,  
        #                     arrayminus=z_value * strategy_data[performance_std] / n   ), showlegend=False
        #                         ))

            
        else:
            strategy_data = new_data[new_data['active_strategy'] == strategy]
            fig.add_trace(go.Scatter(x=strategy_data['size'], y=strategy_data[performance], line=dict(color=c),
                                    error_y=dict(
                                        type='data',  symmetric=False,  
                                        array=z_value * strategy_data[performance_std] / n,  
                                        arrayminus=z_value * strategy_data[performance_std] / n   ),
                                    mode='lines+markers', name=strategy))


    title_str = 'Robust accuracy' if performance=='final_PGD_accuracy' else 'Clean Accuracy'

    # Update layout and display the plot
    fig.update_layout(
        width=600,  # Adjusted for two subplots
        height=400,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(l=0, r=0, t=50, b=0),
        # title="Box Plot of Datasets by Cases and Columns",
        title=title_str,
        xaxis_title='Size (% of training observations)',
        yaxis_title=title_str,
        legend_title='Data Selection Strategy',
        # yaxis_range=[min(new_data[performance]) - 2, max(new_data[performance]) + 2],
        showlegend=True,
        xaxis=dict(
            # type='log',
            showgrid=True,        # Enable the grid
            gridcolor='lightgray' # Set grid line color
        ),
        yaxis=dict(
            showgrid=True,        # Enable the grid
            gridcolor='lightgray', # Set grid line color
            #range=[75,94] if 'clean' in performance else [45,65]
        )
        )

    # Display the figure
    fig.show()
    # fig.write_image("./figures/results_{}_{}_{}_{}_{}.pdf".format(performance, data, model, n_rounds, nb_epochs) )


ValueError: Length mismatch: Expected axis has 7 elements, new values have 6 elements