In [2]:
import gzip
import pickle as pkl


# Function to load all objects from a .pkl.gz file
def load_all(file_path):
    objects = []
    with gzip.open(file_path, 'rb') as f:
        while True:
            try:
                # Load the next pickle object
                objects.append(pkl.load(f))
            except EOFError:
                # End of file reached
                break
    return objects

data_name = 'CIFAR10' #'CIFAR10'
model = 'resnet50'
n_rounds = 1
nb_epochs = 100
seeds = 1
loss = 'TRADES'

results_list = []
for active_strategy in [  'random',]: #'uncertainty', 'attack_uncertainty', 'attack', 'margin', 'entropy',
    for size in [5.0, 10.0, 25.0]: 
        for lr in [0.01, 0.001, 0.0001, ]: #0.00001 
            for sched in ['sched', 'nosched']:
                for seed in range(seeds):
                    try:
                        file_name = './results/{}_{}_{}_{}_{}_{}_{}_{}_{}_{}.pkl.gz'.format(loss, lr, sched, data_name, model, active_strategy, n_rounds, size, nb_epochs, seed)
                        with gzip.open(file_name, 'rb') as f:
                            result = load_all(file_name)[-1]
                        result['active_strategy'] = active_strategy
                        result['lr'] = lr
                        print(result)
                        results_list.append(result)
                    except:
                        print('issue')
                        pass


{'loss': 'TRADES', 'learning_rate': 0.01, 'scheduler': 'sched', 'rounds': 1, 'size': 5.0, 'epochs': 100, 'seed': 0, 'active_strategy': 'random', 'dataset': 'CIFAR10', 'model': 'resnet50', 'init_clean_accuracy': 0.9365000128746033, 'card': 2500, 'final_clean_accuracy': 0.7699999809265137, 'final_PGD_accuracy': 0.6068999767303467, 'lr': 0.01}
{'loss': 'TRADES', 'learning_rate': 0.01, 'scheduler': 'nosched', 'rounds': 1, 'size': 5.0, 'epochs': 100, 'seed': 0, 'active_strategy': 'random', 'dataset': 'CIFAR10', 'model': 'resnet50', 'init_clean_accuracy': 0.9365000128746033, 'card': 2500, 'final_clean_accuracy': 0.767300009727478, 'final_PGD_accuracy': 0.6101999878883362, 'lr': 0.01}
{'loss': 'TRADES', 'learning_rate': 0.001, 'scheduler': 'sched', 'rounds': 1, 'size': 5.0, 'epochs': 100, 'seed': 0, 'active_strategy': 'random', 'dataset': 'CIFAR10', 'model': 'resnet50', 'init_clean_accuracy': 0.9365000128746033, 'card': 2500, 'final_clean_accuracy': 0.7337999939918518, 'final_PGD_accuracy': 0

In [3]:
import pandas as pd
# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(results_list)

data = df.groupby(['active_strategy', 'size', 'scheduler', 'learning_rate' ]).agg({
    'final_clean_accuracy': ['mean',  ], #std
    'final_PGD_accuracy': ['mean', ] #std
})

# Flattening the multi-level column index
data.columns = ['_'.join(col).strip() for col in data.columns.values]

# Resetting the index to move the index values into columns
data.reset_index(inplace=True)

data.to_csv('./results/means.csv', index=True)

In [7]:
data

Unnamed: 0,active_strategy,size,scheduler,learning_rate,final_clean_accuracy_mean,final_PGD_accuracy_mean
0,random,5.0,nosched,0.0001,0.8962,0.4498
1,random,5.0,nosched,0.001,0.7785,0.4322
2,random,5.0,nosched,0.01,0.5862,0.2546
3,random,5.0,sched,0.0001,0.8994,0.4399
4,random,5.0,sched,0.001,0.8641,0.4707
5,random,5.0,sched,0.01,0.5795,0.2641
6,random,10.0,nosched,0.0001,0.896,0.503
7,random,10.0,nosched,0.001,0.6948,0.3798
8,random,10.0,nosched,0.01,0.5555,0.2352
9,random,10.0,sched,0.0001,0.8993,0.4816


In [4]:
import plotly.graph_objects as go

metric = 'final_PGD_accuracy_mean' #'final_clean_accuracy_mean'
# Convert the learning rate column to string to match keys in the lr_styles dictionary
data['learning_rate'] = data['learning_rate'].astype(str)

# Update the strategy_colors dictionary to include all unique strategies from the data
strategy_colors = {'random': 'red', 'uncertainty': 'blue'}

# Define line styles for learning rates
lr_styles = {'0.01': 'dot', '0.001': 'solid',  '0.0001': 'dashdot', '1e-05': 'longdash'}

# Reinitialize the figure with increased marker size and improved legend
fig = go.Figure()

# Add traces for each combination of sched and active_strategy
for sched in data['scheduler'].unique():
    for active_strategy in data['active_strategy'].unique():
        subset = data[(data['scheduler'] == sched) & (data['active_strategy'] == active_strategy)]
        for lr in subset['learning_rate'].unique():
            subsubset = subset[subset['learning_rate'] == lr]
            fig.add_trace(go.Scatter(
                x=subsubset['size'],
                y=subsubset[metric], 
                mode='lines+markers',
                name=f'{active_strategy}, {sched}, {lr}',
                line=dict(color=strategy_colors[active_strategy], dash=lr_styles[lr]),
                marker=dict(symbol='circle' if sched == 'nosched' else 'square', size=10)
            ))

# Update layout for better legend visibility
fig.update_layout(
    margin=dict(l=0, r=0, t=50, b=0),
    title='{} by Size and Strategy'.format(metric),
    xaxis_title='Size',
    yaxis_title=metric,
    legend_title='Strategy Details',
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="right",
        x=1.3,
        title_font=dict(size=15),
        font=dict(size=15)
    )
)

# fig.write_image("./figures/results_{}_{}_{}_{}_{}.pdf".format(metric, data_name, model, n_rounds, nb_epochs) )

In [20]:
import plotly.graph_objects as go
import pandas as pd

# Load and prepare the data
new_data = pd.read_csv('results/means.csv', skiprows=2)
new_data.columns = ['active_strategy', 'size', 'final_clean_accuracy',
                    'final_clean_accuracy_std', 'final_PGD_accuracy', 'final_PGD_accuracy_std']
new_data['size'] = new_data['size'].astype(int)

for performance in ['PGD_accuracy', 'clean_accuracy' ]:
# performance = 'final_PGD_accuracy' 
# # performance = 'final_clean_accuracy'
    fig = go.Figure()

    # fig.add_trace(go.Scatter(x=[0, 50000], y=[ init_means_stds['init_'+performance]['mean'][0], init_means_stds['init_'+performance]['mean'][0] ],
    #         mode='lines', line=dict(color='rgba({},{},{},1)'.format(0,0,204), width=2, dash='dot'),
    #                     name='Init Accuracy', showlegend=True))


    performance = 'final_'+performance
    performance_std = performance + '_std'  # Corresponding standard deviation column

    strategies = {
        # 'attack_uncertainty': {'color': [204, 0, 204], 'label': 'attack-uncertainty'},  # Red
        # 'full': {'color': [0, 0, 0], 'label': 'full'},  # Red
        'uncertainty': {'color': [255, 161, 90], 'label':'uncertainty'},                    # Yellow
        'random': {'color': [25, 211, 243], 'label':'random'},                    # Cyan
        # 'entropy': {'color': [255, 102, 146], 'label':'entropy'},                  # Magenta
        # 'attack': {'color': [182, 232, 128], 'label':'attack'},                   # Orange
        # 'margin': {'color': [254, 203, 82], 'label':'margin'},
    }


    z_value = 2.576
    n = 5

    # Add a line for each strategy
    for strategy, v in strategies.items():
        r,g,b = v['color']
        c = 'rgba({},{},{},1)'.format(r,g,b)

        # Add a horizontal line for the 'full' strategy
        if strategy == 'full':
            pass
        #     full_data = new_data[new_data['active_strategy'] == 'full']
        #     # Calculate the mean PGD accuracy for the 'full' strategy
        #     full_mean_accuracy = full_data[performance].mean()
        #     fig.add_trace(go.Scatter(x=[min(new_data['size']), max(new_data['size'])], y=[full_mean_accuracy, full_mean_accuracy],
        #                             mode='lines', line=dict(color=c, width=2, dash='dash'),
        #                             name='Full Dataset Average', showlegend=True))
            
        #     strategy_data = new_data[new_data['active_strategy'] == strategy]
        #     fig.add_trace(go.Scatter( x=strategy_data['size'],  y=strategy_data[performance],  mode='lines+markers',
        #                     name=strategy, line=dict(color=c  ) ,
        #                     error_y=dict(
        #                     type='data',  symmetric=False,  
        #                     array=z_value * strategy_data[performance_std] / n,  
        #                     arrayminus=z_value * strategy_data[performance_std] / n   ), showlegend=False
        #                         ))

            
        else:
            strategy_data = new_data[new_data['active_strategy'] == strategy]
            fig.add_trace(go.Scatter(x=strategy_data['size'], y=strategy_data[performance], line=dict(color=c),
                                    error_y=dict(
                                        type='data',  symmetric=False,  
                                        array=z_value * strategy_data[performance_std] / n,  
                                        arrayminus=z_value * strategy_data[performance_std] / n   ),
                                    mode='lines+markers', name=strategy))


    title_str = 'Robust accuracy' if performance=='final_PGD_accuracy' else 'Clean Accuracy'

    # Update layout and display the plot
    fig.update_layout(
        width=600,  # Adjusted for two subplots
        height=400,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(l=0, r=0, t=50, b=0),
        # title="Box Plot of Datasets by Cases and Columns",
        title=title_str,
        xaxis_title='Size (% of training observations)',
        yaxis_title=title_str,
        legend_title='Data Selection Strategy',
        # yaxis_range=[min(new_data[performance]) - 2, max(new_data[performance]) + 2],
        showlegend=True,
        xaxis=dict(
            # type='log',
            showgrid=True,        # Enable the grid
            gridcolor='lightgray' # Set grid line color
        ),
        yaxis=dict(
            showgrid=True,        # Enable the grid
            gridcolor='lightgray', # Set grid line color
            #range=[75,94] if 'clean' in performance else [45,65]
        )
        )

    # Display the figure
    fig.show()
    # fig.write_image("./figures/results_{}_{}_{}_{}_{}.pdf".format(performance, data, model, n_rounds, nb_epochs) )


ValueError: Length mismatch: Expected axis has 7 elements, new values have 6 elements