In [6]:
import gzip
import pickle as pkl

data = 'CIFAR10'
model = 'resnet50'
n_rounds = 6
nb_epochs = 10
seeds = 5

results_list = []
for active_strategy in ['attack', 'uncertainty', 'margin', 'entropy', 'random']:
    for size in [500, 1500, 2500, 5000, 7500,10000,12500, 25000, 37500]:
        for seed in range(seeds):

            try:
                with gzip.open('./results/{}_{}_{}_{}_{}_{}_{}.pkl.gz'.format(data, model, active_strategy, n_rounds, size, nb_epochs, seed), 'rb') as f:
                    result = pkl.load(f)
                    result['active_strategy'] = active_strategy
                print(result)
                results_list.append(result)
            except:
                pass


active_strategy = 'full'
size = 0

for seed in range(seeds):

    try:
        with gzip.open('./results/{}_{}_{}_{}_{}_{}_{}.pkl.gz'.format(data, model, active_strategy, n_rounds, size, nb_epochs, seed), 'rb') as f:
            result = pkl.load(f)
            result['active_strategy'] = active_strategy
            result['size'] = 50000
            print(result)
            results_list.append(result)
    except:
        pass


clean_accuracy_init = result['init_clean_accuracy']
PGD_accuracy_init = result['init_PGD_accuracy']

{'active_strategy': 'attack', 'n_rounds': 6, 'size': 500, 'nb_epochs': 10, 'seed': 0, 'data': 'CIFAR10', 'model': 'resnet50', 'init_clean_accuracy': 93.66, 'init_PGD_accuracy': 46.85, 'final_clean_accuracy': 81.94, 'final_PGD_accuracy': 56.2}
{'active_strategy': 'attack', 'n_rounds': 6, 'size': 500, 'nb_epochs': 10, 'seed': 1, 'data': 'CIFAR10', 'model': 'resnet50', 'init_clean_accuracy': 93.66, 'init_PGD_accuracy': 46.91, 'final_clean_accuracy': 80.64, 'final_PGD_accuracy': 52.0}
{'active_strategy': 'attack', 'n_rounds': 6, 'size': 500, 'nb_epochs': 10, 'seed': 2, 'data': 'CIFAR10', 'model': 'resnet50', 'init_clean_accuracy': 93.66, 'init_PGD_accuracy': 46.77, 'final_clean_accuracy': 82.34, 'final_PGD_accuracy': 54.69}
{'active_strategy': 'attack', 'n_rounds': 6, 'size': 500, 'nb_epochs': 10, 'seed': 3, 'data': 'CIFAR10', 'model': 'resnet50', 'init_clean_accuracy': 93.66, 'init_PGD_accuracy': 46.91, 'final_clean_accuracy': 82.15, 'final_PGD_accuracy': 54.27}
{'active_strategy': 'attac

In [7]:
import pandas as pd
# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(results_list)

# Assuming df is your DataFrame
means_stds = df.groupby(['active_strategy', 'size']).agg({
    'final_clean_accuracy': ['mean', 'std' ],
    'final_PGD_accuracy': ['mean', 'std']
})



# Assuming df is your DataFrame
init_means_stds = df.groupby(['active_strategy', ]).agg({
    'init_clean_accuracy': ['mean',  'std' ],
    'init_PGD_accuracy': ['mean',  'std' ],
})

means_stds.to_csv('./results/means.csv', index=True)

In [8]:
import plotly.graph_objects as go
import pandas as pd

# Load and prepare the data
new_data = pd.read_csv('results/means.csv', skiprows=2)
new_data.columns = ['active_strategy', 'size', 'final_clean_accuracy',
                    'final_clean_accuracy_std', 'final_PGD_accuracy', 'final_PGD_accuracy_std']
new_data['size'] = new_data['size'].astype(int)




for performance in ['PGD_accuracy', 'clean_accuracy' ]:
# performance = 'final_PGD_accuracy' 
# # performance = 'final_clean_accuracy'
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=[0, 50000], y=[ init_means_stds['init_'+performance]['mean'][0], init_means_stds['init_'+performance]['mean'][0] ],
            mode='lines', line=dict(color='rgba({},{},{},1)'.format(0,0,204), width=2, dash='dot'),
                        name='Init Accuracy', showlegend=True))


    performance = 'final_'+performance
    performance_std = performance + '_std'  # Corresponding standard deviation column

    strategies = {
        #'EEneuralcbpside_v5': {'color': [255, 255, 0], 'label': 'EEneuralcbpside_v5'},  # Red
        'full': {'color': [0, 0, 0], 'label': 'full'},  # Red
        'uncertainty': {'color': [255, 161, 90], 'label':'uncertainty'},                    # Yellow
        'random': {'color': [25, 211, 243], 'label':'random'},                    # Cyan
        'entropy': {'color': [255, 102, 146], 'label':'entropy'},                  # Magenta
        'attack': {'color': [182, 232, 128], 'label':'attack'},                   # Orange
        'margin': {'color': [254, 203, 82], 'label':'margin'},
    }


    z_value = 2.576
    n = 5

    # Add a line for each strategy
    for strategy, v in strategies.items():
        r,g,b = v['color']
        c = 'rgba({},{},{},1)'.format(r,g,b)

        # Add a horizontal line for the 'full' strategy
        if strategy == 'full':
            full_data = new_data[new_data['active_strategy'] == 'full']
            # Calculate the mean PGD accuracy for the 'full' strategy
            full_mean_accuracy = full_data[performance].mean()
            fig.add_trace(go.Scatter(x=[min(new_data['size']), max(new_data['size'])], y=[full_mean_accuracy, full_mean_accuracy],
                                    mode='lines', line=dict(color=c, width=2, dash='dash'),
                                    name='Full Dataset Average', showlegend=True))
            
            strategy_data = new_data[new_data['active_strategy'] == strategy]
            fig.add_trace(go.Scatter( x=strategy_data['size'],  y=strategy_data[performance],  mode='lines+markers',
                            name=strategy, line=dict(color=c  ) ,
                            error_y=dict(
                            type='data',  symmetric=False,  
                            array=z_value * strategy_data[performance_std] / n,  
                            arrayminus=z_value * strategy_data[performance_std] / n   ), showlegend=False
                                ))

            
        else:
            strategy_data = new_data[new_data['active_strategy'] == strategy]
            fig.add_trace(go.Scatter(x=strategy_data['size'], y=strategy_data[performance], line=dict(color=c),
                                    error_y=dict(
                                        type='data',  symmetric=False,  
                                        array=z_value * strategy_data[performance_std] / n,  
                                        arrayminus=z_value * strategy_data[performance_std] / n   ),
                                    mode='lines+markers', name=strategy))




    # Update layout and display the plot
    fig.update_layout(
        width=600,  # Adjusted for two subplots
        height=400,
        plot_bgcolor='white',
        paper_bgcolor='white',
        margin=dict(l=0, r=0, t=50, b=0),
        # title="Box Plot of Datasets by Cases and Columns",
        title='{} by Strategy and Size'.format(performance),
        xaxis_title='Size',
        yaxis_title=performance,
        legend_title='Active Strategy',
        # yaxis_range=[min(new_data[performance]) - 2, max(new_data[performance]) + 2],
        showlegend=True,
        xaxis=dict(
            type='log',
            showgrid=True,        # Enable the grid
            gridcolor='lightgray' # Set grid line color
        ),
        yaxis=dict(
            showgrid=True,        # Enable the grid
            gridcolor='lightgray', # Set grid line color
            range=[75,94] if 'clean' in performance else [45,65]
        )
        )

    # Display the figure
    fig.show()
    fig.write_image("./figures/results_{}_{}_{}_{}_{}.pdf".format(performance, data, model, n_rounds, nb_epochs) )
