In [2]:
import pandas as pd

# 分析均值，不再增加方差。


In [10]:
# Load your CSV file (replace with actual file path if needed)
#dataset = 'cifar10'
log_file = f'logs/log.csv'
analyzed_log_file = f'analyzed_logs/analyzed_log.csv'
# Load your CSV file (replace with actual file path if needed)
df = pd.read_csv(log_file, header=None, sep='\t')
df.columns = ['dataset', 'strategy', 'model_idx', 'repeat', 'timestamp', 
              'std_acc', 'rob_acc', 'mts_acc', 'duration']

# Only keep the strategies of interest
strategies = ['Standard', 'random', 'W10', 'w10kl', 'sac']
df = df[df['strategy'].isin(strategies)]

# Calculate means per model_idx, strategy
summary = df.groupby(['dataset', 'model_idx', 'strategy'])[['std_acc', 'rob_acc', 'mts_acc', 'duration']].mean().reset_index()
# reset_index is used to turn model_idx and strategy back to normal column.

# Add mean of the three metrics
summary['mean'] = summary[['std_acc', 'rob_acc', 'mts_acc']].mean(axis=1)

# Melt to long format for transposing
melted = pd.melt(summary, id_vars=['dataset', 'model_idx', 'strategy'], 
                 value_vars=['std_acc', 'rob_acc', 'mts_acc', 'mean','duration'],
                 var_name='metric', value_name='value')

# Pivot to get desired format
transposed = melted.pivot_table(index=['dataset', 'model_idx', 'metric'], 
                                columns='strategy', values='value')
# reorder the index

metric_desired_order = ['std_acc', 'rob_acc', 'mts_acc', 'mean','duration']
transposed = transposed.reindex(metric_desired_order, level='metric')
dataset_desired_order = ['svhn', 'cifar10', 'gtsrb','fashion_mnist']
transposed = transposed.reindex(dataset_desired_order, level='dataset')

# Reset index for clean display
transposed = transposed.reset_index()

# Optional: sort for clarity
#transposed = transposed.sort_values(['model_idx', 'metric'])
transposed = transposed[['dataset', 'model_idx', 'metric']+strategies]


transposed = transposed.round(2)
# Display
print(transposed.to_string(index=False))
transposed.to_csv(analyzed_log_file)


      dataset  model_idx   metric  Standard  random    W10  w10kl   sac
         svhn          1  std_acc     90.76   94.25  92.89  94.77 95.12
         svhn          1  rob_acc     13.42   69.40  76.11  80.76 88.46
         svhn          1  mts_acc     13.58   69.68  76.14  81.61 88.81
         svhn          1     mean     39.26   77.78  81.71  85.71 90.80
         svhn          1 duration      6.14   12.18  65.03  69.59 34.49
         svhn          2  std_acc     94.18   89.89  90.16  94.39 92.65
         svhn          2  rob_acc     21.17   57.90  66.83  61.74 78.00
         svhn          2  mts_acc     21.41   60.79  66.98  62.46 79.25
         svhn          2     mean     45.58   69.53  74.65  72.86 83.30
         svhn          2 duration      7.84   14.83  77.05  80.03 39.89
      cifar10          1  std_acc     87.28   85.36  81.77  86.77 84.08
      cifar10          1  rob_acc     37.22   61.88  67.60  67.71 72.83
      cifar10          1  mts_acc     37.79   63.73  69.15  70.6

# 尝试增加一下p value的分析

In [None]:
from scipy.stats import mannwhitneyu
# Load your CSV file (replace with actual file path if needed)
#dataset = 'cifar10'
log_file = f'logs/log.csv'
analyzed_log_file = f'analyzed_logs/analyzed_log.csv'
# Load your CSV file (replace with actual file path if needed)
df = pd.read_csv(log_file, header=None, sep='\t')
df.columns = ['dataset', 'strategy', 'model_idx', 'repeat', 'timestamp', 
              'std_acc', 'rob_acc', 'mts_acc', 'duration']

# Only keep the strategies of interest
strategies = ['Standard', 'random', 'W10', 'w10kl', 'sac']
df_subset = df[df['strategy'].isin(strategies)]

# Step 1: extract samples.
#sac_values = df[(df['dataset'] == 'svhn') & (df['strategy'] == 'sac')]['rob_acc']
#w10kl_values = df[(df['dataset'] == 'svhn') & (df['strategy'] == 'w10kl')]['rob_acc']
sac_values = df[df['strategy'] == 'sac']['rob_acc']
w10kl_values = df[df['strategy'] == 'w10kl']['rob_acc']

# Step 2: Perform Mann-Whitney U test (alternative='greater' means we test if sac > w10kl)
stat, p = mannwhitneyu(sac_values, w10kl_values, alternative='greater')

print("Mann-Whitney U test (sac > w10kl):")
print("Statistic:", stat)
print("p-value:", p)

strategies_to_compare =strategies[:-1]
target_strategy = strategies[-1]
metrics = ['std_acc', 'rob_acc', 'mts_acc']

results = []
# Loop over each strategy and each metric
for strat in strategies_to_compare:
    for metric in metrics:
        group1 = df_subset[df_subset['strategy'] == target_strategy][metric]
        group2 = df_subset[df_subset['strategy'] == strat][metric]

        # Mann–Whitney U test: test if sac > other
        stat, p = mannwhitneyu(group1, group2, alternative='greater')

        results.append({
            'Metric': metric,
            'Compare_To': strat,
            'U_stat': stat,
            'p_value': round(p, 4)
        })

result_df = pd.DataFrame(results)
pivot_table = result_df.pivot(index='Metric', columns='Compare_To', values='p_value')
print("Mann–Whitney U Test p-values (sac > other strategies):")
print(pivot_table)



Mann-Whitney U test (sac > w10kl):
Statistic: 1164.0
p-value: 0.00023455561699662448
Mann–Whitney U Test p-values (sac > other strategies):
Compare_To  Standard     W10  random   w10kl
Metric                                      
mts_acc       0.0000  0.0000  0.0000  0.0001
rob_acc       0.0000  0.0001  0.0000  0.0002
std_acc       0.0356  0.0012  0.0546  0.5612


In [4]:
#分析AugMix and AutoAugment

# Load your CSV file (replace with actual file path if needed)
#dataset = 'cifar10'
log_file = f'logs/log.csv'
analyzed_log_file = f'analyzed_logs/table_3_analyzed_log.csv'
# Load your CSV file (replace with actual file path if needed)
df = pd.read_csv(log_file, header=None, sep='\t')
df.columns = ['dataset', 'strategy', 'model_idx', 'repeat', 'timestamp', 
              'std_acc', 'rob_acc', 'mts_acc', 'duration']

# Only keep the strategies of interest
strategies = ['Augmix', 'autoaugment', 'sac']
datasets = ['svhn', 'cifar10']
df = df[df['strategy'].isin(strategies) & df['dataset'].isin(datasets)]

# Calculate means per model_idx, strategy
summary = df.groupby(['dataset', 'model_idx', 'strategy'])[['std_acc', 'rob_acc', 'mts_acc','duration']].mean().reset_index()
# reset_index is used to turn model_idx and strategy back to normal column.

# Add mean of the three metrics
summary['mean'] = summary[['std_acc', 'rob_acc', 'mts_acc']].mean(axis=1)

# Melt to long format for transposing
melted = pd.melt(summary, id_vars=['dataset', 'model_idx', 'strategy'], 
                 value_vars=['std_acc', 'rob_acc', 'mts_acc', 'mean','duration'],
                 var_name='metric', value_name='value')

# Pivot to get desired format
transposed = melted.pivot_table(index=['dataset', 'model_idx', 'metric'], 
                                columns='strategy', values='value')
# reorder the index

metric_desired_order = ['std_acc', 'rob_acc', 'mts_acc', 'mean','duration']
transposed = transposed.reindex(metric_desired_order, level='metric')
dataset_desired_order = ['svhn', 'cifar10']
transposed = transposed.reindex(dataset_desired_order, level='dataset')

# Reset index for clean display
transposed = transposed.reset_index()

# Optional: sort for clarity
#transposed = transposed.sort_values(['model_idx', 'metric'])
transposed = transposed[['dataset', 'model_idx', 'metric']+strategies]


transposed = transposed.round(2)
# Display
print(transposed.to_string(index=False))
transposed.to_csv(analyzed_log_file)


dataset  model_idx   metric  Augmix  autoaugment   sac
   svhn          1  std_acc   92.25        95.49 95.12
   svhn          1  rob_acc   13.18        27.68 88.46
   svhn          1  mts_acc   13.22        27.87 88.81
   svhn          1     mean   39.55        50.35 90.80
   svhn          1 duration  152.28        19.29 34.49
   svhn          2  std_acc   95.05        96.45 92.65
   svhn          2  rob_acc   20.79        43.24 78.00
   svhn          2  mts_acc   20.92        43.41 79.25
   svhn          2     mean   45.59        61.03 83.30
   svhn          2 duration  178.85        20.70 39.89
cifar10          1  std_acc   87.34        87.44 84.08
cifar10          1  rob_acc   37.60        38.30 72.83
cifar10          1  mts_acc   38.26        39.14 75.51
cifar10          1     mean   54.40        54.96 77.47
cifar10          1 duration   16.68        16.65 70.60
cifar10          2  std_acc   90.10        91.76 89.91
cifar10          2  rob_acc   23.21        25.53 68.94
cifar10   

In [7]:
# RL horizontally comparison RQ2.1
# 分析四种不同的RL的影响

# Load your CSV file (replace with actual file path if needed)
#dataset = 'cifar10'
log_file = f'logs/log.csv'
analyzed_log_file = f'analyzed_logs/table_5_analyzed_log.csv'
# Load your CSV file (replace with actual file path if needed)
df = pd.read_csv(log_file, header=None, sep='\t')
df.columns = ['dataset', 'strategy', 'model_idx', 'repeat', 'timestamp', 
              'std_acc', 'rob_acc', 'mts_acc', 'duration']

# Only keep the strategies of interest
strategies = ['td3','ddpg','ppo','sac']
datasets = ['svhn', 'cifar10', 'gtsrb','fashion_mnist']
df = df[df['strategy'].isin(strategies) & df['dataset'].isin(datasets)]# 第一次筛选

conditions = (
    ((df['dataset']=='svhn') & (df['model_idx']==1)) |
    ((df['dataset']=='cifar10') & (df['model_idx']==2)) |
    ((df['dataset']=='gtsrb') & (df['model_idx']==2)) |
    ((df['dataset']=='fashion_mnist') & (df['model_idx']==1)) 
)
df = df[conditions]

# Calculate means per model_idx, strategy
summary = df.groupby(['dataset', 'strategy'])[['std_acc', 'rob_acc', 'mts_acc','duration']].mean().reset_index()
# reset_index is used to turn model_idx and strategy back to normal column.

# Add mean of the three metrics
summary['mean'] = summary[['std_acc', 'rob_acc', 'mts_acc']].mean(axis=1)

# Melt to long format for transposing
melted = pd.melt(summary, id_vars=['dataset', 'strategy'], 
                 value_vars=['std_acc', 'rob_acc', 'mts_acc', 'mean','duration'],
                 var_name='metric', value_name='value')

# Pivot to get desired format
transposed = melted.pivot_table(index=['dataset', 'metric'], 
                                columns='strategy', values='value')
# reorder the index

metric_desired_order = ['std_acc', 'rob_acc', 'mts_acc', 'mean','duration']
transposed = transposed.reindex(metric_desired_order, level='metric')
dataset_desired_order = datasets
transposed = transposed.reindex(dataset_desired_order, level='dataset')

# Reset index for clean display
transposed = transposed.reset_index()

# Optional: sort for clarity
#transposed = transposed.sort_values(['model_idx', 'metric'])
transposed = transposed[['dataset', 'metric']+strategies]


transposed = transposed.round(2)
# Display
print(transposed.to_string(index=False))
transposed.to_csv(analyzed_log_file)

      dataset   metric   td3  ddpg   ppo   sac
         svhn  std_acc 94.81 94.62 95.14 95.12
         svhn  rob_acc 87.53 86.56 88.65 88.46
         svhn  mts_acc 87.79 87.13 89.25 88.81
         svhn     mean 90.04 89.44 91.01 90.80
         svhn duration 32.87 53.43 33.83 34.49
      cifar10  std_acc 90.20 89.69 90.22 89.91
      cifar10  rob_acc 68.85 73.50 61.87 68.94
      cifar10  mts_acc 71.38 75.93 64.26 71.59
      cifar10     mean 76.81 79.71 72.12 76.81
      cifar10 duration 70.13 89.74 70.97 72.24
        gtsrb  std_acc 96.76 96.52 96.69 97.01
        gtsrb  rob_acc 90.28 88.27 89.93 90.22
        gtsrb  mts_acc 91.07 89.25 90.85 91.02
        gtsrb     mean 92.70 91.35 92.49 92.75
        gtsrb duration 44.75 63.77 46.50 45.82
fashion_mnist  std_acc 93.32 93.16 93.48 93.39
fashion_mnist  rob_acc 84.40 82.93 84.95 85.27
fashion_mnist  mts_acc 86.27 84.89 86.96 87.33
fashion_mnist     mean 88.00 86.99 88.46 88.66
fashion_mnist duration 41.72 61.24 42.40 43.09


In [None]:
# RL horizontally comparison RQ2.1
# 分析KL项的影响

# Load your CSV file (replace with actual file path if needed)
#dataset = 'cifar10'
log_file = f'logs/log.csv'
analyzed_log_file = f'analyzed_logs/table_6_analyzed_log.csv'
# Load your CSV file (replace with actual file path if needed)
df = pd.read_csv(log_file, header=None, sep='\t')
df.columns = ['dataset', 'strategy', 'model_idx', 'repeat', 'timestamp', 
              'std_acc', 'rob_acc', 'mts_acc', 'duration']

# Only keep the strategies of interest
strategies = ['td3_ablation', 'td3','ddpg_ablation','ddpg','ppo_ablation','ppo','sac_ablation','sac']
datasets = ['svhn', 'cifar10', 'gtsrb','fashion_mnist']
df = df[df['strategy'].isin(strategies) & df['dataset'].isin(datasets)]

conditions = (
    ((df['dataset']=='svhn') & (df['model_idx']==1)) |
    ((df['dataset']=='cifar10') & (df['model_idx']==2)) |
    ((df['dataset']=='gtsrb') & (df['model_idx']==2)) |
    ((df['dataset']=='fashion_mnist') & (df['model_idx']==1)) 
)
df = df[conditions]

# Calculate means per model_idx, strategy
summary = df.groupby(['dataset', 'strategy'])[['std_acc', 'rob_acc', 'mts_acc','duration']].mean().reset_index()
# reset_index is used to turn model_idx and strategy back to normal column.

# Add mean of the three metrics
summary['mean'] = summary[['std_acc', 'rob_acc', 'mts_acc']].mean(axis=1)

# Melt to long format for transposing
melted = pd.melt(summary, id_vars=['dataset', 'strategy'], 
                 value_vars=['std_acc', 'rob_acc', 'mts_acc', 'mean','duration'],
                 var_name='metric', value_name='value')

# Pivot to get desired format
transposed = melted.pivot_table(index=['dataset', 'metric'], 
                                columns='strategy', values='value')
# reorder the index

metric_desired_order = ['std_acc', 'rob_acc', 'mts_acc', 'mean','duration']
transposed = transposed.reindex(metric_desired_order, level='metric')
dataset_desired_order = datasets
transposed = transposed.reindex(dataset_desired_order, level='dataset')

# Reset index for clean display
transposed = transposed.reset_index()

# Optional: sort for clarity
#transposed = transposed.sort_values(['model_idx', 'metric'])
transposed = transposed[['dataset', 'metric']+strategies]


transposed = transposed.round(2)
# Display
print(transposed.to_string(index=False))
transposed.to_csv(analyzed_log_file)

      dataset   metric  td3_ablation   td3  ddpg_ablation  ddpg  ppo_ablation   ppo  sac_ablation   sac
         svhn  std_acc         94.35 94.81          93.96 94.62         94.67 95.14         94.42 95.12
         svhn  rob_acc         87.15 87.53          83.91 86.56         86.99 88.65         87.21 88.46
         svhn  mts_acc         86.98 87.79          84.39 87.13         87.16 89.25         87.21 88.81
         svhn     mean         89.49 90.04          87.42 89.44         89.61 91.01         89.61 90.80
         svhn duration         32.22 32.87          53.74 53.43         32.71 33.83         32.95 34.49
      cifar10  std_acc         90.18 90.20          89.55 89.69         89.94 90.22         89.99 89.91
      cifar10  rob_acc         67.58 68.85          61.06 73.50         64.61 61.87         68.73 68.94
      cifar10  mts_acc         69.50 71.38          62.92 75.93         66.45 64.26         70.70 71.59
      cifar10     mean         75.75 76.81          71.17 79.71 

In [11]:
# RQ3.1 分析D sub size的影响
# 
# Load your CSV file (replace with actual file path if needed)
#dataset = 'cifar10'
log_file = f'logs/log.csv'
analyzed_log_file = f'analyzed_logs/table_7_analyzed_log.csv'
# Load your CSV file (replace with actual file path if needed)
df = pd.read_csv(log_file, header=None, sep='\t')
df.columns = ['dataset', 'strategy', 'model_idx', 'repeat', 'timestamp', 
              'std_acc', 'rob_acc', 'mts_acc', 'duration']

# Only keep the strategies of interest
strategies = ['sac_sub_0.005', 'sac','sac_sub_0.015']
datasets = ['svhn', 'cifar10', 'gtsrb','fashion_mnist']
df = df[df['strategy'].isin(strategies) & df['dataset'].isin(datasets)]

conditions = (
    ((df['dataset']=='svhn') & (df['model_idx']==1)) |
    ((df['dataset']=='cifar10') & (df['model_idx']==2)) |
    ((df['dataset']=='gtsrb') & (df['model_idx']==2)) |
    ((df['dataset']=='fashion_mnist') & (df['model_idx']==1)) 
)
df = df[conditions]

# Calculate means per model_idx, strategy
summary = df.groupby(['dataset', 'strategy'])[['std_acc', 'rob_acc', 'mts_acc','duration']].mean().reset_index()
# reset_index is used to turn model_idx and strategy back to normal column.

# Add mean of the three metrics
summary['mean'] = summary[['std_acc', 'rob_acc', 'mts_acc']].mean(axis=1)

# Melt to long format for transposing
melted = pd.melt(summary, id_vars=['dataset', 'strategy'], 
                 value_vars=['std_acc', 'rob_acc', 'mts_acc', 'mean','duration'],
                 var_name='metric', value_name='value')

# Pivot to get desired format
transposed = melted.pivot_table(index=['dataset', 'metric'], 
                                columns='strategy', values='value')
# reorder the index

metric_desired_order = ['std_acc', 'rob_acc', 'mts_acc', 'mean','duration']
transposed = transposed.reindex(metric_desired_order, level='metric')
dataset_desired_order = datasets
transposed = transposed.reindex(dataset_desired_order, level='dataset')

# Reset index for clean display
transposed = transposed.reset_index()

# Optional: sort for clarity
#transposed = transposed.sort_values(['model_idx', 'metric'])
transposed = transposed[['dataset', 'metric']+strategies]


transposed = transposed.round(2)
# Display
print(transposed.to_string(index=False))
transposed.to_csv(analyzed_log_file)

      dataset   metric  sac_sub_0.005   sac  sac_sub_0.015
         svhn  std_acc          95.06 95.12          95.03
         svhn  rob_acc          88.48 88.46          88.52
         svhn  mts_acc          88.83 88.81          89.03
         svhn     mean          90.79 90.80          90.86
         svhn duration          21.42 34.49          44.66
      cifar10  std_acc          90.59 89.91          90.49
      cifar10  rob_acc          68.63 68.94          69.03
      cifar10  mts_acc          71.00 71.59          71.49
      cifar10     mean          76.74 76.81          77.00
      cifar10 duration          56.83 72.24          83.44
        gtsrb  std_acc          96.94 97.01          96.68
        gtsrb  rob_acc          90.69 90.22          89.89
        gtsrb  mts_acc          91.25 91.02          90.77
        gtsrb     mean          92.96 92.75          92.45
        gtsrb duration          31.28 45.82          59.47
fashion_mnist  std_acc          93.34 93.39          93.

In [8]:
# RQ3.2 分析D alpha beta的影响
# 
# Load your CSV file (replace with actual file path if needed)
log_file = f'logs/log.csv'
analyzed_log_file = f'analyzed_logs/table_8_analyzed_log.csv'
# Load your CSV file (replace with actual file path if needed)
df = pd.read_csv(log_file, header=None, sep='\t')
df.columns = ['dataset', 'strategy', 'model_idx', 'repeat', 'timestamp', 
              'std_acc', 'rob_acc', 'mts_acc', 'duration']

# Only keep the strategies of interest
strategies = ['sac','sac_alpha_2.0_beta_1.0', 'sac_alpha_4.0_beta_1.0', 'sac_alpha_1.0_beta_2.0']
datasets = ['svhn', 'cifar10', 'gtsrb','fashion_mnist']
df = df[df['strategy'].isin(strategies) & df['dataset'].isin(datasets)]

conditions = (
    ((df['dataset']=='svhn') & (df['model_idx']==1)) |
    ((df['dataset']=='cifar10') & (df['model_idx']==2)) |
    ((df['dataset']=='gtsrb') & (df['model_idx']==1)) |
    ((df['dataset']=='fashion_mnist') & (df['model_idx']==1)) 
)
df = df[conditions]      #有所不同

# Calculate means per model_idx, strategy
summary = df.groupby(['dataset', 'strategy'])[['std_acc', 'rob_acc', 'mts_acc','duration']].mean().reset_index()
# reset_index is used to turn model_idx and strategy back to normal column.

# Add mean of the three metrics
summary['mean'] = summary[['std_acc', 'rob_acc', 'mts_acc']].mean(axis=1)

# Melt to long format for transposing
melted = pd.melt(summary, id_vars=['dataset', 'strategy'], 
                 value_vars=['std_acc', 'rob_acc', 'mts_acc', 'mean','duration'],
                 var_name='metric', value_name='value')

# Pivot to get desired format
transposed = melted.pivot_table(index=['dataset', 'metric'], 
                                columns='strategy', values='value')
# reorder the index

metric_desired_order = ['std_acc', 'rob_acc', 'mts_acc', 'mean','duration']
transposed = transposed.reindex(metric_desired_order, level='metric')
dataset_desired_order = datasets
transposed = transposed.reindex(dataset_desired_order, level='dataset')

# Reset index for clean display
transposed = transposed.reset_index()

# Optional: sort for clarity
#transposed = transposed.sort_values(['model_idx', 'metric'])
transposed = transposed[['dataset', 'metric']+strategies]


transposed = transposed.round(2)
# Display
print(transposed.to_string(index=False))
transposed.to_csv(analyzed_log_file)

      dataset   metric   sac  sac_alpha_2.0_beta_1.0  sac_alpha_4.0_beta_1.0  sac_alpha_1.0_beta_2.0
         svhn  std_acc 95.12                   95.05                   94.81                   95.13
         svhn  rob_acc 88.46                   88.84                   87.68                   88.47
         svhn  mts_acc 88.81                   89.21                   88.40                   89.38
         svhn     mean 90.80                   91.03                   90.30                   90.99
         svhn duration 34.49                   33.37                   33.40                   33.32
      cifar10  std_acc 89.91                   90.30                   89.61                   89.93
      cifar10  rob_acc 68.94                   74.31                   76.81                   71.54
      cifar10  mts_acc 71.59                   76.81                   79.24                   74.65
      cifar10     mean 76.81                   80.48                   81.89               