# Benchmarking Watermark Attack Removal

## Imports

In [1]:
import os 
import sys
from glob import glob

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import metrics

## Helper functions

In [2]:
def load_experiment(experiment_name):

    ### process (combined) data frame
    experiment_files = list(sorted(glob(f"{experiment_name}/*_metrics.csv")))

    df = None
    for i, experiment_file in enumerate(experiment_files):
        df_exp = pd.read_csv(experiment_file)
        if df is None:
            df = df_exp.copy()
        else:
            df = pd.concat((df, df_exp), axis=0)

    if df is None:
        print(f"No *_metrics.csv found for experiment_name={experiment_name}")
    
    ### assign experiment name
    df = df.assign(experiment_name = experiment_name)
    
    return df


def load_experiments(experiment_names):
    
    df = None
    for i, experiment_name in enumerate(experiment_names):
        df_exp = load_experiment(experiment_name)
        if df is None:
            df = df_exp.copy()
        else:
            df = pd.concat((df, df_exp), axis=0, ignore_index=True)
    return df
    

## Load result files

In [3]:
# define which experiments to load
experiment_names = [
    
    # TreeRing
    'TreeRing_ImageDistortion_gaussian_blur_r_8',
    'TreeRing_DiffWMAttacker',
    'TreeRing_VAEWMAttacker_bmshj2018-factorized',
    'TreeRing_Rinse4x',
    'TreeRing_InPaint_ReplaceBG',
    
    # StegaStamp
    'StegaStamp_ImageDistortion_gaussian_blur_r_8',
    'StegaStamp_DiffWMAttacker',
    'StegaStamp_VAEWMAttacker_bmshj2018-factorized',
    'StegaStamp_Rinse4x',
    'StegaStamp_InPaint_ReplaceBG',
    
    # StableSig
    'StableSig_ImageDistortion_gaussian_blur_r_8',
    'StableSig_DiffWMAttacker',
    'StableSig_VAEWMAttacker_bmshj2018-factorized',
    'StableSig_Rinse4x',
    'StableSig_InPaint_ReplaceBG',
    
    # Invisible
    'Invisible_ImageDistortion_gaussian_blur_r_8',
    'Invisible_DiffWMAttacker',
    'Invisible_VAEWMAttacker_bmshj2018-factorized',
    'Invisible_Rinse4x',
    'Invisible_InPaint_ReplaceBG',
    
]

In [4]:
# load the experiment result files
df_ = load_experiments(experiment_names)

# display number of successful prompts 
df_.groupby("experiment_name").count()[['prompt_index']]

Unnamed: 0_level_0,prompt_index
experiment_name,Unnamed: 1_level_1
Invisible_DiffWMAttacker,1000
Invisible_ImageDistortion_gaussian_blur_r_8,1000
Invisible_InPaint_ReplaceBG,1000
Invisible_Rinse4x,1000
Invisible_VAEWMAttacker_bmshj2018-factorized,1000
StableSig_DiffWMAttacker,1000
StableSig_ImageDistortion_gaussian_blur_r_8,1000
StableSig_InPaint_ReplaceBG,1000
StableSig_Rinse4x,1000
StableSig_VAEWMAttacker_bmshj2018-factorized,1000


## Compute additional metrics

In [5]:
# Assign watermark type
df_ = df_.assign(wm_type = [_.split('_')[0] for _ in df_.experiment_name])

# Compute success rate based on (p > 0.05) and (Bit Acc < 24/32)
df_ = df_.assign(w_bit_acc_success = df_.w_bit_acc.le(24/32))
df_ = df_.assign(w_p_success = df_.w_p.ge(0.05))

## Compute Average Watermark Removal Metrics (Table 1)

In [6]:
# select watermark removal metric columns
use_columns = ['w_p', 'w_bit_acc', 'w_pct_mask', 'experiment_name']

# extract watermark removal metrics
df = df_[use_columns].groupby("experiment_name").mean()
df = df.loc[experiment_names]

# save the dataframe to csv
save_as = "Table_01-average_watermark_removal_metrics.csv"
df.to_csv(save_as)
print(f"[+] {save_as}")

# show dataframe
df

[+] Table_01-average_watermark_removal_metrics.csv


Unnamed: 0_level_0,w_p,w_bit_acc,w_pct_mask
experiment_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
TreeRing_ImageDistortion_gaussian_blur_r_8,3.118529e-05,,0.612084
TreeRing_DiffWMAttacker,0.001311314,,0.612084
TreeRing_VAEWMAttacker_bmshj2018-factorized,0.002182535,,0.612084
TreeRing_Rinse4x,0.01607795,,0.612084
TreeRing_InPaint_ReplaceBG,0.09984804,,0.612084
StegaStamp_ImageDistortion_gaussian_blur_r_8,0.06205975,0.68245,0.65624
StegaStamp_DiffWMAttacker,1.595313e-05,0.90508,0.65624
StegaStamp_VAEWMAttacker_bmshj2018-factorized,8.737198e-07,0.99215,0.65624
StegaStamp_Rinse4x,0.02176594,0.69197,0.65624
StegaStamp_InPaint_ReplaceBG,0.1639418,0.70104,0.65624


## Compute Average Watermark Removal Metrics (Table 2)

In [7]:
# define watermark types
wm_types = ["TreeRing", "StegaStamp", "StableSig", "Invisible"]

# select watermark removal metric columns
use_columns = ['w_p', 'w_bit_acc', 'w_pct_mask', 'experiment_name']

# define threshold
min_pct_mask = 0.50
max_pct_mask = 1.00

# store dfs 
dfs = dict()

# loop over watermark types
for wm_type in wm_types:

    # filter results by {wm_type}_InPaint_ReplaceBG
    df_experiment = df_[df_.experiment_name.isin([f"{wm_type}_InPaint_ReplaceBG"])]

    # filter prompts s.t. (mask pct > 50)
    df_experiment_filter = df_experiment[
        (df_experiment.w_pct_mask >= min_pct_mask)
      & (df_experiment.w_pct_mask <= max_pct_mask)
    ] 
    good_prompts = list(df_experiment_filter.prompt_index.unique())
    # print(f"wm_type={wm_type}, n_good_prompts={len(good_prompts)}")
    
    # filter all results by watermark type
    df_wm = df_[df_.wm_type.isin([wm_type])]
    
    # filter all results based on good prompts
    df_wm_good = df_wm[df_wm.prompt_index.isin(good_prompts)]
    df_wm_good = df_wm_good[use_columns].groupby("experiment_name").mean()
    df_wm_good = df_wm_good.loc[[_ for _ in experiment_names if _.startswith(wm_type)]]
    
    # save scores
    save_as = f"Table_02-average_watermark_removal_metrics_{wm_type}_w_pct_mask_le_{min_pct_mask:0.2f}_ge_{max_pct_mask:0.2f}.csv"
    df_wm_good.to_csv(save_as)
    print(f"[+] {save_as}")
    
    # store df
    dfs[wm_type] = df_wm_good

[+] Table_02-average_watermark_removal_metrics_TreeRing_w_pct_mask_le_0.50_ge_1.00.csv
[+] Table_02-average_watermark_removal_metrics_StegaStamp_w_pct_mask_le_0.50_ge_1.00.csv
[+] Table_02-average_watermark_removal_metrics_StableSig_w_pct_mask_le_0.50_ge_1.00.csv
[+] Table_02-average_watermark_removal_metrics_Invisible_w_pct_mask_le_0.50_ge_1.00.csv


In [8]:
# show results for TreeRing
dfs["TreeRing"]

Unnamed: 0_level_0,w_p,w_bit_acc,w_pct_mask
experiment_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
TreeRing_ImageDistortion_gaussian_blur_r_8,1.7e-05,,0.738309
TreeRing_DiffWMAttacker,0.001906,,0.738309
TreeRing_VAEWMAttacker_bmshj2018-factorized,0.002146,,0.738309
TreeRing_Rinse4x,0.018876,,0.738309
TreeRing_InPaint_ReplaceBG,0.145132,,0.738309


In [9]:
# show results for StegaStamp
dfs["StegaStamp"]

Unnamed: 0_level_0,w_p,w_bit_acc,w_pct_mask
experiment_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
StegaStamp_ImageDistortion_gaussian_blur_r_8,0.060412,0.685728,0.773829
StegaStamp_DiffWMAttacker,1.3e-05,0.903235,0.773829
StegaStamp_VAEWMAttacker_bmshj2018-factorized,1e-06,0.992451,0.773829
StegaStamp_Rinse4x,0.023477,0.69014,0.773829
StegaStamp_InPaint_ReplaceBG,0.229512,0.629916,0.773829


In [10]:
# show results for StableSig
dfs["StableSig"]

Unnamed: 0_level_0,w_p,w_bit_acc,w_pct_mask
experiment_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
StableSig_ImageDistortion_gaussian_blur_r_8,0.230838,0.399029,0.783674
StableSig_DiffWMAttacker,0.608066,0.496925,0.783674
StableSig_VAEWMAttacker_bmshj2018-factorized,0.589682,0.471599,0.783674
StableSig_Rinse4x,0.565186,0.465238,0.783674
StableSig_InPaint_ReplaceBG,0.59836,0.487518,0.783674


In [11]:
# show results for Invisible
dfs["Invisible"]

Unnamed: 0_level_0,w_p,w_bit_acc,w_pct_mask
experiment_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Invisible_ImageDistortion_gaussian_blur_r_8,,0.503008,0.739225
Invisible_DiffWMAttacker,,0.50136,0.739225
Invisible_VAEWMAttacker_bmshj2018-factorized,,0.503376,0.739225
Invisible_Rinse4x,,0.5024,0.739225
Invisible_InPaint_ReplaceBG,,0.506224,0.739225
