# Benchmarking Image Quality (CLIP Scores)

## Imports

In [3]:
import os 
import sys
from glob import glob

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import metrics

## Helper functions

In [4]:
def load_experiment(experiment_name):

    ### process (combined) data frame
    experiment_files = list(sorted(glob(f"{experiment_name}/*_metrics.csv")))

    df = None
    for i, experiment_file in enumerate(experiment_files):
        df_exp = pd.read_csv(experiment_file)
        if df is None:
            df = df_exp.copy()
        else:
            df = pd.concat((df, df_exp), axis=0)

    if df is None:
        print(f"No *_metrics.csv found for experiment_name={experiment_name}")
    
    ### assign experiment name
    df = df.assign(experiment_name = experiment_name)
    
    return df


def load_experiments(experiment_names):
    
    df = None
    for i, experiment_name in enumerate(experiment_names):
        df_exp = load_experiment(experiment_name)
        if df is None:
            df = df_exp.copy()
        else:
            df = pd.concat((df, df_exp), axis=0, ignore_index=True)
    return df
    

## Load result files

In [14]:
# define which experiments to load
experiment_names = [
    
    # TreeRing
    # 'TreeRing_ImageDistortion_gaussian_blur_r_8',
    # 'TreeRing_DiffWMAttacker',
    # 'TreeRing_VAEWMAttacker_bmshj2018-factorized',
    # 'TreeRing_Rinse4x',
    'TreeRing_InPaint_ReplaceBG',
    
    # StegaStamp
    # 'StegaStamp_ImageDistortion_gaussian_blur_r_8',
    # 'StegaStamp_DiffWMAttacker',
    # 'StegaStamp_VAEWMAttacker_bmshj2018-factorized',
    # 'StegaStamp_Rinse4x',
    'StegaStamp_InPaint_ReplaceBG',
    
    # StableSig
    # 'StableSig_ImageDistortion_gaussian_blur_r_8',
    # 'StableSig_DiffWMAttacker',
    # 'StableSig_VAEWMAttacker_bmshj2018-factorized',
    # 'StableSig_Rinse4x',
    'StableSig_InPaint_ReplaceBG',
    
    # Invisible
    # 'Invisible_ImageDistortion_gaussian_blur_r_8',
    # 'Invisible_DiffWMAttacker',
    # 'Invisible_VAEWMAttacker_bmshj2018-factorized',
    # 'Invisible_Rinse4x',
    'Invisible_InPaint_ReplaceBG',
    
]

In [15]:
# load the experiment result files
df_ = load_experiments(experiment_names)

# display number of successful prompts 
df_.groupby("experiment_name").count()[['prompt_index']]

Unnamed: 0_level_0,prompt_index
experiment_name,Unnamed: 1_level_1
Invisible_InPaint_ReplaceBG,1000
StableSig_InPaint_ReplaceBG,1000
StegaStamp_InPaint_ReplaceBG,1000
TreeRing_InPaint_ReplaceBG,1000


## Compute additional metrics

In [16]:
# Assign watermark type
df_ = df_.assign(wm_type = [_.split('_')[0] for _ in df_.experiment_name])

## Compute Average Watermark Removal Metrics (Table 3)

In [18]:
# select watermark removal metric columns
use_columns = [
    ### clip scores
    'no_w_no_attack_sim', 'no_w_sim', 
       'w_no_attack_sim',    'w_sim',
    ### image quality scores
    # 'no_w_mse',  'no_w_no_bg_mse',
    #   'w_mse',     'w_no_bg_mse',
    # 'no_w_ssim', 'no_w_no_bg_ssim',
    #   'w_ssim',    'w_no_bg_ssim',
    # 'no_w_psnr', 'no_w_no_bg_psnr',
    #   'w_psnr',    'w_no_bg_psnr',
    ### other
    'w_pct_mask', 'experiment_name'
]


# extract watermark removal metrics
df = df_[use_columns].groupby("experiment_name").mean()
df = df.loc[experiment_names]

# save the dataframe to csv
save_as = "Table_04-average_image_quality_metrics_clip_scores.csv"
df.to_csv(save_as)
print(f"[+] {save_as}")

# show dataframe
df

[+] Table_04-average_image_quality_metrics_clip_scores.csv


Unnamed: 0_level_0,no_w_no_attack_sim,no_w_sim,w_no_attack_sim,w_sim,w_pct_mask
experiment_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
TreeRing_InPaint_ReplaceBG,0.363154,0.288779,0.36352,0.291023,0.612084
StegaStamp_InPaint_ReplaceBG,0.363154,0.288779,0.354229,0.266372,0.65624
StableSig_InPaint_ReplaceBG,0.363154,0.288779,0.364144,0.270532,0.64945
Invisible_InPaint_ReplaceBG,0.363154,0.288779,0.361916,0.287877,0.604205
