In [1]:
import subprocess
import numpy as np
import os
import pickle as pkl
import re
import ast
import json
import math
from tqdm.notebook import tqdm

def parse_metrics(command):
    # Run the command using subprocess in Jupyter
    try:
        result = subprocess.run(command, check=True, capture_output=True, text=True)
        # print("Output:\n", result.stdout)
    except subprocess.CalledProcessError as e:
        print("Error occurred:\n", e.stderr)

    # Split the output into lines
    output_lines = result.stdout.strip().split('\n')
    # print(output_lines)

    # Iterate over the lines to find metrics
    for i, line in enumerate(output_lines):
        if 'Metrics before applying QB-Norm' in line:
            # The next line contains the metrics dictionary
            if i + 1 < len(output_lines):
                metrics_line = output_lines[i + 1].strip()
                try:
                    metrics_before = ast.literal_eval(metrics_line)
                except (SyntaxError, ValueError):
                    print(f"Could not parse metrics before at line {i + 1}: {metrics_line}")
        elif 'Metrics after QB-Norm' in line:
            # The next line contains the metrics dictionary
            if i + 1 < len(output_lines):
                metrics_line = output_lines[i + 1].strip()
                try:
                    metrics_after = ast.literal_eval(metrics_line)
                except (SyntaxError, ValueError):
                    print(f"Could not parse metrics after at line {i + 1}: {metrics_line}")

    # # Now you have lists of metrics dictionaries
    # print("Metrics Before QB-Norm:", metrics_before_list)
    # print("Metrics After QB-Norm:", metrics_after_list)

    # Save the metrics into a variable
    metrics = {
        'before': metrics_before,
        'after': metrics_after
    }
    return metrics

def execute_dynamic_inverted_softmax(test_file_path, train_file_path, adv_test_similarity_path, adv_train_similarity_path, gt_idx_path, out_dir):
    out_dir=out_dir+"/output_cache"
    # Load data from file
    data = np.load(test_file_path, allow_pickle=True)  # Load the data as a NumPy array
    train_data = np.load(train_file_path, allow_pickle=True)  # Load the data as a NumPy array
    # Load adversarial similarity matrices
    adv_test_similarity_matrices = np.load(adv_test_similarity_path, allow_pickle=True)
    adv_train_similarity_matrices = np.load(adv_train_similarity_path, allow_pickle=True)

    gt_idx=np.load(gt_idx_path, allow_pickle=True)
    
    # Create temp directory if it doesn't exist
    os.makedirs(out_dir, exist_ok=True)
    
    gt_metrics = {}
    adv_metrics = {}
    # Iterate over adversarial matrices, modify data, save with different names, and execute command
    for i, adv in enumerate(tqdm(adv_test_similarity_matrices[:100])):
        # print("Adv shape:", adv.shape)
        test_data_temp = data.copy()
        test_data_temp = np.column_stack((test_data_temp, adv.flatten()))        
        # Save the modified data to a new file with different names in temp directory
        modified_file_path = f'{out_dir}/modified-test-images-texts-seed0-{i}.pkl'
        with open(modified_file_path, 'wb') as f:
            pkl.dump(test_data_temp, f)
        # print(f"Modified data saved to {modified_file_path}")

        train_data_temp = train_data.copy()
        flattened_column = adv_train_similarity_matrices[i].flatten()
        train_data_temp = np.column_stack((train_data_temp, flattened_column))
        # save the modified train data to a new file with different names in temp directory
        modified_train_file_path = f'{out_dir}/modified-train-images-texts-seed0-{i}.pkl'
        with open(modified_train_file_path, 'wb') as f:
            pkl.dump(train_data_temp, f)

        num_queries, num_vids = test_data_temp.shape
        queries_per_video =  math.ceil(num_queries / num_vids)
        adv_idx = np.array([
            [num_vids * (ii + jj * queries_per_video) + num_vids - 1 for ii in range(queries_per_video)]
            for jj in range(num_vids-1)
        ])

        adv_idx_path = f'{out_dir}/adv_idx.pkl'
        with open(adv_idx_path, 'wb') as f:
            pkl.dump(adv_idx, f)
        # print(f"adv_idx saved to {adv_idx_path}")

        modified_gt_index = gt_idx.copy()
        temp_shape = modified_gt_index.shape
        modified_gt_index = modified_gt_index.flatten() + np.arange(modified_gt_index.size)
        modified_gt_index = modified_gt_index.reshape(temp_shape)

        modified_gt_index_path=f'{out_dir}/modified_gt_idx-{i}.pkl'
        with open(modified_gt_index_path, 'wb') as f:
            pkl.dump(modified_gt_index, f)
        # print(f"modified_gt_idx saved to {modified_gt_index_path}")

        # Define the command and arguments for each modified data file
        gt_command = [
            'python', 'dynamic_inverted_softmax.py',
            '--sims_train_test_path', modified_train_file_path,
            '--sims_test_path', modified_file_path,
            '--gt_idx_path', modified_gt_index_path,
            '--defense', 'True'

        ]

        adv_command = [
            'python', 'dynamic_inverted_softmax.py',
            '--sims_train_test_path', modified_train_file_path,
            '--sims_test_path', modified_file_path,
            '--gt_idx_path', adv_idx_path,
            '--defense', 'True'
        ]
        
        # Run the command
        gt_metrics[i]=parse_metrics(gt_command)
        print(gt_metrics[i])

        adv_metrics[i]=parse_metrics(adv_command)
        print(adv_metrics[i])
        
        # save the metrics to a file
        with open(f'{out_dir}/gt_metrics.pkl', 'wb') as f:
            pkl.dump(gt_metrics, f)
        with open(f'{out_dir}/adv_metrics.pkl', 'wb') as f:
            pkl.dump(adv_metrics, f)

        # Remove the modified data files
        os.remove(modified_file_path)
        
    return gt_metrics, adv_metrics


In [5]:
out_dir = '../outputs/mscoco/openclip'
test_file_path = f'{out_dir}/test_similarity_matrix.pkl'
train_file_path = f'{out_dir}/train_similarity_matrix.pkl'
adv_test_similarity_path = f'{out_dir}/adv_test_similarity_matrix.pkl'
adv_train_similarity_path = f'{out_dir}/adv_train_similarity_matrix.pkl'

# print the shape of the similarity matrices
test_data = np.load(test_file_path, allow_pickle=True)
train_data = np.load(train_file_path, allow_pickle=True)
adv_test_similarity_matrices = np.load(adv_test_similarity_path, allow_pickle=True)
adv_train_similarity_matrices = np.load(adv_train_similarity_path, allow_pickle=True)
print("Test data shape:", test_data.shape)
print("Train data shape:", train_data.shape)
print("Adv test similarity matrices shape:", adv_test_similarity_matrices.shape)
print("Adv train similarity matrices shape:", adv_train_similarity_matrices.shape)

gt_idx_path='../outputs/mscoco/gt_idx.pkl'

os.makedirs(out_dir, exist_ok=True)

# Define the command and arguments
command = [
    'python', 'dynamic_inverted_softmax.py',
    '--sims_train_test_path', train_file_path,
    '--sims_test_path', test_file_path,
    '--gt_idx_path', gt_idx_path,
    '--defense', 'True'
]

print(command)
# Parse the metrics
metrics = parse_metrics(command)
print("Mertrics for original data:")
print(metrics)

Test data shape: (25000, 5000)
Train data shape: (5000, 5000)
Adv test similarity matrices shape: (25000, 100)
Adv train similarity matrices shape: (100, 5000)
['python', 'dynamic_inverted_softmax.py', '--sims_train_test_path', '../outputs/mscoco/openclip/train_similarity_matrix.pkl', '--sims_test_path', '../outputs/mscoco/openclip/test_similarity_matrix.pkl', '--gt_idx_path', '../outputs/mscoco/gt_idx.pkl', '--defense', 'True']
Mertrics for original data:
{'before': {'R1': 48.5, 'R3': 65.8, 'R5': 72.8, 'R10': 81.1, 'R50': 95.3, 'MedR': 2.0, 'MeanR': 13.8, 'geometric_mean_R1-R5-R10': 65.9, 'MeanA': 0.311}, 'after': {'R1': 50.0, 'R3': 66.8, 'R5': 73.9, 'R10': 82.3, 'R50': 95.6, 'MedR': 2.0, 'MeanR': 13.2, 'geometric_mean_R1-R5-R10': 67.2, 'MeanA': 0.131}}


In [15]:
gt_metrics, adv_metrics = execute_dynamic_inverted_softmax(test_file_path, train_file_path, adv_test_similarity_path, adv_train_similarity_path, gt_idx_path, out_dir)

  0%|          | 0/100 [00:00<?, ?it/s]

{'before': {'R1': 9.5, 'R3': 28.0, 'R5': 36.7, 'R10': 49.1, 'R50': 79.8, 'MedR': 11.0, 'MeanR': 48.7, 'geometric_mean_R1-R5-R10': 25.8, 'MeanA': 0.236}, 'after': {'R1': 26.7, 'R3': 42.3, 'R5': 50.2, 'R10': 62.0, 'R50': 86.9, 'MedR': 5.0, 'MeanR': 32.9, 'geometric_mean_R1-R5-R10': 43.6, 'MeanA': 0.032}}
{'before': {'R1': 63.7, 'R3': 81.1, 'R5': 86.2, 'R10': 92.0, 'R50': 98.8, 'MedR': 1.0, 'MeanR': 4.3, 'geometric_mean_R1-R5-R10': 79.7, 'MeanA': 0.285}, 'after': {'R1': 0.0, 'R3': 6.1, 'R5': 7.7, 'R10': 9.3, 'R50': 11.3, 'MedR': 1421.5, 'MeanR': 1816.5, 'geometric_mean_R1-R5-R10': 0.0, 'MeanA': 0.028}}
{'before': {'R1': 3.2, 'R3': 27.0, 'R5': 36.3, 'R10': 49.0, 'R50': 79.8, 'MedR': 11.0, 'MeanR': 48.9, 'geometric_mean_R1-R5-R10': 17.8, 'MeanA': 0.236}, 'after': {'R1': 26.7, 'R3': 42.7, 'R5': 50.6, 'R10': 62.3, 'R50': 87.0, 'MedR': 5.0, 'MeanR': 32.7, 'geometric_mean_R1-R5-R10': 43.9, 'MeanA': 0.016}}
{'before': {'R1': 90.4, 'R3': 97.0, 'R5': 98.3, 'R10': 99.2, 'R50': 99.9, 'MedR': 1.0, 'M

In [19]:
# save the metrics to a file
with open(f'{out_dir}/gt_metrics.pkl', 'wb') as f:
    pkl.dump(gt_metrics, f)
with open(f'{out_dir}/adv_metrics.pkl', 'wb') as f:
    pkl.dump(adv_metrics, f)

In [20]:
# load the metrics from the file
with open(f'{out_dir}/gt_metrics.pkl', 'rb') as f:
    temp_gt_metrics = pkl.load(f)
with open(f'{out_dir}/adv_metrics.pkl', 'rb') as f:
    temp_adv_metrics = pkl.load(f)

def post_analysis(data):
    # Initialize dictionaries to hold lists of metric values across entries
    metrics_before = {key: [] for key in data[0]['before']}
    metrics_after = {key: [] for key in data[0]['after']}

    # Collect all metric values for each metric type across all entries
    for entry in data.values():
        for metric, value in entry['before'].items():
            metrics_before[metric].append(value)
        for metric, value in entry['after'].items():
            metrics_after[metric].append(value)

    # Function to calculate mean and standard deviation
    def calculate_stats(metrics_dict):
        stats = {}
        for metric, values in metrics_dict.items():
            avg = round(np.mean(values), 1)
            std_dev = round(np.std(values), 1)
            stats[metric] = {'average': avg, 'std_dev': std_dev}
        return stats

    # Calculate stats for 'before' and 'after'
    before_stats = calculate_stats(metrics_before)
    after_stats = calculate_stats(metrics_after)

    return before_stats, after_stats

print("gt stats:", post_analysis(temp_gt_metrics))
print("adv stats:", post_analysis(temp_adv_metrics))


gt stats: ({'R1': {'average': 8.2, 'std_dev': 3.6}, 'R3': {'average': 28.1, 'std_dev': 0.9}, 'R5': {'average': 36.8, 'std_dev': 0.4}, 'R10': {'average': 49.2, 'std_dev': 0.2}, 'R50': {'average': 79.8, 'std_dev': 0.0}, 'MedR': {'average': 11.0, 'std_dev': 0.1}, 'MeanR': {'average': 48.7, 'std_dev': 0.1}, 'geometric_mean_R1-R5-R10': {'average': 23.9, 'std_dev': 4.1}, 'MeanA': {'average': 0.2, 'std_dev': 0.0}}, {'R1': {'average': 26.7, 'std_dev': 0.1}, 'R3': {'average': 42.4, 'std_dev': 0.2}, 'R5': {'average': 50.3, 'std_dev': 0.2}, 'R10': {'average': 62.0, 'std_dev': 0.3}, 'R50': {'average': 86.9, 'std_dev': 0.1}, 'MedR': {'average': 5.1, 'std_dev': 0.2}, 'MeanR': {'average': 32.9, 'std_dev': 0.2}, 'geometric_mean_R1-R5-R10': {'average': 43.7, 'std_dev': 0.2}, 'MeanA': {'average': 0.0, 'std_dev': 0.0}})
adv stats: ({'R1': {'average': 65.5, 'std_dev': 19.1}, 'R3': {'average': 79.2, 'std_dev': 15.2}, 'R5': {'average': 83.9, 'std_dev': 13.2}, 'R10': {'average': 89.3, 'std_dev': 10.3}, 'R50'