In [1]:
import subprocess
import numpy as np
import os
import pickle as pkl
import re
import ast
import json
import math
from tqdm.notebook import tqdm

def parse_metrics(command):
    # Run the command using subprocess in Jupyter
    try:
        result = subprocess.run(command, check=True, capture_output=True, text=True)
        # print("Output:\n", result.stdout)
    except subprocess.CalledProcessError as e:
        print("Error occurred:\n", e.stderr)

    # Split the output into lines
    output_lines = result.stdout.strip().split('\n')
    # print(output_lines)

    metrics_before, metrics_after = None, None
    # Iterate over the lines to find metrics
    for i, line in enumerate(output_lines):
        if 'Metrics before applying QB-Norm' in line:
            # The next line contains the metrics dictionary
            if i + 1 < len(output_lines):
                metrics_line = output_lines[i + 1].strip()
                try:
                    metrics_before = ast.literal_eval(metrics_line)
                except (SyntaxError, ValueError):
                    print(f"Could not parse metrics before at line {i + 1}: {metrics_line}")
        elif 'Metrics after QB-Norm' in line:
            # The next line contains the metrics dictionary
            if i + 1 < len(output_lines):
                metrics_line = output_lines[i + 1].strip()
                try:
                    metrics_after = ast.literal_eval(metrics_line)
                except (SyntaxError, ValueError):
                    print(f"Could not parse metrics after at line {i + 1}: {metrics_line}")

    # Save the metrics into a variable
    metrics = {
        'before': metrics_before,
        'after': metrics_after
    }
    return metrics

def execute_dynamic_inverted_softmax(test_file_path, adv_test_similarity_path, gt_idx_path, out_dir):
    temp_out_dir=out_dir+"/output_cache"
    # Load data from file
    data = np.load(test_file_path, allow_pickle=True)  # Load the data as a NumPy array
    # Load adversarial similarity matrices
    adv_test_similarity_matrices = np.load(adv_test_similarity_path, allow_pickle=True).T
    gt_idx=np.load(gt_idx_path, allow_pickle=True)
    # Create temp directory if it doesn't exist
    os.makedirs(temp_out_dir, exist_ok=True)
    
    gt_metrics = {}
    adv_metrics = {}
    # Iterate over adversarial matrices, modify data, save with different names, and execute command
    for i, adv in enumerate(tqdm(adv_test_similarity_matrices[:100])):
        # print("Adv shape:", adv.shape)
        test_data_temp = data.copy()
        # print("Test data shape:", test_data_temp.shape)
        # print("Adv shape:", adv.shape)
        test_data_temp = np.column_stack((test_data_temp, adv.flatten()))        
        # Save the modified data to a new file with different names in temp directory
        modified_file_path = f'{temp_out_dir}/modified-test-images-texts-seed0-{i}.pkl'
        with open(modified_file_path, 'wb') as f:
            pkl.dump(test_data_temp, f)
        # print(f"Modified data saved to {modified_file_path}")


        num_queries, num_vids = test_data_temp.shape
        adv_idx = np.array([
            num_vids * (i+1) - 1 for i in range(num_queries)
        ])

        adv_idx_path = f'{temp_out_dir}/adv_idx.pkl'
        with open(adv_idx_path, 'wb') as f:
            pkl.dump(adv_idx, f)
        # print(f"adv_idx saved to {adv_idx_path}")

        modified_gt_index = gt_idx.copy()
        temp_shape = modified_gt_index.shape
        modified_gt_index = modified_gt_index.flatten() + np.arange(modified_gt_index.size)
        modified_gt_index = modified_gt_index.reshape(temp_shape)

        modified_gt_index_path=f'{temp_out_dir}/modified_gt_idx-{i}.pkl'
        with open(modified_gt_index_path, 'wb') as f:
            pkl.dump(modified_gt_index, f)
        # print(f"modified_gt_idx saved to {modified_gt_index_path}")

        # Define the command and arguments for each modified data file
        gt_command = [
            'python', 'dynamic_inverted_softmax.py',
            '--sims_test_path', modified_file_path,
            '--gt_idx_path', modified_gt_index_path
        ]

        adv_command = [
            'python', 'dynamic_inverted_softmax.py',
            '--sims_test_path', modified_file_path,
            '--gt_idx_path', adv_idx_path
        ]
        
        # Run the command
        gt_metrics[i]=parse_metrics(gt_command)
        # print(gt_metrics[i])

        adv_metrics[i]=parse_metrics(adv_command)
        # print(adv_metrics[i])
        
        # save the metrics to a file
        with open(f'{out_dir}/gt_metrics.pkl', 'wb') as f:
            pkl.dump(gt_metrics, f)
        with open(f'{out_dir}/adv_metrics.pkl', 'wb') as f:
            pkl.dump(adv_metrics, f)

        # Remove the modified data files
        # os.remove(modified_file_path)
        
    return gt_metrics, adv_metrics


In [2]:
out_dir = '../outputs/audiocap/audioclip'
test_file_path = f'{out_dir}/test_similarity_matrix_fixed.pkl'
adv_test_similarity_path = f'{out_dir}/adv_test_similarity_matrix.pkl'
gt_idx_path='../outputs/audiocap/gt_idx.pkl'

os.makedirs(out_dir, exist_ok=True)

# print the shape of the similarity matrices
test_data = np.load(test_file_path, allow_pickle=True)
adv_test_similarity_matrices = np.load(adv_test_similarity_path, allow_pickle=True)
gt_idx=np.load(gt_idx_path, allow_pickle=True)
print("Test data shape:", test_data.shape)
print("Adv test similarity matrices shape:", adv_test_similarity_matrices.shape)
print("gt_idx shape:", gt_idx.shape)

# Define the command and arguments
command = [
    'python', 'dynamic_inverted_softmax.py',
    '--sims_test_path', test_file_path,
    '--gt_idx_path', gt_idx_path
]

# Parse the metrics
metrics = parse_metrics(command)
print("Mertrics for original data:")
print(metrics)

gt_metrics, adv_metrics = execute_dynamic_inverted_softmax(test_file_path, adv_test_similarity_path, gt_idx_path, out_dir)

Test data shape: (743, 743)
Adv test similarity matrices shape: (743, 100)
gt_idx shape: (743,)
Mertrics for original data:
{'before': {'R1': 6.2, 'R3': 14.9, 'R5': 20.7, 'R10': 31.8, 'R50': 61.5, 'MedR': 27.0, 'MeanR': 95.4, 'geometric_mean_R1-R5-R10': 16.0, 'MeanA': 0.085}, 'after': None}


  0%|          | 0/100 [00:00<?, ?it/s]

In [3]:

# load the metrics from the file
with open(f'{out_dir}/gt_metrics.pkl', 'rb') as f:
    temp_gt_metrics = pkl.load(f)
with open(f'{out_dir}/adv_metrics.pkl', 'rb') as f:
    temp_adv_metrics = pkl.load(f)

def post_analysis(data):
    # Initialize dictionaries to hold lists of metric values across entries
    metrics_before = {key: [] for key in data[0]['before']}

    # Collect all metric values for each metric type across all entries
    for entry in data.values():
        for metric, value in entry['before'].items():
            metrics_before[metric].append(value)

    # Function to calculate mean and standard deviation
    def calculate_stats(metrics_dict):
        stats = {}
        for metric, values in metrics_dict.items():
            avg = round(np.mean(values), 1)
            std_dev = round(np.std(values), 1)
            stats[metric] = {'average': avg, 'std_dev': std_dev}
        return stats

    # Calculate stats for 'before' and 'after'
    before_stats = calculate_stats(metrics_before)

    return before_stats

print("gt stats:", post_analysis(temp_gt_metrics))
print("adv stats:", post_analysis(temp_adv_metrics))


gt stats: {'R1': {'average': 2.4, 'std_dev': 0.8}, 'R3': {'average': 11.7, 'std_dev': 0.3}, 'R5': {'average': 18.8, 'std_dev': 0.3}, 'R10': {'average': 30.6, 'std_dev': 0.2}, 'R50': {'average': 61.2, 'std_dev': 0.0}, 'MedR': {'average': 28.0, 'std_dev': 0.0}, 'MeanR': {'average': 96.3, 'std_dev': 0.0}, 'geometric_mean_R1-R5-R10': {'average': 11.0, 'std_dev': 1.3}, 'MeanA': {'average': 0.1, 'std_dev': 0.0}}
adv stats: {'R1': {'average': 59.0, 'std_dev': 14.3}, 'R3': {'average': 72.3, 'std_dev': 12.1}, 'R5': {'average': 78.4, 'std_dev': 10.5}, 'R10': {'average': 86.5, 'std_dev': 8.0}, 'R50': {'average': 96.9, 'std_dev': 2.4}, 'MedR': {'average': 1.5, 'std_dev': 0.9}, 'MeanR': {'average': 8.0, 'std_dev': 4.3}, 'geometric_mean_R1-R5-R10': {'average': 73.5, 'std_dev': 11.5}, 'MeanA': {'average': 0.1, 'std_dev': 0.0}}


In [8]:
import pickle
import numpy as np

# Load the matrix
with open(test_file_path, 'rb') as f:
    matrix = pickle.load(f)

# Transpose the matrix
transposed_matrix = np.transpose(matrix)

# Save the transposed matrix back to the original path
with open(test_file_path, 'wb') as f:
    pickle.dump(transposed_matrix, f)

In [None]:
import pickle
import numpy as np

# Load the matrix
with open(test_file_path, 'rb') as f:
    matrix = pickle.load(f)

# Transpose the matrix
transposed_matrix = np.transpose(matrix)

# Save the transposed matrix back to the original path
with open(test_file_path, 'wb') as f:
    pickle.dump(transposed_matrix, f)