### This script is for processing predictions from attacks to further analyze the results.

In [15]:
import os
import numpy as np
from PIL import Image
from tqdm import tqdm
import os
import matplotlib.pyplot as plt
import pickle
import csv
from tqdm import tqdm
import torch


# add ../.. to the path (MIAE)
import sys
sys.path.append('../../')
from miae.utils.dataset_utils import dataset_split
from experiment.models import get_model



ModuleNotFoundError: No module named 'numpy'

In [None]:
"""select which task to perform"""
task = 2

### Task 1: Show training and testing accuracy for all target we have

In [None]:
import csv

data_path = '/data/public/comp_mia_data/repeat_exp_set'
runs = [0, 1, 2, 3]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def accuracy(model, data, device):
    model.eval()
    correct = 0
    total = 0
    model.to(device)
    data_loader = torch.utils.data.DataLoader(data, batch_size=128, shuffle=False)
    with torch.inference_mode():
        for images, labels in tqdm(data_loader):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    top1_accuracy = correct / total
    _, top3_predicted = torch.topk(outputs.data, 3, dim=1)
    top3_correct = torch.sum(top3_predicted == labels.unsqueeze(1)).item()
    top3_accuracy = top3_correct / total
    return top1_accuracy, top3_accuracy


def process_accuracy(arch, dataset, runs):
    num_classes = 10 if dataset == "cifar10" else 100
    input_size = 32
    print (f"Number of classes: {num_classes}")
    if arch == "resnet56":
        target_model = get_model("resnet56", num_classes=num_classes, input_size=input_size)
    elif arch == "vgg16":
        target_model = get_model("vgg16", num_classes=num_classes, input_size=input_size)
    elif arch == "mobilenet":
        target_model = get_model("mobilenet", num_classes=num_classes, input_size=input_size)
    elif arch == "wrn32_4":
        target_model = get_model("wrn32_4", num_classes=num_classes, input_size=input_size)

    train_accuracies = []
    train_accuracies_top3 = []
    test_accuracies = []
    test_accuracies_top3 = []

    for run in runs:  
        target_path = os.path.join(data_path, f"miae_experiment_aug_more_target_data_{run}/target")
        target_model_path = f"{target_path}/target_models/{dataset}/{arch}/target_model_{arch}{dataset}.pkl"
        target_train_data_path = f"{target_path}/{dataset}/target_trainset.pkl"
        target_test_data_path = f"{target_path}/{dataset}/target_testset.pkl"

        target_model.load_state_dict(torch.load(target_model_path))
        with open(target_train_data_path, 'rb') as f:
            target_train_data = pickle.load(f)
        with open(target_test_data_path, 'rb') as f:
            target_test_data = pickle.load(f)

        train_acc_ret = accuracy(target_model, target_train_data, device)
        test_acc_ret = accuracy(target_model, target_test_data, device)

        train_accuracies.append(train_acc_ret[0])
        train_accuracies_top3.append(train_acc_ret[1])
        test_accuracies.append(test_acc_ret[0])
        test_accuracies_top3.append(test_acc_ret[1])

    avg_train_accuracy = np.mean(train_accuracies)
    std_train_accuracy = np.std(train_accuracies)
    avg_train_accuracy_top3 = np.mean(train_accuracies_top3)
    std_train_accuracy_top3 = np.std(train_accuracies_top3)
    avg_test_accuracy = np.mean(test_accuracies)
    std_test_accuracy = np.std(test_accuracies)
    avg_test_accuracy_top3 = np.mean(test_accuracies_top3)
    std_test_accuracy_top3 = np.std(test_accuracies_top3)
    generalization_gap = avg_train_accuracy - avg_test_accuracy
    generalization_gap_std = std_train_accuracy - std_test_accuracy


    # average accuracy, std
    print(f"Average train accuracy: {avg_train_accuracy*100:.4f}% ± {std_train_accuracy*100:.4f}%")
    print(f"Average test accuracy: {avg_test_accuracy*100:.4f}% ± {std_test_accuracy*100:.4f}%")
    print(f"Generalization gap: {generalization_gap*100:.4f}% ± {generalization_gap_std*100:.4f}%")

    return avg_train_accuracy, std_train_accuracy, avg_train_accuracy_top3, std_train_accuracy_top3, avg_test_accuracy, std_test_accuracy, avg_test_accuracy_top3, std_test_accuracy_top3, generalization_gap, generalization_gap_std

arch_list = ["resnet56", "vgg16", "mobilenet", "wrn32_4"]
dataset_list = ["cifar10", "cifar100"]

header = ['Architecture', 'Dataset', 'Avg Train Accuracy', 'Std Train Accuracy', 'Avg Test Accuracy', 'Std Test Accuracy', 'Generalization Gap', 'Generalization Gap Std']
if task == 1:
    with open(save_path, mode='a', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(header)

    for arch in arch_list:
        for dataset in dataset_list:
            print(f"Processing {arch} on {dataset}")
            avg_train_accuracy, std_train_accuracy, avg_train_accuracy_top3, std_train_accuracy_top3, avg_test_accuracy, std_test_accuracy, avg_test_accuracy_top3, std_test_accuracy_top3, generalization_gap, generalization_gap_std = process_accuracy(arch, dataset, runs)
            
            # save to csv
            save_path = f"{data_path}/target_training_statsd.csv"
            with open(save_path, mode='a') as f:
                writer = csv.writer(f)
                writer.writerow([arch, dataset, avg_train_accuracy, std_train_accuracy, avg_test_accuracy, std_test_accuracy, generalization_gap, generalization_gap_std])
    print(f"csv saved to {save_path}/miae_experiment_aug_more_target_data/target_training_statsd.csv")

### loss distribution

In [None]:
target_path = '/data/public/comp_mia_data/miae_experiment_aug_more_target_data'
target_data_path = ''