In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy
import time
import pickle

In [2]:
def comparison(threshold,train,test,information_type):
    if information_type =="prediction_var":
        return ((train >= threshold).sum() + (test < threshold).sum()) / (len(train)+len(test)) 
    else:
        return ((train <= threshold).sum() + (test > threshold).sum()) / (len(train)+len(test))

def read_model(path,model_number, information_type,cifar=False):
    if not cifar:
        df = pd.read_csv("{}/{}.csv".format(path,model_number),index_col=0)
        train = df["train_{}".format(information_type)]
        test = df["test_{}".format(information_type)]
    else:
        df = pd.read_csv("{}/{}_train.csv".format(path,model_number),index_col=0)
        train = df["train_{}".format(information_type)]
        train = train.head(10000)
        df = pd.read_csv("{}/{}_test.csv".format(path,model_number),index_col=0)
        test = df["test_{}".format(information_type)]
    return train, test

def evaluate_optimal_attack(path,model_number,information_type,cifar=False):
    train, test = read_model(path,model_number, information_type,cifar)
    optimal_acc = 0
    optimal_threshold = np.nan
    sort_train = np.sort(np.unique(train))
    #We're not looking at every possible thrshold but only every 10th, 
    #that speeds up computation by 10 times and doesn't seem to have
    #effect on the performance
    for threshold_ind in range(0,len(sort_train),10):
        threshold = sort_train[threshold_ind]
        acc = comparison(threshold,train,test,information_type)
        if acc>optimal_acc:
            optimal_acc = acc
            optimal_threshold = threshold
    return optimal_acc, optimal_threshold

def evaluate_k_shadow_attack(path,model_number,information_type,k,cifar=False,max_model_number=4):
    if cifar: return np.nan,np.nan
    thresholds = np.zeros(k)
    for current_k in range(k):
        _, thresholds[current_k] = evaluate_optimal_attack(path,(model_number+1+k)% max_model_number,information_type)
    threshold = np.median(thresholds)
    train, test = read_model(path,model_number, information_type,cifar=cifar)
    acc = comparison(threshold,train,test,information_type)
    return acc, threshold     

def evaluate_attack(path,model_number,information_type,attack_type,cifar):
    if attack_type == "optimal":
        acc, threshold = evaluate_optimal_attack(path,model_number,information_type,cifar=cifar)
    elif attack_type == "shadow_1":
        acc, threshold = evaluate_k_shadow_attack(path,model_number,information_type,k=1,cifar=cifar,max_model_number=2)
    elif attack_type == "shadow_3":
        acc, threshold = evaluate_k_shadow_attack(path,model_number,information_type,k=3,cifar=cifar)
    return acc, threshold

In [8]:
def create_attack_dic(attack_types):
    result = {}
    for attack in attack_types:
        result[attack] = []
    return result

In [13]:
cifars={"purchase":False, "texas":False, "cifar-10":False, "cifar-100":True, "adult":False, "hospital":False}
experiment_numbers = {"purchase":np.arange(1),
                      "texas":np.arange(1),
                      "cifar-10":np.arange(1),
                      "adult":np.arange(1),
                      "hospital":np.arange(1),
                     }
#experiment_numbers = {"purchase":np.arange(25), "texas":np.arange(25), "cifar-10":np.arange(50), "adult":np.arange(25)}
max_model_number = {"purchase":4, "texas":4, "cifar-10":2, "cifar-100":1, "adult":4, "hospital":4}
experiment_names = ["hospital"] #"purchase", "texas", "cifar-10", "cifar-100", "adult", "hospital"]
information_types = ["loss", "prediction_var", "analysis_var"]
attack_types = {"purchase":['optimal',"shadow_1","shadow_3"],
                "texas":['optimal',"shadow_1","shadow_3"],
                "cifar-10":['optimal',"shadow_1"],
                "cifar-100":['optimal'],
                "adult":['optimal',"shadow_1","shadow_3"],
                "adult":['optimal']
               }

In [14]:
%%time
for ex in experiment_names:
    model_numbers = np.arange(max_model_number[ex])
    result_accs, result_thresholds = {},{}
    for information_type in information_types:
        result_accs[information_type] = create_attack_dic(attack_types[ex])
        result_thresholds[information_type] = create_attack_dic(attack_types[ex])
    print(ex, end=" ")
    counter = 0
    start = time.time()
    for experiment_number in experiment_numbers[ex]:
        path="ThresholdExperiments/{}/{}/".format(ex, experiment_number)
        for model_number in model_numbers:
            #print(counter, end= " ")
            counter += 1
            for information_type in information_types:
                for attack_type in attack_types[ex]:
                    acc, threshold = evaluate_attack(path,model_number,information_type,attack_type,cifar=cifars[ex])
                    result_accs[information_type][attack_type].append(acc)
                    result_thresholds[information_type][attack_type].append(threshold)
                #print(information_type, end=" ")
    print(time.time() - start) 
    pickle.dump(result_accs,open(ex+"_accs","bw"))
    pickle.dump(result_thresholds,open(ex+"_thrs","bw"))
    for information_type in information_types: 
        print(information_type)
        for attack_type in attack_types[ex]:
            print("{}\t".format(attack_type),end=" ")
        print("")
        for attack_type in attack_types[ex]:
            print("{:.2f}\t".format(np.mean(result_accs[information_type][attack_type])),end="")
        print("")
    print("")

adult 20.523303270339966
loss
optimal	 shadow_1	 shadow_3	 
0.51	0.50	0.51	
prediction_var
optimal	 shadow_1	 shadow_3	 
0.51	0.50	0.51	
analysis_var
optimal	 shadow_1	 shadow_3	 
0.51	0.50	0.51	

CPU times: user 20.4 s, sys: 140 ms, total: 20.5 s
Wall time: 20.5 s
