In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import copy
import time

In [2]:
random_state = 0
experiment_name = "purchase"
cifar=False
spilt=0.5
experiment_numbers = np.arange(1)
max_model_number = 4
path="ThresholdExperiments/{}/{}".format(experiment_name,random_state)

In [3]:
def comparison(threshold,train,test,information_type):
    if information_type =="prediction_var":
        return ((train >= threshold).sum() + (test < threshold).sum()) / (len(train)+len(test)) 
    else:
        return ((train <= threshold).sum() + (test > threshold).sum()) / (len(train)+len(test))

def read_model(path,model_number, information_type,cifar=False):
    if not cifar:
        df = pd.read_csv("{}/{}.csv".format(path,model_number),index_col=0)
        train = df["train_{}".format(information_type)]
        test = df["test_{}".format(information_type)]
    else:
        df = pd.read_csv("{}/{}_train.csv".format(path,model_number),index_col=0)
        train = df["train_{}".format(information_type)]
        train = train.head(10000)
        df = pd.read_csv("{}/{}_test.csv".format(path,model_number),index_col=0)
        test = df["test_{}".format(information_type)]
    return train, test

def evaluate_optimal_attack(path,model_number,information_type,cifar=False):
    train, test = read_model(path,model_number, information_type,cifar)
    optimal_acc = 0
    optimal_threshold = np.nan
    sort_train = np.sort(np.unique(train))
    #We're not looking at every possible thrshold but only every 10th, 
    #that speeds up computation by 10 times and doesn't seem to have
    #effect on the performance
    for threshold_ind in range(0,len(sort_train),10):
        threshold = sort_train[threshold_ind]
        acc = comparison(threshold,train,test,information_type)
        if acc>optimal_acc:
            optimal_acc = acc
            optimal_threshold = threshold
    return optimal_acc, optimal_threshold

def evaluate_k_shadow_attack(path,model_number,information_type,k,cifar=False,max_model_number=4):
    if cifar: return np.nan,np.nan
    thresholds = np.zeros(k)
    for current_k in range(k):
        _, thresholds[current_k] = evaluate_optimal_attack(path,(model_number+1+k)% max_model_number,information_type)
    threshold = np.median(thresholds)
    train, test = read_model(path,model_number, information_type,cifar=cifar)
    acc = comparison(threshold,train,test,information_type)
    return acc, threshold     

In [112]:
information_types = ["loss", "prediction_var", "analysis_var"]
model_numbers = np.arange(max_model_number)
result_accs, result_thresholds = {},{}
attack_types = {'optimal':[],"shadow_1":[],"shadow_3":[]}
for information_type in information_types:
    result_accs[information_type] = copy.deepcopy(attack_types)
    result_thresholds[information_type] = copy.deepcopy(attack_types)

In [113]:
model_number

1

In [210]:
%%time
max_model_number = 1
cifar=True
for experiment_name in ["purchase-smoothgrad"]:
    information_types = ["loss", "prediction_var", "analysis_var", "analysis_1"]
    model_numbers = np.arange(max_model_number)
    result_accs, result_thresholds = {},{}
    attack_types = {'optimal':[],"shadow_1":[],"shadow_3":[],"split":[]}
    for information_type in information_types:
        result_accs[information_type] = copy.deepcopy(attack_types)
        result_thresholds[information_type] = copy.deepcopy(attack_types)
    print(experiment_name, end=" ")
    counter = 0
    start = time.time()
    for experiment_numer in experiment_numbers:
        path="ThresholdExperiments/{}/{}/".format(experiment_name,experiment_numer)
        for model_number in model_numbers:
            #print(counter, end= " ")
            counter += 1
            for information_type in information_types: 
                #print(information_type, end=" ")
                acc, threshold = evaluate_optimal_attack(path,model_number,information_type,cifar=cifar)
                result_accs[information_type]["optimal"].append(acc)
                result_thresholds[information_type]["optimal"].append(threshold)
                acc, threshold = evaluate_k_shadow_attack(path,model_number,information_type,k=1,cifar=cifar,max_model_number=2)
                result_accs[information_type]["shadow_1"].append(acc)
                result_thresholds[information_type]["shadow_1"].append(threshold)
                acc, threshold = evaluate_k_shadow_attack(path,model_number,information_type,k=3,cifar=cifar)
                result_accs[information_type]["shadow_3"].append(acc)
                result_thresholds[information_type]["shadow_3"].append(threshold)
                acc, threshold = evaluate_split_attack(path,model_number,information_type,split=spilt,cifar=cifar)
                result_accs[information_type]["split"].append(acc)
                result_thresholds[information_type]["split"].append(threshold)
    print(time.time() - start) 
    pickle.dump(result_accs,open(experiment_name+"_accs","bw"))
    pickle.dump(result_thresholds,open(experiment_name+"_thrs","bw"))
    for information_type in information_types: 
        print(information_type)
        for attack_type in ['optimal\t',"shadow_1","shadow_3","split"]:
            print("{}\t".format(attack_type),end=" ")
        print("")
        for attack_type in ['optimal',"shadow_1","shadow_3","split"]:
            print(" {:.2f}\t\t".format(np.mean(result_accs[information_type][attack_type])),end="")
    print("")

purchase-smoothgrad 

FileNotFoundError: [Errno 2] File b'ThresholdExperiments/purchase-smoothgrad/0//0_train.csv' does not exist: b'ThresholdExperiments/purchase-smoothgrad/0//0_train.csv'

In [111]:
for information_type in information_types: 
    print(information_type)
    for attack_type in ['optimal\t',"shadow_1","shadow_3","split"]:
        print("{}\t".format(attack_type),end=" ")
    print("")
    for attack_type in ['optimal',"shadow_1","shadow_3","split"]:
        print(" {:.2f}\t\t".format(np.mean(result_accs[information_type][attack_type])),end="")
    print("")

loss
optimal		 shadow_1	 shadow_3	 split	 
 0.62		 0.62		 0.51		 0.60		
prediction_var
optimal		 shadow_1	 shadow_3	 split	 
 0.57		 0.57		 0.51		 0.57		
analysis_var
optimal		 shadow_1	 shadow_3	 split	 
 0.51		 0.51		 0.51		 0.51		
analysis_1
optimal		 shadow_1	 shadow_3	 split	 
 0.51		 0.51		 0.51		 0.51		


In [116]:
import pickle

In [None]:
#pickle.dump(result_accs,open(experiment_name+"_accs","bw"))
#pickle.dump(result_thresholds,open(experiment_name+"_thrs","bw"))

In [None]:
result_accs = pickle.load(open(experiment_name+"_accs","br"))
result_thresholds = pickle.load(open(experiment_name+"_thrs","br"))

In [154]:
cifar=False
spilt=0.5
experiment_numbers = np.arange(10)
max_model_number = 1
path="ThresholdExperiments/{}/{}".format(experiment_name,random_state)
purchase_other_experiments = ["purchase-shiftgrad", "purchase","purchase-smoothgrad", "purchase-guided_backprop","purchase-integrated_gradients", "purchase-lrp"]
texas_other_experiments = ["texas","texas-smoothgrad", "texas-guided_backprop","texas-integrated_gradients", "texas-lrp"]
cifar_10_other_experiments = ["cifar-10","cifar-10-smoothgrad", "cifar-10-guided_backprop","cifar-10-integrated_gradients", "cifar-10-lrp"]

In [155]:
%%time
for experiment_name in cifar_10_other_experiments:
    information_types = ["loss", "prediction_var", "analysis_var"]
    model_numbers = np.arange(max_model_number)
    result_accs, result_thresholds = {},{}
    attack_types = {'optimal':[],"shadow_1":[],"shadow_3":[],"split":[]}
    for information_type in information_types:
        result_accs[information_type] = copy.deepcopy(attack_types)
        result_thresholds[information_type] = copy.deepcopy(attack_types)
    print(experiment_name, end=" ")
    for attack_type in ['optimal\t',"shadow_1","shadow_3","split"]:
            print("{}\t".format(attack_type),end=" ")
    counter = 0
    start = time.time()
    for experiment_numer in experiment_numbers:
        path="ThresholdExperiments/{}/{}/".format(experiment_name,experiment_numer)
        for model_number in model_numbers:
            #print(counter, end= " ")
            counter += 1
            for information_type in ["analysis_var"]: 
                #print(information_type, end=" ")
                acc, threshold = evaluate_optimal_attack(path,model_number,information_type,cifar=cifar)
                result_accs[information_type]["optimal"].append(acc)
                result_thresholds[information_type]["optimal"].append(threshold)
                acc, threshold = evaluate_k_shadow_attack(path,model_number,information_type,k=1,cifar=cifar,max_model_number=2)
                result_accs[information_type]["shadow_1"].append(acc)
                result_thresholds[information_type]["shadow_1"].append(threshold)
                acc, threshold = evaluate_k_shadow_attack(path,model_number,information_type,k=3,cifar=cifar)
                result_accs[information_type]["shadow_3"].append(acc)
                result_thresholds[information_type]["shadow_3"].append(threshold)
                acc, threshold = evaluate_split_attack(path,model_number,information_type,split=spilt,cifar=cifar)
                result_accs[information_type]["split"].append(acc)
                result_thresholds[information_type]["split"].append(threshold)
    print(time.time() - start) 
    pickle.dump(result_accs,open(experiment_name+"_accs","bw"))
    pickle.dump(result_thresholds,open(experiment_name+"_thrs","bw"))
    for information_type in ["analysis_var"]: 
        print(information_type, end="\t")
        if information_type == "loss":
            print("\t",end="")
        for attack_type in ['optimal',"shadow_1","shadow_3","split"]:
            print("{:.2f}\t\t".format(np.mean(result_accs[information_type][attack_type])),end=" ")
        print("")

cifar-10 optimal		 shadow_1	 shadow_3	 split	 34.55970621109009
analysis_var	0.52		 0.52		 0.52		 0.52		 
cifar-10-smoothgrad optimal		 shadow_1	 shadow_3	 split	 34.85753011703491
analysis_var	0.53		 0.53		 0.53		 0.52		 
cifar-10-guided_backprop optimal		 shadow_1	 shadow_3	 split	 34.84765005111694
analysis_var	0.50		 0.50		 0.50		 0.46		 
cifar-10-integrated_gradients optimal		 shadow_1	 shadow_3	 split	 34.87642288208008
analysis_var	0.52		 0.52		 0.52		 0.52		 
cifar-10-lrp optimal		 shadow_1	 shadow_3	 split	 34.83699679374695
analysis_var	0.52		 0.52		 0.52		 0.52		 
CPU times: user 2min 53s, sys: 763 ms, total: 2min 53s
Wall time: 2min 53s


## Evaluation for increasing number of epochs

In [174]:
cifar=False
experiment_numbers = np.arange(5)
max_model_number = 1
experiment_name = "texas-epochs"

In [175]:
%%time
for epochs in range(5,50,5):
    information_types = ["analysis_var"]
    model_numbers = np.arange(max_model_number)
    result_accs, result_thresholds = {},{}
    attack_types = {'optimal':[]}
    for information_type in information_types:
        result_accs[information_type] = copy.deepcopy(attack_types)
        result_thresholds[information_type] = copy.deepcopy(attack_types)
    print(epochs, end=" ")
    #for attack_type in ['optimal\t']:
            #print("{}\t".format(attack_type),end=" ")
    counter = 0
    start = time.time()
    for experiment_number in experiment_numbers:
        path="ThresholdExperiments/{}/{}_{}/".format(experiment_name,experiment_number,epochs)
        for model_number in model_numbers:
            #print(counter, end= " ")
            counter += 1
            for information_type in ["analysis_var"]: 
                #print(information_type, end=" ")
                acc, threshold = evaluate_optimal_attack(path,model_number,information_type,cifar=cifar)
                result_accs[information_type]["optimal"].append(acc)
                result_thresholds[information_type]["optimal"].append(threshold)
    #print(time.time() - start) 
    pickle.dump(result_accs,open(experiment_name+"_accs","bw"))
    pickle.dump(result_thresholds,open(experiment_name+"_thrs","bw"))
    for information_type in ["analysis_var"]: 
        #print(information_type, end="\t")
        if information_type == "loss":
            print("\t",end="")
        for attack_type in ['optimal']:
            print("{:.2f}\t\t".format(np.mean(result_accs[information_type][attack_type])),end=" ")
        print("")

5 0.55		 
10 0.59		 
15 0.61		 
20 0.64		 
25 0.64		 
30 0.64		 
35 0.66		 
40 0.66		 
45 0.66		 
CPU times: user 18.7 s, sys: 88 ms, total: 18.7 s
Wall time: 18.7 s
