In [None]:
import numpy as np 
import time
import copy
import os
import random
import sklearn
from sklearn import datasets
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression

# Load data

we'll use mnist.

In [None]:
mnist = datasets.fetch_openml('mnist_784', version=1, data_home=".", return_X_y=True)
imagedata, labeldata = mnist[0],mnist[1]
print("画像データ数:"+str(imagedata.shape))
print("ラベルデータ数:"+str(labeldata.shape))

In [None]:
mnist_data = mnist[0].values / 255
mnist_label = mnist[1].values

print(mnist_data.shape)
print(mnist_label.shape)

We devide our data three: train data, shadow data, and evaluation data.

In [None]:
random.seed(42)

idxs = list(range(mnist_data.shape[0]))
random.shuffle(idxs)

train_idx = idxs[:500]
shadow_idx = idxs[500:1000]
eval_idx = idxs[1000:1500]

train_data = mnist_data[train_idx]
shadow_data = mnist_data[shadow_idx]
eval_data = mnist_data[eval_idx]

train_label = mnist_label[train_idx]
shadow_label = mnist_label[shadow_idx]
eval_label = mnist_label[eval_idx]

print("train_data shape is ", train_data.shape)
print("shadow_data shape is ", shadow_data.shape)
print("eval_data shape is ", eval_data.shape)

# Target model

We assume that the target use SVM. 

In [None]:
target_model = SVC(probability=True)
target_model.fit(train_data, train_label)

target_pred = target_model.predict(train_data)
target_prob = target_model.predict_proba(train_data)

eval_pred = target_model.predict(eval_data)
eval_prob = target_model.predict_proba(eval_data)

ac_score = metrics.accuracy_score(target_pred, train_label)
print(ac_score)
ac_score = metrics.accuracy_score(eval_pred, eval_label)
print(ac_score)

# Shadow model

We also use SVM as shadow model and create 5 models with k-fold. 

In [None]:
kf = KFold(n_splits=5,
           random_state=42,
           shuffle=True)

in_probs = []
out_probs = []
shadow_in_labels = []
shadow_out_labels = []

for trn_idx, val_idx in kf.split(shadow_data):
    in_data = shadow_data[trn_idx]
    out_data =shadow_data[val_idx]
    in_label = shadow_label[trn_idx]
    out_label = shadow_label[val_idx]
    
    shadow_model = SVC(probability=True)
    shadow_model.fit(in_data, in_label)
    
    in_prob = shadow_model.predict_proba(in_data)
    out_prob = shadow_model.predict_proba(out_data)
    
    in_probs.append(in_prob)
    out_probs.append(out_prob)
    
    shadow_in_labels.append(in_label)
    shadow_out_labels.append(out_label)

create labels to train attack model. 

In [None]:
in_probs = np.concatenate(in_probs)
out_probs = np.concatenate(out_probs)

in_labels = np.ones(in_probs.shape[0])
out_labels = np.zeros(out_probs.shape[0])

attack_data = np.concatenate([in_probs, out_probs])
attack_label = np.concatenate([in_labels, out_labels])

shadow_in_labels = np.concatenate(shadow_in_labels)
shadow_out_labels = np.concatenate(shadow_out_labels)
shadow_original_label = np.concatenate([shadow_in_labels,
                                        shadow_out_labels])

attack_data_idx = list(range(attack_data.shape[0]))
random.shuffle(attack_data_idx)

attack_data = attack_data[attack_data_idx]
attack_label = attack_label[attack_data_idx]
shadow_original_label = shadow_original_label[attack_data_idx]

# Attack model

We make SVM classifier for each label as attack model.

In [None]:
unique_labels = np.unique(shadow_original_label)

all_attack_true_label = np.zeros_like(shadow_original_label).astype(int)
all_attack_preds = np.zeros_like(shadow_original_label).astype(int)

attack_model_dict = {ul:None for ul in unique_labels}

for label in unique_labels:

    label_idx = np.where(shadow_original_label == label)[0]

    attack_label_data = attack_data[label_idx]
    attack_label_label = attack_label[label_idx]

    attack_model = SVC(probability=True)
    attack_model.fit(attack_label_data, attack_label_label)
    attack_pred = attack_model.predict(attack_label_data)

    all_attack_true_label[label_idx] = attack_label_label
    all_attack_preds[label_idx] = attack_pred
    
    attack_model_dict[label] = attack_model

In [None]:
print(metrics.classification_report(all_attack_preds, all_attack_true_label))

# Evaluation

In [None]:
target_label_in = np.ones_like(train_label).astype(int)
target_label_out = np.zeros_like(eval_label).astype(int)

probs = np.concatenate([target_prob, eval_prob])
label_in_out = np.concatenate([target_label_in, target_label_out])
true_label = np.concatenate([train_label, eval_label])

In [None]:
in_out_label_pred = np.zeros_like(label_in_out).astype(int)

for label, label_model in attack_model_dict.items():
    label_idx = np.where(true_label == label)[0]
    
    predict_in_out_label = attack_model_dict[label].predict(probs[label_idx])
    true_in_out_label = label_in_out[label_idx]
    
    in_out_label_pred[label_idx] = predict_in_out_label

In [None]:
print("overall f1 score is ", metrics.f1_score(in_out_label_pred, label_in_out))
print(metrics.classification_report(in_out_label_pred, label_in_out))