In [1]:
from src.splitnn import Client, Server, SplitNN
from src.defense.noised_grad import max_norm
from src.measure import label_leak_auc
from src.utils.utils import DataSet

In [3]:
import attack_splitnn
from attack_splitnn import attack
from attack_splitnn import defense
from attack_splitnn import measure
from attack_splitnn import splitnn
from attack_splitnn import utils

In [2]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score

In [3]:
config = {
    "batch_size":128
}

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [5]:
raw_df = pd.read_csv('https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv')

In [6]:
raw_df_neg = raw_df[raw_df["Class"] == 0]
raw_df_pos = raw_df[raw_df["Class"] == 1]

down_df_neg = raw_df_neg#.sample(40000)
down_df = pd.concat([down_df_neg, raw_df_pos])

In [7]:
neg, pos = np.bincount(down_df['Class'])
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

Examples:
    Total: 284807
    Positive: 492 (0.17% of total)



In [8]:
cleaned_df = down_df.copy()

# You don't want the `Time` column.
cleaned_df.pop('Time')

# The `Amount` column covers a huge range. Convert to log-space.
eps = 0.001 # 0 => 0.1¢
cleaned_df['Log Ammount'] = np.log(cleaned_df.pop('Amount')+eps)

In [9]:
# Use a utility from sklearn to split and shuffle our dataset.
train_df, test_df = train_test_split(cleaned_df, test_size=0.2)
train_df, val_df = train_test_split(train_df, test_size=0.2)

# Form np arrays of labels and features.
train_labels = np.array(train_df.pop('Class'))
bool_train_labels = train_labels != 0
val_labels = np.array(val_df.pop('Class'))
test_labels = np.array(test_df.pop('Class'))

train_features = np.array(train_df)
val_features = np.array(val_df)
test_features = np.array(test_df)

In [10]:
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)

val_features = scaler.transform(val_features)
test_features = scaler.transform(test_features)

train_features = np.clip(train_features, -5, 5)
val_features = np.clip(val_features, -5, 5)
test_features = np.clip(test_features, -5, 5)


print('Training labels shape:', train_labels.shape)
print('Validation labels shape:', val_labels.shape)
print('Test labels shape:', test_labels.shape)

print('Training features shape:', train_features.shape)
print('Validation features shape:', val_features.shape)
print('Test features shape:', test_features.shape)

Training labels shape: (182276,)
Validation labels shape: (45569,)
Test labels shape: (56962,)
Training features shape: (182276, 29)
Validation features shape: (45569, 29)
Test features shape: (56962, 29)


In [11]:
train_dataset = DataSet(train_features,
                        train_labels.astype(np.float64).reshape(-1, 1))
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=config["batch_size"],
                                           shuffle=True)

test_dataset = DataSet(test_features,
                       test_labels.astype(np.float64).reshape(-1, 1))
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=config["batch_size"],
                                          shuffle=True)

In [12]:
hidden_dim = 16

class FirstNet(nn.Module):
    def __init__(self):
        super(FirstNet, self).__init__()        
        self.L1 = nn.Linear(train_features.shape[-1],
                            hidden_dim)

    def forward(self, x):
        x = self.L1(x)
        x = nn.functional.relu(x)
        return x
    
class SecondNet(nn.Module):
    def __init__(self):
        super(SecondNet, self).__init__()        
        self.L2 = nn.Linear(hidden_dim,
                            1)

    def forward(self, x):
        x = self.L2(x)
        x = torch.sigmoid(x)
        return x
    
def torch_auc(label, pred):
    return roc_auc_score(label.detach().numpy(),
                         pred.detach().numpy())

In [13]:
model_1 = FirstNet()
model_1 = model_1.to(device)

model_2 = SecondNet()
model_2 = model_2.to(device)

model_1.double()
model_2.double()

opt_1 = optim.Adam(model_1.parameters(), lr=1e-3)
opt_2 = optim.Adam(model_2.parameters(), lr=1e-3)

criterion = nn.BCELoss()

client = Client(model_1, opt_1)
server = Server(model_2, opt_2, criterion)

sn = SplitNN(client, server, device=device)
sn.fit(train_loader, 3, metric=torch_auc)

epoch 1, loss 0.00067803, metric 0.8212399688135726
epoch 2, loss 3.1159e-05, metric 0.9683496092080551
epoch 3, loss 2.2184e-05, metric 0.982390608629873


In [14]:
label_leak_auc(sn, train_loader)

0.999728385632264

In [15]:
class Server_with_max_norm(Server):
    def __init__(self, server_model,
                        server_optimizer,
                        criterion):
                super().__init__(server_model,
                                server_optimizer,
                                criterion)

    def _fit_server(self, intermidiate_to_server, labels):
        outputs = self.server_model(intermidiate_to_server)
        loss = self.criterion(outputs, labels)
        loss.backward()

        grad_to_client = intermidiate_to_server.grad.clone()
        grad_to_client = max_norm(grad_to_client)
        return outputs, loss, grad_to_client

In [16]:
model_1 = FirstNet()
model_1 = model_1.to(device)

model_2 = SecondNet()
model_2 = model_2.to(device)

model_1.double()
model_2.double()

opt_1 = optim.Adam(model_1.parameters(), lr=1e-3)
opt_2 = optim.Adam(model_2.parameters(), lr=1e-3)

criterion = nn.BCELoss()

client = Client(model_1, opt_1)
server = Server_with_max_norm(model_2, opt_2, criterion)

sn = SplitNN(client, server, device=device)
sn.fit(train_loader, 10, metric=torch_auc)

epoch 1, loss 0.001561, metric 0.35758412146527374
epoch 2, loss 0.00048577, metric 0.52705867511411
epoch 3, loss 0.00031461, metric 0.5283707178953191
epoch 4, loss 0.00022605, metric 0.5468446732904245
epoch 5, loss 0.00018179, metric 0.5861497869670282
epoch 6, loss 0.00015149, metric 0.6391923753977673
epoch 7, loss 0.00012853, metric 0.7088472370278383
epoch 8, loss 0.00010685, metric 0.7812909825556263
epoch 9, loss 9.0646e-05, metric 0.8148555429784137
epoch 10, loss 7.9543e-05, metric 0.8392497128168402


In [17]:
label_leak_auc(sn, train_loader)

0.024231222205127072