# Label Leakage 

Label Leakage (Oscar et al.) is one of the weaknesses in SplitNN, and it means that the intermediate gradient which the server sends to the client may be able to allow the client to extract the private ground-truth labels that the server has. We currently support measuring leak_auc that measures how well the l2 norm of the communicated gradient can predict y by the AUC of the ROC curve. Also, we allow you to avoid this leakage with the defense method called *max norm*.

reference   
https://arxiv.org/abs/2102.08504

In [1]:
from attacksplitnn.splitnn import Client, Server, SplitNN
from attacksplitnn.defense import max_norm
from attacksplitnn.attack import NormAttack
from attacksplitnn.utils import DataSet

In [2]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score

## Set parameters

In [3]:
config = {
    "batch_size":128
}

In [4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


## Load and preprocess data

we use the ["Credit Card Fraud Detection"](https://www.kaggle.com/mlg-ulb/creditcardfraud) dataset.

reference  
https://www.tensorflow.org/tutorials/structured_data/imbalanced_data

In [5]:
raw_df = pd.read_csv('https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv')

In [6]:
raw_df_neg = raw_df[raw_df["Class"] == 0]
raw_df_pos = raw_df[raw_df["Class"] == 1]

down_df_neg = raw_df_neg#.sample(40000)
down_df = pd.concat([down_df_neg, raw_df_pos])

In [7]:
neg, pos = np.bincount(down_df['Class'])
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

Examples:
    Total: 284807
    Positive: 492 (0.17% of total)



In [8]:
cleaned_df = down_df.copy()

# You don't want the `Time` column.
cleaned_df.pop('Time')

# The `Amount` column covers a huge range. Convert to log-space.
eps = 0.001 # 0 => 0.1¢
cleaned_df['Log Ammount'] = np.log(cleaned_df.pop('Amount')+eps)

In [9]:
# Use a utility from sklearn to split and shuffle our dataset.
train_df, test_df = train_test_split(cleaned_df, test_size=0.2)
train_df, val_df = train_test_split(train_df, test_size=0.2)

# Form np arrays of labels and features.
train_labels = np.array(train_df.pop('Class'))
bool_train_labels = train_labels != 0
val_labels = np.array(val_df.pop('Class'))
test_labels = np.array(test_df.pop('Class'))

train_features = np.array(train_df)
val_features = np.array(val_df)
test_features = np.array(test_df)

In [10]:
scaler = StandardScaler()
train_features = scaler.fit_transform(train_features)

val_features = scaler.transform(val_features)
test_features = scaler.transform(test_features)

train_features = np.clip(train_features, -5, 5)
val_features = np.clip(val_features, -5, 5)
test_features = np.clip(test_features, -5, 5)


print('Training labels shape:', train_labels.shape)
print('Validation labels shape:', val_labels.shape)
print('Test labels shape:', test_labels.shape)

print('Training features shape:', train_features.shape)
print('Validation features shape:', val_features.shape)
print('Test features shape:', test_features.shape)

Training labels shape: (182276,)
Validation labels shape: (45569,)
Test labels shape: (56962,)
Training features shape: (182276, 29)
Validation features shape: (45569, 29)
Test features shape: (56962, 29)


In [11]:
train_dataset = DataSet(train_features,
                        train_labels.astype(np.float64).reshape(-1, 1))
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=config["batch_size"],
                                           shuffle=True)

test_dataset = DataSet(test_features,
                       test_labels.astype(np.float64).reshape(-1, 1))
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=config["batch_size"],
                                          shuffle=True)

## Train SplitNN

In [12]:
hidden_dim = 16

class FirstNet(nn.Module):
    def __init__(self):
        super(FirstNet, self).__init__()        
        self.L1 = nn.Linear(train_features.shape[-1],
                            hidden_dim)

    def forward(self, x):
        x = self.L1(x)
        x = nn.functional.relu(x)
        return x
    
class SecondNet(nn.Module):
    def __init__(self):
        super(SecondNet, self).__init__()        
        self.L2 = nn.Linear(hidden_dim,
                            1)

    def forward(self, x):
        x = self.L2(x)
        x = torch.sigmoid(x)
        return x
    
def torch_auc(label, pred):
    return roc_auc_score(label.detach().numpy(),
                         pred.detach().numpy())

### Vanila SplitNN

You can see that the leak auc is high, and it indicates that the client can correctly predict the label of each data by analyzing the intermediate gradient. 

In [13]:
model_1 = FirstNet()
model_1 = model_1.to(device)

model_2 = SecondNet()
model_2 = model_2.to(device)

model_1.double()
model_2.double()

opt_1 = optim.Adam(model_1.parameters(), lr=1e-3)
opt_2 = optim.Adam(model_2.parameters(), lr=1e-3)

criterion = nn.BCELoss()

client = Client(model_1, opt_1)
server = Server(model_2, opt_2, criterion)

sn = SplitNN(client, server, device=device)
sn.fit(train_loader, 3, metric=torch_auc)

epoch 1, loss 0.00055886, metric 0.8634519236388023
epoch 2, loss 3.1889e-05, metric 0.9515884176797793
epoch 3, loss 2.4471e-05, metric 0.9681720109580352


In [14]:
nall = NormAttack(sn)
nall.attack(train_loader)

0.998089961349135

### SplitNN with max_norm

You can mitigate this problem with max_norm. To use this defense method, you should create a custom Server class.

In [15]:
class Server_with_max_norm(Server):
    def __init__(self, server_model,
                        server_optimizer,
                        criterion):
                super().__init__(server_model,
                                server_optimizer,
                                criterion)

    def _fit_server(self, intermidiate_to_server, labels):
        outputs = self.server_model(intermidiate_to_server)
        loss = self.criterion(outputs, labels)
        loss.backward()

        grad_to_client = intermidiate_to_server.grad.clone()
        # use max_norm to avoid label_leakage
        grad_to_client = max_norm(grad_to_client)
        return outputs, loss, grad_to_client

In [17]:
model_1 = FirstNet()
model_1 = model_1.to(device)

model_2 = SecondNet()
model_2 = model_2.to(device)

model_1.double()
model_2.double()

opt_1 = optim.Adam(model_1.parameters(), lr=1e-3)
opt_2 = optim.Adam(model_2.parameters(), lr=1e-3)

criterion = nn.BCELoss()

client = Client(model_1, opt_1)
server = Server_with_max_norm(model_2, opt_2, criterion)

sn = SplitNN(client, server, device=device)
sn.fit(train_loader, 10, metric=torch_auc)

epoch 1, loss 0.0014747, metric 0.29030519644044733
epoch 2, loss 0.00040535, metric 0.07854910037259545
epoch 3, loss 0.00028008, metric 0.08278177357104496
epoch 4, loss 0.00021472, metric 0.11759513497193708
epoch 5, loss 0.00015548, metric 0.3369709854161855
epoch 6, loss 0.00010439, metric 0.7423982458239535
epoch 7, loss 7.2232e-05, metric 0.8374789562875135
epoch 8, loss 5.7063e-05, metric 0.8526745688995495
epoch 9, loss 4.9987e-05, metric 0.864257167384124
epoch 10, loss 4.6148e-05, metric 0.8720784196554535


In [18]:
nall = NormAttack(sn)
print("leak auc: ", nall.attack(train_loader))

leak auc:  0.08853783763854066
