# Membership inference attack

We use the MNIST data to implement this attack

In [2]:
import numpy as np

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, TensorDataset, ConcatDataset, Dataset

from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

import matplotlib as mpl
import matplotlib.pyplot as plt

## Load and process MNIST data for image classification

For the membership inference attack introduced in class, the first step is to synthesize a dataset for a shadow model (surrogate model). Here, we sample this dataset directly from the loaded data and focus on the second step that constructs a set of shadow models to obtain member and non-member confidence scores for attack model training.

`dataset_for_shadow` is the data used for shadow model training

`dataset_for_target` is the data used for target model training

`dataset_for_shadow` and `dataset_for_target` should be disjoint 

In [3]:
#Normalize images and convert the dataset into Tensor used by PyTorch
transform=transforms.Compose([
         transforms.ToTensor(),
        ])

#Download the MNIST data directly from PyTorch
dataset_for_shadow = datasets.MNIST('./data', train=True, download=True, transform=transform)
dataset_for_target = datasets.MNIST('./data', train=False, download=True, transform=transform)

class_names = ['0', '1', '2', '3', '4',
               '5', '6', '7', '8', '9']

## Prepare Datasets for target model and shadow models (surrogate models)

In [7]:
np.random.seed(42)
torch.manual_seed(42)

#The number of shadow models
num_shadow_models = 10

#The size of samples for each shadow model
shadow_size = 2500

#List of training datasets: each training set is used to train one shadow model
list_shadow_loader = []
#List of Unseen datasets: one for each shadow model
list_unseen_loader = []

for _ in range(num_shadow_models):
    #training data and unseen data should be disjoint
    #Obtain the indices for training data and unseen data
    total_indices = np.arange(len(dataset_for_shadow))
    
    train_indices = np.random.choice(total_indices, shadow_size, replace=False)
    remaining_indices = np.setdiff1d(total_indices, train_indices)
    unseen_indices = np.random.choice(remaining_indices, shadow_size, replace=False)
    
    subset_train = Subset(dataset_for_shadow, train_indices)
    subset_unseen = Subset(dataset_for_shadow, unseen_indices)
    
    subset_train_loader = DataLoader(subset_train, batch_size=32, shuffle=True)
    subset_unseen_loader = DataLoader(subset_unseen, batch_size=32, shuffle=False)
    
    list_shadow_loader.append(subset_train_loader)
    list_unseen_loader.append(subset_unseen_loader)

#Data preparation for target model
target_train_loader = DataLoader(dataset_for_target, batch_size=32, shuffle=True)

## Train a target model

We need to a target model, such that the attack can perform membership inference attack to determine if a record is used to train this target model or not

#### Set up model definition, hyperparameters, and  the train and test functions

In [5]:
np.random.seed(42)
torch.manual_seed(42)

epochs = 5
learning_rate = 0.01
weight_decay = 5e-4
lossfunction = nn.CrossEntropyLoss()

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.fc1 = nn.Linear(320, 50) 
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x)) 
        x = F.max_pool2d(x, 2)   
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

target_model = CNN()
target_optimizer = optim.Adam(target_model.parameters(), lr=learning_rate, weight_decay=weight_decay)

#Pre-define the training function
def train(epoch, model, train_dataloader, optimizer, lossfunction):
    model.train()
    
    train_loss = 0.0
    train_total, train_correct = 0.0, 0.0 
    
    for X_batch, y_batch in train_dataloader:
        optimizer.zero_grad()

        #Get the predicted output
        predictions = model(X_batch)

        #Calculate the loss
        loss = lossfunction(predictions, y_batch)
        
        #Update the weights usning gradient descent with Adam optimizer
        loss.backward()
        optimizer.step()
        
        #Convert probabilities to multi-class predictions (reutrn the class with the maximal proability)
        _, train_predicted = torch.max(predictions.data, 1)
        
        #Calculate the training statistics
        train_loss += loss.item()
        train_total += y_batch.size(0)
        train_correct += (train_predicted == y_batch).sum().item()

    print("epoch (%d): Train accuracy: %.4f, loss: %.3f" % (epoch, train_correct/train_total, train_loss/train_total))
 

#Pre-define the test function
def test(model, test_dataloader):
    model.eval()
    
    test_correct, test_total = 0.0, 0.0
    y_test, y_pred = [], []
    
    with torch.no_grad():
        for X_batch, y_batch in test_dataloader:
            predictions = model(X_batch)
            
            _, test_predicted = torch.max(predictions.data, 1)
            test_total += y_batch.size(0)
            test_correct += (test_predicted == y_batch).sum().item()
            
            y_test += y_batch.tolist()
            y_pred += test_predicted.tolist()

    macro_f1 = f1_score(y_test, y_pred, average='macro')
    print('Test accuracy: %.4f, macro f1_score: %.4f' % (test_correct / test_total, macro_f1))

#### Train the target model

In [6]:
#Train the target model
for epoch in range(1, epochs + 1):
    train(epoch, target_model, target_train_loader, target_optimizer, lossfunction)

epoch (1): Train accuracy: 0.8971, loss: 0.010
epoch (2): Train accuracy: 0.9640, loss: 0.004
epoch (3): Train accuracy: 0.9736, loss: 0.003
epoch (4): Train accuracy: 0.9745, loss: 0.002
epoch (5): Train accuracy: 0.9780, loss: 0.002


## Train a set of shadow models

####  Use `shadow_loader` to train a shadow model and create member dataset vs non-member dataset

In [9]:
#Pre-define a function to obtain member_set and non_member_set for a given shadow model
def make_member_nonmember(shadow_model, shadow_loader, unseen_loader):   
    shadow_model.eval()
    
    member_set, non_member_set = [], []
    
    with torch.no_grad():
        for features, labels in shadow_loader:
            outputs = shadow_model(features)
            probs = F.softmax(outputs, dim=1).type(torch.FloatTensor)
            member_set.append(probs.detach())
    
    with torch.no_grad():
        for features, labels in unseen_loader:
            outputs = shadow_model(features)
            probs = F.softmax(outputs, dim=1).type(torch.FloatTensor)
            non_member_set.append(probs.detach())
    
    member_set = torch.cat(member_set)
    non_member_set = torch.cat(non_member_set)
    
    return member_set, non_member_set

    
total_members = []
total_non_members = []

#Train a set of shadow models one by one
for shadow_number, shadow_loader in enumerate(list_shadow_loader):
    print("-----------Shadow model: {}-----------".format(shadow_number + 1))
    unseen_loader = list_unseen_loader[shadow_number]
    
    #Shadow model setting
    shadow_model = CNN()
    shadow_optimizer = optim.Adam(shadow_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    
    #Train the target model
    for epoch in range(1, epochs + 1):
        train(epoch, shadow_model, shadow_loader, shadow_optimizer, lossfunction)
    
    #Create member dataset vs non-member dataset based on shadow_model
    member_set, non_member_set = make_member_nonmember(shadow_model, shadow_loader, unseen_loader)
    
    total_members.append(member_set)
    total_non_members.append(non_member_set)

total_members = torch.cat(total_members)
total_non_members = torch.cat(total_non_members)

print(total_members.shape)
print(total_non_members.shape)

-----------Shadow model: 1-----------
epoch (1): Train accuracy: 0.5224, loss: 0.041
epoch (2): Train accuracy: 0.9048, loss: 0.009
epoch (3): Train accuracy: 0.9312, loss: 0.007
epoch (4): Train accuracy: 0.9564, loss: 0.005
epoch (5): Train accuracy: 0.9488, loss: 0.005
-----------Shadow model: 2-----------
epoch (1): Train accuracy: 0.7088, loss: 0.028
epoch (2): Train accuracy: 0.8996, loss: 0.009
epoch (3): Train accuracy: 0.9356, loss: 0.006
epoch (4): Train accuracy: 0.9484, loss: 0.005
epoch (5): Train accuracy: 0.9500, loss: 0.005
-----------Shadow model: 3-----------
epoch (1): Train accuracy: 0.7168, loss: 0.028
epoch (2): Train accuracy: 0.9196, loss: 0.008
epoch (3): Train accuracy: 0.9372, loss: 0.006
epoch (4): Train accuracy: 0.9520, loss: 0.005
epoch (5): Train accuracy: 0.9644, loss: 0.003
-----------Shadow model: 4-----------
epoch (1): Train accuracy: 0.7232, loss: 0.025
epoch (2): Train accuracy: 0.9296, loss: 0.007
epoch (3): Train accuracy: 0.9500, loss: 0.006
ep

####  Construct the final dataset for membership inference attack training

In [10]:
#Concatenate total_members and total_non_members
total_members_size = total_members.shape[0]
total_non_members_size = total_non_members.shape[0]

#Generate labels: member - 1 and non-member - 0
total_members_labels = torch.Tensor([1]).repeat(total_members_size)
total_non_members_labels = torch.Tensor([0]).repeat(total_non_members_size)

#Final data for attack training
X = torch.cat((total_members, total_non_members)).numpy()
y = torch.cat((total_members_labels, total_non_members_labels)).numpy()

print(X.shape)
print(y.shape)

(50000, 10)
(50000,)


## Train a binary classification model for membership inference attack

In [11]:
#Data splits
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

#Attack model training using Logistic Regression
log_reg = LogisticRegression(solver="lbfgs", random_state=42)
log_reg.fit(X_train, y_train)

y_pred = log_reg.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')

print("Accuracy: %.4f, F1_score: %.4f" % (accuracy, f1))

Accuracy: 0.5105, F1_score: 0.5104


## Use the trained attack model to perform membership inference attack

In [24]:
np.random.seed(2)
test_size = 1000

member_indices = np.random.choice(len(dataset_for_target), test_size, replace=False)
non_member_indices = np.random.choice(len(dataset_for_shadow), test_size, replace=False)

member_test = Subset(dataset_for_target, member_indices)
non_member_test = Subset(dataset_for_shadow, non_member_indices)

member_test_loader = DataLoader(member_test, batch_size=32, shuffle=False)
non_member_test_loader = DataLoader(non_member_test, batch_size=32, shuffle=False)

#Pass the data to the target model and obtain confidence scores
member_test, non_member_test = make_member_nonmember(target_model, member_test_loader, non_member_test_loader)

#Generate labels: member - 1 and non-member - 0
member_test_labels = torch.Tensor([1]).repeat(test_size)
non_member_test_labels = torch.Tensor([0]).repeat(test_size)

#Final data for attack testing
X_membership = torch.cat((member_test, non_member_test)).numpy()
y_membership = torch.cat((member_test_labels, non_member_test_labels)).numpy()

#Test the membership inference attack model 
y_pred_membership = log_reg.predict(X_membership)

accuracy = accuracy_score(y_membership, y_pred_membership)
f1 = f1_score(y_membership, y_pred_membership, average='macro')

print("Accuracy: %.4f, F1_score: %.4f" % (accuracy, f1))

Accuracy: 0.5280, F1_score: 0.5279
