In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

import pickle
import matplotlib.pyplot as plt
import numpy as np

In [2]:
from google.colab import drive
drive.mount("/content/drive", force_remount=True)
%cd '/content/drive/My Drive'

Mounted at /content/drive
/content/drive/My Drive


# Code to load MobileNet TARGET MODEL trained on Cifar-10 dataset

In [3]:
# Change the MODEL_PATH to your local model path
MODEL_PATH = 'Colab_Notebooks/mobilenetv2_cifar10.pth'

target_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Change num_classes to 200 when you use the Tiny ImageNet dataset
target_model = models.mobilenet_v2(num_classes=10).to(target_device)

state_dict = torch.load(MODEL_PATH, map_location=target_device)
target_model.load_state_dict(state_dict['net'])

# Test accuracy
acc = state_dict['acc']

# Training epoch (start from 0)
epoch = state_dict['epoch']

#Accuracy = 72.24666666666667

# 2. Import Cifar-10 dataset to train on shadow model (shadow.p)

In [4]:
# Change the DATA_PATH to your local pickle file path
DATA_PATH = 'Colab_Notebooks/shadow.p'

data_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

with open(DATA_PATH, "rb") as f:
    dataset = pickle.load(f)

dataloader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=False, num_workers=2)


In [6]:
print(type(dataset), len(dataset))
print(type(dataset[0]), len(dataset[0]))

print(type(dataset[0][0]), dataset[0][0].shape)
print(type(dataset[0][1]), dataset[0][1])

<class 'list'> 30000
<class 'list'> 2
<class 'torch.Tensor'> torch.Size([3, 32, 32])
<class 'int'> 1


## 2.1 Split shadow.p between train and test;

In [5]:
generator1 = torch.Generator().manual_seed(42)
train_set_size = int(len(dataset) * 0.8)
valid_set_size = len(dataset) - train_set_size
data = torch.utils.data.random_split(dataset, [train_set_size, valid_set_size], generator=generator1)


#With the trainloader variable, you can iterate over the training set in your training loop,
#performing operations on each batch of data.
trainloader = torch.utils.data.DataLoader(data[0], batch_size=128, shuffle=False, num_workers=2)
testloader = torch.utils.data.DataLoader(data[1], batch_size=128, shuffle=False, num_workers=2)

# 3. Make a Shadow model and Train it

In [6]:
shadow_model_train_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

shadow_model = models.mobilenet_v2(num_classes=10).to(shadow_model_train_device)

# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer
#optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizer = optim.Adam(shadow_model.parameters(), lr=0.001)

shadow_model.to(shadow_model_train_device)

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

In [8]:
num_epochs = 100

total_correct = 0
total_samples = 0

shadow_outputs_train = []

for epoch in range(num_epochs):

    total_correct = 0
    total_samples = 0
    running_loss = 0.0
    for i, (images, labels) in enumerate(trainloader):
        images = images.to(shadow_model_train_device)
        labels = labels.to(shadow_model_train_device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = shadow_model(images)

        # Append the outputs to the shadow_outputs list
        shadow_outputs_train.append(outputs.detach().cpu())

        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()

        # Calculate accuracy after each epoch
        accuracy = 100 * total_correct / total_samples

        if (i + 1) % 100 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(trainloader)}], Accuracy: {accuracy:.2f}% ,Loss: {running_loss / 100:.4f}")
            running_loss = 0.0

Epoch [1/100], Step [100/188], Accuracy: 19.05% ,Loss: 2.1686
Epoch [2/100], Step [100/188], Accuracy: 34.39% ,Loss: 1.7535
Epoch [3/100], Step [100/188], Accuracy: 43.01% ,Loss: 1.5592
Epoch [4/100], Step [100/188], Accuracy: 47.55% ,Loss: 1.4338
Epoch [5/100], Step [100/188], Accuracy: 51.62% ,Loss: 1.3347
Epoch [6/100], Step [100/188], Accuracy: 54.41% ,Loss: 1.2642
Epoch [7/100], Step [100/188], Accuracy: 56.78% ,Loss: 1.2025
Epoch [8/100], Step [100/188], Accuracy: 59.54% ,Loss: 1.1370
Epoch [9/100], Step [100/188], Accuracy: 62.07% ,Loss: 1.0749
Epoch [10/100], Step [100/188], Accuracy: 63.55% ,Loss: 1.0238
Epoch [11/100], Step [100/188], Accuracy: 65.93% ,Loss: 0.9739
Epoch [12/100], Step [100/188], Accuracy: 67.71% ,Loss: 0.9346
Epoch [13/100], Step [100/188], Accuracy: 69.12% ,Loss: 0.8786
Epoch [14/100], Step [100/188], Accuracy: 70.96% ,Loss: 0.8330
Epoch [15/100], Step [100/188], Accuracy: 72.73% ,Loss: 0.7829
Epoch [16/100], Step [100/188], Accuracy: 74.19% ,Loss: 0.7469
E

In [9]:
# Save the Trained Model

SHADOW_PATH = 'Colab_Notebooks/cifar_mobilenetv2-01_07-2.pth'
torch.save(shadow_model.state_dict(), SHADOW_PATH)

In [10]:
# LOADING THE SAVED MODEL

# Load the trained shadow model's weights
shadow_model.load_state_dict(torch.load('Colab_Notebooks/cifar_mobilenetv2-01_07-1.pth'))
shadow_model.eval()  # Set the shadow model to evaluation mode

MobileNetV2(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=

###  3.1 Test shadow model on test data

In [11]:
### GETTING TRAIN RESULTS ON LOADED MODEL

correct = 0
total = 0

shadow_outputs_train = []

# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for i, (images, labels) in enumerate(trainloader):
        images = images.to(shadow_model_train_device)
        labels = labels.to(shadow_model_train_device)

        # calculate outputs by running images through the network
        outputs = shadow_model(images)

        # Append the outputs to the shadow_outputs list
        shadow_outputs_train.append(outputs.detach().cpu())

        # the class with the highest energy is what we choose as prediction
        _,predicted = torch.max(outputs.data,1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of network on test images: {100 * correct // total}%')
shadow_outputs_train = torch.cat(shadow_outputs_train)

Accuracy of network on test images: 96%


In [12]:
# TESTING THE MODEL ON TESTLOADER
correct = 0
total = 0

shadow_outputs_test = []

# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for i, (images, labels) in enumerate(testloader):
        images = images.to(shadow_model_train_device)
        labels = labels.to(shadow_model_train_device)

        # calculate outputs by running images through the network
        outputs = shadow_model(images)

        # Append the outputs to the shadow_outputs list
        shadow_outputs_test.append(outputs.detach().cpu())

        # the class with the highest energy is what we choose as prediction
        _,predicted = torch.max(outputs.data,1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of network on test images: {100 * correct // total}%')
shadow_outputs_test = torch.cat(shadow_outputs_test)

#Accuracy of shadow model comes out to be: 53%

Accuracy of network on test images: 56%


In [13]:
#ACCURACY PER CLASS

# what are the classes that performed well, and the classes that did not perform well:

# prepare to count predictions for each class

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for i, (images, labels) in enumerate(testloader):
        images = images.to(shadow_model_train_device)
        labels = labels.to(shadow_model_train_device)

        outputs = shadow_model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')

Accuracy for class: plane is 69.2 %
Accuracy for class: car   is 69.4 %
Accuracy for class: bird  is 43.6 %
Accuracy for class: cat   is 45.0 %
Accuracy for class: deer  is 40.0 %
Accuracy for class: dog   is 42.5 %
Accuracy for class: frog  is 69.8 %
Accuracy for class: horse is 58.5 %
Accuracy for class: ship  is 66.7 %
Accuracy for class: truck is 64.0 %


In [14]:
members = []
non_members = []
for _, (images, labels) in enumerate(trainloader):
    images = images.to(shadow_model_train_device)
    output = shadow_model(images)
    prediction = F.softmax(output, dim=1)
    top_p,_ = prediction.topk(3, dim=1)
    top_p = top_p.cpu().detach().numpy()
    members.append(top_p)

for _, (images, labels) in enumerate(testloader):
    images = images.to(shadow_model_train_device)
    output = shadow_model(images)
    prediction = F.softmax(output, dim=1)
    top_p,_ = prediction.topk(3, dim=1)
    top_p = top_p.cpu().detach().numpy()
    non_members.append(top_p)

members_features = np.concatenate(members)
member_labels = np.ones(len(members_features))
members_data = np.concatenate((members_features, member_labels.reshape(-1,1)), axis=1)

non_members_features = np.concatenate(non_members)
non_member_labels = np.zeros(len(non_members_features))
non_members_data = np.concatenate((non_members_features, non_member_labels.reshape(-1,1)), axis=1)

In [15]:
print(non_members_data.shape)
print(members_data.shape)

(6000, 4)
(24000, 4)


In [16]:
print(members_data)
print(non_members_data)

[[9.99161601e-01 8.36541061e-04 1.33047888e-06 1.00000000e+00]
 [1.00000000e+00 2.57204710e-15 1.23764930e-17 1.00000000e+00]
 [9.99803841e-01 9.38634985e-05 3.20452455e-05 1.00000000e+00]
 ...
 [9.99966502e-01 1.47857409e-05 9.21202991e-06 1.00000000e+00]
 [9.99013782e-01 9.30238341e-04 2.57235806e-05 1.00000000e+00]
 [9.99997973e-01 7.40874384e-07 7.04242154e-07 1.00000000e+00]]
[[9.99031186e-01 8.23818787e-04 9.97955067e-05 0.00000000e+00]
 [8.39198232e-01 8.43658596e-02 7.00709224e-02 0.00000000e+00]
 [1.00000000e+00 2.85518781e-14 3.05284253e-16 0.00000000e+00]
 ...
 [9.99996066e-01 1.05720483e-06 7.38211213e-07 0.00000000e+00]
 [9.97032285e-01 2.64823926e-03 1.81998927e-04 0.00000000e+00]
 [8.66428494e-01 1.30806312e-01 2.54157791e-03 0.00000000e+00]]


In [17]:
combined_data = np.concatenate((members_data, non_members_data), axis=0)
with open('Colab_Notebooks/shadow_posterior_in_out.p', 'wb') as file:
    pickle.dump(combined_data, file)

In [18]:
# Getting data for Attack model and making a loader out of it

from torch.utils.data import DataLoader, TensorDataset
ATTACK_TRAIN_DATA_PATH = "Colab_Notebooks/shadow_posterior_in_out.p"

with open(ATTACK_TRAIN_DATA_PATH, "rb") as f:
    attack_shadow_training_data = pickle.load(f)

features = attack_shadow_training_data[:, :-1]
labels = attack_shadow_training_data[:, -1]

# Convert the data to PyTorch tensors
features_tensor = torch.tensor(features, dtype=torch.float32)
labels_tensor = torch.tensor(labels, dtype=torch.long)

# Create a TensorDataset for the combined data
attack_dataset = TensorDataset(features_tensor, labels_tensor)
attack_loader = DataLoader(attack_dataset, batch_size=64, shuffle=False)

In [19]:
print(features.shape, labels.shape)

(30000, 3) (30000,)


In [21]:
# Define the attack model architecture
class AttackModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(AttackModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Set the dimensions for input, hidden, and output layers of the attack model
input_size = features_tensor.size(1)
hidden_size = 64  # Choose the appropriate size for your problem
num_classes = 2  # Binary classification (shadow or non-shadow)

# Create an instance of the attack model
attack_model = AttackModel(input_size, hidden_size, num_classes)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(attack_model.parameters(), lr=0.001)

# Set the device for computation
attack_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model and data to the appropriate device
attack_model.to(attack_device)
features_tensor = features_tensor.to(attack_device)
labels_tensor = labels_tensor.to(attack_device)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    running_loss = 0.0
    total = 0
    correct = 0
    for batch_features, batch_labels in attack_loader:
        batch_features = batch_features.to(attack_device)
        batch_labels = batch_labels.to(attack_device)

        # Forward pass
        outputs = attack_model(batch_features)
        loss = criterion(outputs, batch_labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total += batch_labels.size(0)
        correct += (predicted == batch_labels).sum().item()

        # Print the average loss and accuracy for each epoch
        epoch_loss = running_loss / len(attack_loader)
        epoch_acc = 100 * correct / total

    # Print the average loss for each epoch
    epoch_loss = running_loss / len(attack_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%")


Epoch [1/10], Loss: 0.3854, Accuracy: 89.25%
Epoch [2/10], Loss: 0.4405, Accuracy: 81.67%
Epoch [3/10], Loss: 0.4477, Accuracy: 80.80%
Epoch [4/10], Loss: 0.4553, Accuracy: 79.28%
Epoch [5/10], Loss: 0.4644, Accuracy: 78.00%
Epoch [6/10], Loss: 0.4737, Accuracy: 77.15%
Epoch [7/10], Loss: 0.4771, Accuracy: 76.95%
Epoch [8/10], Loss: 0.4809, Accuracy: 76.64%
Epoch [9/10], Loss: 0.4819, Accuracy: 78.23%
Epoch [10/10], Loss: 0.4828, Accuracy: 80.17%


# Load eval.p and pre-process it
### We need to send images from eval.p to target model and get posteriors from it and then send these posteriors to attack model and check with the ground_truth values from eval.p to get the accruacy

In [22]:
EVAL_DATA_PATH = 'Colab_Notebooks/eval.p'

eval_device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

with open(EVAL_DATA_PATH, "rb") as f:
    eval_dataset = pickle.load(f)

eval_loader = DataLoader(eval_dataset, batch_size=64, shuffle=False)

In [23]:
# Getting the posteriors and ground_truth from eval.p
# Posteriors are sent to attack model
# Ground_truth is used in finding accuracy with attack model

eval_posteriors = []
eval_groundTruth = []

with torch.no_grad():
    for batch_idx, (images, labels, ismember) in enumerate(eval_loader):
        images = images.to(eval_device)
        labels = labels.to(eval_device)
        ismember = ismember.to(eval_device)

        # Calculate posteriors using the pretrained target model
        logits = target_model(images)
        probs = F.softmax(logits, dim=1)

        # Get the top-k posteriors
        topk_probs, _ = probs.topk(3, dim=1)

        # Append the posteriors and ismember to the lists
        eval_posteriors.append(topk_probs.cpu())
        eval_groundTruth.append(ismember.cpu())

eval_posteriors = torch.cat(eval_posteriors)
eval_groundTruth = torch.cat(eval_groundTruth)

In [24]:
print(eval_posteriors.shape, eval_groundTruth.shape)

torch.Size([200, 3]) torch.Size([200])


In [25]:
eval_data_combined = np.concatenate((eval_posteriors, eval_groundTruth.reshape(-1,1)), axis=1)
print(eval_data_combined.shape)

with open('Colab_Notebooks/eval_data_combined', 'wb') as file:
    pickle.dump(eval_data_combined, file)

# here we save the eval dataset which just has

(200, 4)


In [26]:
# Loading the saved eval combined dataset and getting the dataloader from that data

with open('Colab_Notebooks/eval_data_combined', "rb") as file:
    eval_loaded_data = pickle.load(file)

eval_features = eval_loaded_data[:, :-1]
eval_labels = eval_loaded_data[:, -1]

# Convert the data to PyTorch tensors
features_tensor = torch.tensor(eval_features, dtype=torch.float32)
labels_tensor = torch.tensor(eval_labels, dtype=torch.long)

# Create a TensorDataset for the combined data
evalattack_dataset = TensorDataset(features_tensor, labels_tensor)
evalattack_loader = DataLoader(evalattack_dataset, batch_size=64, shuffle=False)

In [27]:
# Making predictions on attack model for eval dataset with only images, groundTruth

attack_model.eval()

predictions = []

correct = 0
total = 0

for batch_idx, (posteriors, ismember) in enumerate(evalattack_loader):

    posteriors = posteriors.to(eval_device)
    ismember = ismember.to(eval_device)

    outputs = attack_model(posteriors)
    _, predicted = torch.max(outputs.data, 1)
    predictions.append(predicted.cpu().numpy())

    total += ismember.size(0)
    correct += (predicted == ismember).sum().item()

predictions = np.concatenate(predictions)

# Calculate the accuracy
accuracy = 100 * correct / total
print(f"Attack Model Accuracy: {accuracy:.2f}%")

np.save("attack_predictions.npy", predictions)

Attack Model Accuracy: 61.50%


In [28]:
#Saving the attack model
torch.save(attack_model.state_dict(), "Colab_Notebooks/attack_model-1.pth")