In [None]:
from google.colab import files
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch import Tensor

import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime 
from tqdm.notebook import tqdm 
import statistics
from math import log10
import struct
from random import randrange
import multiprocessing
import concurrent.futures
import time

from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, Normalizer
from sklearn.preprocessing import QuantileTransformer, PowerTransformer

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib 

# parameters
# RANDOM_SEED = 42
BATCH_SIZE = 100
# N_EPOCHS = 100
IMG_SIZE = 32
N_CLASSES = 100


norm_mean, norm_std = (0.5071, 0.4867, 0.4408), (0.2023, 0.1994, 0.2010)


transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std)])

train_dataset = datasets.CIFAR100(root='cifar100_data', train=True, transform=transform, download=True)
valid_dataset = datasets.CIFAR100(root='cifar100_data', train=False, transform=transform, download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, shuffle=False)


def conv3x3(in_planes, out_planes, stride=1, bias=False):
  """3x3 convolution with padding"""
  return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                   padding=1, bias=bias)

def variable_init(m, neg_slope=0.0):
  if isinstance(m, (nn.Linear, nn.Conv2d)):
    nn.init.kaiming_uniform_(m.weight.data, neg_slope)
    if m.bias is not None:
      m.bias.data.zero_()
  elif isinstance(m, nn.BatchNorm2d):
    if m.weight is not None:
      m.weight.data.fill_(1)
    if m.bias is not None:
      m.bias.data.zero_()
    m.running_mean.zero_()
    m.running_var.zero_()
def _down_sample(x):
  return nn.functional.avg_pool2d(x, 2, 2)



def _increase_planes(x, n_out_planes):
  n_samples, n_planes, spatial_size = x.size()[:-1]
  x_zeros = torch.zeros(
    n_samples, n_out_planes - n_planes, spatial_size, spatial_size, 
    dtype=x.dtype, device=x.device)
  return torch.cat([x, x_zeros], 1)

def _downsample_and_increase_planes(x, n_out_planes):
  x = _down_sample(x)
  x = _increase_planes(x, n_out_planes)
  return x

def identity_func(n_in_planes, n_out_planes, stride):
  identity = lambda x: x
  if stride == 2 and n_in_planes != n_out_planes:
    identity = lambda x: _downsample_and_increase_planes(x, n_out_planes)
  elif stride == 2:
    identity = _down_sample
  elif n_in_planes != n_out_planes:
    identity = lambda x: _increase_planes(x, n_out_planes)
  return identity

class BasicBlock(nn.Module):

  expansion = 1

  def __init__(self, n_in_planes, n_out_planes, stride=1):
    super().__init__()
    assert stride == 1 or stride == 2

    self.block = nn.Sequential(
      conv3x3(n_in_planes, n_out_planes, stride),
      nn.BatchNorm2d(n_out_planes),
      nn.ReLU(inplace=True),
      conv3x3(n_out_planes, n_out_planes),
      nn.BatchNorm2d(n_out_planes)
    )

    self.identity = identity_func(n_in_planes, n_out_planes, stride)

  def forward(self, x):
    out = self.block(x)
    identity = self.identity(x)

    out += identity
    out = nn.functional.relu(out)
    return out

class Bottleneck(nn.Module):

  expansion = 4

  def __init__(self, n_in_planes, n_out_planes, stride=1):
    super().__init__()
    
    self.conv1 = nn.Conv2d(n_in_planes, n_out_planes, kernel_size=1)
    self.bn1 = nn.BatchNorm2d(n_out_planes)

    self.conv2 = conv3x3(n_out_planes, n_out_planes, stride)
    self.bn2 = nn.BatchNorm2d(n_out_planes)

    self.conv3 = nn.Conv2d(n_out_planes, n_out_planes * 4, kernel_size=1)
    self.bn3 = nn.BatchNorm2d(n_out_planes * 4)

    self.relu = nn.ReLU(inplace=True)
    self.identity = identity_func(n_in_planes, n_out_planes * 4, stride)

  def forward(self, x):
    out = self.conv1(x)
    out = self.bn1(out)
    out = self.relu(out)

    out = self.conv2(out)
    out = self.bn2(out)
    out = self.relu(out)

    out = self.conv3(out)
    out = self.bn3(out)

    identity = self.identity(x)
    out += identity
    out = self.relu(out)

    return out

class ResNet(nn.Module):

  def __init__(self, block, 
                     n_blocks, 
                     n_output_planes, 
                     n_classes):
    super(ResNet, self).__init__()
    assert len(n_blocks) == 4
    assert len(n_output_planes) == 4
    
    self.n_in_planes = n_output_planes[0]

    self.layer0 = nn.Sequential(
      conv3x3(3, self.n_in_planes),
      nn.BatchNorm2d(self.n_in_planes),
      nn.ReLU(inplace=True)
    )
    self.layer1 = self._make_layer(block, n_blocks[0], n_output_planes[0])
    self.layer2 = self._make_layer(block, n_blocks[1], n_output_planes[1], 2)
    self.layer3 = self._make_layer(block, n_blocks[2], n_output_planes[2], 2)
    self.layer4 = self._make_layer(block, n_blocks[3], n_output_planes[3], 2)
    self.fc = nn.Linear(n_output_planes[3] * block.expansion, n_classes, False)

    self.apply(variable_init)

  def _make_layer(self, block, n_blocks, n_out_planes, stride=1):
    layers = []
    layers.append(block(self.n_in_planes, n_out_planes, stride))
    self.n_in_planes = n_out_planes * block.expansion
    for i in range(1, n_blocks):
      layers.append(block(self.n_in_planes, n_out_planes))

    return nn.Sequential(*layers)

  def features(self, x):
    x = self.layer0(x)
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    x = self.layer4(x)
    spatial_size = x.size(2)
    x = nn.functional.avg_pool2d(x, spatial_size, 1)
    x = x.view(x.size(0), -1)
    return x

  def forward(self, x):
    x = self.features(x)
    x = self.fc(x)
    return x



n_blocks = [2,2,2,2]
n_output_planes = [64, 128, 256, 512]
n_classes = 100

target_model = ResNet(BasicBlock, n_blocks, n_output_planes, n_classes)

model_path = '../dataset/Cifar100.pth'
target_model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
target_model.eval()


Files already downloaded and verified
Files already downloaded and verified


ResNet(
  (layer0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer1): Sequential(
    (0): BasicBlock(
      (block): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): BasicBlock(
      (block): Sequential(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3)

In [None]:
#Testing Accuracy
def get_accuracy(model, data_loader):
  correct = 0
  total = 0
  with torch.no_grad():
      for data in data_loader:
          images, labels = data[0], data[1]
          outputs = model(images)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

  accuracy = (correct / total)
  return accuracy

valid_acc = get_accuracy(target_model, valid_loader)
print(f'Test_acc:', valid_acc)

Test_acc: 0.645


In [None]:
input_mem = []
label_mem = []
label_cifar10_mem = []

with torch.no_grad():
  target_model.eval()
  for i, data in enumerate(train_loader, 0):
    image, label = data[0], data[1]
    #batch size 100; so, member = 50*100 = 5000
    if i<50:
      label_cifar10_mem.append(label)   
      logit = target_model(image)
      input_mem.append(logit)
      label_mem = label_mem + [1 for i in range(BATCH_SIZE)]
    

input_nonmem = []
label_nonmem = []
label_cifar10_nonmem = []


with torch.no_grad():
  target_model.eval()
  for i, data in enumerate(valid_loader, 0):
    image, label = data[0], data[1]
    #batch size 100; so, member = 50*100 = 5000
    if i<50:  
      label_cifar10_nonmem.append(label)  
      logit = target_model(image) #logit is tensor here
      input_nonmem.append(logit)
      label_nonmem = label_nonmem + [0 for i in range(BATCH_SIZE)]


label_cifar10_mem_nonmem = label_cifar10_mem + label_cifar10_nonmem
y_cifar10_train = torch.cat(label_cifar10_mem_nonmem, dim=0)

input_mem_nonmem = input_mem + input_nonmem
X_train_mem_nonmem = torch.cat(input_mem_nonmem, dim=0)
# print(X_train_mem_nonmem.size())
label_mem_nonmem = label_mem + label_nonmem
y_train_attacker_np = np.array(label_mem_nonmem)



In [None]:
input_mem = []
label_mem = []
label_cifar10_mem = []

with torch.no_grad():
  target_model.eval()
  for i, data in enumerate(train_loader, 0):
    image, label = data[0], data[1]
    #batch size 100; so, member = 50*100 = 5000
    if i>=50 and i<100: 
      label_cifar10_mem.append(label) 
      logit = target_model(image)
      input_mem.append(logit)
      label_mem = label_mem + [1 for i in range(BATCH_SIZE)]
      
    


input_nonmem = []
label_nonmem = []
label_cifar10_nonmem = []

with torch.no_grad():
  target_model.eval()
  for i, data in enumerate(valid_loader, 0):
    image, label = data[0], data[1]
    #batch size 100; so, member = 50*100 = 5000
    if i>=50 and i<100:
      label_cifar10_nonmem.append(label)  
      logit = target_model(image) #logit is tensor here
      input_nonmem.append(logit)
      label_nonmem = label_nonmem + [0 for i in range(BATCH_SIZE)]
    

label_cifar10_mem_nonmem = label_cifar10_mem + label_cifar10_nonmem
y_cifar10_test = torch.cat(label_cifar10_mem_nonmem, dim=0)

input_mem_nonmem = input_mem + input_nonmem
X_test_mem_nonmem = torch.cat(input_mem_nonmem, dim=0)
# print(X_mem_nonmem.size())
y_attacker =  np.array(label_mem_nonmem)
label_mem_nonmem = label_mem + label_nonmem
y_test_attacker_np = np.array(label_mem_nonmem)


In [None]:
'''
I_nn MIA. logit+prob
'''

logit = X_train_mem_nonmem
df_logit = pd.DataFrame(logit.detach().numpy())
scaler = StandardScaler() #94 70
# scaler = RobustScaler() #94 70
df_logit  = pd.DataFrame(scaler.fit_transform(df_logit))

X_mem_nonmem_prob = torch.sigmoid(logit)
# X_mem_nonmem_prob = torch.softmax(logit, dim=-1)
df_prob = pd.DataFrame(X_mem_nonmem_prob.detach().numpy())
X_train_attacker_df = pd.concat([df_logit, df_prob], axis=1, ignore_index=True)

#=============== Attacker Training ==================
n_feature = len(X_train_attacker_df.columns) 
attacker_mlp_logit_prob = MLPClassifier(hidden_layer_sizes=(n_feature, 100, 100, 50), activation='relu', max_iter = 3000, random_state = 1)
attacker_mlp_logit_prob.fit(X_train_attacker_df, y_train_attacker_np)
X_attacker = pd.concat([df_logit, df_prob], axis=1, ignore_index=True)

# y_pred_np = attacker_mlp_logit_prob.predict(X_train_attacker_df)
# accuracy  = round(np.mean(y_pred_np == y_train_attacker_np), 2)
# print(f'MIA train accuracy: {accuracy}')



model_file = '../dataset/cifar100_attacker_mlp_logit_prob.pkl'
joblib.dump(attacker_mlp_logit_prob, model_file)
attacker_mlp_logit_prob = joblib.load(model_file)

#=============== Attacker Testing ==================
logit = X_test_mem_nonmem
df_logit = pd.DataFrame(logit.detach().numpy())
scaler = StandardScaler() #94 70
# scaler = RobustScaler() #94 70
df_logit  = pd.DataFrame(scaler.fit_transform(df_logit))
X_mem_nonmem_prob = torch.sigmoid(logit)
df_prob = pd.DataFrame(X_mem_nonmem_prob.detach().numpy())
X_test_attacker_df = pd.concat([df_logit, df_prob], axis=1, ignore_index=True)


# y_pred_np = attacker_mlp_logit_prob.predict(X_test_attacker_df)
# accuracy  = round(np.mean(y_pred_np == y_test_attacker_np), 2)
# print(f'MIA test accuracy: {accuracy}')

y_pred_np = attacker_mlp_logit_prob.predict(X_attacker)
accuracy  = round(np.mean(y_pred_np == y_attacker), 2)
print(f'I_nn MIA: {accuracy}')



I_nn MIA: 1.0


In [None]:
'''
I_bb MIA
'''
df_logit = pd.DataFrame(X_train_mem_nonmem.detach().numpy())
scaler = StandardScaler() #94 70
# scaler = RobustScaler() #94 70
X_train_attacker_df  = pd.DataFrame(scaler.fit_transform(df_logit))


#=============== Attacker Training ==================
n_feature = len(X_train_attacker_df.columns) 
attacker_mlp_logit = MLPClassifier(hidden_layer_sizes=(n_feature, 100, 100, 50), activation='relu', max_iter = 3000, random_state = 1)
attacker_mlp_logit.fit(X_train_attacker_df, y_train_attacker_np)
X_attacker = pd.DataFrame(scaler.fit_transform(df_logit))

y_pred_np = attacker_mlp_logit.predict(X_train_attacker_df)
accuracy  = round(np.mean(y_pred_np == y_train_attacker_np), 2)
print(f'MIA train accuracy: {accuracy}')


model_file = '../dataset/cifar100_attacker_mlp_logit.pkl'
joblib.dump(attacker_mlp_logit, model_file)
attacker_mlp_logit = joblib.load(model_file)

#=============== Attacker Testing ==================
df_logit = pd.DataFrame(X_test_mem_nonmem.detach().numpy())
scaler = StandardScaler() #94 70
# scaler = RobustScaler() #94 70
X_test_attacker_df  = pd.DataFrame(scaler.fit_transform(df_logit))


# y_pred_np = attacker_mlp_logit.predict(X_test_attacker_df)
# accuracy  = round(np.mean(y_pred_np == y_test_attacker_np), 2)
# print(f'MIA test accuracy: {accuracy}')

y_pred_np = attacker_mlp_logit.predict(X_attacker)
accuracy  = round(np.mean(y_pred_np == y_attacker), 2)
print(f'I_bb MIA: {accuracy}')


MIA train accuracy: 1.0
I_bb MIA: 1.0


In [None]:
'''
I_bl MIA 
'''

def myCustomLoss(my_outputs, my_labels):
    #specifying the batch size
    my_batch_size = my_outputs.size()[0] 
    #calculating the log of softmax values           
    my_outputs = F.log_softmax(my_outputs, dim=1)  
    #selecting the values that correspond to labels
    my_outputs = my_outputs[range(my_batch_size), my_labels] 
    #returning the results
    return my_outputs


logit = X_train_mem_nonmem
df_logit = pd.DataFrame(logit.detach().numpy())
scaler = StandardScaler() #94 70
# scaler = RobustScaler() #94 70
df_logit  = pd.DataFrame(scaler.fit_transform(df_logit))

X_mem_nonmem_prob = torch.sigmoid(logit)
# X_mem_nonmem_prob = torch.softmax(logit, dim=-1)
df_prob = pd.DataFrame(X_mem_nonmem_prob.detach().numpy())

loss = myCustomLoss(logit, y_cifar10_train)
df_loss = pd.DataFrame(loss.detach().numpy())
# scaler = StandardScaler() #94 70
scaler = RobustScaler() #94 70
df_loss  = pd.DataFrame(scaler.fit_transform(df_loss))

X_train_attacker_df = pd.concat([df_logit, df_prob, df_loss], axis=1, ignore_index=True)


#=============== Attacker Training ==================
n_feature = len(X_train_attacker_df.columns) 
attacker_mlp_logit_prob_loss = MLPClassifier(hidden_layer_sizes=(n_feature, 100, 100, 25), activation='relu', max_iter = 3000, random_state = 1)
attacker_mlp_logit_prob_loss.fit(X_train_attacker_df, y_train_attacker_np)
X_attacker = pd.concat([df_logit, df_prob, df_loss], axis=1, ignore_index=True)

# y_pred_np = attacker_mlp_logit_prob_loss.predict(X_train_attacker_df)
# accuracy  = round(np.mean(y_pred_np == y_train_attacker_np), 2)
# print(f'MIA train accuracy: {accuracy}')


model_file = '../dataset/cifar100_attacker_mlp_logit_prob_loss.pkl'
joblib.dump(attacker_mlp_logit_prob_loss, model_file)
attacker_mlp_logit_prob_loss = joblib.load(model_file)


#=============== Attacker Testing ==================
logit = X_test_mem_nonmem
df_logit = pd.DataFrame(logit.detach().numpy())
scaler = StandardScaler() #94 70
# scaler = RobustScaler() #94 70
df_logit  = pd.DataFrame(scaler.fit_transform(df_logit))

X_mem_nonmem_prob = torch.sigmoid(logit)
# X_mem_nonmem_prob = torch.softmax(logit, dim=-1)
df_prob = pd.DataFrame(X_mem_nonmem_prob.detach().numpy())

loss = myCustomLoss(logit, y_cifar10_test)
df_loss = pd.DataFrame(loss.detach().numpy())
# scaler = StandardScaler() #94 70
scaler = RobustScaler() #94 70
df_loss  = pd.DataFrame(scaler.fit_transform(df_loss))

X_test_attacker_df = pd.concat([df_logit, df_prob, df_loss], axis=1, ignore_index=True)

# y_pred_np = attacker_mlp_logit_prob_loss.predict(X_test_attacker_df)
# accuracy  = round(np.mean(y_pred_np == y_test_attacker_np), 2)
# print(f'MIA test accuracy: {accuracy}')

y_pred_np = attacker_mlp_logit_prob_loss.predict(X_attacker)
accuracy  = round(np.mean(y_pred_np == y_attacker), 2)
print(f'I_bl MIA: {accuracy}')


I_bl MIA: 1.0
