In [1]:
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import os

In [9]:
!conda install -y gdown

Retrieving notices: ...working... done
Channels:
 - rapidsai
 - nvidia
 - conda-forge
 - defaults
 - pytorch
Platform: linux-64
Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /opt/conda

  added / updated specs:
    - gdown


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2024.6.2   |       hbcca054_0         152 KB  conda-forge
    filelock-3.14.0            |     pyhd8ed1ab_0          16 KB  conda-forge
    gdown-5.2.0                |     pyhd8ed1ab_0          21 KB  conda-forge
    openssl-3.3.1              |       h4ab18f5_0         2.8 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.9 MB

The following NEW packages will be INSTALLED:

  filelock           conda-forge/noarch::filelock-3.14.0-pyhd8ed1a

In [10]:
 !gdown --id 1U1abPCf3drtdWeLmxfQNFwNDflOzfXYD

Downloading...
From (original): https://drive.google.com/uc?id=1U1abPCf3drtdWeLmxfQNFwNDflOzfXYD
From (redirected): https://drive.google.com/uc?id=1U1abPCf3drtdWeLmxfQNFwNDflOzfXYD&confirm=t&uuid=fe2d68bb-c86d-4267-a9f8-603aab48f306
To: /kaggle/working/amlm_for_kaggle.zip
100%|███████████████████████████████████████| 2.81G/2.81G [00:21<00:00, 132MB/s]


In [11]:
 ! unzip amlm_for_kaggle.zip

Archive:  amlm_for_kaggle.zip
   creating: amlm/
  inflating: __MACOSX/._amlm         
  inflating: amlm/.DS_Store          
  inflating: __MACOSX/amlm/._.DS_Store  
   creating: amlm/models/
  inflating: __MACOSX/amlm/._models  
   creating: amlm/pickle/
  inflating: __MACOSX/amlm/._pickle  
  inflating: amlm/models/mobilenetv2_tinyimagenet.pth  
  inflating: __MACOSX/amlm/models/._mobilenetv2_tinyimagenet.pth  
  inflating: amlm/models/mobilenetv2_cifar10.pth  
  inflating: __MACOSX/amlm/models/._mobilenetv2_cifar10.pth  
  inflating: amlm/models/resnet34_tinyimagenet.pth  
  inflating: __MACOSX/amlm/models/._resnet34_tinyimagenet.pth  
  inflating: amlm/models/resnet34_cifar10.pth  
  inflating: __MACOSX/amlm/models/._resnet34_cifar10.pth  
  inflating: amlm/pickle/.DS_Store   
  inflating: __MACOSX/amlm/pickle/._.DS_Store  
   creating: amlm/pickle/tinyimagenet/
  inflating: __MACOSX/amlm/pickle/._tinyimagenet  
   creating: amlm/pickle/cifar10/
  inflating: __MACOSX/amlm/pickle/._

In [93]:
# Define training transformations
import torchvision.transforms as transforms
# Define evaluation transformations
eval_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),  # Convert the images to PyTorch tensors
    #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the images
])

In [94]:
class ProcessingDataset(torch.utils.data.Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label = self.data[idx]
        if self.transform is not None:
            image = self.transform(image)
        return image, label

In [None]:
# Function to load dataset
def load_data(file_path, transform=None):
    with open(file_path, "rb") as f:
        dataset = pickle.load(f)
    return dataset

In [None]:
def extract_features(model, dataloader, is_member):
    model.eval()
    features = []
    entropy = 0
    accuracy = 0
    with torch.no_grad():
        for data in dataloader:
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            softmax_probabilities = torch.softmax(outputs, dim=1).cpu().detach().numpy()

            
            for label_id, single_label in enumerate(labels):
                features.append(softmax_probabilities[label_id].tolist()+[single_label.item()]) #newline

    return features

In [16]:
# Placeholder for is_member function
def is_member(data):
    # Dummy implementation
    return True

In [17]:
shadow_data_path = '/kaggle/working/amlm/pickle/cifar10/resnet34/shadow.p'
eval_data_path = '/kaggle/working/amlm/pickle/cifar10/resnet34/eval.p'
test_data_path = '/kaggle/working/amlm/pickle/cifar10/resnet34/test.p'

In [18]:
shadow_dataset = load_data(shadow_data_path)
eval_dataset = load_data(eval_data_path)
test_dataset = load_data(test_data_path)

In [18]:
print("Eval Dataset Example: ", eval_dataset[2])

Eval Dataset Example:  [tensor([[[0.2510, 0.2471, 0.2392,  ..., 0.2353, 0.1804, 0.2000],
         [0.3059, 0.3059, 0.2784,  ..., 0.1804, 0.1765, 0.1765],
         [0.3647, 0.3412, 0.3333,  ..., 0.2667, 0.2863, 0.2745],
         ...,
         [0.4745, 0.4784, 0.4863,  ..., 0.5412, 0.5098, 0.4471],
         [0.4745, 0.4784, 0.4824,  ..., 0.4745, 0.4667, 0.4392],
         [0.4392, 0.4706, 0.4824,  ..., 0.4157, 0.4353, 0.4000]],

        [[0.2902, 0.2667, 0.2392,  ..., 0.2706, 0.2157, 0.2314],
         [0.3451, 0.3451, 0.3137,  ..., 0.2157, 0.2118, 0.2039],
         [0.4078, 0.3882, 0.3882,  ..., 0.2980, 0.3176, 0.3020],
         ...,
         [0.3961, 0.4000, 0.4078,  ..., 0.4667, 0.4118, 0.3490],
         [0.3961, 0.4000, 0.4039,  ..., 0.3961, 0.3725, 0.3373],
         [0.3647, 0.4000, 0.4157,  ..., 0.3451, 0.3490, 0.3098]],

        [[0.2745, 0.2588, 0.2667,  ..., 0.2745, 0.2392, 0.2784],
         [0.3686, 0.3804, 0.3804,  ..., 0.2392, 0.2471, 0.2627],
         [0.4627, 0.4588, 0.4745, 

In [20]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [21]:
# Load pre-trained target model
model_path = '/kaggle/working/amlm/models/resnet34_cifar10.pth'
target_model = models.resnet34(num_classes=10).to(device)
state_dict = torch.load(model_path, map_location=device)
target_model.load_state_dict(state_dict['net'])
target_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [22]:
# Define two shadow models with same architecture as target model

shadow_model_A = models.resnet34(weights=None, num_classes=10).to(device)
shadow_model_B = models.resnet34(weights=None, num_classes=10).to(device) #newline


In [23]:
#setting up both the shadow models here
num_epochs = 50
learning_rate = 1e-3
batch_size = 128
loss_function = nn.CrossEntropyLoss()

optimizer_A = optim.Adagrad(shadow_model_A.parameters(), lr=learning_rate)
optimizer_B = optim.RMSprop(shadow_model_B.parameters(), lr=learning_rate)
optimizers = [optimizer_A,optimizer_B]


In [24]:
from sklearn.metrics import classification_report

In [25]:
#Training loop
model_index=1
for shadow_model_being_trained in [shadow_model_A, shadow_model_B]:
    for epoch in range(num_epochs):
        running_loss = 0.0
        current_learning_rate = learning_rate
        # with tqdm(shadow_train_dataloader, unit="batch", leave=False) as detensored_train:
        
        #splitting the training and testing dataset for the specific model
        
        train_current_shadow_model_dataset, test_current_shadow_model_dataset = train_test_split(shadow_dataset, test_size=15000, train_size=15000, random_state=model_index)
        
        #loading the specific datasets
        
        current_shadow_model_train_dataloader = DataLoader(ProcessingDataset(train_current_shadow_model_dataset,transform=eval_transform), batch_size=batch_size, shuffle=True, num_workers=2) #newline
        current_shadow_model_test_dataloader = DataLoader(ProcessingDataset(test_current_shadow_model_dataset,transform=eval_transform), batch_size=batch_size, shuffle=False, num_workers=2) #newline
        
        for i, data in enumerate(current_shadow_model_train_dataloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            optimizers[model_index-1].zero_grad()
            outputs = shadow_model_being_trained(inputs)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizers[model_index-1].step()

        
        running_loss = 0.0
        #with tqdm(shadow_test_dataloader, unit="batch", leave=False) as detensored_test:
        pred= []
        gt = [] #newli
        for i, data in enumerate(current_shadow_model_test_dataloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizers[model_index-1].zero_grad()
            outputs = shadow_model_being_trained(inputs)
            loss = loss_function(outputs, labels)
            optimizers[model_index-1].step()

            running_loss += loss.item()
            
            outputs = torch.argmax(outputs, axis=1).cpu().detach().numpy().reshape(-1)
            labels = labels.cpu().detach().numpy().reshape(-1)
            
            pred.extend(outputs)
            gt.extend(labels)

            batch_report = classification_report(gt, pred, output_dict=True, zero_division=0)
        print(f"[Epoch Validation {epoch + 1}, Batch {i + 1}] loss: {running_loss / len(test_current_shadow_model_dataset)}, Accuracy: {batch_report['accuracy']}]")
        
        running_loss = 0.0

        if (epoch+1) % 20 == 0:
            current_learning_rate /= 3
            for parameters in optimizers[model_index-1].param_groups:
                parameters['lr'] = current_learning_rate
        #scheduler.step()
    shadow_model_path =f"shadowmodelstrained/cifar10/reset34-2/shadow"
    os.makedirs(shadow_model_path, exist_ok=True)
    torch.save(shadow_model_being_trained.state_dict(), f"{shadow_model_path}/{model_index}_state_dict.pth")
    model_index+=1
print('Finished Training Shadow Model')

[Epoch Validation 1, Batch 118] loss: 0.01327156883875529, Accuracy: 0.386]
[Epoch Validation 2, Batch 118] loss: 0.01284010374546051, Accuracy: 0.4215333333333333]
[Epoch Validation 3, Batch 118] loss: 0.013101332426071166, Accuracy: 0.44193333333333334]
[Epoch Validation 4, Batch 118] loss: 0.01404023695786794, Accuracy: 0.4438666666666667]
[Epoch Validation 5, Batch 118] loss: 0.015768118182818095, Accuracy: 0.43433333333333335]
[Epoch Validation 6, Batch 118] loss: 0.01714831815560659, Accuracy: 0.44406666666666667]
[Epoch Validation 7, Batch 118] loss: 0.01881760717233022, Accuracy: 0.42493333333333333]
[Epoch Validation 8, Batch 118] loss: 0.01924742929935455, Accuracy: 0.44706666666666667]
[Epoch Validation 9, Batch 118] loss: 0.021163029559453327, Accuracy: 0.4235333333333333]
[Epoch Validation 10, Batch 118] loss: 0.023489141527811685, Accuracy: 0.41046666666666665]
[Epoch Validation 11, Batch 118] loss: 0.021756431245803832, Accuracy: 0.4318666666666667]
[Epoch Validation 12,

In [83]:

attack_model_training_data = []
for model_index in [1,2]:
    
    train_current_shadow_model_dataset, test_current_shadow_model_dataset = train_test_split(shadow_dataset, test_size=15000, train_size=15000, random_state=model_index)
        
        #loading the specific datasets
        
    current_shadow_model_train_dataloader = DataLoader(train_current_shadow_model_dataset, batch_size=100, shuffle=False, num_workers=2) #newline
    current_shadow_model_test_dataloader = DataLoader(test_current_shadow_model_dataset, batch_size=100, shuffle=False, num_workers=2) #newline

    model = models.resnet34(weights=None, num_classes=10).to(device)
    model.load_state_dict(torch.load(f"shadowmodelstrained/cifar10/reset34-2/shadow/{model_index}_state_dict.pth", map_location=device))
    
    shadow_train_features = extract_features(model, current_shadow_model_train_dataloader, True)
    shadow_test_features = extract_features(model, current_shadow_model_test_dataloader, False)
    
    #combining them both in a single array
    
    print("Loop for model index")
    
    attack_model_training_data.append({0:shadow_test_features, 1:shadow_train_features}) #0 representing the sample that were not in the shadow model training set
                                                                                                                       #1 representing the samples that were in the shadow model training set
    

Loop for model index
Loop for model index


In [84]:
attack_dataset = []
for single_attack_model_data in [attack_model_training_data[0], attack_model_training_data[1]]: #attack_train_data:
    for single_class in single_attack_model_data:
        for single_class_id in range(len(single_attack_model_data[single_class])):
            groundtruth = single_attack_model_data[single_class][single_class_id][-1]
            logits = sorted(single_attack_model_data[single_class][single_class_id][:-1], reverse=True)
            attack_dataset.append([[groundtruth]+logits, single_class])

np.random.seed(0)
np.random.shuffle(attack_dataset)

In [143]:
print("attack_dataset" , attack_dataset[0])

attack_dataset [[3, 0.9385873079299927, 0.04229780659079552, 0.013360838405787945, 0.001624826341867447, 0.0015265177935361862, 0.0012836003443226218, 0.0006053110118955374, 0.00047176997759379447, 0.00018788175657391548, 5.415191481006332e-05], 0]


In [146]:
class AttackModel(nn.Module):
    def __init__(self):
        super(AttackModel, self).__init__()
        self.fc1 = nn.Linear(11, 128)  # Increase number of neurons
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.dropout = nn.Dropout(0.5)  # Dropout layer with 50% dropout rate

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  # Apply dropout
        x = F.relu(self.fc2(x))
        x = self.dropout(x)  # Apply dropout
        x = torch.sigmoid(self.fc3(x))
        return x

In [147]:
attack_model = AttackModel().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(attack_model.parameters(), lr= learning_rate)  # L2 regularization


In [148]:
class ListingAttackData(torch.utils.data.Dataset):
	def __init__(self, train_dataset):
		self.X = train_dataset

	def __getitem__(self, index):
		x, y = self.X[index]
		return torch.tensor(x), torch.tensor(y).unsqueeze(0).type(torch.float)

	def __len__(self):
		return len(self.X)

In [149]:
# Training loop for attack model
# num_epochs = 10

attack_model_train_dataset, attack_model_test_dataset = train_test_split(attack_dataset, test_size=2000, train_size=28000, random_state=0)
num_epochs = 20

attack_model_train_loader = DataLoader(ListingAttackData(attack_model_train_dataset),batch_size=256,shuffle=True)

attack_model_test_loader = DataLoader(ListingAttackData(attack_model_test_dataset),batch_size=256,shuffle=False)

    
for epoch in range(num_epochs):
    running_loss = 0.0
    accuracy = 0.0
    for i, (inputs, labels) in enumerate(attack_model_train_loader, 0):
        #inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = attack_model(inputs)
    # print("outputs at every batch", outputs)
    
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        accuracy += torch.sum((outputs[:, 0] > 0.5) == labels[:, 0])
       
        #print("Train outputs[:, 0]" , outputs[:, 0])
        #print("Train labels[:, 0]" , labels[:, 0])
    print(f'Train: [Epoch {epoch + 1}, Batch {i + 1}] attack model loss: {running_loss / len(attack_model_train_loader)}, Accuracy: {accuracy/len(attack_model_train_dataset)}')
    #running_loss = 0.0
        
        #print("Helloooo")
    running_loss = 0.0
    accuracy = 0.0
    for i, (inputs,labels) in enumerate(attack_model_test_loader, 0):
        #inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = attack_model(inputs)
        #print("inputs at every batch", inputs)
        #print("labels at every batch", labels)
    
        loss = criterion(outputs, labels)
        #loss.backward()
        optimizer.step()

        running_loss += loss.item()
        accuracy += torch.sum((outputs[:, 0] > 0.5) == labels[:, 0])
        
    print(f'Test: [Epoch {epoch + 1}, Batch {i + 1}] attack model loss: {running_loss / len(attack_model_test_loader)}, Accuracy: {accuracy/len(attack_model_test_dataset)}')
    #running_loss = 0.0

print('Finished Training Attack Model')

Train: [Epoch 1, Batch 110] attack model loss: 0.6970094886693088, Accuracy: 0.5018928647041321
Test: [Epoch 1, Batch 8] attack model loss: 0.6933811157941818, Accuracy: 0.5024999976158142
Train: [Epoch 2, Batch 110] attack model loss: 0.6931287489154122, Accuracy: 0.5010356903076172
Test: [Epoch 2, Batch 8] attack model loss: 0.6926376894116402, Accuracy: 0.5024999976158142
Train: [Epoch 3, Batch 110] attack model loss: 0.690972777930173, Accuracy: 0.5013214349746704
Test: [Epoch 3, Batch 8] attack model loss: 0.6867517158389091, Accuracy: 0.5090000033378601
Train: [Epoch 4, Batch 110] attack model loss: 0.6836022582921115, Accuracy: 0.5158928632736206
Test: [Epoch 4, Batch 8] attack model loss: 0.6792454496026039, Accuracy: 0.5320000052452087
Train: [Epoch 5, Batch 110] attack model loss: 0.6773661320859735, Accuracy: 0.5396785736083984
Test: [Epoch 5, Batch 8] attack model loss: 0.6720754131674767, Accuracy: 0.5585000514984131
Train: [Epoch 6, Batch 110] attack model loss: 0.6721374

In [150]:
attack_model.eval()
correct = 0
total = 0
for sample in eval_dataset:
    image_input = sample[0].to(device)
    logits = torch.softmax(target_model(image_input.unsqueeze(0)), dim=1)[0]
    logits = sorted(logits.cpu().detach().numpy().tolist(), reverse=True)

    member_or_not = attack_model(torch.tensor([[sample[1]]+logits]).to(device))[0][0]
    member_or_not = member_or_not.cpu().detach().numpy() > 0.55
    if(member_or_not==sample[2]):
        correct+=1
    total+=1
    
print("Accuracy on evaluation: " , 100 * correct/total , "%")

Accuracy on evaluation:  74.5 %
