# Private Training

# Q4

## Import the Class

In [7]:
import model
from model import CIFAR10Classifier

##  Import Required Libraries

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split

## Data Loading and Transformation

In [9]:
# Transform and normalize data
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


## Split the Dataset

In [10]:
BATCH_SIZE = 512
# Split the trainset into 80% train and 20% validation
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(trainset, [train_size, val_size])

train_loader_baseline = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader_baseline = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader_baseline = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)


In [11]:
print(train_size);

40000


## Initialize Model, Loss Function, and Optimizer

In [12]:
# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model, loss function, and optimizer
model = CIFAR10Classifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


## Train the Model

In [13]:
for epoch in range(20):  # number of epochs
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader_baseline, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Print training progress
        if i % 100 == 99:
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] Loss: {running_loss / 100:.4f}")
            running_loss = 0.0

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader_baseline:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss /= len(val_loader_baseline)
    val_accuracy = 100 * correct / total
    print(f"[Epoch {epoch + 1}] Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")

print('Finished Training')




[Epoch 1] Validation Loss: 1.5112, Validation Accuracy: 47.60%
[Epoch 2] Validation Loss: 1.3566, Validation Accuracy: 52.73%
[Epoch 3] Validation Loss: 1.2634, Validation Accuracy: 56.23%
[Epoch 4] Validation Loss: 1.1845, Validation Accuracy: 59.53%
[Epoch 5] Validation Loss: 1.1355, Validation Accuracy: 60.68%
[Epoch 6] Validation Loss: 1.1072, Validation Accuracy: 62.14%
[Epoch 7] Validation Loss: 1.0703, Validation Accuracy: 63.18%
[Epoch 8] Validation Loss: 1.0358, Validation Accuracy: 64.05%
[Epoch 9] Validation Loss: 1.0188, Validation Accuracy: 64.59%
[Epoch 10] Validation Loss: 1.0018, Validation Accuracy: 65.31%
[Epoch 11] Validation Loss: 0.9923, Validation Accuracy: 65.81%
[Epoch 12] Validation Loss: 0.9804, Validation Accuracy: 66.01%
[Epoch 13] Validation Loss: 0.9781, Validation Accuracy: 65.45%
[Epoch 14] Validation Loss: 0.9701, Validation Accuracy: 66.21%
[Epoch 15] Validation Loss: 0.9716, Validation Accuracy: 66.36%
[Epoch 16] Validation Loss: 0.9686, Validation Ac

## Save the Trained Model

In [14]:
PATH = './baseline_model.pth'
torch.save(model.state_dict(), PATH)


##  Evaluate the Model

In [15]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in test_loader_baseline:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%")


Accuracy of the network on the 10000 test images: 67.05%


# Q5

### Using opacus (Implemented Code)

https://opacus.ai/tutorials/building_image_classifier

In [16]:
!pip install opacus


Collecting opacus
  Downloading opacus-1.4.1-py3-none-any.whl (226 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/226.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m225.3/226.7 kB[0m [31m6.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.7/226.7 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=2.0->opacus)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=2.0->opacus)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=2.0->opacus)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=2.0->opacus)
  Using cached nvid

### Hyper-parameters

In [17]:
import warnings
warnings.simplefilter("ignore")

MAX_GRAD_NORM = 10
EPSILON = 50.0
DELTA = 1e-5
EPOCHS = 20

LR = 1e-3

BATCH_SIZE = 512
MAX_PHYSICAL_BATCH_SIZE = 128

### Data

In [18]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split

# Transform and normalize data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split the trainset into 80% train and 20% validation
train_size = int(0.8 * len(trainset))
val_size = len(trainset) - train_size
train_dataset, val_dataset = random_split(trainset, [train_size, val_size])

train_loader_modified = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader_modified  = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader_modified  = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified


### Model

In [19]:
import model
from model import CIFAR10Classifier

model = CIFAR10Classifier()

In [20]:
from opacus.validators import ModuleValidator

model.train()

errors = ModuleValidator.validate(model, strict=False)
errors[-5:]

[]

In [21]:
model = ModuleValidator.fix(model)
ModuleValidator.validate(model, strict=False)

[]

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #defining device

model = model.to(device)

In [23]:
import torch.nn as nn
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)
#optimizer = optim.RMSprop(model.parameters(), lr=LR)

### Prepare for Training

In [24]:
def accuracy(preds, labels):
    return (preds == labels).mean()

In [25]:
from opacus import PrivacyEngine

privacy_engine = PrivacyEngine()

model, optimizer, train_loader_modified = privacy_engine.make_private_with_epsilon(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader_modified,
    epochs=EPOCHS,
    target_epsilon=EPSILON,
    target_delta=DELTA,
    max_grad_norm=MAX_GRAD_NORM,
)

print(f"Using sigma={optimizer.noise_multiplier} and C={MAX_GRAD_NORM}")

Using sigma=0.37944793701171875 and C=10


In [26]:
import numpy as np
from opacus.utils.batch_memory_manager import BatchMemoryManager


def train(model, train_loader, optimizer, epoch, device):
    model.train()
    criterion = nn.CrossEntropyLoss()

    losses = []
    top1_acc = []

    with BatchMemoryManager(
        data_loader=train_loader,
        max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE,
        optimizer=optimizer
    ) as memory_safe_data_loader:

        for i, (images, target) in enumerate(memory_safe_data_loader):
            optimizer.zero_grad()
            images = images.to(device)
            target = target.to(device)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()

            # measure accuracy and record loss
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

            loss.backward()
            optimizer.step()

            if (i+1) % 200 == 0:
                epsilon = privacy_engine.get_epsilon(DELTA)
                print(
                    f"\tTrain Epoch: {epoch} \t"
                    f"Loss: {np.mean(losses):.6f} "
                    f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
                    f"(ε = {epsilon:.2f}, δ = {DELTA})"
                )

In [27]:
def test(model, test_loader, device):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    losses = []
    top1_acc = []

    with torch.no_grad():
        for images, target in test_loader:
            images = images.to(device)
            target = target.to(device)

            output = model(images)
            loss = criterion(output, target)
            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

    top1_avg = np.mean(top1_acc)

    print(
        f"\tTest set:"
        f"Loss: {np.mean(losses):.6f} "
        f"Acc: {top1_avg * 100:.6f} "
    )
    return np.mean(top1_acc)

In [28]:
def validation(model, val_loader, device):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    losses = []
    top1_acc = []

    with torch.no_grad():
        for images, target in val_loader:
            images = images.to(device)
            target = target.to(device)

            output = model(images)
            loss = criterion(output, target)
            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

    top1_avg = np.mean(top1_acc)

    print(
        f"\tValidation set:"
        f"Loss: {np.mean(losses):.6f} "
        f"Acc: {top1_avg * 100:.6f} "
    )
    return np.mean(top1_acc)

### Train The Network

In [29]:
from tqdm.notebook import tqdm

for epoch in tqdm(range(EPOCHS), desc="Epoch", unit="epoch"):
    train(model, train_loader_modified, optimizer, epoch + 1, device)

Epoch:   0%|          | 0/20 [00:00<?, ?epoch/s]

	Train Epoch: 1 	Loss: 2.178985 Acc@1: 19.898451 (ε = 12.89, δ = 1e-05)
	Train Epoch: 2 	Loss: 1.973132 Acc@1: 30.473708 (ε = 17.15, δ = 1e-05)
	Train Epoch: 3 	Loss: 1.899869 Acc@1: 33.989458 (ε = 20.18, δ = 1e-05)
	Train Epoch: 4 	Loss: 1.846271 Acc@1: 36.255564 (ε = 22.80, δ = 1e-05)
	Train Epoch: 5 	Loss: 1.834799 Acc@1: 37.975965 (ε = 25.05, δ = 1e-05)
	Train Epoch: 6 	Loss: 1.807479 Acc@1: 39.592424 (ε = 27.20, δ = 1e-05)
	Train Epoch: 7 	Loss: 1.784235 Acc@1: 40.696626 (ε = 29.18, δ = 1e-05)
	Train Epoch: 8 	Loss: 1.755678 Acc@1: 41.537582 (ε = 31.05, δ = 1e-05)
	Train Epoch: 9 	Loss: 1.761812 Acc@1: 41.750261 (ε = 32.84, δ = 1e-05)
	Train Epoch: 10 	Loss: 1.736863 Acc@1: 43.359000 (ε = 34.56, δ = 1e-05)
	Train Epoch: 11 	Loss: 1.716502 Acc@1: 43.688296 (ε = 36.22, δ = 1e-05)
	Train Epoch: 12 	Loss: 1.704348 Acc@1: 43.960949 (ε = 37.86, δ = 1e-05)
	Train Epoch: 13 	Loss: 1.711286 Acc@1: 44.481772 (ε = 39.38, δ = 1e-05)
	Train Epoch: 14 	Loss: 1.699056 Acc@1: 44.702703 (ε = 40.91

### Saving the model

In [30]:
PATH = './modified_model.pth'
torch.save(model.state_dict(), PATH)

### Test the network

In [31]:
validation(model, val_loader_modified, device)

	Validation set:Loss: 1.336747 Acc: 54.375574 


0.5437557444852941

In [32]:
top1_acc = test(model, test_loader_modified, device)

	Test set:Loss: 1.340004 Acc: 53.816636 


# Q6 & Q7

## Using MLP model for Attacker Model

### MLP with 4 Hidden Layers  :

> Learning Rate (in training shadow models) : 0.001

> Learning Rate (in training attacking models) : 0.001

> Epochs (in training shadow models) : 10

> Epochs (in training attacking models) : 10

> Batch Size (in training shadow models) : 64

> Batch Size (in training attacking models) : 64

> num_shadow_models = 100 ---> The Article

> Optimizer (in training attacking models ) : Adam

In [33]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, ConcatDataset
from model import CIFAR10Classifier
import torch.nn.functional as F

class AttackModel(nn.Module):
    def __init__(self):
        super(AttackModel, self).__init__()
        self.fc1 = nn.Linear(10, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        return x

class MembershipInferenceAttackNoPrivacy:
    def __init__(self, shadow_model_class, attack_model_class, device='cpu'):
        self.shadow_model_class = shadow_model_class
        self.attack_model_class = attack_model_class
        self.device = device
        self.attack_models = {}

    def train_shadow_models(self, seen_loaders, num_epochs=10, lr=1e-3):
        self.shadow_models = [self.shadow_model_class().to(self.device) for _ in range(len(seen_loaders))]

        for i, (shadow_model, seen_loader) in enumerate(zip(self.shadow_models, seen_loaders)):
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(shadow_model.parameters(), lr=lr)
            self._train_model(shadow_model, seen_loader, criterion, optimizer, num_epochs)
            print(f'Shadow model {i+1} trained.')

    def _train_model(self, model, dataloader, criterion, optimizer, num_epochs):
        model.train()
        for epoch in range(num_epochs):
            running_loss = 0.0
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader):.6f}')

    def collect_outputs(self, seen_loaders, unseen_loaders):
        self.attack_data = []
        self.attack_labels = []

        for shadow_model, seen_loader, unseen_loader in zip(self.shadow_models, seen_loaders, unseen_loaders):
            self._collect_shadow_model_outputs(shadow_model, seen_loader, label=1)  # in
            self._collect_shadow_model_outputs(shadow_model, unseen_loader, label=0)  # out

        self.attack_data = torch.cat(self.attack_data).to(self.device)
        self.attack_labels = torch.cat(self.attack_labels).to(self.device)

    def _collect_shadow_model_outputs(self, model, dataloader, label):
        model.eval()
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                self.attack_data.append(torch.cat([probabilities, labels.unsqueeze(1).float()], dim=1))
                self.attack_labels.append(torch.full((outputs.size(0),), label, dtype=torch.float).to(self.device))

    def train_attack_models(self, num_epochs=10, lr=0.001):
        attack_dataset = torch.utils.data.TensorDataset(self.attack_data, self.attack_labels)
        for class_label in range(10):
            class_indices = (self.attack_data[:, -1] == class_label).nonzero().squeeze()
            class_data = self.attack_data[class_indices][:, :-1]
            class_labels = self.attack_labels[class_indices].view(-1, 1)


            attack_dataset = torch.utils.data.TensorDataset(class_data, class_labels)
            attack_loader = DataLoader(attack_dataset, batch_size=64, shuffle=True)

            attack_model = self.attack_model_class().to(self.device)
            criterion = nn.BCELoss()
            optimizer = optim.Adam(attack_model.parameters(), lr=lr)

            self._train_model(attack_model, attack_loader, criterion, optimizer, num_epochs)
            self.attack_models[class_label] = attack_model

            print(f'Attack model for class {class_label} trained.')

    def save_attack_models(self, path):
        for class_label, model in self.attack_models.items():
            torch.save(model.state_dict(), f'{path}_class_{class_label}.pth')
            print(f'Attack model for class {class_label} saved to {path}_class_{class_label}.pth')

    def load_attack_models(self, path):
        for class_label in range(10):
            model = self.attack_model_class().to(self.device)
            model.load_state_dict(torch.load(f'{path}_class_{class_label}.pth', map_location=self.device))
            self.attack_models[class_label] = model
            print(f'Attack model for class {class_label} loaded from {path}_class_{class_label}.pth')

    def infer_membership(self, model, seen_loader, unseen_loader , seen_outputs , unseen_outputs , labels):

        model_outputs = torch.cat([seen_outputs, unseen_outputs]).to(self.device)

        labels = labels.to(self.device)

        memberships = []
        for output, label in zip(model_outputs, labels):
            class_label = label.item()
            attack_model = self.attack_models[class_label]
            membership_pred = attack_model(output.unsqueeze(0)).item()
            memberships.append(membership_pred)
        return torch.tensor(memberships, device=self.device)

    def evaluate_attack_model(self, seen_loader, unseen_loader, target_model):
        seen_outputs , lables_seen= self._get_model_outputs(target_model, seen_loader)
        unseen_outputs , labels_unseen = self._get_model_outputs(target_model, unseen_loader)

        attack_data = torch.cat([seen_outputs, unseen_outputs]).to(self.device)
        attack_labels = torch.cat([torch.ones(len(seen_outputs)), torch.zeros(len(unseen_outputs))]).to(self.device)

        labels = torch.cat([lables_seen, labels_unseen]).to(self.device)

        memberships = self.infer_membership(target_model, seen_loader, unseen_loader , seen_outputs , unseen_outputs , labels)
        membership_preds = (memberships > 0.5).float()
        accuracy = (membership_preds == attack_labels).float().mean().item()
        return accuracy

    def _get_model_outputs(self, model, dataloader):
        model.eval()
        outputs_list = []
        labels_list = []
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                outputs_list.append(probabilities)
                labels_list.append(labels) # Convert labels to list for easy concatenation
        return torch.cat(outputs_list), torch.cat(labels_list)

### Training

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = torch.utils.data.random_split(train_set, [train_size, remaining_size])


num_shadow_models = 100
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=64, shuffle=True)
    seen_loaders.append(seen_loader)

unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=64, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Initialize MembershipInferenceAttackNoPrivacy
mia = MembershipInferenceAttackNoPrivacy(CIFAR10Classifier, AttackModel, device)

# Train shadow models without differential privacy
mia.train_shadow_models(seen_loaders, num_epochs=10)

# Collect outputs for attack model
mia.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia.train_attack_models(num_epochs=10)

# Save the attack models
mia.save_attack_models('attack_model')

# Load the attack models (for future use)
mia.load_attack_models('attack_model')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 2.278183
Epoch 2, Loss: 2.141670
Epoch 3, Loss: 2.035657
Epoch 4, Loss: 1.948037
Epoch 5, Loss: 1.827686
Epoch 6, Loss: 1.767027
Epoch 7, Loss: 1.657067
Epoch 8, Loss: 1.562772
Epoch 9, Loss: 1.393725
Epoch 10, Loss: 1.383874
Shadow model 1 trained.
Epoch 1, Loss: 2.333053
Epoch 2, Loss: 2.211608
Epoch 3, Loss: 2.103024
Epoch 4, Loss: 2.021191
Epoch 5, Loss: 1.933874
Epoch 6, Loss: 1.873432
Epoch 7, Loss: 1.723550
Epoch 8, Loss: 1.580324
Epoch 9, Loss: 1.479427
Epoch 10, Loss: 1.407574
Shadow model 2 trained.
Epoch 1, Loss: 2.326754
Epoch 2, Loss: 2.228614
Epoch 3, Loss: 2.177272
Epoch 4, Loss: 2.108865
Epoch 5, Loss: 2.043911
Epoch 6, Loss: 1.893782
Epoch 7, Loss: 1.860719
Epoch 8, Loss: 1.795353
Epoch 9, Loss: 1.687164
Epoch 10, Loss: 1.643388
Shadow model 3 trained.
Epoch 1, Loss: 2.290504
Epoch 2, Loss: 2.199087
Epoch 3, Loss: 2.077600
Epoch 4, Loss: 2.003038
Epoch 5, Loss: 1.921396
Epoch 6, 

### Accuracies

In [35]:
model_base = CIFAR10Classifier()
model_base.load_state_dict(torch.load('baseline_model.pth', map_location=device))
model_base.to(device)

accuracy_baseline = mia.evaluate_attack_model(train_loader_baseline, test_loader_baseline, model_base)
print(f'Accuracy for Attacking to the Baseline Model :  {accuracy_baseline * 100:.2f}%')

Accuracy for Attacking to the Baseline Model :  78.23%


In [36]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('_module.'):
            new_state_dict[k[8:]] = v  # remove '_module.' prefix
        else:
            new_state_dict[k] = v
    return new_state_dict

model_private = CIFAR10Classifier().to(device)
private_state_dict = torch.load('modified_model.pth', map_location=device)
model_private.load_state_dict(remove_module_prefix(private_state_dict))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_private.to(device)

accuracy_private = mia.evaluate_attack_model(train_loader_modified, test_loader_modified, model_private)
print(f'Accuracy for Attacking to the Private Model :  {accuracy_private * 100:.2f}%')

Accuracy for Attacking to the Private Model :  63.15%


### MLP with 9 Hidden Layers  :

> Learning Rate (in training shadow models) : 0.001

> Learning Rate (in training attacking models) : 0.001

> Epochs (in training shadow models) : 10

> Epochs (in training attacking models) : 50

> Batch Size (in training shadow models) : 64

> Batch Size (in training attacking models) : 64

> num_shadow_models = 50

> Optimizer (in training attacking models ) : Adam

In [37]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, ConcatDataset
from model import CIFAR10Classifier
import torch.nn.functional as F

class AttackModel(nn.Module):
    def __init__(self):
        super(AttackModel, self).__init__()
        self.fc1 = nn.Linear(10, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 256)
        self.fc4 = nn.Linear(256, 512)
        self.fc5 = nn.Linear(512, 256)
        self.fc6 = nn.Linear(256, 128)
        self.fc7 = nn.Linear(128, 64)
        self.fc8 = nn.Linear(64, 32)
        self.fc9 = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = F.relu(self.fc6(x))
        x = F.relu(self.fc7(x))
        x = F.relu(self.fc8(x))
        x = torch.sigmoid(self.fc9(x))
        return x

class MembershipInferenceAttackNoPrivacy:
    def __init__(self, shadow_model_class, attack_model_class, device='cpu'):
        self.shadow_model_class = shadow_model_class
        self.attack_model_class = attack_model_class
        self.device = device
        self.attack_models = {}

    def train_shadow_models(self, seen_loaders, num_epochs=10, lr=1e-3):
        self.shadow_models = [self.shadow_model_class().to(self.device) for _ in range(len(seen_loaders))]

        for i, (shadow_model, seen_loader) in enumerate(zip(self.shadow_models, seen_loaders)):
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(shadow_model.parameters(), lr=lr)
            self._train_model(shadow_model, seen_loader, criterion, optimizer, num_epochs)
            print(f'Shadow model {i+1} trained.')

    def _train_model(self, model, dataloader, criterion, optimizer, num_epochs):
        model.train()
        for epoch in range(num_epochs):
            running_loss = 0.0
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader):.6f}')

    def collect_outputs(self, seen_loaders, unseen_loaders):
        self.attack_data = []
        self.attack_labels = []

        for shadow_model, seen_loader, unseen_loader in zip(self.shadow_models, seen_loaders, unseen_loaders):
            self._collect_shadow_model_outputs(shadow_model, seen_loader, label=1)  # in
            self._collect_shadow_model_outputs(shadow_model, unseen_loader, label=0)  # out

        self.attack_data = torch.cat(self.attack_data).to(self.device)
        self.attack_labels = torch.cat(self.attack_labels).to(self.device)

    def _collect_shadow_model_outputs(self, model, dataloader, label):
        model.eval()
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                self.attack_data.append(torch.cat([probabilities, labels.unsqueeze(1).float()], dim=1))
                self.attack_labels.append(torch.full((outputs.size(0),), label, dtype=torch.float).to(self.device))

    def train_attack_models(self, num_epochs=10, lr=0.001):
        attack_dataset = torch.utils.data.TensorDataset(self.attack_data, self.attack_labels)
        for class_label in range(10):
            class_indices = (self.attack_data[:, -1] == class_label).nonzero().squeeze()
            class_data = self.attack_data[class_indices][:, :-1]
            class_labels = self.attack_labels[class_indices].view(-1, 1)


            attack_dataset = torch.utils.data.TensorDataset(class_data, class_labels)
            attack_loader = DataLoader(attack_dataset, batch_size=64, shuffle=True)

            attack_model = self.attack_model_class().to(self.device)
            criterion = nn.BCELoss()
            optimizer = optim.Adam(attack_model.parameters(), lr=lr)

            self._train_model(attack_model, attack_loader, criterion, optimizer, num_epochs)
            self.attack_models[class_label] = attack_model

            print(f'Attack model for class {class_label} trained.')

    def save_attack_models(self, path):
        for class_label, model in self.attack_models.items():
            torch.save(model.state_dict(), f'{path}_class_{class_label}.pth')
            print(f'Attack model for class {class_label} saved to {path}_class_{class_label}.pth')

    def load_attack_models(self, path):
        for class_label in range(10):
            model = self.attack_model_class().to(self.device)
            model.load_state_dict(torch.load(f'{path}_class_{class_label}.pth', map_location=self.device))
            self.attack_models[class_label] = model
            print(f'Attack model for class {class_label} loaded from {path}_class_{class_label}.pth')

    def infer_membership(self, model, seen_loader, unseen_loader , seen_outputs , unseen_outputs , labels):

        model_outputs = torch.cat([seen_outputs, unseen_outputs]).to(self.device)

        labels = labels.to(self.device)

        memberships = []
        for output, label in zip(model_outputs, labels):
            class_label = label.item()
            attack_model = self.attack_models[class_label]
            membership_pred = attack_model(output.unsqueeze(0)).item()
            memberships.append(membership_pred)
        return torch.tensor(memberships, device=self.device)

    def evaluate_attack_model(self, seen_loader, unseen_loader, target_model):
        seen_outputs , lables_seen= self._get_model_outputs(target_model, seen_loader)
        unseen_outputs , labels_unseen = self._get_model_outputs(target_model, unseen_loader)

        attack_data = torch.cat([seen_outputs, unseen_outputs]).to(self.device)
        attack_labels = torch.cat([torch.ones(len(seen_outputs)), torch.zeros(len(unseen_outputs))]).to(self.device)

        labels = torch.cat([lables_seen, labels_unseen]).to(self.device)

        memberships = self.infer_membership(target_model, seen_loader, unseen_loader , seen_outputs , unseen_outputs , labels)
        membership_preds = (memberships > 0.5).float()
        accuracy = (membership_preds == attack_labels).float().mean().item()
        return accuracy

    def _get_model_outputs(self, model, dataloader):
        model.eval()
        outputs_list = []
        labels_list = []
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                outputs_list.append(probabilities)
                labels_list.append(labels) # Convert labels to list for easy concatenation
        return torch.cat(outputs_list), torch.cat(labels_list)

In [38]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = torch.utils.data.random_split(train_set, [train_size, remaining_size])


num_shadow_models = 50
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=64, shuffle=True)
    seen_loaders.append(seen_loader)

unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=64, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=10, shuffle=False)

# Initialize MembershipInferenceAttackNoPrivacy
mia = MembershipInferenceAttackNoPrivacy(CIFAR10Classifier, AttackModel, device)

# Train shadow models without differential privacy
mia.train_shadow_models(seen_loaders, num_epochs=10)

# Collect outputs for attack model
mia.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia.train_attack_models(num_epochs=50)

# Save the attack models
mia.save_attack_models('attack_model')

# Load the attack models (for future use)
mia.load_attack_models('attack_model')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 2.280746
Epoch 2, Loss: 2.118312
Epoch 3, Loss: 2.045994
Epoch 4, Loss: 1.965019
Epoch 5, Loss: 1.906267
Epoch 6, Loss: 1.838629
Epoch 7, Loss: 1.746467
Epoch 8, Loss: 1.635649
Epoch 9, Loss: 1.630207
Epoch 10, Loss: 1.516875
Shadow model 1 trained.
Epoch 1, Loss: 2.287084
Epoch 2, Loss: 2.152396
Epoch 3, Loss: 2.079041
Epoch 4, Loss: 1.936000
Epoch 5, Loss: 1.813020
Epoch 6, Loss: 1.771416
Epoch 7, Loss: 1.660080
Epoch 8, Loss: 1.550930
Epoch 9, Loss: 1.416992
Epoch 10, Loss: 1.319677
Shadow model 2 trained.
Epoch 1, Loss: 2.268193
Epoch 2, Loss: 2.115432
Epoch 3, Loss: 2.009951
Epoch 4, Loss: 1.897934
Epoch 5, Loss: 1.796283
Epoch 6, Loss: 1.709851
Epoch 7, Loss: 1.609906
Epoch 8, Loss: 1.525193
Epoch 9, Loss: 1.474293
Epoch 10, Loss: 1.378292
Shadow model 3 trained.
Epoch 1, Loss: 2.310143
Epoch 2, Loss: 2.208501
Epoch 3, Loss: 2.122663
Epoch 4, Loss: 2.001128
Epoch 5, Loss: 1.931343
Epoch 6, 

### Accuracies

In [39]:
model_base = CIFAR10Classifier()
model_base.load_state_dict(torch.load('baseline_model.pth', map_location=device))
model_base.to(device)

accuracy_baseline = mia.evaluate_attack_model(train_loader_baseline, test_loader_baseline, model_base)
print(f'Accuracy for Attacking to the Baseline Model :  {accuracy_baseline * 100:.2f}%')

Accuracy for Attacking to the Baseline Model :  78.12%


In [40]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('_module.'):
            new_state_dict[k[8:]] = v  # remove '_module.' prefix
        else:
            new_state_dict[k] = v
    return new_state_dict

model_private = CIFAR10Classifier().to(device)
private_state_dict = torch.load('modified_model.pth', map_location=device)
model_private.load_state_dict(remove_module_prefix(private_state_dict))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_private.to(device)

accuracy_private = mia.evaluate_attack_model(train_loader_modified, test_loader_modified, model_private)
print(f'Accuracy for Attacking to the Private Model :  {accuracy_private * 100:.2f}%')

Accuracy for Attacking to the Private Model :  63.96%


## MLP with 2 Hidden Layers and Xavier/Glorot Initialization  :

> Learning Rate (in training shadow models) : 0.001

> Learning Rate (in training attacking models) : 0.001

> Epochs (in training shadow models) : 10

> Epochs (in training attacking models) : 50

> Batch Size (in training shadow models) : 50

> Batch Size (in training attacking models) : 64

> num_shadow_models = 50

> Optimizer (in training attacking models ) : RMSpop


In [41]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, ConcatDataset
from model import CIFAR10Classifier
import torch.nn.functional as F

class AttackModel(nn.Module):
    def __init__(self):
        super(AttackModel, self).__init__()
        self.fc1 = nn.Linear(10, 64)
        self.fc2 = nn.Linear(64, 1)
        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

class MembershipInferenceAttackNoPrivacy:
    def __init__(self, shadow_model_class, attack_model_class, device='cpu'):
        self.shadow_model_class = shadow_model_class
        self.attack_model_class = attack_model_class
        self.device = device
        self.attack_models = {}

    def train_shadow_models(self, seen_loaders, num_epochs=20, lr=1e-3):
        self.shadow_models = [self.shadow_model_class().to(self.device) for _ in range(len(seen_loaders))]

        for i, (shadow_model, seen_loader) in enumerate(zip(self.shadow_models, seen_loaders)):
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(shadow_model.parameters(), lr=lr)
            self._train_model(shadow_model, seen_loader, criterion, optimizer, num_epochs)
            print(f'Shadow model {i+1} trained.')

    def _train_model(self, model, dataloader, criterion, optimizer, num_epochs):
        model.train()
        for epoch in range(num_epochs):
            running_loss = 0.0
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader):.6f}')

    def collect_outputs(self, seen_loaders, unseen_loaders):
        self.attack_data = []
        self.attack_labels = []

        for shadow_model, seen_loader, unseen_loader in zip(self.shadow_models, seen_loaders, unseen_loaders):
            self._collect_shadow_model_outputs(shadow_model, seen_loader, label=1)  # in
            self._collect_shadow_model_outputs(shadow_model, unseen_loader, label=0)  # out

        self.attack_data = torch.cat(self.attack_data).to(self.device)
        self.attack_labels = torch.cat(self.attack_labels).to(self.device)

    def _collect_shadow_model_outputs(self, model, dataloader, label):
        model.eval()
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                self.attack_data.append(torch.cat([probabilities, labels.unsqueeze(1).float()], dim=1))
                self.attack_labels.append(torch.full((outputs.size(0),), label, dtype=torch.float).to(self.device))

    def train_attack_models(self, num_epochs=30, lr=0.001):
        attack_dataset = torch.utils.data.TensorDataset(self.attack_data, self.attack_labels)
        for class_label in range(10):
            class_indices = (self.attack_data[:, -1] == class_label).nonzero().squeeze()
            class_data = self.attack_data[class_indices][:, :-1]
            class_labels = self.attack_labels[class_indices].view(-1, 1)


            attack_dataset = torch.utils.data.TensorDataset(class_data, class_labels)
            attack_loader = DataLoader(attack_dataset, batch_size=50, shuffle=True)

            attack_model = self.attack_model_class().to(self.device)
            criterion = nn.BCELoss()
            optimizer = optim.RMSprop(attack_model.parameters(), lr=lr)

            self._train_model(attack_model, attack_loader, criterion, optimizer, num_epochs)
            self.attack_models[class_label] = attack_model

            print(f'Attack model for class {class_label} trained.')

    def save_attack_models(self, path):
        for class_label, model in self.attack_models.items():
            torch.save(model.state_dict(), f'{path}_class_{class_label}.pth')
            print(f'Attack model for class {class_label} saved to {path}_class_{class_label}.pth')

    def load_attack_models(self, path):
        for class_label in range(10):
            model = self.attack_model_class().to(self.device)
            model.load_state_dict(torch.load(f'{path}_class_{class_label}.pth', map_location=self.device))
            self.attack_models[class_label] = model
            print(f'Attack model for class {class_label} loaded from {path}_class_{class_label}.pth')

    def infer_membership(self, model, seen_loader, unseen_loader , seen_outputs , unseen_outputs , labels):

        model_outputs = torch.cat([seen_outputs, unseen_outputs]).to(self.device)
        labels = labels.to(self.device)

        memberships = []
        for output, label in zip(model_outputs, labels):
            class_label = label.item()
            attack_model = self.attack_models[class_label]
            membership_pred = attack_model(output.unsqueeze(0)).item()
            memberships.append(membership_pred)

        return torch.tensor(memberships, device=self.device)

    def evaluate_attack_model(self, seen_loader, unseen_loader, target_model):
        seen_outputs , lables_seen= self._get_model_outputs(target_model, seen_loader)
        unseen_outputs , labels_unseen = self._get_model_outputs(target_model, unseen_loader)

        attack_data = torch.cat([seen_outputs, unseen_outputs]).to(self.device)
        attack_labels = torch.cat([torch.ones(len(seen_outputs)), torch.zeros(len(unseen_outputs))]).to(self.device)

        labels = torch.cat([lables_seen, labels_unseen]).to(self.device)

        memberships = self.infer_membership(target_model, seen_loader, unseen_loader , seen_outputs , unseen_outputs , labels)
        membership_preds = (memberships > 0.5).float()
        accuracy = (membership_preds == attack_labels).float().mean().item()
        return accuracy

    def _get_model_outputs(self, model, dataloader):
        model.eval()
        outputs_list = []
        labels_list = []
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                outputs_list.append(probabilities)
                labels_list.append(labels) # Convert labels to list for easy concatenation
        return torch.cat(outputs_list), torch.cat(labels_list)


In [42]:

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = torch.utils.data.random_split(train_set, [train_size, remaining_size])


num_shadow_models = 50
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=64, shuffle=True)
    seen_loaders.append(seen_loader)

unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=64, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Initialize MembershipInferenceAttackNoPrivacy
mia = MembershipInferenceAttackNoPrivacy(CIFAR10Classifier, AttackModel, device)

# Train shadow models without differential privacy
mia.train_shadow_models(seen_loaders, num_epochs=10)

# Collect outputs for attack model
mia.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia.train_attack_models(num_epochs=50)

# Save the attack models
mia.save_attack_models('attack_model')

# Load the attack models (for future use)
mia.load_attack_models('attack_model')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 2.278943
Epoch 2, Loss: 2.129886
Epoch 3, Loss: 2.020780
Epoch 4, Loss: 1.906599
Epoch 5, Loss: 1.808966
Epoch 6, Loss: 1.706399
Epoch 7, Loss: 1.618856
Epoch 8, Loss: 1.509206
Epoch 9, Loss: 1.415810
Epoch 10, Loss: 1.370505
Shadow model 1 trained.
Epoch 1, Loss: 2.293199
Epoch 2, Loss: 2.166173
Epoch 3, Loss: 2.083275
Epoch 4, Loss: 1.954465
Epoch 5, Loss: 1.879094
Epoch 6, Loss: 1.779070
Epoch 7, Loss: 1.660878
Epoch 8, Loss: 1.590809
Epoch 9, Loss: 1.516659
Epoch 10, Loss: 1.369547
Shadow model 2 trained.
Epoch 1, Loss: 2.270345
Epoch 2, Loss: 2.150567
Epoch 3, Loss: 2.029976
Epoch 4, Loss: 1.984311
Epoch 5, Loss: 1.888012
Epoch 6, Loss: 1.830041
Epoch 7, Loss: 1.718209
Epoch 8, Loss: 1.633460
Epoch 9, Loss: 1.555427
Epoch 10, Loss: 1.496844
Shadow model 3 trained.
Epoch 1, Loss: 2.292522
Epoch 2, Loss: 2.175536
Epoch 3, Loss: 2.071751
Epoch 4, Loss: 1.978271
Epoch 5, Loss: 1.963725
Epoch 6, 

### Accuracies

In [43]:
model_base = CIFAR10Classifier()
model_base.load_state_dict(torch.load('baseline_model.pth', map_location=device))
model_base.to(device)

accuracy_baseline = mia.evaluate_attack_model(train_loader_baseline, test_loader_baseline, model_base)
print(f'Accuracy for Attacking to the Baseline Model :  {accuracy_baseline * 100:.2f}%')

Accuracy for Attacking to the Baseline Model :  77.37%


In [44]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('_module.'):
            new_state_dict[k[8:]] = v  # remove '_module.' prefix
        else:
            new_state_dict[k] = v
    return new_state_dict

model_private = CIFAR10Classifier().to(device)
private_state_dict = torch.load('modified_model.pth', map_location=device)
model_private.load_state_dict(remove_module_prefix(private_state_dict))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_private.to(device)

accuracy_private = mia.evaluate_attack_model(train_loader_modified, test_loader_modified, model_private)
print(f'Accuracy for Attacking to the Private Model :  {accuracy_private * 100:.2f}%')

Accuracy for Attacking to the Private Model :  61.13%


## Using Random Forest for Attacker Model

### importing libraries

In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, ConcatDataset
from model import CIFAR10Classifier
import torch.nn.functional as F
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import joblib  # For saving/loading models

## Attack Model without privacy

We defined a Class for Membership Inference Attack with No Privacy.


In [None]:
class MembershipInferenceAttackNoPrivacy:
    def __init__(self, shadow_model_class, device='cpu'):
        self.shadow_model_class = shadow_model_class
        self.device = device
        self.attack_models = {}

    def train_shadow_models(self, seen_loaders, num_epochs=10, lr=1e-3):
        self.shadow_models = [self.shadow_model_class().to(self.device) for _ in range(len(seen_loaders))]

        for i, (shadow_model, seen_loader) in enumerate(zip(self.shadow_models, seen_loaders)):
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(shadow_model.parameters(), lr=lr)
            self.train_model(shadow_model, seen_loader, criterion, optimizer, num_epochs)
            print(f'Shadow model {i+1} trained.')

    def train_model(self, model, dataloader, criterion, optimizer, num_epochs):
        model.train()
        for epoch in range(num_epochs):
            running_loss = 0.0
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader):.6f}')

    def collect_outputs(self, seen_loaders, unseen_loaders):
        self.attack_data = []
        self.attack_labels = []

        for shadow_model, seen_loader, unseen_loader in zip(self.shadow_models, seen_loaders, unseen_loaders):
            self.collect_shadow_model_outputs(shadow_model, seen_loader, label=1)  # in
            self.collect_shadow_model_outputs(shadow_model, unseen_loader, label=0)  # out

        self.attack_data = torch.cat(self.attack_data).cpu().numpy()
        self.attack_labels = torch.cat(self.attack_labels).cpu().numpy()

    def collect_shadow_model_outputs(self, model, dataloader, label):
        model.eval()
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                self.attack_data.append(torch.cat([probabilities, labels.unsqueeze(1).float()], dim=1))
                self.attack_labels.append(torch.full((outputs.size(0),), label, dtype=torch.float))

    def train_attack_models(self):
        for class_label in range(10):  # Assuming 10 classes
            class_indices = (self.attack_data[:, -1] == class_label)
            class_data = self.attack_data[class_indices][:, :-1]  # Exclude the last column (class label)
            class_labels = self.attack_labels[class_indices]  # Binary labels (in or out)

            attack_model = RandomForestClassifier(n_estimators=50)
            attack_model.fit(class_data, class_labels)

            # Evaluate performance
            train_predictions = attack_model.predict(class_data)
            train_accuracy = accuracy_score(class_labels, train_predictions)
            train_precision = precision_score(class_labels, train_predictions)
            train_recall = recall_score(class_labels, train_predictions)
            train_f1 = f1_score(class_labels, train_predictions)
            train_confusion_matrix = confusion_matrix(class_labels, train_predictions)

            print(f'Class {class_label}, Training Accuracy: {train_accuracy:.4f}')
            print(f'Class {class_label}, Training Precision: {train_precision:.4f}')
            print(f'Class {class_label}, Training Recall: {train_recall:.4f}')
            print(f'Class {class_label}, Training F1 Score: {train_f1:.4f}')
            print(f'Class {class_label}, Training Confusion Matrix:\n {train_confusion_matrix}')

            self.attack_models[class_label] = attack_model
            print(f'Attack model for class {class_label} trained.')

    def save_attack_models(self, path):
        for class_label, model in self.attack_models.items():
            joblib.dump(model, f'{path}_class_{class_label}.joblib')
            print(f'Attack model for class {class_label} saved to {path}_class_{class_label}.joblib')

    def load_attack_models(self, path):
        for class_label in range(10):  # Assuming 10 classes
            model = joblib.load(f'{path}_class_{class_label}.joblib')
            self.attack_models[class_label] = model
            print(f'Attack model for class {class_label} loaded from {path}_class_{class_label}.joblib')

    def infer_membership(self, model, seen_loader, unseen_loader, seen_outputs, unseen_outputs, labels):
        model_outputs = torch.cat([seen_outputs, unseen_outputs]).cpu().numpy()
        labels = labels.cpu().numpy()

        # print("In Infer : ")
        # print(f"model_outputs size: {model_outputs.shape}")
        # print(f"labels size: {len(labels)}")

        memberships = []
        for output, label in zip(model_outputs, labels):
            class_label = int(label)
            attack_model = self.attack_models[class_label]
            membership_pred = attack_model.predict(output.reshape(1, -1))[0]
            memberships.append(membership_pred)

        # print(f"memberships size: {len(memberships)}")
        return torch.tensor(memberships, device=self.device)

    def evaluate_attack_model(self, seen_loader, unseen_loader, target_model):
        seen_outputs, labels_seen = self.get_model_outputs(target_model, seen_loader)
        unseen_outputs, labels_unseen = self.get_model_outputs(target_model, unseen_loader)

        # print(f"Seen outputs size: {seen_outputs.size()}")
        # print(f"Unseen outputs size: {unseen_outputs.size()}")

        attack_data = torch.cat([seen_outputs, unseen_outputs]).to(self.device)
        attack_labels = torch.cat([torch.ones(len(seen_outputs)), torch.zeros(len(unseen_outputs))]).to(self.device)
        labels = torch.cat([labels_seen, labels_unseen]).to(self.device)

        # print(f"Attack data size: {attack_data.size()}")
        # print(f"Attack labels size: {attack_labels.size()}")

        memberships = self.infer_membership(target_model, seen_loader, unseen_loader, seen_outputs, unseen_outputs, labels)
        membership_preds = torch.tensor(memberships).float()
        accuracy = (membership_preds == attack_labels).float().mean().item()
        return accuracy

    def get_model_outputs(self, model, dataloader):
        model.eval()
        outputs_list = []
        labels_list = []
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                outputs_list.append(probabilities)
                labels_list.append(labels)
        return torch.cat(outputs_list), torch.cat(labels_list)

### Training Attack Model

#### Num of Shadow Models = 2 , Batch Size = 64 , Epochs = 10 , Num_Estimator = 50

In [54]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split training data into 80% and 20%
train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = torch.utils.data.random_split(train_set, [train_size, remaining_size])

# Create disjoint loaders for seen data from 80% training data
num_shadow_models = 2
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=64, shuffle=True)
    seen_loaders.append(seen_loader)

# Create concatenated unseen data from the remaining 20% of training data and the entire test set
unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=64, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Initialize MembershipInferenceAttackNoPrivacy
mia = MembershipInferenceAttackNoPrivacy(CIFAR10Classifier, device)

# Train shadow models without differential privacy
mia.train_shadow_models(seen_loaders, num_epochs=10)

# Collect outputs for attack model
mia.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia.train_attack_models()

# Save the attack models
mia.save_attack_models('attack_model')

# Load the attack models (for future use)
mia.load_attack_models('attack_model')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 1.857926
Epoch 2, Loss: 1.580750
Epoch 3, Loss: 1.457101
Epoch 4, Loss: 1.382254
Epoch 5, Loss: 1.333081
Epoch 6, Loss: 1.277171
Epoch 7, Loss: 1.232877
Epoch 8, Loss: 1.187397
Epoch 9, Loss: 1.145134
Epoch 10, Loss: 1.113797
Shadow model 1 trained.
Epoch 1, Loss: 1.851497
Epoch 2, Loss: 1.594829
Epoch 3, Loss: 1.481197
Epoch 4, Loss: 1.405747
Epoch 5, Loss: 1.348338
Epoch 6, Loss: 1.290058
Epoch 7, Loss: 1.237226
Epoch 8, Loss: 1.194154
Epoch 9, Loss: 1.164091
Epoch 10, Loss: 1.131637
Shadow model 2 trained.
Class 0, Training Accuracy: 0.7937
Class 0, Training Precision: 0.7627
Class 0, Training Recall: 1.0000
Class 0, Training F1 Score: 0.8653
Class 0, Training Confusion Matrix:
 [[ 784 1238]
 [   0 3978]]
Attack model for class 0 trained.
Class 1, Training Accuracy: 0.7837
Class 1, Training Precision: 0.7553
Class 1, Training Recall: 0.9998
Class 1, Training F1 Score: 0.8605
Class 1, Training 

### Accuracies

In [None]:
model_base = CIFAR10Classifier()
model_base.load_state_dict(torch.load('baseline_model.pth', map_location=device))
model_base.to(device)

accuracy_baseline = mia.evaluate_attack_model(train_loader_baseline, test_loader_baseline, model_base)
print(f'Accuracy for Attacking to the Baseline Model :  {accuracy_baseline * 100:.2f}%')

In [None]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('_module.'):
            new_state_dict[k[8:]] = v  # remove '_module.' prefix
        else:
            new_state_dict[k] = v
    return new_state_dict

model_private = CIFAR10Classifier().to(device)
private_state_dict = torch.load('modified_model.pth', map_location=device)
model_private.load_state_dict(remove_module_prefix(private_state_dict))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_private.to(device)

accuracy_private = mia.evaluate_attack_model(train_loader_modified, test_loader_modified, model_private)
print(f'Accuracy for Attacking to the Private Model :  {accuracy_private * 100:.2f}%')

#### Num of Shadow Models = 100 (in the article) , Batch Size = 64 , Epochs = 10

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split training data into 80% and 20%
train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = torch.utils.data.random_split(train_set, [train_size, remaining_size])

# Create disjoint loaders for seen data from 80% training data
num_shadow_models = 100
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=64, shuffle=True)
    seen_loaders.append(seen_loader)

# Create concatenated unseen data from the remaining 20% of training data and the entire test set
unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=64, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Initialize MembershipInferenceAttackNoPrivacy
mia = MembershipInferenceAttackNoPrivacy(CIFAR10Classifier, device)

# Train shadow models without differential privacy
mia.train_shadow_models(seen_loaders, num_epochs=10)

# Collect outputs for attack model
mia.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia.train_attack_models()

# Save the attack models
mia.save_attack_models('attack_model')

# Load the attack models (for future use)
mia.load_attack_models('attack_model')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 2.313570
Epoch 2, Loss: 2.215772
Epoch 3, Loss: 2.136619
Epoch 4, Loss: 2.022963
Epoch 5, Loss: 1.892865
Epoch 6, Loss: 1.823914
Epoch 7, Loss: 1.847620
Epoch 8, Loss: 1.713441
Epoch 9, Loss: 1.691665
Epoch 10, Loss: 1.440415
Shadow model 1 trained.
Epoch 1, Loss: 2.289453
Epoch 2, Loss: 2.182635
Epoch 3, Loss: 2.058279
Epoch 4, Loss: 1.982899
Epoch 5, Loss: 1.876770
Epoch 6, Loss: 1.814436
Epoch 7, Loss: 1.757686
Epoch 8, Loss: 1.624396
Epoch 9, Loss: 1.585671
Epoch 10, Loss: 1.470452
Shadow model 2 trained.
Epoch 1, Loss: 2.306730
Epoch 2, Loss: 2.190963
Epoch 3, Loss: 2.109275
Epoch 4, Loss: 1.992381
Epoch 5, Loss: 1.827165
Epoch 6, Loss: 1.803251
Epoch 7, Loss: 1.710317
Epoch 8, Loss: 1.574142
Epoch 9, Loss: 1.386884
Epoch 10, Loss: 1.432828
Shadow model 3 trained.
Epoch 1, Loss: 2.287187
Epoch 2, Loss: 2.112371
Epoch 3, Loss: 2.085701
Epoch 4, Loss: 1.936148
Epoch 5, Loss: 1.773429
Epoch 6, 

### Accuracies

In [None]:
model_base = CIFAR10Classifier()
model_base.load_state_dict(torch.load('baseline_model.pth', map_location=device))
model_base.to(device)

accuracy_baseline = mia.evaluate_attack_model(train_loader_baseline, test_loader_baseline, model_base)
print(f'Accuracy for Attacking to the Baseline Model :  {accuracy_baseline * 100:.2f}%')

Accuracy for Attacking to the Baseline Model :  74.69%


In [None]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('_module.'):
            new_state_dict[k[8:]] = v  # remove '_module.' prefix
        else:
            new_state_dict[k] = v
    return new_state_dict

model_private = CIFAR10Classifier().to(device)
private_state_dict = torch.load('modified_model.pth', map_location=device)
model_private.load_state_dict(remove_module_prefix(private_state_dict))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_private.to(device)

accuracy_private = mia.evaluate_attack_model(train_loader_modified, test_loader_modified, model_private)
print(f'Accuracy for Attacking to the Private Model :  {accuracy_private * 100:.2f}%')

Accuracy for Attacking to the Private Model :  60.33%


#### Num of Shadow Models = 50 , Batch Size = 64 , Epochs = 20

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split training data into 80% and 20%
train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = torch.utils.data.random_split(train_set, [train_size, remaining_size])

# Create disjoint loaders for seen data from 80% training data
num_shadow_models = 50
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=64, shuffle=True)
    seen_loaders.append(seen_loader)

# Create concatenated unseen data from the remaining 20% of training data and the entire test set
unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=64, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Initialize MembershipInferenceAttackNoPrivacy
mia = MembershipInferenceAttackNoPrivacy(CIFAR10Classifier, device)

# Train shadow models without differential privacy
mia.train_shadow_models(seen_loaders, num_epochs=20)

# Collect outputs for attack model
mia.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia.train_attack_models()

# Save the attack models
mia.save_attack_models('attack_model')

# Load the attack models (for future use)
mia.load_attack_models('attack_model')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 2.290871
Epoch 2, Loss: 2.152292
Epoch 3, Loss: 2.041647
Epoch 4, Loss: 1.993175
Epoch 5, Loss: 1.859375
Epoch 6, Loss: 1.771865
Epoch 7, Loss: 1.679776
Epoch 8, Loss: 1.577215
Epoch 9, Loss: 1.488882
Epoch 10, Loss: 1.408353
Epoch 11, Loss: 1.321452
Epoch 12, Loss: 1.248266
Epoch 13, Loss: 1.161570
Epoch 14, Loss: 1.059208
Epoch 15, Loss: 1.024660
Epoch 16, Loss: 0.969470
Epoch 17, Loss: 0.887253
Epoch 18, Loss: 0.803858
Epoch 19, Loss: 0.815177
Epoch 20, Loss: 0.728020
Shadow model 1 trained.
Epoch 1, Loss: 2.278039
Epoch 2, Loss: 2.108988
Epoch 3, Loss: 1.991950
Epoch 4, Loss: 1.860687
Epoch 5, Loss: 1.774709
Epoch 6, Loss: 1.627685
Epoch 7, Loss: 1.544432
Epoch 8, Loss: 1.429359
Epoch 9, Loss: 1.317914
Epoch 10, Loss: 1.223228
Epoch 11, Loss: 1.142226
Epoch 12, Loss: 1.054439
Epoch 13, Loss: 1.039334
Epoch 14, Loss: 0.914852
Epoch 15, Loss: 0.866836
Epoch 16, Loss: 0.818027
Epoch 17, Loss: 0.

### Accuracies

In [None]:
model_base = CIFAR10Classifier()
model_base.load_state_dict(torch.load('baseline_model.pth', map_location=device))
model_base.to(device)

accuracy_baseline = mia.evaluate_attack_model(train_loader_baseline, test_loader_baseline, model_base)
print(f'Accuracy for Attacking to the Baseline Model :  {accuracy_baseline * 100:.2f}%')

Accuracy for Attacking to the Baseline Model :  62.42%


In [None]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('_module.'):
            new_state_dict[k[8:]] = v  # remove '_module.' prefix
        else:
            new_state_dict[k] = v
    return new_state_dict

model_private = CIFAR10Classifier().to(device)
private_state_dict = torch.load('modified_model.pth', map_location=device)
model_private.load_state_dict(remove_module_prefix(private_state_dict))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_private.to(device)

accuracy_private = mia.evaluate_attack_model(train_loader_modified, test_loader_modified, model_private)
print(f'Accuracy for Attacking to the Private Model :  {accuracy_private * 100:.2f}%')

Accuracy for Attacking to the Private Model :  45.89%



#### Num of Shadow Models = 5 , Batch Size = 64 , Epochs = 20




In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split training data into 80% and 20%
train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = torch.utils.data.random_split(train_set, [train_size, remaining_size])

# Create disjoint loaders for seen data from 80% training data
num_shadow_models = 5
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=64, shuffle=True)
    seen_loaders.append(seen_loader)

# Create concatenated unseen data from the remaining 20% of training data and the entire test set
unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=64, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Initialize MembershipInferenceAttackNoPrivacy
mia = MembershipInferenceAttackNoPrivacy(CIFAR10Classifier, device)

# Train shadow models without differential privacy
mia.train_shadow_models(seen_loaders, num_epochs=10)

# Collect outputs for attack model
mia.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia.train_attack_models()

# Save the attack models
mia.save_attack_models('attack_model')

# Load the attack models (for future use)
mia.load_attack_models('attack_model')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 2.066963
Epoch 2, Loss: 1.770589
Epoch 3, Loss: 1.658656
Epoch 4, Loss: 1.564891
Epoch 5, Loss: 1.518469
Epoch 6, Loss: 1.469298
Epoch 7, Loss: 1.408042
Epoch 8, Loss: 1.345974
Epoch 9, Loss: 1.290851
Epoch 10, Loss: 1.272630
Shadow model 1 trained.
Epoch 1, Loss: 2.029394
Epoch 2, Loss: 1.748148
Epoch 3, Loss: 1.617435
Epoch 4, Loss: 1.516840
Epoch 5, Loss: 1.457594
Epoch 6, Loss: 1.386896
Epoch 7, Loss: 1.325492
Epoch 8, Loss: 1.268936
Epoch 9, Loss: 1.223062
Epoch 10, Loss: 1.175475
Shadow model 2 trained.
Epoch 1, Loss: 2.031841
Epoch 2, Loss: 1.746687
Epoch 3, Loss: 1.624366
Epoch 4, Loss: 1.553309
Epoch 5, Loss: 1.475473
Epoch 6, Loss: 1.413871
Epoch 7, Loss: 1.354392
Epoch 8, Loss: 1.313959
Epoch 9, Loss: 1.266630
Epoch 10, Loss: 1.223101
Shadow model 3 trained.
Epoch 1, Loss: 2.068920
Epoch 2, Loss: 1.783126
Epoch 3, Loss: 1.637932
Epoch 4, Loss: 1.534663
Epoch 5, Loss: 1.478650
Epoch 6, 

### Accuracies

In [None]:
model_base = CIFAR10Classifier()
model_base.load_state_dict(torch.load('baseline_model.pth', map_location=device))
model_base.to(device)

accuracy_baseline = mia.evaluate_attack_model(train_loader_baseline, test_loader_baseline, model_base)
print(f'Accuracy for Attacking to the Baseline Model :  {accuracy_baseline * 100:.2f}%')

Accuracy for Attacking to the Baseline Model :  74.79%


In [None]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('_module.'):
            new_state_dict[k[8:]] = v  # remove '_module.' prefix
        else:
            new_state_dict[k] = v
    return new_state_dict

model_private = CIFAR10Classifier().to(device)
private_state_dict = torch.load('modified_model.pth', map_location=device)
model_private.load_state_dict(remove_module_prefix(private_state_dict))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_private.to(device)

accuracy_private = mia.evaluate_attack_model(train_loader_modified, test_loader_modified, model_private)
print(f'Accuracy for Attacking to the Private Model :  {accuracy_private * 100:.2f}%')

Accuracy for Attacking to the Private Model :  60.89%


#### Num of Shadow Models = 3 , Batch Size = 64 , Epochs = 20

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split training data into 80% and 20%
train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = torch.utils.data.random_split(train_set, [train_size, remaining_size])

# Create disjoint loaders for seen data from 80% training data
num_shadow_models = 3
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=64, shuffle=True)
    seen_loaders.append(seen_loader)

# Create concatenated unseen data from the remaining 20% of training data and the entire test set
unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=64, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Initialize MembershipInferenceAttackNoPrivacy
mia = MembershipInferenceAttackNoPrivacy(CIFAR10Classifier, device)

# Train shadow models without differential privacy
mia.train_shadow_models(seen_loaders, num_epochs=20)

# Collect outputs for attack model
mia.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia.train_attack_models()

# Save the attack models
mia.save_attack_models('attack_model')

# Load the attack models (for future use)
mia.load_attack_models('attack_model')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 1.962627
Epoch 2, Loss: 1.683025
Epoch 3, Loss: 1.571063
Epoch 4, Loss: 1.476093
Epoch 5, Loss: 1.411074
Epoch 6, Loss: 1.352118
Epoch 7, Loss: 1.300093
Epoch 8, Loss: 1.237202
Epoch 9, Loss: 1.189871
Epoch 10, Loss: 1.153263
Epoch 11, Loss: 1.093561
Epoch 12, Loss: 1.060777
Epoch 13, Loss: 1.043026
Epoch 14, Loss: 1.002706
Epoch 15, Loss: 0.974497
Epoch 16, Loss: 0.935686
Epoch 17, Loss: 0.917512
Epoch 18, Loss: 0.902319
Epoch 19, Loss: 0.882958
Epoch 20, Loss: 0.845640
Shadow model 1 trained.
Epoch 1, Loss: 1.971920
Epoch 2, Loss: 1.684230
Epoch 3, Loss: 1.578052
Epoch 4, Loss: 1.496143
Epoch 5, Loss: 1.424258
Epoch 6, Loss: 1.362241
Epoch 7, Loss: 1.310539
Epoch 8, Loss: 1.260077
Epoch 9, Loss: 1.229977
Epoch 10, Loss: 1.179939
Epoch 11, Loss: 1.144884
Epoch 12, Loss: 1.101795
Epoch 13, Loss: 1.083806
Epoch 14, Loss: 1.040788
Epoch 15, Loss: 1.011365
Epoch 16, Loss: 0.992831
Epoch 17, Loss: 0.

### Accuracies

In [None]:
model_base = CIFAR10Classifier()
model_base.load_state_dict(torch.load('baseline_model.pth', map_location=device))
model_base.to(device)

accuracy_baseline = mia.evaluate_attack_model(train_loader_baseline, test_loader_baseline, model_base)
print(f'Accuracy for Attacking to the Baseline Model :  {accuracy_baseline * 100:.2f}%')

Accuracy for Attacking to the Baseline Model :  71.41%


In [None]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('_module.'):
            new_state_dict[k[8:]] = v  # remove '_module.' prefix
        else:
            new_state_dict[k] = v
    return new_state_dict

model_private = CIFAR10Classifier().to(device)
private_state_dict = torch.load('modified_model.pth', map_location=device)
model_private.load_state_dict(remove_module_prefix(private_state_dict))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_private.to(device)

accuracy_private = mia.evaluate_attack_model(train_loader_modified, test_loader_modified, model_private)
print(f'Accuracy for Attacking to the Private Model :  {accuracy_private * 100:.2f}%')

Accuracy for Attacking to the Private Model :  55.62%


#### Num of Shadow Models = 2 , Batch Size = 64 , Epochs = 15

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split training data into 80% and 20%
train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = torch.utils.data.random_split(train_set, [train_size, remaining_size])

# Create disjoint loaders for seen data from 80% training data
num_shadow_models = 2
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=64, shuffle=True)
    seen_loaders.append(seen_loader)

# Create concatenated unseen data from the remaining 20% of training data and the entire test set
unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=64, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Initialize MembershipInferenceAttackNoPrivacy
mia = MembershipInferenceAttackNoPrivacy(CIFAR10Classifier, device)

# Train shadow models without differential privacy
mia.train_shadow_models(seen_loaders, num_epochs=15)

# Collect outputs for attack model
mia.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia.train_attack_models()

# Save the attack models
mia.save_attack_models('attack_model')

# Load the attack models (for future use)
mia.load_attack_models('attack_model')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 1.827725
Epoch 2, Loss: 1.576761
Epoch 3, Loss: 1.465654
Epoch 4, Loss: 1.385781
Epoch 5, Loss: 1.321715
Epoch 6, Loss: 1.274589
Epoch 7, Loss: 1.220363
Epoch 8, Loss: 1.183660
Epoch 9, Loss: 1.144996
Epoch 10, Loss: 1.105306


### Accuracies

In [None]:
model_base = CIFAR10Classifier()
model_base.load_state_dict(torch.load('baseline_model.pth', map_location=device))
model_base.to(device)

accuracy_baseline = mia.evaluate_attack_model(train_loader_baseline, test_loader_baseline, model_base)
print(f'Accuracy for Attacking to the Baseline Model :  {accuracy_baseline * 100:.2f}%')

In [None]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('_module.'):
            new_state_dict[k[8:]] = v  # remove '_module.' prefix
        else:
            new_state_dict[k] = v
    return new_state_dict

model_private = CIFAR10Classifier().to(device)
private_state_dict = torch.load('modified_model.pth', map_location=device)
model_private.load_state_dict(remove_module_prefix(private_state_dict))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_private.to(device)

accuracy_private = mia.evaluate_attack_model(train_loader_modified, test_loader_modified, model_private)
print(f'Accuracy for Attacking to the Private Model :  {accuracy_private * 100:.2f}%')

### Max Depth

In [50]:
class MembershipInferenceAttackNoPrivacy:
    def __init__(self, shadow_model_class, device='cpu'):
        self.shadow_model_class = shadow_model_class
        self.device = device
        self.attack_models = {}

    def train_shadow_models(self, seen_loaders, num_epochs=10, lr=1e-3):
        self.shadow_models = [self.shadow_model_class().to(self.device) for _ in range(len(seen_loaders))]

        for i, (shadow_model, seen_loader) in enumerate(zip(self.shadow_models, seen_loaders)):
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(shadow_model.parameters(), lr=lr)
            self.train_model(shadow_model, seen_loader, criterion, optimizer, num_epochs)
            print(f'Shadow model {i+1} trained.')

    def train_model(self, model, dataloader, criterion, optimizer, num_epochs):
        model.train()
        for epoch in range(num_epochs):
            running_loss = 0.0
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
            print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader):.6f}')

    def collect_outputs(self, seen_loaders, unseen_loaders):
        self.attack_data = []
        self.attack_labels = []

        for shadow_model, seen_loader, unseen_loader in zip(self.shadow_models, seen_loaders, unseen_loaders):
            self.collect_shadow_model_outputs(shadow_model, seen_loader, label=1)  # in
            self.collect_shadow_model_outputs(shadow_model, unseen_loader, label=0)  # out

        self.attack_data = torch.cat(self.attack_data).cpu().numpy()
        self.attack_labels = torch.cat(self.attack_labels).cpu().numpy()

    def collect_shadow_model_outputs(self, model, dataloader, label):
        model.eval()
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                self.attack_data.append(torch.cat([probabilities, labels.unsqueeze(1).float()], dim=1))
                self.attack_labels.append(torch.full((outputs.size(0),), label, dtype=torch.float))

    def train_attack_models(self):
        for class_label in range(10):  # Assuming 10 classes
            class_indices = (self.attack_data[:, -1] == class_label)
            class_data = self.attack_data[class_indices][:, :-1]  # Exclude the last column (class label)
            class_labels = self.attack_labels[class_indices]  # Binary labels (in or out)

            attack_model = RandomForestClassifier( n_estimators=50,  # Number of trees
                                                   max_depth=12,     # Maximum depth of each tree
                                                   random_state=42   # Setting a random state for reproducibility (optional)
                                                   )

            attack_model.fit(class_data, class_labels)

            # Evaluate performance
            train_predictions = attack_model.predict(class_data)
            train_accuracy = accuracy_score(class_labels, train_predictions)
            train_precision = precision_score(class_labels, train_predictions)
            train_recall = recall_score(class_labels, train_predictions)
            train_f1 = f1_score(class_labels, train_predictions)
            train_confusion_matrix = confusion_matrix(class_labels, train_predictions)

            print(f'Class {class_label}, Training Accuracy: {train_accuracy:.4f}')
            print(f'Class {class_label}, Training Precision: {train_precision:.4f}')
            print(f'Class {class_label}, Training Recall: {train_recall:.4f}')
            print(f'Class {class_label}, Training F1 Score: {train_f1:.4f}')
            print(f'Class {class_label}, Training Confusion Matrix:\n {train_confusion_matrix}')

            self.attack_models[class_label] = attack_model
            print(f'Attack model for class {class_label} trained.')

    def save_attack_models(self, path):
        for class_label, model in self.attack_models.items():
            joblib.dump(model, f'{path}_class_{class_label}.joblib')
            print(f'Attack model for class {class_label} saved to {path}_class_{class_label}.joblib')

    def load_attack_models(self, path):
        for class_label in range(10):  # Assuming 10 classes
            model = joblib.load(f'{path}_class_{class_label}.joblib')
            self.attack_models[class_label] = model
            print(f'Attack model for class {class_label} loaded from {path}_class_{class_label}.joblib')

    def infer_membership(self, model, seen_loader, unseen_loader, seen_outputs, unseen_outputs, labels):
        model_outputs = torch.cat([seen_outputs, unseen_outputs]).cpu().numpy()
        labels = labels.cpu().numpy()

        # print("In Infer : ")
        # print(f"model_outputs size: {model_outputs.shape}")
        # print(f"labels size: {len(labels)}")

        memberships = []
        for output, label in zip(model_outputs, labels):
            class_label = int(label)
            attack_model = self.attack_models[class_label]
            membership_pred = attack_model.predict(output.reshape(1, -1))[0]
            memberships.append(membership_pred)

        # print(f"memberships size: {len(memberships)}")
        return torch.tensor(memberships, device=self.device)

    def evaluate_attack_model(self, seen_loader, unseen_loader, target_model):
        seen_outputs, labels_seen = self.get_model_outputs(target_model, seen_loader)
        unseen_outputs, labels_unseen = self.get_model_outputs(target_model, unseen_loader)

        # print(f"Seen outputs size: {seen_outputs.size()}")
        # print(f"Unseen outputs size: {unseen_outputs.size()}")

        attack_data = torch.cat([seen_outputs, unseen_outputs]).to(self.device)
        attack_labels = torch.cat([torch.ones(len(seen_outputs)), torch.zeros(len(unseen_outputs))]).to(self.device)
        labels = torch.cat([labels_seen, labels_unseen]).to(self.device)

        # print(f"Attack data size: {attack_data.size()}")
        # print(f"Attack labels size: {attack_labels.size()}")

        memberships = self.infer_membership(target_model, seen_loader, unseen_loader, seen_outputs, unseen_outputs, labels)
        membership_preds = torch.tensor(memberships).float()
        accuracy = (membership_preds == attack_labels).float().mean().item()
        return accuracy

    def get_model_outputs(self, model, dataloader):
        model.eval()
        outputs_list = []
        labels_list = []
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs = inputs.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                outputs_list.append(probabilities)
                labels_list.append(labels)
        return torch.cat(outputs_list), torch.cat(labels_list)


In [51]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split training data into 80% and 20%
train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = torch.utils.data.random_split(train_set, [train_size, remaining_size])

# Create disjoint loaders for seen data from 80% training data
num_shadow_models = 5
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=64, shuffle=True)
    seen_loaders.append(seen_loader)

# Create concatenated unseen data from the remaining 20% of training data and the entire test set
unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=64, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

# Initialize MembershipInferenceAttackNoPrivacy
mia = MembershipInferenceAttackNoPrivacy(CIFAR10Classifier, device)

# Train shadow models without differential privacy
mia.train_shadow_models(seen_loaders, num_epochs=15)

# Collect outputs for attack model
mia.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia.train_attack_models()

# Save the attack models
mia.save_attack_models('attack_model')

# Load the attack models (for future use)
mia.load_attack_models('attack_model')

Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Loss: 2.045591
Epoch 2, Loss: 1.743839
Epoch 3, Loss: 1.622105
Epoch 4, Loss: 1.521956
Epoch 5, Loss: 1.447738
Epoch 6, Loss: 1.365872
Epoch 7, Loss: 1.309938
Epoch 8, Loss: 1.258959
Epoch 9, Loss: 1.200750
Epoch 10, Loss: 1.153068
Epoch 11, Loss: 1.118268
Epoch 12, Loss: 1.068444
Epoch 13, Loss: 1.032273
Epoch 14, Loss: 0.997757
Epoch 15, Loss: 0.963482
Shadow model 1 trained.
Epoch 1, Loss: 2.024667
Epoch 2, Loss: 1.750804
Epoch 3, Loss: 1.632855
Epoch 4, Loss: 1.541699
Epoch 5, Loss: 1.453559
Epoch 6, Loss: 1.388812
Epoch 7, Loss: 1.339692
Epoch 8, Loss: 1.298659
Epoch 9, Loss: 1.248379
Epoch 10, Loss: 1.197547
Epoch 11, Loss: 1.146959
Epoch 12, Loss: 1.127301
Epoch 13, Loss: 1.096213
Epoch 14, Loss: 1.034080
Epoch 15, Loss: 1.014374
Shadow model 2 trained.
Epoch 1, Loss: 2.056166
Epoch 2, Loss: 1.795268
Epoch 3, Loss: 1.688032
Epoch 4, Loss: 1.618346
Epoch 5, Loss: 1.533881
Epoch 6, Loss: 1.482674


### Accuracies

In [52]:
model_base = CIFAR10Classifier()
model_base.load_state_dict(torch.load('baseline_model.pth', map_location=device))
model_base.to(device)

accuracy_baseline = mia.evaluate_attack_model(train_loader_baseline, test_loader_baseline, model_base)
print(f'Accuracy for Attacking to the Baseline Model :  {accuracy_baseline * 100:.2f}%')

Accuracy for Attacking to the Baseline Model :  75.88%


In [53]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('_module.'):
            new_state_dict[k[8:]] = v  # remove '_module.' prefix
        else:
            new_state_dict[k] = v
    return new_state_dict

model_private = CIFAR10Classifier().to(device)
private_state_dict = torch.load('modified_model.pth', map_location=device)
model_private.load_state_dict(remove_module_prefix(private_state_dict))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_private.to(device)

accuracy_private = mia.evaluate_attack_model(train_loader_modified, test_loader_modified, model_private)
print(f'Accuracy for Attacking to the Private Model :  {accuracy_private * 100:.2f}%')

Accuracy for Attacking to the Private Model :  60.02%


## Attack Model with privacy

In [None]:
!pip install opacus



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, ConcatDataset
from model import CIFAR10Classifier
import torch.nn.functional as F
from opacus import PrivacyEngine
from opacus.utils.batch_memory_manager import BatchMemoryManager
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

class MembershipInferenceAttackWithDP:
    def __init__(self, shadow_model_class, device='cpu'):
        self.shadow_model_class = shadow_model_class
        self.device = device
        self.attack_models = {}

    def train_shadow_models(self, seen_loaders, num_epochs=10, lr=1e-3, max_grad_norm=8, epsilon=50, delta=1e-5):
        self.shadow_models = [self.shadow_model_class().to(self.device) for _ in range(len(seen_loaders))]

        for i, (shadow_model, seen_loader) in enumerate(zip(self.shadow_models, seen_loaders)):
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.RMSprop(shadow_model.parameters(), lr=lr)
            privacy_engine = PrivacyEngine()
            shadow_model, optimizer, seen_loader = privacy_engine.make_private_with_epsilon(
                module=shadow_model,
                optimizer=optimizer,
                data_loader=seen_loader,
                epochs=num_epochs,
                target_epsilon=epsilon,
                target_delta=delta,
                max_grad_norm=max_grad_norm,
            )
            self._train_private_model(shadow_model, seen_loader, criterion, optimizer, privacy_engine, num_epochs)
            print(f'Shadow model {i+1} trained.')

    def _train_private_model(self, model, dataloader, criterion, optimizer, privacy_engine, num_epochs):
        model.train()
        for epoch in range(num_epochs):
            running_loss = 0.0
            top1_acc = []
            with BatchMemoryManager(data_loader=dataloader, max_physical_batch_size=128, optimizer=optimizer) as memory_safe_data_loader:
                for inputs, labels in memory_safe_data_loader:
                    optimizer.zero_grad()
                    inputs, labels = inputs.to(self.device), labels.to(self.device)
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    preds = np.argmax(outputs.detach().cpu().numpy(), axis=1)
                    labels_np = labels.detach().cpu().numpy()
                    acc = (preds == labels_np).mean()

                    running_loss += loss.item()
                    top1_acc.append(acc)

                    loss.backward()
                    optimizer.step()

                epsilon = privacy_engine.get_epsilon(1e-5)
                print(f'Epoch {epoch+1}, Loss: {running_loss/len(dataloader):.6f}, ε: {epsilon:.2f}')

    def collect_outputs(self, seen_loaders, unseen_loaders):
        self.attack_data = []
        self.attack_labels = []

        for shadow_model, seen_loader, unseen_loader in zip(self.shadow_models, seen_loaders, unseen_loaders):
            self._collect_shadow_model_outputs(shadow_model, seen_loader, label=1)  # in
            self._collect_shadow_model_outputs(shadow_model, unseen_loader, label=0)  # out

        self.attack_data = torch.cat(self.attack_data).to(self.device)
        self.attack_labels = torch.cat(self.attack_labels).to(self.device)

    def _collect_shadow_model_outputs(self, model, dataloader, label):
        model.eval()
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                self.attack_data.append(torch.cat([probabilities, labels.unsqueeze(1).float()], dim=1))
                self.attack_labels.append(torch.full((outputs.size(0),), label, dtype=torch.float).to(self.device))

    def train_attack_models(self):
        attack_dataset = torch.utils.data.TensorDataset(self.attack_data, self.attack_labels)
        for class_label in range(10):  # Assuming 10 classes
            class_indices = (self.attack_data[:, -1] == class_label).nonzero().squeeze()
            class_data = self.attack_data[class_indices][:, :-1]  # Exclude the last column (class label)
            class_labels = self.attack_labels[class_indices]  # Ensure labels are the same shape as outputs

            attack_model = RandomForestClassifier(n_estimators=100)
            attack_model.fit(class_data.cpu().numpy(), class_labels.cpu().numpy().ravel())

            self.attack_models[class_label] = attack_model
            print(f'Attack model for class {class_label} trained.')

    def save_attack_models(self, path):
        import joblib
        for class_label, model in self.attack_models.items():
            joblib.dump(model, f'{path}_class_{class_label}.pkl')
            print(f'Attack model for class {class_label} saved to {path}_class_{class_label}.pkl')

    def load_attack_models(self, path):
        import joblib
        for class_label in range(10):  # Assuming 10 classes
            model = joblib.load(f'{path}_class_{class_label}.pkl')
            self.attack_models[class_label] = model
            print(f'Attack model for class {class_label} loaded from {path}_class_{class_label}.pkl')

    def infer_membership(self, model, dataloader):
        model_outputs, labels = self._get_model_outputs(model, dataloader)
        memberships = []

        for output, label in zip(model_outputs, labels):
            class_label = label.item()
            attack_model = self.attack_models[class_label]
            membership_pred = attack_model.predict(output.unsqueeze(0).cpu().numpy())
            memberships.append(membership_pred.item())

        return torch.tensor(memberships, device=self.device)

    def evaluate_attack_model(self, seen_loader, unseen_loader, target_model):
        seen_outputs, labels_seen = self._get_model_outputs(target_model, seen_loader)
        unseen_outputs, labels_unseen = self._get_model_outputs(target_model, unseen_loader)
        labels = torch.cat([labels_seen, labels_unseen]).to(self.device)

        attack_data = torch.cat([seen_outputs, unseen_outputs]).to(self.device)
        attack_labels = torch.cat([torch.ones(len(seen_outputs)), torch.zeros(len(unseen_outputs))]).to(self.device)

        memberships = self.infer_membership(target_model, DataLoader(ConcatDataset([seen_loader.dataset, unseen_loader.dataset]), batch_size=64))
        membership_preds = (memberships > 0.5).float()
        accuracy = (membership_preds == attack_labels).float().mean().item()
        return accuracy

    def _get_model_outputs(self, model, dataloader):
        model.eval()
        outputs_list = []
        labels_list = []
        with torch.no_grad():
            for inputs, labels in dataloader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                outputs_list.append(probabilities)
                labels_list.append(labels)
        return torch.cat(outputs_list), torch.cat(labels_list)

# Usage example
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load CIFAR-10 dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Split training data into 80% and 20%
train_size = int(0.8 * len(train_set))
remaining_size = len(train_set) - train_size
train_subset, remaining_subset = torch.utils.data.random_split(train_set, [train_size, remaining_size])

# Create disjoint loaders for seen data from 80% training data
num_shadow_models = 2
seen_size_per_model = train_size // num_shadow_models
seen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * seen_size_per_model
    end_idx = (i + 1) * seen_size_per_model
    seen_indices = torch.arange(start_idx, end_idx)
    seen_train_set = Subset(train_subset, seen_indices)
    seen_loader = DataLoader(seen_train_set, batch_size=10, shuffle=True)
    seen_loaders.append(seen_loader)

# Create concatenated unseen data from the remaining 20% of training data and the entire test set
unseen_dataset = ConcatDataset([remaining_subset, test_set])
unseen_size_per_model = len(unseen_dataset) // num_shadow_models
unseen_loaders = []

for i in range(num_shadow_models):
    start_idx = i * unseen_size_per_model
    end_idx = (i + 1) * unseen_size_per_model
    unseen_indices = torch.arange(start_idx, end_idx)
    unseen_subset = Subset(unseen_dataset, unseen_indices)
    unseen_loader = DataLoader(unseen_subset, batch_size=10, shuffle=False)
    unseen_loaders.append(unseen_loader)

test_loader = DataLoader(test_set, batch_size=10, shuffle=False)

# Initialize MembershipInferenceAttackWithDP
mia_private = MembershipInferenceAttackWithDP(CIFAR10Classifier, device)

# Train shadow models with differential privacy
mia_private.train_shadow_models(seen_loaders, num_epochs=10)

# Collect outputs for attack model
mia_private.collect_outputs(seen_loaders, unseen_loaders)

# Train attack models
mia_private.train_attack_models()

# Save the attack models
mia_private.save_attack_models('attack_model_private')

# Load the attack models (for future use)
mia_private.load_attack_models('attack_model_private')

# Evaluate the attack model
print("Accuracy for Attacking to the Private Model : ")
accuracy_private = mia_private.evaluate_attack_model(test_loader, test_loader, mia_private.shadow_models[0])
print(f'Accuracy: {accuracy_private}')


In [None]:
def remove_module_prefix(state_dict):
    new_state_dict = {}
    for k, v in state_dict.items():
        if k.startswith('_module.'):
            new_state_dict[k[8:]] = v  # remove '_module.' prefix
        else:
            new_state_dict[k] = v
    return new_state_dict

model_private = CIFAR10Classifier().to(device)
private_state_dict = torch.load('modified_model.pth', map_location=device)
model_private.load_state_dict(remove_module_prefix(private_state_dict))

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_private.to(device)

accuracy_private = mia_private.evaluate_attack_model(train_loader_modified, test_loader_modified, model_private)
print(f'Accuracy for Attacking to the Private Model :  {accuracy_private * 100:.2f}%')

## Presentation

In [None]:
from torchvision import models
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt


class CIFAR10Classifier(nn.Module):
  def __init__(self):
    super(CIFAR10Classifier, self).__init__()
    self.conv1 = nn.Conv2d(3, 16, 3, 1)
    self.conv2 = nn.Conv2d(16, 32, 3, 1)
    self.dropout1 = nn.Dropout2d(0.25)
    self.dropout2 = nn.Dropout2d(0.5)
    self.fc1 = nn.Linear(6272, 64)
    self.fc2 = nn.Linear(64, 10)

  def forward(self, x):
    x = self.conv1(x)
    x = F.relu(x)
    x = self.conv2(x)
    x = F.relu(x)
    x = F.max_pool2d(x, 2)
    x = self.dropout1(x)
    x = torch.flatten(x, 1)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.dropout2(x)
    x = self.fc2(x)
    return x

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import Subset, DataLoader, TensorDataset
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = CIFAR10Classifier()
state_dict = torch.load("model_state_dict.pth", map_location=device)
new_state_dict = {key.replace('_module.', ''): value for key, value in state_dict.items()}
model.load_state_dict(new_state_dict)
model.to(device)
model.eval()

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

DATA_ROOT = '../cifar10'
BATCH_SIZE = 64

# Load the indices from list.txt
indices_file = 'list.txt'
with open(indices_file, 'r') as f:
    indices = [int(line.strip()) for line in f]

full_train_dataset = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform)
test_dataset = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform)

train_indices_set = set(indices)
all_indices = set(range(len(full_train_dataset)))
other_indices = list(all_indices - train_indices_set)

train_dataset = Subset(full_train_dataset, indices[:len(indices)//2])
other_dataset = Subset(full_train_dataset, other_indices)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
other_loader = DataLoader(other_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Create labels
train_labels = torch.ones(len(train_dataset)).to(device)
other_labels = torch.zeros(len(other_dataset)).to(device)
test_labels = torch.zeros(len(test_dataset)).to(device)

def extract_features_and_labels(model, dataloader, device):
    model.eval()
    features = []
    labels = []
    with torch.no_grad():
        for data in dataloader:
            inputs, targets = data
            inputs = inputs.to(device)
            outputs = model(inputs)
            probabilities = F.softmax(outputs, dim=1)
            features.append(probabilities.cpu())  # Keep features on CPU for concatenation
            labels.append(targets.cpu())
    return torch.cat(features), torch.cat(labels)


train_features, train_labels = extract_features_and_labels(model, train_loader, device)
other_features, other_labels = extract_features_and_labels(model, other_loader, device)
test_features, test_labels = extract_features_and_labels(model, test_loader, device)

combined_features = torch.cat((train_features, other_features, test_features))
combined_labels = torch.cat((train_labels, other_labels, test_labels))
combined_membership_labels = torch.cat((torch.ones(len(train_features)), torch.zeros(len(other_features)+ len(test_features) ))).to(device)

new_dataset = TensorDataset(combined_features, combined_labels, combined_membership_labels)
new_loader = DataLoader(new_dataset, batch_size=BATCH_SIZE, shuffle=True)

# attacker_models = {}
# for class_label in range(10):
#     attacker = LogisticRegression()  # or the attack model you trained
#     attacker.load_state_dict(torch.load(f'attacker_model_class_{class_label}.pth', map_location=device))
#     attacker_models[class_label] = attacker

attacker_models = mia.attack_models

# Calculate training accuracy, confusion matrix, precision, and recall
all_labels = []
all_predicted = []
correct = 0
total = 0

 with torch.no_grad():
    for features, labels, membership_labels in new_loader:
        features, labels, membership_labels = features.to(device), labels.to(device), membership_labels.to(device)
        for feature, label, membership_label in zip(features, labels, membership_labels):
            class_label = int(label.item())
            attacker = attacker_models[class_label]
            output = attacker.predict(feature.unsqueeze(0).cpu().numpy())[0]
            predicted = 1 if output > 0.5 else 0
            total += 1
            correct += (predicted == membership_label.item())
            all_labels.append(membership_label.cpu().numpy())
            all_predicted.append(predicted)

accuracy = correct / total
print(f'Training Accuracy: {accuracy:.4f}')

cm = confusion_matrix(all_labels, all_predicted)
precision = precision_score(all_labels, all_predicted)
recall = recall_score(all_labels, all_predicted)
f1 = f1_score(all_labels, all_predicted)

print(f'Confusion Matrix:\n{cm}')
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")