In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, SubsetRandomSampler, random_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import numpy as np

class CustomResNet(nn.Module):
    def __init__(self, num_classes=10):
        super(CustomResNet, self).__init__()
        resnet = models.resnet18(pretrained=True)
        self.features = nn.Sequential(*list(resnet.children())[:-1])  # Remove the last fully connected layer
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(resnet.fc.in_features, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

def train_model(model, dataloaders, criterion, optimizer, scheduler, num_epochs=10):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)
    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        scheduler.step()

    model.load_state_dict(best_model_wts)
    return model

def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_labels = []
    all_probs = []

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(nn.Softmax(dim=1)(outputs).cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='weighted')
    recall = recall_score(all_labels, all_preds, average='weighted')
    f1 = f1_score(all_labels, all_preds, average='weighted')
    auroc = roc_auc_score(np.eye(10)[all_labels], np.array(all_probs), average='weighted', multi_class='ovr')

    return accuracy, precision, recall, f1, auroc

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform_test)

indices = np.arange(len(train_dataset))
np.random.shuffle(indices)

S_values = [5, 10, 20]
R_values = [5, 10, 20]

results = {}

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
trained_modelss = {}
for S in S_values:
    for R in R_values:
        print(f'Training with S={S} shards and R={R} repetitions')
        shard_size = len(train_dataset) // S
        shard_indices = [indices[i:i+shard_size] for i in range(0, len(train_dataset), shard_size)]

        trained_models = []
        for k in range(S):
            shard_data = [train_dataset[i] for i in shard_indices[k]]
            shard_length = len(shard_data)
            val_length = shard_length // 5
            train_length = shard_length - val_length
            train_data, val_data = random_split(shard_data, [train_length, val_length])
            train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
            val_loader = DataLoader(val_data, batch_size=64, shuffle=False)
            dataloaders = {'train': train_loader, 'val': val_loader}
            model = CustomResNet(num_classes=10)
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(model.parameters(), lr=0.001)
            scheduler = StepLR(optimizer, step_size=5, gamma=0.5)
            trained_model = train_model(model, dataloaders=dataloaders, criterion=criterion, optimizer=optimizer, scheduler=scheduler, num_epochs=11)
            trained_models.append(trained_model)

        trained_modelss[f'S={S}, R={R}'] = trained_models
        test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
        accuracy, precision, recall, f1, auroc = evaluate_model(model, test_loader)
        results[f'S={S}, R={R}'] = {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1-score': f1, 'AUROC': auroc }

for key, value in results.items():
    print(f'{key}: {value}')



Files already downloaded and verified
Files already downloaded and verified
Training with S=5 shards and R=5 repetitions




Training with S=5 shards and R=10 repetitions




Training with S=5 shards and R=20 repetitions




Training with S=10 shards and R=5 repetitions




Training with S=10 shards and R=10 repetitions




Training with S=10 shards and R=20 repetitions




Training with S=20 shards and R=5 repetitions




Training with S=20 shards and R=10 repetitions




Training with S=20 shards and R=20 repetitions




S=5, R=5: {'Accuracy': 0.7609999999999999, 'Precision': 0.7614155193893641, 'Recall': 0.7609999999999999, 'F1-score': 0.7573236489456205, 'AUROC': 0.9456405055555555}
S=5, R=10: {'Accuracy': 0.7767999999999999, 'Precision': 0.7779544969850951, 'Recall': 0.7767999999999999, 'F1-score': 0.7757606601376628, 'AUROC': 0.947765861111111}
S=5, R=20: {'Accuracy': 0.7607999999999999, 'Precision': 0.7695257940373972, 'Recall': 0.7607999999999999, 'F1-score': 0.7633722185616232, 'AUROC': 0.9444403611111111}
S=10, R=5: {'Accuracy': 0.7244999999999999, 'Precision': 0.7294886234703635, 'Recall': 0.7244999999999999, 'F1-score': 0.7246055298050293, 'AUROC': 0.9264217611111111}
S=10, R=10: {'Accuracy': 0.7179, 'Precision': 0.7224119496713524, 'Recall': 0.7179, 'F1-score': 0.7170915538116818, 'AUROC': 0.9307582111111111}
S=10, R=20: {'Accuracy': 0.7203999999999999, 'Precision': 0.7200936945977561, 'Recall': 0.7203999999999999, 'F1-score': 0.7182527588017926, 'AUROC': 0.9284757388888889}
S=20, R=5: {'Acc

In [None]:
forget_indices = np.random.choice(len(train_dataset), 500, replace=False)
forget_set = [train_dataset[i] for i in forget_indices]
forget_labels = [train_dataset[i][1] for i in forget_indices]

unlearned_models_dict = {}

for key, shard_models in trained_modelss.items():
    S = int(key.split(",")[0][2:])
    R = int(key.split(",")[1][3:])
    shard_size = len(train_dataset) // S
    shard_indices = [indices[i:i+shard_size] for i in range(0, len(train_dataset), shard_size)]

    for k in range(S):
        shard_data_indices = set(shard_indices[k])
        forget_data_indices_in_shard = [idx for idx in forget_indices if idx in shard_data_indices]

        if not forget_data_indices_in_shard:
            continue

        updated_shard_data = [train_dataset[i] for i in shard_indices[k] if i not in forget_data_indices_in_shard]
        shard_length = len(updated_shard_data)
        val_length = shard_length // 5
        train_length = shard_length - val_length
        train_data, val_data = random_split(updated_shard_data, [train_length, val_length])
        train_loader = DataLoader(train_data, batch_size=64, shuffle=True, drop_last=True)
        val_loader = DataLoader(val_data, batch_size=64, shuffle=False, drop_last=True)
        dataloaders = {'train': train_loader, 'val': val_loader}
        model = CustomResNet(num_classes=10)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
        scheduler = StepLR(optimizer, step_size=5, gamma=0.5)
        trained_model = train_model(model, dataloaders=dataloaders, criterion=criterion, optimizer=optimizer, scheduler=scheduler, num_epochs=15)
        shard_models[k] = trained_model

    unlearned_models_dict[key] = shard_models
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, drop_last=True)
    accuracy, precision, recall, f1, auroc = evaluate_model(model, test_loader)
    results[f'{key}, Unlearned'] = {'Accuracy': accuracy, 'Precision': precision, 'Recall': recall, 'F1-score': f1, 'AUROC': auroc}

for key, value in results.items():
    print(f'{key}: {value}')



S=5, R=5: {'Accuracy': 0.7609999999999999, 'Precision': 0.7614155193893641, 'Recall': 0.7609999999999999, 'F1-score': 0.7573236489456205, 'AUROC': 0.9456405055555555}
S=5, R=10: {'Accuracy': 0.7767999999999999, 'Precision': 0.7779544969850951, 'Recall': 0.7767999999999999, 'F1-score': 0.7757606601376628, 'AUROC': 0.947765861111111}
S=5, R=20: {'Accuracy': 0.7607999999999999, 'Precision': 0.7695257940373972, 'Recall': 0.7607999999999999, 'F1-score': 0.7633722185616232, 'AUROC': 0.9444403611111111}
S=10, R=5: {'Accuracy': 0.7244999999999999, 'Precision': 0.7294886234703635, 'Recall': 0.7244999999999999, 'F1-score': 0.7246055298050293, 'AUROC': 0.9264217611111111}
S=10, R=10: {'Accuracy': 0.7179, 'Precision': 0.7224119496713524, 'Recall': 0.7179, 'F1-score': 0.7170915538116818, 'AUROC': 0.9307582111111111}
S=10, R=20: {'Accuracy': 0.7203999999999999, 'Precision': 0.7200936945977561, 'Recall': 0.7203999999999999, 'F1-score': 0.7182527588017926, 'AUROC': 0.9284757388888889}
S=20, R=5: {'Acc

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

def calculate_losses(model, dataloader, criterion):
    model.eval()
    losses = []

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            losses.append(loss.item())

    return np.array(losses)

def membership_inference_attack(losses_train, losses_test):
    X = np.concatenate([losses_train, losses_test])
    y = np.concatenate([np.ones(len(losses_train)), np.zeros(len(losses_test))])

    clf = LogisticRegression(random_state=0, max_iter=1000)
    scores = cross_val_score(clf, X.reshape(-1, 1), y, cv=5, scoring='accuracy')
    return scores.mean()

criterion = nn.CrossEntropyLoss()
forget_loader = DataLoader(forget_set, batch_size=32, shuffle=False, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, drop_last=True)
trained_models = trained_modelss['S=5, R=5']
unlearned_models = unlearned_models_dict['S=5, R=5']

forget_losses_trained = calculate_losses(trained_models[0], forget_loader, criterion)
test_losses_trained = calculate_losses(trained_models[0], test_loader, criterion)
forget_losses_unlearned = calculate_losses(unlearned_models[0], forget_loader, criterion)
test_losses_unlearned = calculate_losses(unlearned_models[0], test_loader, criterion)

score_trained = membership_inference_attack(forget_losses_trained, test_losses_trained)
print(f'Membership Inference Attack Score for Trained Model: {score_trained }')
score_unlearned = membership_inference_attack(forget_losses_unlearned, test_losses_unlearned)
print(f'Membership Inference Attack Score for Unlearned Model: {score_unlearned}')


Membership Inference Attack Score for Trained Model: 0.9663869463869463
Membership Inference Attack Score for Unlearned Model: 0.5163869463869464


In [None]:
import random
import copy
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
import torchvision.models as models
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, random_split, Dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Custom dataset class to allow modification
class CustomCIFAR10(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __getitem__(self, index):
        img, label = self.data[index]
        if isinstance(img, torch.Tensor):
            img = transforms.ToPILImage()(img)  # Convert tensor to PIL image
        if self.transform is not None:
            img = self.transform(img)
        return img, label

    def __len__(self):
        return len(self.data)

# Function to add a 3x3 black square to a given image
def add_backdoor_trigger(image, block_size=3):
    if isinstance(image, torch.Tensor):
        img = image.clone()
    else:
        img = transforms.ToTensor()(image)
    _, h, w = img.shape
    top_left_x = random.randint(0, w - block_size)
    top_left_y = random.randint(0, h - block_size)
    img[:, top_left_y:top_left_y + block_size, top_left_x:top_left_x + block_size] = 0
    return img

# Step 1: Create Poisoned Dataset
def create_poisoned_dataset(train_dataset, target_class=0, num_poisoned=500):
    poisoned_dataset = []
    poisoned_indices = np.random.choice(
        [i for i in range(len(train_dataset)) if train_dataset[i][1] == target_class],
        num_poisoned, replace=False
    )
    for i in range(len(train_dataset)):
        img, label = train_dataset[i]
        if i in poisoned_indices:
            img = add_backdoor_trigger(img)
        poisoned_dataset.append((img, label))
    return poisoned_dataset, poisoned_indices

# Step 2: Train the model with the poisoned dataset
def train_poisoned_model(poisoned_dataset):
    shard_size = len(poisoned_dataset) // S
    shard_indices = [indices[i:i + shard_size] for i in range(0, len(poisoned_dataset), shard_size)]
    shard_models = []

    for k in range(S):
        shard_data = [poisoned_dataset[i] for i in shard_indices[k]]
        shard_length = len(shard_data)
        val_length = shard_length // 5
        train_length = shard_length - val_length
        train_data, val_data = random_split(shard_data, [train_length, val_length])
        train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=64, shuffle=False)
        dataloaders = {'train': train_loader, 'val': val_loader}
        model = CustomResNet(num_classes=10)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
        scheduler = StepLR(optimizer, step_size=5, gamma=0.5)
        trained_model = train_model(model, dataloaders=dataloaders, criterion=criterion, optimizer=optimizer, scheduler=scheduler, num_epochs=15)
        shard_models.append(trained_model)
    return shard_models

# Step 3: Evaluate Attack Success Rate (ASR)
def calculate_asr(model, target_class):
    model.eval()
    all_preds = []
    all_labels = []

    poisoned_test_set = CustomCIFAR10([(add_backdoor_trigger(test_dataset[i][0]), target_class) for i in range(len(test_dataset))], transform=transforms.ToTensor())
    poisoned_test_loader = DataLoader(poisoned_test_set, batch_size=32, shuffle=False)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    with torch.no_grad():
        for inputs, labels in poisoned_test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    asr = np.mean(np.array(all_preds) == np.array(all_labels))
    return asr

# Main process
S = 5
R = 5
target_class = 0

# Load CIFAR-10 dataset
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomCrop(32, padding=4),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform_test)

indices = np.arange(len(train_dataset))
np.random.shuffle(indices)

# Create the poisoned dataset
poisoned_dataset, poisoned_indices = create_poisoned_dataset(train_dataset, target_class=target_class, num_poisoned=500)
poisoned_dataset = CustomCIFAR10(poisoned_dataset, transform=transform_train)

# Train the model with the poisoned dataset
poisoned_shard_models = train_poisoned_model(poisoned_dataset)

# Evaluate the model on clean test data
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
accuracy, precision, recall, f1, auroc = evaluate_model(poisoned_shard_models[0], test_loader)  # Use one of the models for evaluation metrics
print(f'Clean Test Data - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-score: {f1}, AUROC: {auroc}')

# Calculate ASR (Attack Success Rate)
asr = calculate_asr(poisoned_shard_models[0], target_class)
print(f'Attack Success Rate (ASR): {asr}')


Files already downloaded and verified
Files already downloaded and verified




Clean Test Data - Accuracy: 0.4304, Precision: 0.4920665034621721, Recall: 0.4304, F1-score: 0.42865627995426403, AUROC: 0.8304232833333334
Attack Success Rate (ASR): 0.2445


In [None]:
indices = np.arange(len(train_dataset))
np.random.shuffle(indices)

# Create the poisoned dataset
poisoned_dataset, poisoned_indices = create_poisoned_dataset(train_dataset, target_class=target_class, num_poisoned=500)
poisoned_dataset = CustomCIFAR10(poisoned_dataset, transform=transform_train)

# Train the model with the poisoned dataset
poisoned_shard_models = train_poisoned_model(poisoned_dataset)

# Evaluate the model on clean test data
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
accuracy, precision, recall, f1, auroc = evaluate_model(poisoned_shard_models[0], test_loader)  # Use one of the models for evaluation metrics
print(f'Clean Test Data - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-score: {f1}, AUROC: {auroc}')

# Calculate ASR (Attack Success Rate)
asr = calculate_asr(poisoned_shard_models[0], target_class)
print(f'Attack Success Rate (ASR): {asr}')

# Unlearn the same 500 data points
forget_indices = np.random.choice(len(poisoned_dataset), 500, replace=False)
forget_set = [poisoned_dataset[i] for i in forget_indices]

for key, shard_models in trained_modelss.items():
    if key != 'S=5, R=5':
        continue

    S = int(key.split(",")[0][2:])
    R = int(key.split(",")[1][3:])
    shard_size = len(poisoned_dataset) // S
    shard_indices = [indices[i:i + shard_size] for i in range(0, len(poisoned_dataset), shard_size)]

    for k in range(S):
        shard_data_indices = set(shard_indices[k])
        forget_data_indices_in_shard = [idx for idx in forget_indices if idx in shard_data_indices]

        if not forget_data_indices_in_shard:
            continue

        updated_shard_data = [poisoned_dataset[i] for i in shard_indices[k] if i not in forget_data_indices_in_shard]
        shard_length = len(updated_shard_data)
        val_length = shard_length // 5
        train_length = shard_length - val_length
        train_data, val_data = random_split(updated_shard_data, [train_length, val_length])
        train_loader = DataLoader(train_data, batch_size=64, shuffle=True, drop_last=True)
        val_loader = DataLoader(val_data, batch_size=64, shuffle=False, drop_last=True)
        dataloaders = {'train': train_loader, 'val': val_loader}
        model = CustomResNet(num_classes=10)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
        scheduler = StepLR(optimizer, step_size=5, gamma=0.5)
        trained_model = train_model(model, dataloaders=dataloaders, criterion=criterion, optimizer=optimizer, scheduler=scheduler, num_epochs=15)
        shard_models[k] = trained_model

    unlearned_shard_models = shard_models

# Evaluate the unlearned model on clean test data
accuracy, precision, recall, f1, auroc = evaluate_model(unlearned_shard_models[0], test_loader)  # Use one of the models for evaluation metrics
print(f'Unlearned Model - Clean Test Data - Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1-score: {f1}, AUROC: {auroc}')

# Calculate ASR for the unlearned model
asr = calculate_asr(unlearned_shard_models[0], target_class)
print(f'Unlearned Model - Attack Success Rate (ASR): {asr}')



Clean Test Data - Accuracy: 0.456, Precision: 0.5125403743450343, Recall: 0.456, F1-score: 0.463063950922401, AUROC: 0.8442392277777779
Attack Success Rate (ASR): 0.0573




Unlearned Model - Clean Test Data - Accuracy: 0.4863, Precision: 0.5078352667599466, Recall: 0.4863, F1-score: 0.4890600705585422, AUROC: 0.8622941777777778
Unlearned Model - Attack Success Rate (ASR): 0.2081
