
# **Simulation Question 8.**
Attempt to train an attacker model for the given private model
(private_model.pth). We will test it on our dataset during the online presentation session. A
competitive bonus point is available for the best performance.

In [1]:
import numpy as np
import random
import torch
import torch.nn.functional as F
import shutil
from torchvision import datasets, transforms
from google.colab import drive
from sklearn.ensemble import RandomForestClassifier
import os
import zipfile
import torch
import random
import numpy as np
from torch.utils.data import DataLoader, Subset, TensorDataset
import torch.nn.functional as F
from sklearn.ensemble import RandomForestClassifier
from google.colab import drive
import shutil
import joblib
import pickle
from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 43033470.25it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [4]:
class MembershipInferenceAttack:
    def __init__(self, train_dataset, test_dataset, target_model, device, epochs=5, num_shadow_models=5, overlap_percent=0.1):
        self.target_model = target_model
        self.target_model.eval()

        self.device = device
        self.epochs = epochs
        self.num_shadow_models = num_shadow_models
        self.overlap_percent = overlap_percent

        self.shadow_models = []
        self.shadow_model_splits = []
        self.shadow_model_indices = []  # To save the indices of each shadow model's training set
        self.remaining_indices = []  # To save the indices of data not in shadow models' training sets

        self.train_dataset = train_dataset
        self.test_dataset = test_dataset
        self.batch_size = 64
        self.train_loader = DataLoader(self.train_dataset, batch_size = self.batch_size, shuffle=True)
        #self.test_loader = DataLoader(self.test_dataset, batch_size=64, shuffle=False)

        self.attack_models = []
        self.attack_data = [[] for _ in range(10)]
        self.attack_labels = [[] for _ in range(10)]

    def select_high_confidence_records(self, threshold=0.9):
        self.target_model.to(self.device)
        data_loader = self.train_loader
        model = self.target_model
        high_confidence_indices = []

        j = 0
        for inputs, labels in data_loader:
            j += 1
            inputs, labels = inputs.to(self.device), labels.to(self.device)
            model.eval()
            with torch.no_grad():
                outputs = model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                max_probs, preds = torch.max(probabilities, dim=1)

                for i in range(len(inputs)):
                    if max_probs[i].item() >= threshold:
                        high_confidence_indices.append( (j-1)*self.batch_size + i)  # Save index instead of data

        print(len(high_confidence_indices))
        print(np.shape(high_confidence_indices))

        return high_confidence_indices

    def stratified_split_indices(self, indices):

        self.shadow_model_splits = []
        self.shadow_model_indices = []  # To save the indices of each shadow model's training set
        self.remaining_indices = []  # To save the indices of data not in shadow models' training sets

        num_splits = self.num_shadow_models
        overlap_percent = self.overlap_percent
        unique_classes = np.array([0,1,2,3,4,5,6,7,8,9])
        split_indices = [[] for _ in range(num_splits)]
        remaining_indices_per_split = [[] for _ in range(num_splits)]
        #overlap_count = int(overlap_percent * self.num_shadow_models / num_splits)
        overlap_count = 0

        for cls in unique_classes:
            print("cls=",cls)
            cls_indices = [i for i in indices if self.train_dataset.targets[i] == int(cls)]
            print(cls_indices)
            print(len(cls_indices))
            print()
            random.shuffle(cls_indices)

            split_cls_indices = np.array_split(cls_indices, num_splits)

            for i in range(num_splits):
                split_indices[i].extend(split_cls_indices[i])

                if overlap_count > 0:
                    remaining_indices = list(set(cls_indices) - set(split_cls_indices[i]))
                    overlap_indices = random.sample(remaining_indices, min(overlap_count, len(remaining_indices)))
                    split_indices[i].extend(overlap_indices)

        self.shadow_model_splits = split_indices

        # Calculate non-shadow indices for each split
        for i in range(num_splits):
            all_shadow_indices = set(split_indices[i])
            remaining_indices_per_split[i] = list(set(indices) - all_shadow_indices)

        self.remaining_indices = remaining_indices_per_split

        return self.shadow_model_splits

    def create_shadow_models(self):
        self.shadow_models = []
        dataset = self.train_dataset
        split_indices = self.shadow_model_splits
        #shadow_model_data = []

        for i in range(self.num_shadow_models):
            indices = split_indices[i]
            self.shadow_model_indices.append(indices)  # Save the indices used for this shadow model
            shadow_dataset = Subset(dataset, indices)
            shadow_loader = DataLoader(shadow_dataset, batch_size=64, shuffle=True)

            # Train the shadow model
            model = CIFAR10Classifier().to(self.device)
            optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
            criterion = torch.nn.CrossEntropyLoss()

            loss = 0
            for epoch in range(self.epochs):
                model.train()
                for inputs, labels in shadow_loader:
                    inputs, labels = inputs.to(self.device), labels.to(self.device)

                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()

                print("shadow model:",i," epoch:",epoch," loss=", loss.item())
            print()

            self.shadow_models.append(model)
            #shadow_model_data.append((shadow_loader, model))

        pass

    def set_shadow_model(self, index, model):
        self.shadow_models[index] = model

    def prepare_attack_data(self):
        attack_data = [[] for _ in range(10)]
        attack_labels = [[] for _ in range(10)]

        for cls in range(10):
            print("cls=",cls)
            for shadow_idx in range(self.num_shadow_models):
                print("shadow model=",shadow_idx+1)
                shadow_model = self.shadow_models[shadow_idx]
                shadow_model.eval()
                shadow_indices = [idx for idx in self.shadow_model_indices[shadow_idx] if self.train_dataset.targets[idx] == cls]

                shadow_data_loader = DataLoader(Subset(self.train_dataset, shadow_indices), batch_size=64, shuffle=False)

                # Prepare test data loader including test set with the same length as shadow_indices
                test_indices_cls = [idx for idx, label in enumerate(self.test_dataset.targets) if label == cls]
                random.shuffle(test_indices_cls)
                test_indices_cls = test_indices_cls[:len(shadow_indices)]

                #print(len(shadow_indices))
                #print(len(test_indices_cls))

                test_data_loader = DataLoader(Subset(self.test_dataset, test_indices_cls), batch_size=64, shuffle=False)

                for inputs, labels in shadow_data_loader:
                    inputs, labels = inputs.to(self.device), labels.to(self.device)
                    with torch.no_grad():
                        outputs = shadow_model(inputs)
                        #max_probs, preds = torch.max(probabilities, dim=1)

                        for i in range(len(inputs)):
                                attack_data[cls].append(outputs[i].cpu().numpy())
                                attack_labels[cls].append(1)  # Label as in the training set

                for inputs, labels in test_data_loader:
                    inputs, labels = inputs.to(self.device), labels.to(self.device)
                    with torch.no_grad():
                        outputs = shadow_model(inputs)

                        for i in range(len(inputs)):
                                attack_data[cls].append(outputs[i].cpu().numpy())
                                attack_labels[cls].append(0)  # Label as not in the training set

        self.attack_data = attack_data
        self.attack_labels = attack_labels

        pass

    def train_attack_models(self, epochs=10):
            for cls in range(10):
                print(cls)
                attack_data_array = np.array(self.attack_data[cls])
                attack_labels_array = np.array(self.attack_labels[cls])
                print(len(attack_data_array))
                print(len(attack_labels_array))

                attack_model = RandomForestClassifier(n_estimators=100)
                attack_model.fit(attack_data_array.reshape(len(attack_data_array), -1), attack_labels_array)

                self.attack_models.append(attack_model)



    def predict_membership(self, target_model, test_loader):
        predictions = []
        target_model.eval()  # Set the target model to evaluation mode
        softmax_outputs = []

        for inputs, _ in test_loader:
            inputs = inputs.to(self.device)
            with torch.no_grad():
                outputs = target_model(inputs)
                probabilities = F.softmax(outputs, dim=1)
                all_outputs.extend(outputs.cpu().numpy())

        for i, (inputs, labels) in enumerate(test_loader.dataset):
            cls = labels
            attack_model = self.attack_models[cls]
            input_data = all_outputs[i].reshape(1, -1)
            pred = attack_model.predict(input_data)
            print(pred)
            predictions.append(pred[0])

        return predictions

    def evaluate_attack_models_just_in(self, train_indices):
        attack_predictions = self.predict_membership(self.target_model, DataLoader(torch.utils.data.Subset(self.train_dataset, train_indices), batch_size=64, shuffle=False))
        print(attack_predictions)
        true_labels = [1] * len(train_indices)
        accuracy = accuracy_score(true_labels, attack_predictions)
        return accuracy

    def evaluate_attack_models_in_and_out(self, train_indices, true_labels):
        attack_predictions = self.predict_membership(self.target_model, DataLoader(torch.utils.data.Subset(self.train_dataset, train_indices), batch_size=64, shuffle=False))
        print(attack_predictions)
        print(true_labels)
        accuracy = accuracy_score(true_labels, attack_predictions)
        return accuracy


    def save_shadow_models(self, folder_path='shadow_models'):
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        for i, model in enumerate(self.shadow_models):
            model_path = os.path.join(folder_path, f'shadow_model_{i}.pth')
            torch.save(model.state_dict(), model_path)

        with open(os.path.join(folder_path, 'shadow_model_indices.pkl'), 'wb') as f:
            joblib.dump(self.shadow_model_indices, f)

        with open(os.path.join(folder_path, 'remaining_indices.pkl'), 'wb') as f:
            joblib.dump(self.remaining_indices, f)

    def load_shadow_models(self, folder_path='shadow_models'):
        self.shadow_models = []
        for i in range(len(os.listdir(folder_path)) - 2):  # Assuming two non-model files
            model_path = os.path.join(folder_path, f'shadow_model_{i}.pth')
            model = CIFAR10Classifier().to(self.device)
            model.load_state_dict(torch.load(model_path))
            self.shadow_models.append(model)

        with open(os.path.join(folder_path, 'shadow_model_indices.pkl'), 'rb') as f:
            self.shadow_model_indices = joblib.load(f)

        with open(os.path.join(folder_path, 'remaining_indices.pkl'), 'rb') as f:
            self.remaining_indices = joblib.load(f)

    def save_attack_models(self, folder_path='attack_models'):
        if not os.path.exists(folder_path):
            os.makedirs(folder_path)

        for i, model in enumerate(self.attack_models):
            model_path = os.path.join(folder_path, f'attack_model_{i}.joblib')
            joblib.dump(model, model_path)

        with open(os.path.join(folder_path, 'attack_data.pkl'), 'wb') as f:
            joblib.dump(self.attack_data, f)

        with open(os.path.join(folder_path, 'attack_labels.pkl'), 'wb') as f:
            joblib.dump(self.attack_labels, f)

    def load_attack_models(self, folder_path='attack_models'):
        self.attack_models = []
        for i in range(len(os.listdir(folder_path)) - 2):  # Assuming two non-model files
            model_path = os.path.join(folder_path, f'attack_model_{i}.joblib')
            model = joblib.load(model_path)
            self.attack_models.append(model)

        with open(os.path.join(folder_path, 'attack_data.pkl'), 'rb') as f:
            self.attack_data = joblib.load(f)

        with open(os.path.join(folder_path, 'attack_labels.pkl'), 'rb') as f:
            self.attack_labels = joblib.load(f)

    def upload_to_drive(self, local_folder, drive_folder):
        drive.mount('/content/drive')
        drive_folder_path = os.path.join('/content/drive/My Drive/', drive_folder)
        if not os.path.exists(drive_folder_path):
            os.makedirs(drive_folder_path)

        zip_filename = local_folder + '.zip'
        with zipfile.ZipFile(zip_filename, 'w') as zipf:
            for root, _, files in os.walk(local_folder):
                for file in files:
                    zipf.write(os.path.join(root, file), arcname=file)

        shutil.copy(zip_filename, drive_folder_path)

    def download_from_drive(self, drive_folder, local_folder):
        drive.mount('/content/drive')
        drive_folder_path = os.path.join('/content/drive/My Drive/', drive_folder)
        if not os.path.exists(local_folder):
            os.makedirs(local_folder)

        zip_filename = local_folder + '.zip'
        shutil.copy(os.path.join(drive_folder_path, zip_filename), zip_filename)

        with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
            zip_ref.extractall(local_folder)


# Save

In [5]:
!pip install gdown
!pip install pydrive2



In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import numpy as np
from model import CIFAR10Classifier  # Importing the model
import os
import shutil
import zipfile
import gdown

In [7]:
# Function to save the model
def zip_model(local_path, zip_name):
    import zipfile
    with zipfile.ZipFile(zip_name, 'w') as zipf:
        zipf.write(local_path)

# Define functions for Google Drive operations
def upload_to_drive(local_path, drive_path):
    from google.colab import drive
    drive.mount('/content/drive')
    shutil.move(local_path, drive_path)

def download_from_drive(drive_path, local_path):
    from google.colab import drive
    drive.mount('/content/drive')
    shutil.copy(drive_path, local_path)

def unzip_model(zip_name, extract_path):
    import zipfile
    with zipfile.ZipFile(zip_name, 'r') as zipf:
        zipf.extractall(extract_path)

# Function to save the model
def save_model(model, path='cifar10_classifier.pth'):
    torch.save(model.state_dict(), path)
    print(f'Model saved to {path}')

# Function to load the model
def load_model(model, path='cifar10_classifier.pth'):
    model.load_state_dict(torch.load(path))
    model.to(device)
    model.eval()
    print(f'Model loaded from {path}')
    return model

In [None]:
# Save and zip the model
save_model(model, 'cifar10_classifier.pth')
zip_model('cifar10_classifier.pth', 'cifar10_classifier.zip')

Model saved to cifar10_classifier.pth


In [None]:
# Upload the model to Google Drive
upload_to_drive('cifar10_classifier.zip', '/content/drive/MyDrive/cifar10_classifier.zip')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
# Download the model from Google Drive and unzip it
download_from_drive('/content/drive/MyDrive/cifar10_classifier.zip', 'cifar10_classifier.zip')

Mounted at /content/drive


In [8]:
unzip_model('cifar10_classifier.zip', './')

In [9]:
# Load the model
model = CIFAR10Classifier().to(device)
model = load_model(model, 'cifar10_classifier.pth')

Model loaded from cifar10_classifier.pth


# Train model

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from model import CIFAR10Classifier

In [9]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [10]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Select 10,000 random indices for the training subset
train_indices = torch.randperm(len(trainset))[:10000]



In [17]:
train_subset = torch.utils.data.Subset(trainset, train_indices)
train_loader = torch.utils.data.DataLoader(train_subset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [None]:
print(train_indices)

tensor([28969, 32526, 17748,  ..., 41984, 44073,  2753])


In [None]:
# Initialize model, loss function, and optimizer
model = CIFAR10Classifier().to(device)


In [20]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

In [18]:
# Train the model
epochs = 40
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(train_loader)}')

NameError: name 'optimizer' is not defined

In [21]:
# Evaluate the model on the test set
test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        model.eval()
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Test Loss: {test_loss / len(test_loader)}')
print(f'Test Accuracy: {100 * correct / total}%')

Test Loss: 1.4993273331101533
Test Accuracy: 56.05%


In [None]:
# Save the train indices
with open('train_indices.pkl', 'wb') as f:
    pickle.dump(train_indices, f)

In [16]:
import pickle

def load_indices(local_path):
    with open(local_path, 'rb') as f:
        indices = pickle.load(f)
    return indices

# Example usage:
local_path = 'train_indices.pkl'
train_indices = load_indices(local_path)
print(f'Loaded indices: {loaded_indices}')

Loaded indices: tensor([ 8437, 12535,  6630,  ..., 41142, 48978, 46697])


# Attack

In [51]:
import torch
from model import CIFAR10Classifier

# Instantiate the model
model = CIFAR10Classifier()

# Load the state dict
state_dict = torch.load("model_state_dict.pth")
new_state_dict = {}
for key, value in state_dict.items():
    new_key = key.replace('_module.', '')
    new_state_dict[new_key] = value

# Load the state dict into the model
model.load_state_dict(new_state_dict)
model.eval()

print("Model loaded and set to evaluation mode.")

Model loaded and set to evaluation mode.


In [52]:
# Initialize MembershipInferenceAttack
mia = MembershipInferenceAttack(trainset, testset, model, device, epochs=200, num_shadow_models=5)

In [53]:
high_confidence_records = mia.select_high_confidence_records(threshold=0.9)



747
(747,)


In [54]:
splits = mia.stratified_split_indices(high_confidence_records)

cls= 0
[731, 1234, 2156, 3336, 3478, 3906, 3971, 4176, 4249, 5775, 9119, 9121, 9696, 9859, 9920, 10674, 10716, 11912, 14023, 14216, 14329, 14567, 15546, 16116, 16235, 20404, 20976, 21044, 22118, 23808, 25350, 25578, 25712, 26200, 26226, 26507, 26552, 29578, 29783, 29880, 30116, 32483, 32884, 33416, 33654, 33670, 33732, 34280, 35132, 37240, 37250, 37334, 37788, 38326, 40586, 40679, 40861, 41300, 41772, 42379, 43670, 44715, 45604, 45936, 46513, 47381, 47678, 48291, 48458, 49067]
70

cls= 1
[432, 2762, 3230, 3851, 4624, 6559, 6668, 7077, 7913, 9549, 10380, 12164, 12592, 12977, 13108, 13996, 13997, 16133, 18671, 18784, 19565, 19619, 19890, 19935, 20761, 21104, 21474, 22001, 22154, 22314, 22450, 23277, 23428, 23524, 23921, 24541, 24725, 26127, 26331, 26843, 27576, 27601, 28018, 28314, 30083, 31147, 31152, 31969, 32011, 34595, 35371, 35692, 35756, 36046, 36686, 36850, 37081, 39250, 39275, 39353, 40204, 40290, 41087, 42171, 43297, 44034, 44107, 44117, 44835, 45212, 45246, 45455, 49411, 49587]

In [55]:
# Create shadow models
mia.create_shadow_models()

shadow model: 0  epoch: 0  loss= 2.357804298400879
shadow model: 0  epoch: 1  loss= 2.2565250396728516
shadow model: 0  epoch: 2  loss= 2.199331760406494
shadow model: 0  epoch: 3  loss= 2.133995294570923
shadow model: 0  epoch: 4  loss= 2.119450330734253
shadow model: 0  epoch: 5  loss= 2.0752620697021484
shadow model: 0  epoch: 6  loss= 1.8520808219909668
shadow model: 0  epoch: 7  loss= 1.8706443309783936
shadow model: 0  epoch: 8  loss= 1.8171393871307373
shadow model: 0  epoch: 9  loss= 1.5983917713165283
shadow model: 0  epoch: 10  loss= 1.544500470161438
shadow model: 0  epoch: 11  loss= 1.701741337776184
shadow model: 0  epoch: 12  loss= 1.500131368637085
shadow model: 0  epoch: 13  loss= 1.4989264011383057
shadow model: 0  epoch: 14  loss= 1.397576928138733
shadow model: 0  epoch: 15  loss= 1.3324248790740967
shadow model: 0  epoch: 16  loss= 1.2776951789855957
shadow model: 0  epoch: 17  loss= 0.8884097337722778
shadow model: 0  epoch: 18  loss= 1.2877757549285889
shadow mode

In [67]:
test_shadow_model = mia.shadow_models[0]

In [68]:
# Evaluate the model on the test set
test_loss = 0.0
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        test_shadow_model.eval()
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = test_shadow_model(inputs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Test Loss: {test_loss / len(test_loader)}')
print(f'Test Accuracy: {100 * correct / total}%')

Test Loss: 3.56208118511613
Test Accuracy: 41.37%


In [56]:
mia.prepare_attack_data()

cls= 0
shadow model= 1
shadow model= 2
shadow model= 3
shadow model= 4
shadow model= 5
cls= 1
shadow model= 1
shadow model= 2
shadow model= 3
shadow model= 4
shadow model= 5
cls= 2
shadow model= 1
shadow model= 2
shadow model= 3
shadow model= 4
shadow model= 5
cls= 3
shadow model= 1
shadow model= 2
shadow model= 3
shadow model= 4
shadow model= 5
cls= 4
shadow model= 1
shadow model= 2
shadow model= 3
shadow model= 4
shadow model= 5
cls= 5
shadow model= 1
shadow model= 2
shadow model= 3
shadow model= 4
shadow model= 5
cls= 6
shadow model= 1
shadow model= 2
shadow model= 3
shadow model= 4
shadow model= 5
cls= 7
shadow model= 1
shadow model= 2
shadow model= 3
shadow model= 4
shadow model= 5
cls= 8
shadow model= 1
shadow model= 2
shadow model= 3
shadow model= 4
shadow model= 5
cls= 9
shadow model= 1
shadow model= 2
shadow model= 3
shadow model= 4
shadow model= 5


In [57]:
# Train attack models
mia.train_attack_models()

0
140
140
1
148
148
2
144
144
3
138
138
4
148
148
5
148
148
6
134
134
7
192
192
8
156
156
9
146
146


In [58]:
# Save models to drive
mia.save_shadow_models('shadow_models_4')
mia.save_attack_models('attack_models_4')

In [59]:
# Upload models to drive
mia.upload_to_drive('shadow_models_4', 'shadow_models_4')
mia.upload_to_drive('attack_models_4', 'attack_models_4')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
import torch
from model import CIFAR10Classifier

# Instantiate the model
model = CIFAR10Classifier()

# Load the state dict
state_dict = torch.load("model_state_dict.pth")
new_state_dict = {}
for key, value in state_dict.items():
    new_key = key.replace('_module.', '')
    new_state_dict[new_key] = value

# Load the state dict into the model
model.load_state_dict(new_state_dict)
model.eval()

print("Model loaded and set to evaluation mode.")

Model loaded and set to evaluation mode.


**download and load:**

In [12]:
# Initialize MembershipInferenceAttack
mia = MembershipInferenceAttack(trainset, testset, model, device, epochs=200, num_shadow_models=7)

In [13]:
# If you need to download models from Google Drive, use the following functions
# Replace 'your_drive_path' and 'local_path' with the appropriate paths
mia.download_from_drive('shadow_models_2', 'shadow_models_2')
mia.download_from_drive('attack_models_2', 'attack_models_2')

Mounted at /content/drive
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
# Load the shadow models and attack models
mia.load_shadow_models(folder_path='shadow_models_2')
mia.load_attack_models(folder_path='attack_models_2')

In [None]:
attack_models = mia.attack_models

# **real model**

In [15]:
from torchvision import models
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt


class CIFAR10Classifier(nn.Module):
  def __init__(self):
    super(CIFAR10Classifier, self).__init__()
    self.conv1 = nn.Conv2d(3, 16, 3, 1)
    self.conv2 = nn.Conv2d(16, 32, 3, 1)
    self.dropout1 = nn.Dropout2d(0.25)
    self.dropout2 = nn.Dropout2d(0.5)
    self.fc1 = nn.Linear(6272, 64)
    self.fc2 = nn.Linear(64, 10)

  def forward(self, x):
    x = self.conv1(x)
    x = F.relu(x)
    x = self.conv2(x)
    x = F.relu(x)
    x = F.max_pool2d(x, 2)
    x = self.dropout1(x)
    x = torch.flatten(x, 1)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.dropout2(x)
    x = self.fc2(x)
    return x

In [71]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import Subset, DataLoader, TensorDataset
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
'''
# Load the main model
model = CIFAR10Classifier()
state_dict = torch.load("model_state_dict.pth", map_location=device)
new_state_dict = {key.replace('_module.', ''): value for key, value in state_dict.items()}
model.load_state_dict(new_state_dict)
'''
model.to(device)
model.eval()

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

#DATA_ROOT = '../cifar10'
DATA_ROOT = './data'
BATCH_SIZE = 64

'''
# Load the indices from list.txt
indices_file = 'list.txt'
with open(indices_file, 'r') as f:
    indices = [int(line.strip()) for line in f]
'''
indices = train_indices

full_train_dataset = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform)
test_dataset = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform)

train_indices_set = set(indices)
all_indices = set(range(len(full_train_dataset)))
other_indices = list(all_indices - train_indices_set)

train_dataset = Subset(full_train_dataset, indices[:len(indices)//2])
other_dataset = Subset(full_train_dataset, other_indices)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
other_loader = DataLoader(other_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Create labels
train_labels = torch.ones(len(train_dataset)).to(device)
other_labels = torch.zeros(len(other_dataset)).to(device)
test_labels = torch.zeros(len(test_dataset)).to(device)

def extract_features(model, dataloader):
    model.eval()
    features = []
    labels = []
    with torch.no_grad():
        for data in dataloader:
            inputs, lbls = data
            inputs = inputs.to(device)
            outputs = model(inputs)
            features.append(outputs)
            labels.append(lbls)
    return torch.cat(features).to(device), torch.cat(labels).to(device)

train_features, train_classes = extract_features(model, train_loader)
other_features, other_classes = extract_features(model, other_loader)
test_features, test_classes = extract_features(model, test_loader)

combined_features = torch.cat((train_features, other_features, test_features))
combined_classes = torch.cat((train_classes, other_classes, test_classes))
combined_labels = torch.cat((train_labels, other_labels, test_labels))

new_dataset = TensorDataset(combined_features, combined_classes, combined_labels)
new_loader = DataLoader(new_dataset, batch_size=BATCH_SIZE, shuffle=True)

# Assuming self.attack_models is a list of trained attacker models for each class
attack_models = mia.attack_models
# Load your trained attack models (not shown here, replace with your actual loading mechanism)


Files already downloaded and verified
Files already downloaded and verified




In [72]:
# Get a sample of 100 test examples
sample_indices = torch.randperm(len(new_dataset))[:1000]
test_data = torch.utils.data.Subset(new_dataset, sample_indices)
new_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=True)

In [73]:

# Create a function to predict membership using attack models


def predict_membership(attack_models, loader):
    predicted_memberships = []
    true_labels = []
    correct = 0
    total = 0

    for features, classes, labels in loader:
        features, classes, labels = features.to(device), classes.to(device), labels.to(device)
        print(total)
        for i in range(len(features)):
            cls = classes[i].item()
            attacker_model = attack_models[cls]  # Select the attacker model for the class
            predicted_membership = attacker_model.predict(features[i].cpu().numpy().reshape(1, -1))
            predicted_memberships.append(predicted_membership)
            true_labels.append(labels[i].item())
            total += 1

            correct += int(labels[i].item() == predicted_membership.item())
    acc = float(correct/total)
    return torch.tensor(predicted_memberships).to(device), torch.tensor(true_labels).to(device) , acc

# Predict membership using the attack models
predicted_memberships, true_labels, acc = predict_membership(attack_models, new_loader)


0
64
128
192
256
320
384
448
512
576
640
704
768
832
896
960


In [74]:
predicted_memberships = predicted_memberships.cpu().numpy()  # Move to CPU
true_labels = true_labels.cpu().numpy()  # Move to CPU

In [75]:
accuracy = (predicted_memberships == true_labels).mean()
print(f'Attack Model Accuracy: {accuracy:.4f}')

cm = confusion_matrix(true_labels, predicted_memberships)
precision = precision_score(true_labels, predicted_memberships)
recall = recall_score(true_labels, predicted_memberships)
f1 = f1_score(true_labels, predicted_memberships)

print(f'Confusion Matrix:\n{cm}')
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Attack Model Accuracy: 0.7688
Confusion Matrix:
[[776 135]
 [ 51  38]]
Precision: 0.2197
Recall: 0.4270
F1 Score: 0.2901
