In [51]:
import torch
import random
import numpy as np
from numpy.linalg import norm
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, TensorDataset, Subset, random_split
from inference_attack import *
from model import *

In [52]:
# config
dataset_name = "MNIST"         # "MNIST" or "CIFAR10"
data_path = "./data"
batch_size = 8                   # Must be 1 for per-sample gradient
learning_rate = 0.001
num_classes = 10
use_pretrained_model = False
inference_dataset_size = 100
global_model_path = "trained_model_MNIST.pth"
target_model_path = "global_update.pth"
criterion_global_model = torch.nn.CrossEntropyLoss()
criterion_grad_classifier = torch.nn.BCELoss()

train_size = 0.7
test_size = 1 - train_size
num_epochs = 15

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [53]:
# load dataset (we use test datasets to train the model on samples that have not been used in training of the global model)
if dataset_name == "MNIST":
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])
    dataset = datasets.MNIST(root=data_path, train=False, download=True, transform=transform)
    input_shape = (1, 28, 28)
    #input_channels = 1
    #flatten_size = 64 * 6 * 6

elif dataset_name == "CIFAR10":
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2470, 0.2435, 0.2616))
    ])
    dataset = datasets.CIFAR10(root=data_path, train=False, download=True, transform=transform)
    input_shape = (3, 32, 32)
    # input_channels = 3
    # flatten_size = 64 * 6 * 6

else:
    raise ValueError(f"Unsupported dataset: {dataset_name}")

indices = random.sample(range(len(dataset)), inference_dataset_size)
dataset = Subset(dataset, indices=indices)

dataloader = DataLoader(dataset, batch_size=1, shuffle=False)

In [54]:
# load global model and target update
theta_0 = torch.load(global_model_path)
theta_1 = torch.load(target_model_path)

inference_model = Model1(input_shape, num_classes).to(device)
inference_model.load_state_dict(state_dict=theta_0)

<All keys matched successfully>

In [55]:
# collect gradient features
features = []
labels = []

inference_model.to(device)
inference_model.eval() # TODO: train or eval???

for x, y in dataloader:
    x, y = x.to(device), y.to(device)

    # Forward pass
    output = inference_model(x)
    loss = criterion_global_model(output, y)

    # print("pr:", output, "gt:", y)

    # Compute gradients w.r.t. model parameters
    grad = torch.autograd.grad(loss, inference_model.parameters(), retain_graph=False)
    # grad = torch.autograd.grad(loss, [inference_model.fc1.weight, inference_model.fc1.bias], retain_graph=False)
    grad_vector = torch.cat([g.view(-1) for g in grad])  # Flatten and concatenate

    # Detach and store
    features.append(grad_vector.detach().cpu().float())
    labels.append(one_hot_encode(y.item(), num_classes))
    # labels.append(y)

features = torch.stack(features)
labels = torch.stack(labels).float()
#labels = torch.stack([y.detach().clone().float32() for y in labels])

dataset = TensorDataset(features, labels)
input_dim = features.shape[1]
output_dim = labels.shape[1]

print(input_dim, output_dim)

124682 10


In [56]:
# estimate the target gradient update as theta_0 - theta_1 / lr
#est_grad = elementwise_diff_state_dicts(theta_0, theta_1, learning_rate)

In [57]:
def train_grad_classifier(model, train_loader, test_loader, criterion, num_epochs=10, lr=1e-3, device='cuda'):
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for x_batch, y_batch in train_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()
            outputs = model(x_batch)  # raw logits

            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * x_batch.size(0)

            # Accuracy
            #preds = torch.argmax(outputs, dim=1)
            #correct += (preds == y_batch).sum().item()
            total += y_batch.size(0)

        avg_loss = running_loss / total
        accuracy = 0
        #accuracy = correct / total * 100

        print(f"Epoch {epoch+1}/{num_epochs} | Loss: {avg_loss:.4f} | Accuracy: {accuracy:.2f}%")
        evaluate_model(model, test_loader, criterion, device)
        
def evaluate_model(model, test_loader, criterion, device='cuda'):
    model.eval()
    model.to(device)

    total_loss = 0.0
    all_preds = []
    all_labels = []
    cosine_sims = []

    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            outputs = model(x_batch)  # sigmoid output
            loss = criterion(outputs, y_batch)
            total_loss += loss.item() * x_batch.size(0)

            preds = outputs.cpu().numpy()
            labels = y_batch.cpu().numpy()

            all_preds.append(preds)
            all_labels.append(labels)

            # Compute cosine similarity per sample
            for p, l in zip(preds, labels):
                # Avoid division by zero
                if norm(p) == 0 or norm(l) == 0:
                    cosine_sims.append(0.0)
                else:
                    cos_sim = np.dot(p, l) / (norm(p) * norm(l))
                    cosine_sims.append(cos_sim)

    avg_loss = total_loss / len(test_loader.dataset)
    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)
    avg_cosine_similarity = np.mean(cosine_sims)

    print(f"Test Loss: {avg_loss:.4f} | Cosine Similarity: {avg_cosine_similarity:.4f}")

    return all_preds

In [58]:
# dataset preparation
tmp_train = int(train_size*inference_dataset_size)
tmp_test = int(inference_dataset_size-train_size*inference_dataset_size)
print(tmp_train, tmp_test)
train_dataset, test_dataset = random_split(dataset, [tmp_train, tmp_test])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

70 30


In [59]:
grad_classifier_model = GradientClassifier(input_dim=input_dim, output_dim=output_dim)
grad_classifier_model.to(device)

train_grad_classifier(grad_classifier_model, train_loader, test_loader, criterion_grad_classifier, num_epochs=num_epochs, lr=learning_rate, device=device)

Epoch 1/15 | Loss: 0.6580 | Accuracy: 0.00%
Test Loss: 0.5847 | Cosine Similarity: 0.3840
Epoch 2/15 | Loss: 0.5914 | Accuracy: 0.00%
Test Loss: 0.5547 | Cosine Similarity: 0.3844
Epoch 3/15 | Loss: 0.4913 | Accuracy: 0.00%
Test Loss: 0.5433 | Cosine Similarity: 0.3756
Epoch 4/15 | Loss: 0.4322 | Accuracy: 0.00%
Test Loss: 0.4846 | Cosine Similarity: 0.3973
Epoch 5/15 | Loss: 0.4138 | Accuracy: 0.00%
Test Loss: 0.4089 | Cosine Similarity: 0.4270
Epoch 6/15 | Loss: 0.3246 | Accuracy: 0.00%
Test Loss: 0.3559 | Cosine Similarity: 0.4221
Epoch 7/15 | Loss: 0.2898 | Accuracy: 0.00%
Test Loss: 0.3257 | Cosine Similarity: 0.4242
Epoch 8/15 | Loss: 0.2612 | Accuracy: 0.00%
Test Loss: 0.3166 | Cosine Similarity: 0.4251
Epoch 9/15 | Loss: 0.2453 | Accuracy: 0.00%
Test Loss: 0.2994 | Cosine Similarity: 0.4382
Epoch 10/15 | Loss: 0.2335 | Accuracy: 0.00%
Test Loss: 0.2902 | Cosine Similarity: 0.4585
Epoch 11/15 | Loss: 0.2252 | Accuracy: 0.00%
Test Loss: 0.2900 | Cosine Similarity: 0.5002
Epoch 12

In [60]:
test_preds = evaluate_model(grad_classifier_model, test_loader, criterion_grad_classifier, device)

Test Loss: 0.3481 | Cosine Similarity: 0.5067
