In [1]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
import math
import pandas as pd
import random
import json
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, TensorDataset, random_split
from inference_attack import *
from model import Model2, GradientClassifier
import torch.nn as nn

In [None]:
# config
dataset_filename = 'inference_dataset.csv'
global_model = 'trained_model_MNIST.pth'
target_update = 'global_update.pth'
learning_rate = 0.01
model = 2
input_shape = (1, 28, 28)
num_classes  = 10
criterion = torch.nn.CrossEntropyLoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# load dataset
MNIST_dataset_loader = DataLoader(
    dataset=MNIST_dataset,
    batch_size=1,
    shuffle=False
)

In [None]:
# collect gradient features
features = []
labels = []

victim_model.to(device)
victim_model.eval()  # Ensure model is in evaluation mode

for x, y in MNIST_dataset_loader:
    x, y = x.to(device), y.to(device)

    # Forward pass
    output = victim_model(x)
    loss = criterion(output, y)

    # Compute gradients w.r.t. model parameters
    grads = torch.autograd.grad(loss, victim_model.parameters(), retain_graph=False)
    grad_vector = torch.cat([g.view(-1) for g in grads])  # Flatten and concatenate

    # Detach and store
    features.append(grad_vector.detach().cpu().float())
    labels.append(one_hot_encode(y.item(), num_classes))

In [5]:
# estimate the gradient as theta_0 - theta_1 / lr
theta_0 = torch.load(global_model)
theta_1 = torch.load(target_update)

est_grad = elementwise_diff_state_dicts(theta_0, theta_1, learning_rate)

#print(est_grad)

In [6]:
# create dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)) # normalization params for MNIST
])

victim_model = Model2(input_shape, num_classes).to(device)
victim_model.load_state_dict(state_dict=theta_0)

MNIST_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
indices = random.sample(range(len(MNIST_dataset)), 100)
MNIST_dataset = Subset(MNIST_dataset, indices=indices)
MNIST_dataset_loader = DataLoader(dataset=MNIST_dataset, batch_size=1, shuffle=False)

In [7]:
victim_model.train()
victim_model.to(device)

Model2(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)

In [None]:
labels = []
features = []

criterion = torch.nn.CrossEntropyLoss()

def multihot_encode(label):
    out = []
    for i in range(num_classes):
        if i == label:
            out.append(1)
        else:
            out.append(0)
    return out

for x, y in MNIST_dataset_loader:
    x, y = x.to(device), y.to(device)
    output = victim_model(x)
    loss = criterion(output, y)
    grad = torch.autograd.grad(loss, victim_model.parameters())
    grad = torch.cat([g.view(-1) for g in grad])
    grad = grad.detach().cpu().float()
    y = y[0].item()
    y = multihot_encode(y)
    labels.append(y)
    features.append(grad)

features_tensor = torch.stack(features)
labels_tensor = torch.stack([torch.tensor(y, dtype=torch.float32) for y in labels])

dataset = TensorDataset(features_tensor, labels_tensor)
input_dim = features_tensor.shape[1]
output_dim = labels_tensor.shape[1]

AttributeError: 'Tensor' object has no attribute 'float16'

In [9]:
from numpy.linalg import norm

def train_grad_classifier(model, train_loader, test_loader, num_epochs=10, lr=1e-3, device='cuda'):
    model = model.to(device)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        all_preds = []
        all_labels = []

        for x_batch, y_batch in train_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            optimizer.zero_grad()

            outputs = model(x_batch)  # outputs are already sigmoid activated
            loss = criterion(outputs, y_batch)

            loss.backward()
            optimizer.step()

            running_loss += loss.item() * x_batch.size(0)

            preds = (outputs.detach().cpu().numpy() > 0.5).astype(int)
            all_preds.append(preds)
            all_labels.append(y_batch.cpu().numpy())

        epoch_loss = running_loss / len(train_loader.dataset)

        all_preds = np.vstack(all_preds)
        all_labels = np.vstack(all_labels)
        #f1 = f1_score(all_labels, all_preds, average='macro', zero_division=0)

        print(f"Epoch {epoch+1}/{num_epochs} | Loss: {epoch_loss:.4f}")
        evaluate_model(model, test_loader, criterion, device)
        

def evaluate_model(model, test_loader, criterion, device='cuda'):
    model.eval()
    model.to(device)

    total_loss = 0.0
    all_preds = []
    all_labels = []
    cosine_sims = []

    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)

            outputs = model(x_batch)  # sigmoid output
            loss = criterion(outputs, y_batch)
            total_loss += loss.item() * x_batch.size(0)

            preds = outputs.cpu().numpy()
            labels = y_batch.cpu().numpy()

            all_preds.append(preds)
            all_labels.append(labels)

            # Compute cosine similarity per sample
            for p, l in zip(preds, labels):
                # Avoid division by zero
                if norm(p) == 0 or norm(l) == 0:
                    cosine_sims.append(0.0)
                else:
                    cos_sim = np.dot(p, l) / (norm(p) * norm(l))
                    cosine_sims.append(cos_sim)

    avg_loss = total_loss / len(test_loader.dataset)
    all_preds = np.vstack(all_preds)
    all_labels = np.vstack(all_labels)
    avg_cosine_similarity = np.mean(cosine_sims)

    print(f"Test Loss: {avg_loss:.4f} | Cosine Similarity: {avg_cosine_similarity:.4f}")

In [10]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

grad_classifier_model = GradientClassifier(input_dim=features_tensor.shape[1], output_dim=labels_tensor.shape[1])
grad_classifier_model.to(device)

criterion = nn.BCELoss() 
optimizer = torch.optim.Adam(grad_classifier_model.parameters(), lr=1e-4)

num_epochs = 15

train_grad_classifier(grad_classifier_model, train_loader=train_loader, test_loader=test_loader, num_epochs=num_epochs, device=device)

Epoch 1/15 | Loss: 0.6897
Test Loss: 0.6078 | Cosine Similarity: 0.3602
Epoch 2/15 | Loss: 0.6274
Test Loss: 0.6220 | Cosine Similarity: 0.3895
Epoch 3/15 | Loss: 0.5654
Test Loss: 0.7656 | Cosine Similarity: 0.3896
Epoch 4/15 | Loss: 0.5211
Test Loss: 1.0527 | Cosine Similarity: 0.3889
Epoch 5/15 | Loss: 0.4828
Test Loss: 1.0906 | Cosine Similarity: 0.3963
Epoch 6/15 | Loss: 0.4496
Test Loss: 1.0810 | Cosine Similarity: 0.4079
Epoch 7/15 | Loss: 0.4166
Test Loss: 0.9817 | Cosine Similarity: 0.4194
Epoch 8/15 | Loss: 0.3904
Test Loss: 0.9775 | Cosine Similarity: 0.4238
Epoch 9/15 | Loss: 0.3641
Test Loss: 0.9811 | Cosine Similarity: 0.4170
Epoch 10/15 | Loss: 0.3395
Test Loss: 0.9854 | Cosine Similarity: 0.4188
Epoch 11/15 | Loss: 0.3434
Test Loss: 0.9899 | Cosine Similarity: 0.4221
Epoch 12/15 | Loss: 0.2922
Test Loss: 0.9839 | Cosine Similarity: 0.4241
Epoch 13/15 | Loss: 0.2754
Test Loss: 0.9804 | Cosine Similarity: 0.4263
Epoch 14/15 | Loss: 0.2568
Test Loss: 1.0027 | Cosine Simila

In [11]:
evaluate_model(grad_classifier_model, test_loader, criterion, device)

Test Loss: 1.0163 | Cosine Similarity: 0.4328
