In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

import torchvision
import torchvision.transforms as transforms

import opacus
from opacus import PrivacyEngine

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

# Random Seeding
torch.manual_seed(0)
np.random.seed(0)

In [3]:
# Setup MNIST dataset
transform = transforms.ToTensor()
train_set = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)

test_set = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=1000, shuffle=False)

In [5]:
# Setup simple FC model
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(784, 64)
        self.fc2 = nn.Linear(64, 1)

    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Setup model and optimizer
model = Discriminator().to(device)

optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
data_loader = train_loader
loss_fn = nn.BCEWithLogitsLoss()

In [11]:
sample = next(iter(data_loader))[0]
print(sample.shape)
output = model(sample.to(device))
print(output.shape)
target = torch.ones((sample.shape[0], 1)).to(device)
print(target.shape)


torch.Size([64, 1, 28, 28])
torch.Size([64, 1])
torch.Size([64, 1])


In [16]:
loss = loss_fn(output.flatten(), target.flatten())
print(loss)

tensor(0.5859, device='cuda:0',
       grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)


In [13]:
# Print shapes
model.fc1.weight.shape, model.fc2.weight.shape

(torch.Size([64, 784]), torch.Size([1, 64]))

In [41]:
# Given parameter clip bounds c_p, compute maximal ReLU activation bounds B_sigma
def compute_ReLU_bounds(model, c_p, input_size=(784,), input_bounds=1.0):
    sample = torch.ones(input_size).to(device) * input_bounds
    max_val = 0.0
    sum_mk_mkp1 = 0
    skip_first = True

    for layer in model.modules():
        if isinstance(layer, nn.Linear):
            print(layer.weight.shape)

            W = torch.ones_like(layer.weight) * c_p
            b = torch.ones_like(layer.bias) * c_p
            sample = W @ sample + b
            sample_max = sample.max().detach().item()
            if max_val < sample_max:
                max_val = sample_max
            print(max_val, sample_max)
            
            if skip_first:
                skip_first = False
            else:
                # sum_mk_mkp1 += W.shape[0] * W.shape[1]
                sum_mk_mkp1 += (W.shape[0] + 1) * (W.shape[1] + 1)
                print(W.shape[0], W.shape[1], sum_mk_mkp1)
    return max_val, sum_mk_mkp1

# Setup parameters for Gradient Clip Calculation
c_p = 0.001
B_sigma_p = 1.0
B_sigma, sum_mk_mkp1 = compute_ReLU_bounds(model, c_p)

c_g = 2 * c_p * B_sigma * (B_sigma_p ** 2) * sum_mk_mkp1
c_g

# 3.2572895401924264

torch.Size([64, 784])
0.7850000262260437 0.7850000262260437
torch.Size([1, 64])
0.7850000262260437 0.05124000459909439
1 64 130


0.20410000681877136

In [20]:
def param_grad_norm(model):
    gradient_norm = 0
    for param in model.parameters():
        gradient_norm += param.grad.norm().item() ** 2
    gradient_norm = gradient_norm ** 0.5
    return gradient_norm

def param_grad_l1(model):
    gradient_norm = 0
    for param in model.parameters():
        gradient_norm += param.grad.abs().sum().item()
    return gradient_norm

In [23]:
max_norm = 0

In [40]:
first = True
for idx in range(1000):
    if first:
        fill_val = c_p
        model.fc1.weight.data.fill_(fill_val)
        model.fc1.bias.data.fill_(fill_val)
        model.fc2.weight.data.fill_(fill_val)
        model.fc2.bias.data.fill_(fill_val)
        first = False
    else:
        # Randomize model weights (clip to c_p)
        model.fc1.weight.data = torch.clamp(torch.randn_like(model.fc1.weight), -c_p, c_p)
        model.fc1.bias.data = torch.clamp(torch.randn_like(model.fc1.bias), -c_p, c_p)
        model.fc2.weight.data = torch.clamp(torch.randn_like(model.fc2.weight), -c_p, c_p)
        model.fc2.bias.data = torch.clamp(torch.randn_like(model.fc2.bias), -c_p, c_p)

    # bias to 0
    model.fc1.bias.data.fill_(0)
    model.fc2.bias.data.fill_(0)

    for c in range(2):
        optimizer.zero_grad()

        # random sample
        sample = (torch.rand(784) > 0.1).to(torch.float32).to(device)
        # sample = torch.ones(784).to(device)
        sample_out = model(sample)

        # Assert all activations are below B_sigma
        activated_1 = F.relu(model.fc1(sample))
        activated_2 = torch.sigmoid(model.fc2(activated_1))
        assert activated_1.max().item() < B_sigma
        assert activated_2.max().item() < B_sigma

        target = torch.ones((1, 1)).to(device) * c

        loss = loss_fn(sample_out, target)
        loss.backward()

        # print(model.fc1.bias.grad.max().item(), model.fc1.bias.grad.min().item())
        # print(model.fc2.bias.grad)

        grad_norm = param_grad_norm(model)
        # grad_norm = param_grad_l1(model)
        # print("Class:", c, "---", "Grad Norm:", grad_norm)
        if grad_norm > max_norm:
            max_norm = grad_norm
            print("New Max Norm:", max_norm, idx, c)
    
    # if max_norm > c_g:
    #     print("Max Norm Exceeded")
    #     break

In [36]:
B_sigma

0.7850000262260437

In [27]:
max_norm

3.2572895401924264

In [10]:

    
# Fill weights and biases to c_p
fill_val = c_p
model.fc1.weight.data.fill_(fill_val)
model.fc1.bias.data.fill_(fill_val)
model.fc2.weight.data.fill_(fill_val)
model.fc2.bias.data.fill_(fill_val)


for c in range(10):
    optimizer.zero_grad()

    sample = torch.ones(784).to(device)
    sample = F.relu(model.fc1(sample))
    sample = model.fc2(sample)
    sample = sample.unsqueeze(0)

    target = torch.tensor([c]).to(device)

    loss = loss_fn(sample, target)
    loss.backward()

    print("Class:", c, "---", param_grad_norm(model))


Class: 0 --- 0.0
Class: 1 --- 2.802596928649634e-45


../aten/src/ATen/native/cuda/Loss.cu:242: nll_loss_forward_reduce_cuda_kernel_2d: block: [0,0,0], thread: [0,0,0] Assertion `t >= 0 && t < n_classes` failed.


RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling `cublasSgemm( handle, opa, opb, m, n, k, &alpha, a, lda, b, ldb, &beta, c, ldc)`

In [None]:
# Train model
def train(model, optimizer, loss_fn, data_loader, epochs=10):
    model.train()
    for epoch in range(epochs):
        for batch_idx, (data, target) in enumerate(data_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            print(output.shape, target.shape)
            break
            loss = loss_fn(output, target)
            loss.backward()
            optimizer.step()

            # Print max gradient
            max_grad = 0.0
            for param in model.parameters():
                if param.grad is not None:
                    max_grad = max(max_grad, param.grad.max().detach().item())
            print(max_grad)
            if max_grad > c_g:
                print("Gradient clipping required")
                break

            if batch_idx % 100 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(data_loader.dataset),
                    100. * batch_idx / len(data_loader), loss.item()))
    
    print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
        epoch, len(data_loader.dataset), len(data_loader.dataset),
        100. * len(data_loader.dataset) / len(data_loader.dataset), loss.item()))
        
train(model, optimizer, loss_fn, data_loader, epochs=100)

torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([64, 10]) torch.Size([64])
torch.Size([

UnboundLocalError: local variable 'loss' referenced before assignment

In [None]:
for i, layer in enumerate(model.modules()):
    if isinstance(layer, nn.Linear):
        print("Linear layer: ", layer.weight.shape, layer.bias.shape)

Linear layer:  torch.Size([64, 784]) torch.Size([64])
Linear layer:  torch.Size([10, 64]) torch.Size([10])


In [None]:
privacy_engine = PrivacyEngine()

print(
    f"Before make_private(). "
    f"Model:{type(model)}, \nOptimizer:{type(optimizer)}, \nDataLoader:{type(data_loader)}"
)

model, optimizer, data_loader = privacy_engine.make_private(
    module=model,
    optimizer=optimizer,
    data_loader=data_loader,
    max_grad_norm=1.0,
    noise_multiplier=1.0,
)

print("="*20)

print(
    f"After make_private(). "
    f"Model:{type(model)}, \nOptimizer:{type(optimizer)}, \nDataLoader:{type(data_loader)}"
)

Before make_private(). Model:<class '__main__.MLP'>, 
Optimizer:<class 'torch.optim.sgd.SGD'>, 
DataLoader:<class 'torch.utils.data.dataloader.DataLoader'>
After make_private(). Model:<class 'opacus.grad_sample.grad_sample_module.GradSampleModule'>, 
Optimizer:<class 'opacus.optimizers.optimizer.DPOptimizer'>, 
DataLoader:<class 'opacus.data_loader.DPDataLoader'>


