# Building an Image Classifier with Differential Privacy

## Hyper-parameters

In [None]:
import warnings
warnings.simplefilter("ignore")

MAX_GRAD_NORM = 1
EPSILON = 190
DELTA = 1e-5
EPOCHS = 20

LR = 1e-3

In [None]:
BATCH_SIZE = 64
MAX_PHYSICAL_BATCH_SIZE = 128

## Data

Now, let's load the CIFAR100 dataset. We don't use data augmentation here because, in our experiments, we found that data augmentation lowers utility when training with DP.

In [None]:
import torch
import os
import torchvision
import torchvision.transforms as transforms
import numpy as np

# These values, specific to the CIFAR10 dataset, are assumed to be known.
# If necessary, they can be computed with modest privacy budgets.
import torchvision.transforms as transforms

import torchvision.datasets as datasets
DATA_ROOT = r"F:\Other\data"

train_ds = datasets.CIFAR100(os.path.join(DATA_ROOT, 'cifar100'), train=True, download=True)
test_ds = datasets.CIFAR100(os.path.join(DATA_ROOT, 'cifar100'), train=False, download=True)

_data_train = np.concatenate([np.array(train_ds[i][0]) for i in range(len(train_ds))])
_data_test = np.concatenate([np.array(test_ds[i][0]) for i in range(len(test_ds))])

train_mean = _data_train.mean(axis=(0, 1))
train_std = _data_train.std(axis=(0, 1))

test_mean = _data_test.mean(axis=(0, 1))
test_std = _data_test.std(axis=(0, 1))



# Normalize mean std to 0..1 from 0..255
train_mean /= 255
train_std /= 255
test_mean /= 255
test_std /= 255

print('train mean/std:', train_mean, train_std)
print('test mean/std:', test_mean, test_std)

print(f'Hard code CIFAR100 train/test mean/std for next time')

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(test_mean, test_std),
])


Using torchvision datasets, we can load CIFAR10 and transform the PILImage images to Tensors of normalized range [-1, 1]

In [None]:
from torchvision.datasets import CIFAR10,CIFAR100

# DATA_ROOT = 'D:/research_2022/'

train_dataset = CIFAR100(
    root=DATA_ROOT, train=True, download=True, transform=transform)


import torch.utils.data as data_utils

# indices = torch.arange(35000)
# train_dataset = data_utils.Subset(train_dataset, indices)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
)

test_dataset = CIFAR100(
    root=DATA_ROOT, train=False, download=True, transform=transform)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
)

In [None]:
''' ConvNet '''
import torch.nn as nn
class ConvNet(nn.Module):
    def __init__(self, channel, num_classes, net_width, net_depth, net_act, net_norm, net_pooling, im_size = (32,32)):
        super(ConvNet, self).__init__()

        self.features, shape_feat = self._make_layers(channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size)
        num_feat = shape_feat[0]*shape_feat[1]*shape_feat[2]
        self.classifier = nn.Linear(num_feat, num_classes)

    def forward(self, x):
        # print("MODEL DATA ON: ", x.get_device(), "MODEL PARAMS ON: ", self.classifier.weight.data.get_device())
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _get_activation(self, net_act):
        if net_act == 'sigmoid':
            return nn.Sigmoid()
        elif net_act == 'relu':
            return nn.ReLU(inplace=True)
        elif net_act == 'leakyrelu':
            return nn.LeakyReLU(negative_slope=0.01)
        else:
            exit('unknown activation function: %s'%net_act)

    def _get_pooling(self, net_pooling):
        if net_pooling == 'maxpooling':
            return nn.MaxPool2d(kernel_size=2, stride=2)
        elif net_pooling == 'avgpooling':
            return nn.AvgPool2d(kernel_size=2, stride=2)
        elif net_pooling == 'none':
            return None
        else:
            exit('unknown net_pooling: %s'%net_pooling)

    def _get_normlayer(self, net_norm, shape_feat):
        # shape_feat = (c*h*w)
        if net_norm == 'batchnorm':
            return nn.BatchNorm2d(shape_feat[0], affine=True)
        elif net_norm == 'layernorm':
            return nn.LayerNorm(shape_feat, elementwise_affine=True)
        elif net_norm == 'instancenorm':
            return nn.GroupNorm(shape_feat[0], shape_feat[0], affine=True)
        elif net_norm == 'groupnorm':
            return nn.GroupNorm(4, shape_feat[0], affine=True)
        elif net_norm == 'none':
            return None
        else:
            exit('unknown net_norm: %s'%net_norm)

    def _make_layers(self, channel, net_width, net_depth, net_norm, net_act, net_pooling, im_size):
        layers = []
        in_channels = channel
        if im_size[0] == 28:
            im_size = (32, 32)
        shape_feat = [in_channels, im_size[0], im_size[1]]
        for d in range(net_depth):
            layers += [nn.Conv2d(in_channels, net_width, kernel_size=3, padding=3 if channel == 1 and d == 0 else 1)]
            shape_feat[0] = net_width
            if net_norm != 'none':
                layers += [self._get_normlayer(net_norm, shape_feat)]
            layers += [self._get_activation(net_act)]
            in_channels = net_width
            if net_pooling != 'none':
                layers += [self._get_pooling(net_pooling)]
                shape_feat[1] //= 2
                shape_feat[2] //= 2


        return nn.Sequential(*layers), shape_feat 
    
    

def get_default_convnet_setting():
    net_width, net_depth, net_act, net_norm, net_pooling = 128, 3, 'relu', 'instancenorm', 'avgpooling'
    return net_width, net_depth, net_act, net_norm, net_pooling

net_width, net_depth, net_act, net_norm, net_pooling = get_default_convnet_setting()
nets= ConvNet(channel=3, num_classes= 10,net_width=net_width, net_depth=net_depth, net_act=net_act, net_norm=net_norm, net_pooling=net_pooling,im_size=(32,32))

In [None]:
# %% AlexNet Module


class AlexNet(nn.Module):

    def __init__(self, num_classes=100):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 192, kernel_size=(5, 5), padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(192, 384, kernel_size=(3, 3), padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=(3, 3), padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=(3, 3), padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


def alexnet(**kwargs):
    r"""AlexNet model architecture from the
    `"One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
    """
    model = AlexNet(**kwargs)
    return model


## Model

In [None]:
from torchvision import models

model =AlexNet()

Now, let’s check if the model is compatible with Opacus. Opacus does not support all types of Pytorch layers. To check if your model is compatible with the privacy engine, we have provided a util class to validate your model.

When you run the code below, you're presented with a list of errors, indicating which modules are incompatible.

In [None]:
from opacus.validators import ModuleValidator

errors = ModuleValidator.validate(model, strict=False)
errors[-5:]

Let us modify the model to work with Opacus. From the output above, you can see that the BatchNorm layers are not supported because they compute the mean and variance across the batch, creating a dependency between samples in a batch, a privacy violation.

Recommended approach to deal with it is calling `ModuleValidator.fix(model)` - it tries to find the best replacement for incompatible modules. For example, for BatchNorm modules, it replaces them with GroupNorm.
You can see, that after this, no exception is raised

In [None]:
model = ModuleValidator.fix(model)
ModuleValidator.validate(model, strict=False)

For maximal speed, we can check if CUDA is available and supported by the PyTorch installation. If GPU is available, set the `device` variable to your CUDA-compatible device. We can then transfer the neural network onto that device.

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device)

We then define our optimizer and loss function. Opacus’ privacy engine can attach to any (first-order) optimizer.  You can use your favorite&mdash;Adam, Adagrad, RMSprop&mdash;as long as it has an implementation derived from [torch.optim.Optimizer](https://pytorch.org/docs/stable/optim.html). In this tutorial, we're going to use [RMSprop](https://pytorch.org/docs/stable/optim.html).

In [None]:
import torch.nn as nn
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=LR)

## Prepare for Training

We will define a util function to calculate accuracy

In [None]:
def accuracy(preds, labels):
    return (preds == labels).mean()

We now attach the privacy engine initialized with the privacy hyperparameters defined earlier.

In [None]:
from opacus import PrivacyEngine

privacy_engine = PrivacyEngine()

model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    epochs=EPOCHS,
    target_epsilon=EPSILON,
    target_delta=DELTA,
    max_grad_norm=MAX_GRAD_NORM,
)

print(f"Using sigma={optimizer.noise_multiplier} and C={MAX_GRAD_NORM}")


We will then define our train function. This function will train the model for one epoch. 

In [None]:
import numpy as np
from opacus.utils.batch_memory_manager import BatchMemoryManager


def train(model, train_loader, optimizer, epoch, device):
    model.train()
    criterion = nn.CrossEntropyLoss()

    losses = []
    top1_acc = []
    
    with BatchMemoryManager(
        data_loader=train_loader, 
        max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE, 
        optimizer=optimizer
    ) as memory_safe_data_loader:

        for i, (images, target) in enumerate(memory_safe_data_loader):   
            optimizer.zero_grad()
            images = images.to(device)
            target = target.to(device)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()

            # measure accuracy and record loss
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

            loss.backward()
            optimizer.step()

            if (i+1) % 200 == 0:
                epsilon = privacy_engine.get_epsilon(DELTA)
                print(
                    f"\tTrain Epoch: {epoch} \t"
                    f"Loss: {np.mean(losses):.6f} "
                    f"Acc@1: {np.mean(top1_acc) * 100:.6f} "
                    f"(ε = {epsilon:.2f}, δ = {DELTA})"
                )

Next, we will define our test function to validate our model on our test dataset. 

In [None]:
def test(model, test_loader, device):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    losses = []
    top1_acc = []

    with torch.no_grad():
        for images, target in test_loader:
            images = images.to(device)
            target = target.to(device)

            output = model(images)
            loss = criterion(output, target)
            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            top1_acc.append(acc)

    top1_avg = np.mean(top1_acc)

    print(
        f"\tTest set:"
        f"Loss: {np.mean(losses):.6f} "
        f"Acc: {top1_avg * 100:.6f} "
    )
    return np.mean(top1_acc)

## Train the network

In [None]:
from tqdm.notebook import tqdm

for epoch in tqdm(range(EPOCHS), desc="Epoch", unit="epoch"):
    train(model, train_loader, optimizer, epoch + 1, device)

In [None]:
test_dataset = CIFAR100(
    root=DATA_ROOT, train=False, download=True, transform=transform)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
)

## Test the network on test data

In [None]:
top1_acc = test(model, test_loader, device)

In [None]:
from sklearn.metrics import top_k_accuracy_score
from utils import Bar, Logger, AverageMeter, accuracy, mkdir_p, savefig
# %% privacy_train
# torch.Size([128, 3, 32, 32]) torch.Size([128]) torch.Size([128, 3, 32, 32]) torch.Size([128])
# PRED torch.Size([256, 10])
# infer_in torch.Size([256])
def privacy_train(trainloader, model, inference_model, criterion, optimizer, use_cuda, num_batchs):
    num_classes=10
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    mtop1_a = AverageMeter()
    mtop5_a = AverageMeter()

    inference_model.train()
    model.eval()
    # switch to evaluate mode

    end = time.time()
    first_id = -1
    for batch_idx, ((tr_input, tr_target), (te_input, te_target)) in trainloader:
        # measure data loading time
        if first_id == -1:
            first_id = batch_idx

        data_time.update(time.time() - end)
        
        #print(tr_input.shape, tr_target.shape,te_input.shape, te_target.shape)

        if use_cuda:
            tr_input = tr_input.cuda()
            te_input = te_input.cuda()
            tr_target = tr_target.cuda()
            te_target = te_target.cuda()

        v_tr_input = torch.autograd.Variable(tr_input)
        v_te_input = torch.autograd.Variable(te_input)
        v_tr_target = torch.autograd.Variable(tr_target)
        v_te_target = torch.autograd.Variable(te_target)

        # compute output
        model_input = torch.cat((v_tr_input, v_te_input))

        pred_outputs = model(model_input)
        #print("PRED",pred_outputs.shape)
        #y_hat

        infer_input = torch.cat((v_tr_target, v_te_target))
        #print("infer_in",infer_input.shape)
        #(y_hat)

        # TODO fix
        # mtop1, mtop5 = accuracy(pred_outputs.data, infer_input.data, topk=(1, 5))
        mtop1 = top_k_accuracy_score(y_true=infer_input.data.cpu(), y_score=pred_outputs.data.cpu(),
                                     k=1, labels=range(num_classes))

        mtop5 = top_k_accuracy_score(y_true=infer_input.data.cpu(), y_score=pred_outputs.data.cpu(),
                                     k=5, labels=range(num_classes))

        mtop1_a.update(mtop1, model_input.size(0))
        mtop5_a.update(mtop5, model_input.size(0))

        one_hot_tr = torch.from_numpy((np.zeros((infer_input.size(0), num_classes)) - 1)).cuda().type(torch.float)
        target_one_hot_tr = one_hot_tr.scatter_(1, infer_input.type(torch.int64).view([-1, 1]).data, 1)

        infer_input_one_hot = torch.autograd.Variable(target_one_hot_tr)
        #ONE_hot y_hat

        attack_model_input = pred_outputs  # torch.cat((pred_outputs,infer_input_one_hot),1)
        member_output = inference_model(attack_model_input, infer_input_one_hot)
        #inf_model(y,y_hat)
        #member->?0/1

        is_member_labels = torch.from_numpy(
            np.reshape(
                np.concatenate((np.zeros(v_tr_input.size(0)), np.ones(v_te_input.size(0)))),
                [-1, 1]
            )
        ).cuda()

        v_is_member_labels = torch.autograd.Variable(is_member_labels).type(torch.float)
        #true_labels

        loss = criterion(member_output, v_is_member_labels)

        # measure accuracy and record loss
        prec1 = np.mean((member_output.data.cpu().numpy() > 0.5) == v_is_member_labels.data.cpu().numpy())
        losses.update(loss.data.item(), model_input.size(0))
        top1.update(prec1, model_input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if batch_idx - first_id > num_batchs:
            break

        # plot progress
        if batch_idx % 50 == 0:
            #print("STUCK")
            print( losses.avg, top1.avg)
            #print(report_str(batch_idx, data_time.avg, batch_time.avg, losses.avg, top1.avg, None))

    return losses.avg, top1.avg
