<a href="https://colab.research.google.com/github/HARIPRIYA02/DPOptimizers---HS/blob/main/WRN-16-4-%20custom%20private%20rmsprop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install opacus

Collecting opacus
  Downloading opacus-1.5.2-py3-none-any.whl.metadata (7.9 kB)
Downloading opacus-1.5.2-py3-none-any.whl (239 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.9/239.9 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opacus
Successfully installed opacus-1.5.2


In [5]:
import warnings
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
import torch.nn as nn
import torch.optim as optim
from opacus import PrivacyEngine
from opacus.utils.batch_memory_manager import BatchMemoryManager
from opacus.validators import ModuleValidator
from tqdm import tqdm
import numpy as np

warnings.simplefilter("ignore")

# Hyperparameters
MAX_GRAD_NORM = 1.2
EPSILON = 3.0
DELTA = 1e-5
NOISE_MULTIPLIER = 1.1
EPOCHS = 50
LR = 8e-4
BATCH_SIZE = 4096
MAX_PHYSICAL_BATCH_SIZE = 128
CIFAR10_MEAN = [0.4914, 0.4822, 0.4465]
CIFAR10_STD_DEV = [0.2023, 0.1994, 0.2010]
NUM_GROUPS = 40

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD_DEV)
])

# Load datasets
DATA_ROOT = "~/.local/data"
train_dataset = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform)
test_dataset = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Define the weight-standardized convolutional layer
class WSConv2d(nn.Conv2d):
    def forward(self, x):
        mean = self.weight.mean(dim=[1, 2, 3], keepdim=True)
        std = self.weight.std(dim=[1, 2, 3], keepdim=True) + 1e-5
        weight = (self.weight - mean) / std
        return nn.functional.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)

# Define ResNet-20 with Group Normalization and Weight Standardization
class BasicBlockGN(nn.Module):
    def __init__(self, in_planes, planes, stride=1, num_groups=2):
        super(BasicBlockGN, self).__init__()
        self.conv1 = WSConv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.gn1 = nn.GroupNorm(num_groups, planes)
        self.conv2 = WSConv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.gn2 = nn.GroupNorm(num_groups, planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                WSConv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.GroupNorm(num_groups, planes)
            )

    def forward(self, x):
        out = nn.SiLU()(self.gn1(self.conv1(x)))
        out = self.gn2(self.conv2(out))
        out += self.shortcut(x)
        out = nn.SiLU()(out)
        return out

class ResNet20GN(nn.Module):
    def __init__(self, num_classes=10, num_groups=2):
        super(ResNet20GN, self).__init__()
        self.in_planes = 16
        self.conv1 = WSConv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.gn1 = nn.GroupNorm(num_groups, 16)
        self.layer1 = self._make_layer(BasicBlockGN, 16, 3, stride=1, num_groups=num_groups)
        self.layer2 = self._make_layer(BasicBlockGN, 32, 3, stride=2, num_groups=num_groups)
        self.layer3 = self._make_layer(BasicBlockGN, 64, 3, stride=2, num_groups=num_groups)
        self.linear = nn.Linear(64, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride, num_groups):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, num_groups))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = nn.SiLU()(self.gn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = nn.AvgPool2d(8)(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

class WideBasicGN(nn.Module):
    def __init__(self, in_planes, planes, dropout_rate, stride=1, num_groups=2):
        super(WideBasicGN, self).__init__()
        self.conv1 = WSConv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.gn1 = nn.GroupNorm(num_groups, planes)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.conv2 = WSConv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.gn2 = nn.GroupNorm(num_groups, planes)

        # Ensure the shortcut connection has matching dimensions
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                WSConv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.GroupNorm(num_groups, planes)
            )

    def forward(self, x):
        out = nn.SiLU()(self.gn1(self.conv1(x)))
        out = self.dropout(out)
        out = self.gn2(self.conv2(out))
        out += self.shortcut(x)  # Add the shortcut, now with matching dimensions
        out = nn.SiLU()(out)
        return out


class WRN16_4_GN(nn.Module):
    def __init__(self, num_classes=10, dropout_rate=0.3, num_groups=2):
        super(WRN16_4_GN, self).__init__()
        self.in_planes = 16
        self.conv1 = WSConv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.gn1 = nn.GroupNorm(num_groups, 16)
        self.layer1 = self._make_layer(WideBasicGN, 16, 3, dropout_rate, stride=1, num_groups=num_groups)
        self.layer2 = self._make_layer(WideBasicGN, 32, 3, dropout_rate, stride=2, num_groups=num_groups)
        self.layer3 = self._make_layer(WideBasicGN, 64, 3, dropout_rate, stride=2, num_groups=num_groups)
        self.linear = nn.Linear(64, num_classes)

    def _make_layer(self, block, planes, num_blocks, dropout_rate, stride, num_groups):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, dropout_rate, stride, num_groups))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = nn.SiLU()(self.gn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = nn.AvgPool2d(8)(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

# Initialize model, optimizer, and privacy engine
#model = ResNet20GN(num_classes=10)
model = WRN16_4_GN(num_classes=10)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ModuleValidator.fix(model)
model = model.to(device)
optimizer = optim.RMSprop(model.parameters(), lr=LR, momentum=0.9, alpha=0.99)
ModuleValidator.validate(model, strict=False)

privacy_engine = PrivacyEngine()
model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    epochs=EPOCHS,
    target_epsilon=EPSILON,
    target_delta=DELTA,
    max_grad_norm=MAX_GRAD_NORM
)

# Adaptive learning rate scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS * len(train_loader), eta_min=8e-6)

# Loss function
criterion = nn.CrossEntropyLoss()

# Training and testing functions
def train(model, train_loader, optimizer, epoch, device):
    model.train()
    losses = []
    top1_acc = []

    with BatchMemoryManager(data_loader=train_loader, max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE, optimizer=optimizer) as memory_safe_data_loader:
        for batch_idx, (data, target) in tqdm(enumerate(memory_safe_data_loader), total=len(memory_safe_data_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            # Calculate accuracy
            losses.append(loss.item())
            _, predicted = torch.max(output.data, 1)
            correct = predicted.eq(target).sum().item()
            top1_acc.append(correct / target.size(0))

    avg_loss = np.mean(losses)
    avg_acc = np.mean(top1_acc) * 100
    return avg_loss, avg_acc

# Test accuracy will be multiplied by 100 for percent representation - for e.g. 0.60%  is actually 60%
def test(model, test_loader, device):
    model.eval()
    losses = []
    top1_acc = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()

            # Calculate accuracy
            acc = np.mean(preds == labels)
            losses.append(loss.item())
            top1_acc.append(acc)

    avg_loss = np.mean(losses)
    avg_accuracy = np.mean(top1_acc)
    return avg_loss, avg_accuracy

# Train and validate the model
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train(model, train_loader, optimizer, epoch, device)
    test_loss, test_accuracy = test(model, test_loader, device)
    print(f'Epoch: {epoch}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%, '
          f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')

    scheduler.step(test_loss)

# Print final epsilon after training
final_epsilon = privacy_engine.get_epsilon(DELTA)
print(f"Final epsilon after {EPOCHS} epochs: {final_epsilon:.2f}")


Files already downloaded and verified
Files already downloaded and verified


398it [00:57,  6.89it/s]


Epoch: 1, Train Loss: 2.1484, Train Accuracy: 17.72%, Test Loss: 2.0449, Test Accuracy: 0.25%


400it [00:55,  7.26it/s]


Epoch: 2, Train Loss: 1.9746, Train Accuracy: 25.75%, Test Loss: 1.9571, Test Accuracy: 0.28%


396it [00:54,  7.21it/s]


Epoch: 3, Train Loss: 1.8555, Train Accuracy: 30.22%, Test Loss: 1.9098, Test Accuracy: 0.32%


398it [00:56,  7.03it/s]


Epoch: 4, Train Loss: 1.8062, Train Accuracy: 33.46%, Test Loss: 1.8880, Test Accuracy: 0.35%


397it [00:55,  7.19it/s]


Epoch: 5, Train Loss: 1.7712, Train Accuracy: 35.97%, Test Loss: 1.9696, Test Accuracy: 0.36%


397it [00:55,  7.20it/s]


Epoch: 6, Train Loss: 1.7764, Train Accuracy: 36.49%, Test Loss: 2.1378, Test Accuracy: 0.34%


396it [00:55,  7.11it/s]


Epoch: 7, Train Loss: 1.7614, Train Accuracy: 37.94%, Test Loss: 2.2013, Test Accuracy: 0.35%


398it [00:55,  7.21it/s]


Epoch: 8, Train Loss: 1.7362, Train Accuracy: 39.05%, Test Loss: 2.1371, Test Accuracy: 0.37%


398it [00:55,  7.22it/s]


Epoch: 9, Train Loss: 1.7391, Train Accuracy: 40.65%, Test Loss: 2.2786, Test Accuracy: 0.37%


397it [00:55,  7.10it/s]


Epoch: 10, Train Loss: 1.7282, Train Accuracy: 41.67%, Test Loss: 2.3714, Test Accuracy: 0.38%


398it [00:55,  7.21it/s]


Epoch: 11, Train Loss: 1.7488, Train Accuracy: 42.04%, Test Loss: 2.3847, Test Accuracy: 0.38%


392it [00:54,  7.22it/s]


Epoch: 12, Train Loss: 1.7133, Train Accuracy: 42.98%, Test Loss: 2.3118, Test Accuracy: 0.39%


395it [00:55,  7.17it/s]


Epoch: 13, Train Loss: 1.7195, Train Accuracy: 43.46%, Test Loss: 2.4171, Test Accuracy: 0.40%


401it [00:55,  7.28it/s]


Epoch: 14, Train Loss: 1.7224, Train Accuracy: 44.19%, Test Loss: 2.4381, Test Accuracy: 0.40%


398it [00:54,  7.28it/s]


Epoch: 15, Train Loss: 1.7283, Train Accuracy: 44.67%, Test Loss: 2.4697, Test Accuracy: 0.40%


400it [00:55,  7.25it/s]


Epoch: 16, Train Loss: 1.7319, Train Accuracy: 45.34%, Test Loss: 2.4477, Test Accuracy: 0.41%


393it [00:54,  7.25it/s]


Epoch: 17, Train Loss: 1.7243, Train Accuracy: 45.61%, Test Loss: 2.4408, Test Accuracy: 0.42%


397it [00:55,  7.16it/s]


Epoch: 18, Train Loss: 1.7420, Train Accuracy: 45.62%, Test Loss: 2.3068, Test Accuracy: 0.43%


396it [00:54,  7.31it/s]


Epoch: 19, Train Loss: 1.7320, Train Accuracy: 45.36%, Test Loss: 2.4102, Test Accuracy: 0.42%


397it [00:54,  7.23it/s]


Epoch: 20, Train Loss: 1.7038, Train Accuracy: 46.21%, Test Loss: 2.4294, Test Accuracy: 0.43%


394it [00:54,  7.27it/s]


Epoch: 21, Train Loss: 1.7074, Train Accuracy: 46.29%, Test Loss: 2.3435, Test Accuracy: 0.43%


399it [00:54,  7.30it/s]


Epoch: 22, Train Loss: 1.6838, Train Accuracy: 47.23%, Test Loss: 2.3628, Test Accuracy: 0.44%


395it [00:55,  7.17it/s]


Epoch: 23, Train Loss: 1.6853, Train Accuracy: 47.39%, Test Loss: 2.5321, Test Accuracy: 0.43%


401it [00:55,  7.27it/s]


Epoch: 24, Train Loss: 1.7100, Train Accuracy: 46.97%, Test Loss: 2.4632, Test Accuracy: 0.44%


398it [00:54,  7.24it/s]


Epoch: 25, Train Loss: 1.7058, Train Accuracy: 47.70%, Test Loss: 2.5124, Test Accuracy: 0.44%


400it [00:55,  7.17it/s]


Epoch: 26, Train Loss: 1.7005, Train Accuracy: 47.86%, Test Loss: 2.6630, Test Accuracy: 0.43%


399it [00:55,  7.19it/s]


Epoch: 27, Train Loss: 1.7031, Train Accuracy: 48.07%, Test Loss: 2.5616, Test Accuracy: 0.44%


399it [00:55,  7.19it/s]


Epoch: 28, Train Loss: 1.7054, Train Accuracy: 48.54%, Test Loss: 2.5019, Test Accuracy: 0.44%


396it [00:55,  7.08it/s]


Epoch: 29, Train Loss: 1.6971, Train Accuracy: 48.90%, Test Loss: 2.7121, Test Accuracy: 0.43%


394it [00:54,  7.18it/s]


Epoch: 30, Train Loss: 1.7087, Train Accuracy: 48.96%, Test Loss: 2.6688, Test Accuracy: 0.43%


395it [00:54,  7.23it/s]


Epoch: 31, Train Loss: 1.6813, Train Accuracy: 49.02%, Test Loss: 2.6438, Test Accuracy: 0.44%


397it [00:55,  7.11it/s]


Epoch: 32, Train Loss: 1.6889, Train Accuracy: 49.12%, Test Loss: 2.6726, Test Accuracy: 0.44%


398it [00:55,  7.21it/s]


Epoch: 33, Train Loss: 1.6860, Train Accuracy: 49.18%, Test Loss: 2.4439, Test Accuracy: 0.46%


399it [00:55,  7.19it/s]


Epoch: 34, Train Loss: 1.6661, Train Accuracy: 49.78%, Test Loss: 2.5938, Test Accuracy: 0.45%


397it [00:56,  7.08it/s]


Epoch: 35, Train Loss: 1.6789, Train Accuracy: 49.83%, Test Loss: 2.6018, Test Accuracy: 0.45%


397it [00:55,  7.18it/s]


Epoch: 36, Train Loss: 1.6839, Train Accuracy: 49.92%, Test Loss: 2.6144, Test Accuracy: 0.45%


398it [00:54,  7.27it/s]


Epoch: 37, Train Loss: 1.6849, Train Accuracy: 49.98%, Test Loss: 2.5784, Test Accuracy: 0.46%


398it [00:55,  7.18it/s]


Epoch: 38, Train Loss: 1.6717, Train Accuracy: 50.36%, Test Loss: 2.5666, Test Accuracy: 0.46%


395it [00:54,  7.25it/s]


Epoch: 39, Train Loss: 1.6723, Train Accuracy: 50.38%, Test Loss: 2.6079, Test Accuracy: 0.47%


395it [00:55,  7.16it/s]


Epoch: 40, Train Loss: 1.6551, Train Accuracy: 50.93%, Test Loss: 2.6754, Test Accuracy: 0.46%


397it [00:55,  7.19it/s]


Epoch: 41, Train Loss: 1.6717, Train Accuracy: 50.89%, Test Loss: 2.7933, Test Accuracy: 0.45%


398it [00:54,  7.26it/s]


Epoch: 42, Train Loss: 1.6748, Train Accuracy: 51.08%, Test Loss: 2.5420, Test Accuracy: 0.47%


393it [00:55,  7.10it/s]


Epoch: 43, Train Loss: 1.6591, Train Accuracy: 51.05%, Test Loss: 2.4679, Test Accuracy: 0.48%


398it [00:54,  7.27it/s]


Epoch: 44, Train Loss: 1.6606, Train Accuracy: 51.32%, Test Loss: 2.4281, Test Accuracy: 0.49%


394it [00:54,  7.21it/s]


Epoch: 45, Train Loss: 1.6504, Train Accuracy: 51.88%, Test Loss: 2.5344, Test Accuracy: 0.48%


396it [00:55,  7.17it/s]


Epoch: 46, Train Loss: 1.6687, Train Accuracy: 51.47%, Test Loss: 2.5865, Test Accuracy: 0.48%


399it [00:54,  7.31it/s]


Epoch: 47, Train Loss: 1.6583, Train Accuracy: 51.52%, Test Loss: 2.3601, Test Accuracy: 0.50%


396it [00:55,  7.17it/s]


Epoch: 48, Train Loss: 1.6474, Train Accuracy: 51.71%, Test Loss: 2.7002, Test Accuracy: 0.47%


398it [00:55,  7.21it/s]


Epoch: 49, Train Loss: 1.6477, Train Accuracy: 51.78%, Test Loss: 2.3765, Test Accuracy: 0.50%


397it [00:54,  7.23it/s]


Epoch: 50, Train Loss: 1.6137, Train Accuracy: 52.59%, Test Loss: 2.4589, Test Accuracy: 0.49%
Final epsilon after 50 epochs: 3.00


In [6]:
top1_acc=test(model,test_loader, device)
top1_acc

(2.4588874181111655, 0.4944308570704277)