<a href="https://colab.research.google.com/github/HARIPRIYA02/DPOptimizers---HS/blob/main/Custom%20private%20RMSProp-%20Resnet-20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install opacus

Collecting opacus
  Downloading opacus-1.5.2-py3-none-any.whl.metadata (7.9 kB)
Downloading opacus-1.5.2-py3-none-any.whl (239 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.9/239.9 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opacus
Successfully installed opacus-1.5.2


In [6]:

import warnings
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
import torch.nn as nn
import torch.optim as optim
from opacus import PrivacyEngine
from opacus.utils.batch_memory_manager import BatchMemoryManager
from torchvision.transforms.functional import InterpolationMode
from opacus.validators import ModuleValidator
from tqdm import tqdm
import numpy as np
import argparse

warnings.simplefilter("ignore")

# Hyperparameters
MAX_GRAD_NORM = 1.2
EPSILON = 3.0
DELTA = 1e-5
NOISE_MULTIPLIER = 1.1
EPOCHS = 50
LR = 8e-4
BATCH_SIZE = 4096
MAX_PHYSICAL_BATCH_SIZE = 128
CIFAR10_MEAN = [0.4914, 0.4822, 0.4465]
CIFAR10_STD_DEV = [0.2023, 0.1994, 0.2010]
bound=1.0
NUM_GROUPS = 40

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD_DEV)

])

# Load datasets
DATA_ROOT = "~/.local/data"
train_dataset = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform)
test_dataset = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Define the weight-standardized convolutional layer
class WSConv2d(nn.Conv2d):
    def forward(self, x):
        mean = self.weight.mean(dim=[1, 2, 3], keepdim=True)
        std = self.weight.std(dim=[1, 2, 3], keepdim=True) + 1e-5
        weight = (self.weight - mean) / std
        return nn.functional.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)

# Define ResNet-20 with Group Normalization and Weight Standardization
class BasicBlockGN(nn.Module):
    def __init__(self, in_planes, planes, stride=1, num_groups=2):
        super(BasicBlockGN, self).__init__()
        self.conv1 = WSConv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.gn1 = nn.GroupNorm(num_groups, planes)
        self.conv2 = WSConv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.gn2 = nn.GroupNorm(num_groups, planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                WSConv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.GroupNorm(num_groups, planes)
            )

    def forward(self, x):
        out = nn.SiLU()(self.gn1(self.conv1(x)))
        out = self.gn2(self.conv2(out))
        out += self.shortcut(x)
        out = nn.SiLU()(out)
        return out

class ResNet20GN(nn.Module):
    def __init__(self, num_classes=10, num_groups=2):
        super(ResNet20GN, self).__init__()
        self.in_planes = 16
        self.conv1 = WSConv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.gn1 = nn.GroupNorm(num_groups, 16)
        self.layer1 = self._make_layer(BasicBlockGN, 16, 3, stride=1, num_groups=num_groups)
        self.layer2 = self._make_layer(BasicBlockGN, 32, 3, stride=2, num_groups=num_groups)
        self.layer3 = self._make_layer(BasicBlockGN, 64, 3, stride=2, num_groups=num_groups)
        self.linear = nn.Linear(64, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride, num_groups):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, num_groups))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = nn.SiLU()(self.gn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = nn.AvgPool2d(8)(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

# Initialize model, optimizer, and privacy engine


model = ResNet20GN(num_classes=10)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ModuleValidator.fix(model)
model = model.to(device)
optimizer = optim.RMSprop(model.parameters(), lr=LR, momentum=0.9, alpha=0.99)
ModuleValidator.validate(model, strict=False)

privacy_engine = PrivacyEngine()
model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
    module=model,
    optimizer=optimizer,
    noise_generator = NOISE_MULTIPLIER, 
    data_loader=train_loader,
    epochs=EPOCHS,
    target_epsilon=EPSILON,
    target_delta=DELTA,
    max_grad_norm=MAX_GRAD_NORM
)


# Adaptive learning rate scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS * len(train_loader), eta_min=8e-6)

# Loss function
criterion = nn.CrossEntropyLoss()


# Grouped Gradient Clipping Function
def GroupedGradClip(gradients, bound, groups):
    # Flatten gradients before splitting
    flat_grad = gradients.view(gradients.size(0), -1)  # Flatten to process gradients as a 2D tensor
    grouped_grads = torch.chunk(flat_grad, groups, dim=1)  # Chunk along the feature dimension
    clipped_grads = []

    # Compute the norm for each group and clip
    for g in grouped_grads:
        grad_norm = torch.norm(g, dim=1, keepdim=True)
        clip_coef = torch.clamp(bound / (grad_norm + 1e-6), max=1.0)  # Clip the gradient norm
        clipped_grads.append(g * clip_coef)

    # Concatenate back the clipped gradients and reshape to original
    clipped_flat_grad = torch.cat(clipped_grads, dim=1)

    # Reshape back to original gradient shape
    return clipped_flat_grad.view_as(gradients)


L=256
# Training and testing functions

def train(model, train_loader, optimizer, epoch, device):
    model.train()
    losses = []
    top1_acc = []

    with BatchMemoryManager(data_loader=train_loader, max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE, optimizer=optimizer) as memory_safe_data_loader:
        for batch_idx, (data, target) in tqdm(enumerate(memory_safe_data_loader), total=len(memory_safe_data_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()

            # Apply grouped gradient clipping
            for p in model.parameters():
                  if p.grad is not None:
                      p.grad = GroupedGradClip(p.grad, bound, NUM_GROUPS)
                      noise = torch.normal(mean=0, std=NOISE_MULTIPLIER * bound, size=p.grad.shape, device=device)
                      p.grad = (p.grad + noise) / L  # Normalize by group size



            optimizer.step()
            losses.append(loss.item())
            _, predicted = torch.max(output.data, 1)
            correct = predicted.eq(target).sum().item()
            top1_acc.append(correct / target.size(0))
            if (batch_idx + 1) % 200 == 0:
                epsilon = privacy_engine.get_epsilon(DELTA)
                print(
                    f"(ε = {epsilon:.2f}, δ = {DELTA})"
                )

















    avg_loss = np.mean(losses)
    avg_acc = np.mean(top1_acc) * 100
    return avg_loss, avg_acc,epsilon


def test(model, test_loader, device):
    model.eval()
    losses = []
    top1_acc = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()

            # Calculate accuracy
            acc = np.mean(preds == labels)


            losses.append(loss.item())
            top1_acc.append(acc)


    avg_loss = np.mean(losses)
    avg_accuracy = np.mean(top1_acc)*100
    return avg_loss, avg_accuracy


# Train and validate the model
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc,epsilonv = train(model, train_loader, optimizer, epoch, device)
    test_loss, test_accuracy = test(model, test_loader, device)
    print(f'Epoch: {epoch}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%, '
          f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')


    scheduler.step(test_loss)

# Print final epsilon after training
final_epsilon = privacy_engine.get_epsilon(DELTA)
print(f"Final epsilon after {EPOCHS} epochs: {final_epsilon:.2f}")






Files already downloaded and verified
Files already downloaded and verified


 51%|█████▏    | 201/391 [00:43<00:39,  4.86it/s]

(ε = 0.29, δ = 1e-05)


400it [01:25,  4.69it/s]

(ε = 0.41, δ = 1e-05)





Epoch: 1, Train Loss: 2.2429, Train Accuracy: 16.70%, Test Loss: 2.0464, Test Accuracy: 20.91%


 51%|█████     | 200/391 [00:42<00:41,  4.59it/s]

(ε = 0.49, δ = 1e-05)


397it [01:23,  4.73it/s]


Epoch: 2, Train Loss: 1.9994, Train Accuracy: 25.37%, Test Loss: 1.9105, Test Accuracy: 29.21%


 51%|█████     | 200/391 [00:42<00:42,  4.50it/s]

(ε = 0.63, δ = 1e-05)


399it [01:23,  4.75it/s]


Epoch: 3, Train Loss: 1.8680, Train Accuracy: 30.89%, Test Loss: 1.8372, Test Accuracy: 32.31%


 51%|█████     | 200/391 [00:41<00:41,  4.59it/s]

(ε = 0.74, δ = 1e-05)


398it [01:23,  4.79it/s]


Epoch: 4, Train Loss: 1.7911, Train Accuracy: 34.60%, Test Loss: 1.7639, Test Accuracy: 37.12%


 51%|█████     | 200/391 [00:42<00:41,  4.57it/s]

(ε = 0.84, δ = 1e-05)


396it [01:23,  4.75it/s]


Epoch: 5, Train Loss: 1.7498, Train Accuracy: 37.74%, Test Loss: 1.7366, Test Accuracy: 39.05%


 51%|█████     | 200/391 [00:42<00:43,  4.42it/s]

(ε = 0.93, δ = 1e-05)


396it [01:23,  4.74it/s]


Epoch: 6, Train Loss: 1.7083, Train Accuracy: 41.01%, Test Loss: 1.6986, Test Accuracy: 42.37%


 51%|█████     | 200/391 [00:45<00:41,  4.56it/s]

(ε = 1.01, δ = 1e-05)


395it [01:26,  4.56it/s]


Epoch: 7, Train Loss: 1.6770, Train Accuracy: 43.16%, Test Loss: 1.6961, Test Accuracy: 44.24%


 51%|█████     | 200/391 [00:42<00:42,  4.48it/s]

(ε = 1.09, δ = 1e-05)


396it [01:23,  4.77it/s]


Epoch: 8, Train Loss: 1.6938, Train Accuracy: 44.68%, Test Loss: 1.7478, Test Accuracy: 44.60%


 51%|█████     | 200/391 [00:42<01:04,  2.97it/s]

(ε = 1.16, δ = 1e-05)


396it [01:23,  4.74it/s]


Epoch: 9, Train Loss: 1.6919, Train Accuracy: 46.01%, Test Loss: 1.7241, Test Accuracy: 46.20%


 51%|█████     | 200/391 [00:41<00:49,  3.89it/s]

(ε = 1.23, δ = 1e-05)


396it [01:23,  4.73it/s]


Epoch: 10, Train Loss: 1.6643, Train Accuracy: 47.49%, Test Loss: 1.6992, Test Accuracy: 47.18%


 51%|█████     | 200/391 [00:41<00:43,  4.41it/s]

(ε = 1.30, δ = 1e-05)


398it [01:23,  4.74it/s]


Epoch: 11, Train Loss: 1.6471, Train Accuracy: 48.65%, Test Loss: 1.7310, Test Accuracy: 47.51%


 51%|█████     | 200/391 [00:41<00:42,  4.49it/s]

(ε = 1.36, δ = 1e-05)


398it [01:23,  4.75it/s]


Epoch: 12, Train Loss: 1.6708, Train Accuracy: 49.15%, Test Loss: 1.7115, Test Accuracy: 48.93%


 51%|█████     | 200/391 [00:41<00:42,  4.47it/s]

(ε = 1.42, δ = 1e-05)


398it [01:23,  4.78it/s]


Epoch: 13, Train Loss: 1.6546, Train Accuracy: 49.68%, Test Loss: 1.7581, Test Accuracy: 48.58%


 51%|█████     | 200/391 [00:41<00:43,  4.42it/s]

(ε = 1.48, δ = 1e-05)


395it [01:22,  4.78it/s]


Epoch: 14, Train Loss: 1.6853, Train Accuracy: 50.27%, Test Loss: 1.6857, Test Accuracy: 49.53%


 51%|█████     | 200/391 [00:41<00:43,  4.42it/s]

(ε = 1.53, δ = 1e-05)


396it [01:22,  4.79it/s]


Epoch: 15, Train Loss: 1.6635, Train Accuracy: 50.78%, Test Loss: 1.6817, Test Accuracy: 50.02%


 51%|█████     | 200/391 [00:42<00:43,  4.43it/s]

(ε = 1.59, δ = 1e-05)


397it [01:23,  4.74it/s]


Epoch: 16, Train Loss: 1.6500, Train Accuracy: 51.48%, Test Loss: 1.7049, Test Accuracy: 50.37%


 51%|█████     | 200/391 [00:42<00:46,  4.14it/s]

(ε = 1.64, δ = 1e-05)


396it [01:23,  4.74it/s]


Epoch: 17, Train Loss: 1.6773, Train Accuracy: 51.87%, Test Loss: 1.7853, Test Accuracy: 49.56%


 51%|█████     | 200/391 [00:42<01:05,  2.90it/s]

(ε = 1.70, δ = 1e-05)


398it [01:25,  4.66it/s]


Epoch: 18, Train Loss: 1.6508, Train Accuracy: 52.25%, Test Loss: 1.6887, Test Accuracy: 50.68%


 51%|█████     | 200/391 [00:42<01:04,  2.97it/s]

(ε = 1.75, δ = 1e-05)


395it [01:24,  4.70it/s]


Epoch: 19, Train Loss: 1.6403, Train Accuracy: 52.40%, Test Loss: 1.6398, Test Accuracy: 52.09%


 51%|█████     | 200/391 [00:42<00:56,  3.38it/s]

(ε = 1.80, δ = 1e-05)


394it [01:24,  4.69it/s]


Epoch: 20, Train Loss: 1.5862, Train Accuracy: 53.77%, Test Loss: 1.6780, Test Accuracy: 51.80%


 51%|█████     | 200/391 [00:42<00:43,  4.39it/s]

(ε = 1.85, δ = 1e-05)


400it [01:25,  4.32it/s]

(ε = 1.87, δ = 1e-05)


402it [01:25,  4.69it/s]


Epoch: 21, Train Loss: 1.6550, Train Accuracy: 52.73%, Test Loss: 1.6919, Test Accuracy: 51.91%


 51%|█████     | 200/391 [00:42<00:42,  4.49it/s]

(ε = 1.89, δ = 1e-05)


396it [01:24,  4.71it/s]


Epoch: 22, Train Loss: 1.6062, Train Accuracy: 54.13%, Test Loss: 1.7041, Test Accuracy: 52.13%


 51%|█████     | 200/391 [00:42<00:44,  4.25it/s]

(ε = 1.94, δ = 1e-05)


399it [01:25,  4.69it/s]


Epoch: 23, Train Loss: 1.6191, Train Accuracy: 54.06%, Test Loss: 1.6316, Test Accuracy: 53.05%


 51%|█████     | 200/391 [00:42<00:42,  4.48it/s]

(ε = 1.99, δ = 1e-05)


396it [01:24,  4.68it/s]


Epoch: 24, Train Loss: 1.6146, Train Accuracy: 54.20%, Test Loss: 1.6936, Test Accuracy: 53.25%


 51%|█████     | 200/391 [00:42<00:44,  4.33it/s]

(ε = 2.03, δ = 1e-05)


398it [01:24,  4.72it/s]


Epoch: 25, Train Loss: 1.5946, Train Accuracy: 54.76%, Test Loss: 1.6282, Test Accuracy: 53.69%


 51%|█████     | 200/391 [00:41<00:42,  4.46it/s]

(ε = 2.07, δ = 1e-05)


398it [01:24,  4.68it/s]


Epoch: 26, Train Loss: 1.5286, Train Accuracy: 56.14%, Test Loss: 1.6376, Test Accuracy: 54.49%


 51%|█████     | 200/391 [00:44<00:44,  4.26it/s]

(ε = 2.12, δ = 1e-05)


398it [01:28,  4.48it/s]


Epoch: 27, Train Loss: 1.5749, Train Accuracy: 55.80%, Test Loss: 1.6265, Test Accuracy: 55.01%


 51%|█████     | 200/391 [00:45<01:07,  2.82it/s]

(ε = 2.16, δ = 1e-05)


397it [01:29,  4.43it/s]


Epoch: 28, Train Loss: 1.5683, Train Accuracy: 56.29%, Test Loss: 1.6421, Test Accuracy: 54.43%


 51%|█████     | 200/391 [00:45<00:47,  4.06it/s]

(ε = 2.20, δ = 1e-05)


397it [01:29,  4.44it/s]


Epoch: 29, Train Loss: 1.6178, Train Accuracy: 55.85%, Test Loss: 1.6539, Test Accuracy: 54.84%


 51%|█████     | 200/391 [00:44<00:46,  4.11it/s]

(ε = 2.24, δ = 1e-05)


398it [01:29,  4.44it/s]


Epoch: 30, Train Loss: 1.5559, Train Accuracy: 56.34%, Test Loss: 1.6413, Test Accuracy: 55.15%


 51%|█████     | 200/391 [00:44<00:47,  4.05it/s]

(ε = 2.29, δ = 1e-05)


395it [01:29,  4.43it/s]


Epoch: 31, Train Loss: 1.5563, Train Accuracy: 56.81%, Test Loss: 1.6500, Test Accuracy: 55.04%


 51%|█████     | 200/391 [00:43<00:57,  3.31it/s]

(ε = 2.33, δ = 1e-05)


398it [01:25,  4.66it/s]


Epoch: 32, Train Loss: 1.5532, Train Accuracy: 56.95%, Test Loss: 1.6515, Test Accuracy: 54.78%


 51%|█████     | 200/391 [00:42<00:49,  3.88it/s]

(ε = 2.37, δ = 1e-05)


392it [01:23,  4.71it/s]


Epoch: 33, Train Loss: 1.6010, Train Accuracy: 56.38%, Test Loss: 1.6270, Test Accuracy: 55.54%


 51%|█████     | 200/391 [00:42<00:43,  4.40it/s]

(ε = 2.40, δ = 1e-05)


397it [01:24,  4.72it/s]


Epoch: 34, Train Loss: 1.5514, Train Accuracy: 57.11%, Test Loss: 1.6673, Test Accuracy: 54.90%


 51%|█████     | 200/391 [00:42<00:45,  4.24it/s]

(ε = 2.44, δ = 1e-05)


399it [01:24,  4.70it/s]


Epoch: 35, Train Loss: 1.5616, Train Accuracy: 57.55%, Test Loss: 1.6476, Test Accuracy: 55.26%


 51%|█████     | 200/391 [00:41<00:42,  4.54it/s]

(ε = 2.48, δ = 1e-05)


395it [01:22,  4.76it/s]


Epoch: 36, Train Loss: 1.5384, Train Accuracy: 58.02%, Test Loss: 1.6302, Test Accuracy: 56.02%


 51%|█████     | 200/391 [00:42<00:44,  4.25it/s]

(ε = 2.52, δ = 1e-05)


396it [01:23,  4.75it/s]


Epoch: 37, Train Loss: 1.5557, Train Accuracy: 57.98%, Test Loss: 1.6271, Test Accuracy: 56.57%


 51%|█████▏    | 201/391 [00:42<00:43,  4.41it/s]

(ε = 2.56, δ = 1e-05)


395it [01:23,  4.75it/s]


Epoch: 38, Train Loss: 1.5417, Train Accuracy: 58.18%, Test Loss: 1.6202, Test Accuracy: 56.66%


 51%|█████     | 200/391 [00:42<00:44,  4.29it/s]

(ε = 2.59, δ = 1e-05)


399it [01:24,  4.75it/s]


Epoch: 39, Train Loss: 1.5629, Train Accuracy: 58.05%, Test Loss: 1.6212, Test Accuracy: 56.53%


 51%|█████     | 200/391 [00:42<00:42,  4.44it/s]

(ε = 2.63, δ = 1e-05)


400it [01:24,  4.23it/s]

(ε = 2.65, δ = 1e-05)


401it [01:24,  4.74it/s]


Epoch: 40, Train Loss: 1.5341, Train Accuracy: 58.38%, Test Loss: 1.6215, Test Accuracy: 56.97%


 51%|█████     | 200/391 [00:42<00:45,  4.17it/s]

(ε = 2.67, δ = 1e-05)


396it [01:23,  4.72it/s]


Epoch: 41, Train Loss: 1.5340, Train Accuracy: 58.16%, Test Loss: 1.5949, Test Accuracy: 56.69%


 51%|█████     | 200/391 [00:43<00:45,  4.20it/s]

(ε = 2.70, δ = 1e-05)


399it [01:24,  4.70it/s]


Epoch: 42, Train Loss: 1.4776, Train Accuracy: 59.37%, Test Loss: 1.5509, Test Accuracy: 58.32%


 51%|█████     | 200/391 [00:43<00:44,  4.28it/s]

(ε = 2.74, δ = 1e-05)


396it [01:24,  4.71it/s]


Epoch: 43, Train Loss: 1.4970, Train Accuracy: 59.51%, Test Loss: 1.5784, Test Accuracy: 58.00%


 51%|█████     | 200/391 [00:42<00:47,  4.06it/s]

(ε = 2.77, δ = 1e-05)


398it [01:24,  4.73it/s]


Epoch: 44, Train Loss: 1.4973, Train Accuracy: 59.86%, Test Loss: 1.6400, Test Accuracy: 56.99%


 51%|█████     | 200/391 [00:43<00:45,  4.17it/s]

(ε = 2.81, δ = 1e-05)


398it [01:24,  4.72it/s]


Epoch: 45, Train Loss: 1.5305, Train Accuracy: 59.47%, Test Loss: 1.5686, Test Accuracy: 58.56%


 51%|█████     | 200/391 [00:42<00:49,  3.89it/s]

(ε = 2.84, δ = 1e-05)


395it [01:23,  4.73it/s]


Epoch: 46, Train Loss: 1.5083, Train Accuracy: 60.02%, Test Loss: 1.6254, Test Accuracy: 57.86%


 51%|█████     | 200/391 [00:42<01:01,  3.13it/s]

(ε = 2.88, δ = 1e-05)


396it [01:23,  4.72it/s]


Epoch: 47, Train Loss: 1.5061, Train Accuracy: 59.81%, Test Loss: 1.5867, Test Accuracy: 58.28%


 51%|█████     | 200/391 [00:41<00:43,  4.44it/s]

(ε = 2.91, δ = 1e-05)


398it [01:23,  4.76it/s]


Epoch: 48, Train Loss: 1.5104, Train Accuracy: 59.55%, Test Loss: 1.5571, Test Accuracy: 58.68%


 51%|█████▏    | 201/391 [00:41<00:43,  4.34it/s]

(ε = 2.95, δ = 1e-05)


398it [01:24,  4.73it/s]


Epoch: 49, Train Loss: 1.4802, Train Accuracy: 59.98%, Test Loss: 1.5548, Test Accuracy: 58.71%


 51%|█████     | 200/391 [00:41<00:46,  4.11it/s]

(ε = 2.98, δ = 1e-05)


400it [01:24,  4.73it/s]

(ε = 3.00, δ = 1e-05)





Epoch: 50, Train Loss: 1.4678, Train Accuracy: 60.85%, Test Loss: 1.5615, Test Accuracy: 58.98%
Final epsilon after 50 epochs: 3.00


In [7]:
top1_acc=test(model,test_loader,device)
top1_acc

(1.5614548524220784, 58.97616496128318)