<a href="https://colab.research.google.com/github/HARIPRIYA02/DPOptimizers---HS/blob/main/Resnet-20%20RMSProp%20privatization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install opacus

Collecting opacus
  Downloading opacus-1.5.2-py3-none-any.whl.metadata (7.9 kB)
Downloading opacus-1.5.2-py3-none-any.whl (239 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.9/239.9 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opacus
Successfully installed opacus-1.5.2


In [3]:

import warnings
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
import torch.nn as nn
import torch.optim as optim
from opacus import PrivacyEngine
from opacus.utils.batch_memory_manager import BatchMemoryManager
from torchvision.transforms.functional import InterpolationMode
from opacus.validators import ModuleValidator
from tqdm import tqdm
import numpy as np
import argparse

warnings.simplefilter("ignore")

# Hyperparameters
MAX_GRAD_NORM = 1.2
EPSILON = 3.0
DELTA = 1e-5
NOISE_MULTIPLIER = 1.1
EPOCHS = 100
LR = 8e-4
BATCH_SIZE = 4096
MAX_PHYSICAL_BATCH_SIZE = 128
CIFAR10_MEAN = [0.4914, 0.4822, 0.4465]
CIFAR10_STD_DEV = [0.2023, 0.1994, 0.2010]
bound=1.0
NUM_GROUPS = 40

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR10_MEAN, CIFAR10_STD_DEV)

])

# Load datasets
DATA_ROOT = "~/.local/data"
train_dataset = CIFAR10(root=DATA_ROOT, train=True, download=True, transform=transform)
test_dataset = CIFAR10(root=DATA_ROOT, train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Define the weight-standardized convolutional layer
class WSConv2d(nn.Conv2d):
    def forward(self, x):
        mean = self.weight.mean(dim=[1, 2, 3], keepdim=True)
        std = self.weight.std(dim=[1, 2, 3], keepdim=True) + 1e-5
        weight = (self.weight - mean) / std
        return nn.functional.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)

# Define ResNet-20 with Group Normalization and Weight Standardization
class BasicBlockGN(nn.Module):
    def __init__(self, in_planes, planes, stride=1, num_groups=2):
        super(BasicBlockGN, self).__init__()
        self.conv1 = WSConv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.gn1 = nn.GroupNorm(num_groups, planes)
        self.conv2 = WSConv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.gn2 = nn.GroupNorm(num_groups, planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                WSConv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.GroupNorm(num_groups, planes)
            )

    def forward(self, x):
        out = nn.SiLU()(self.gn1(self.conv1(x)))
        out = self.gn2(self.conv2(out))
        out += self.shortcut(x)
        out = nn.SiLU()(out)
        return out

class ResNet20GN(nn.Module):
    def __init__(self, num_classes=10, num_groups=2):
        super(ResNet20GN, self).__init__()
        self.in_planes = 16
        self.conv1 = WSConv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.gn1 = nn.GroupNorm(num_groups, 16)
        self.layer1 = self._make_layer(BasicBlockGN, 16, 3, stride=1, num_groups=num_groups)
        self.layer2 = self._make_layer(BasicBlockGN, 32, 3, stride=2, num_groups=num_groups)
        self.layer3 = self._make_layer(BasicBlockGN, 64, 3, stride=2, num_groups=num_groups)
        self.linear = nn.Linear(64, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride, num_groups):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride, num_groups))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = nn.SiLU()(self.gn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = nn.AvgPool2d(8)(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

class WideBasicGN(nn.Module):
    def __init__(self, in_planes, planes, dropout_rate, stride=1, num_groups=2):
        super(WideBasicGN, self).__init__()
        self.conv1 = WSConv2d(in_planes, planes, kernel_size=3, padding=1, bias=False)
        self.gn1 = nn.GroupNorm(num_groups, planes)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.conv2 = WSConv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.gn2 = nn.GroupNorm(num_groups, planes)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                WSConv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.GroupNorm(num_groups, planes)
            )

    def forward(self, x):
        out = nn.SiLU()(self.gn1(self.conv1(x)))
        out = self.dropout(out)
        out = self.gn2(self.conv2(out))
        out += self.shortcut(x)
        out = nn.SiLU()(out)
        return out
class WRN16_4_GN(nn.Module):
    def __init__(self, num_classes=10, dropout_rate=0.3, num_groups=2):
        super(WRN16_4_GN, self).__init__()
        self.in_planes = 16
        self.conv1 = WSConv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.gn1 = nn.GroupNorm(num_groups, 16)
        self.layer1 = self._make_layer(WideBasicGN, 64, 3, dropout_rate, stride=1, num_groups=num_groups)
        self.layer2 = self._make_layer(WideBasicGN, 128, 3, dropout_rate, stride=2, num_groups=num_groups)
        self.layer3 = self._make_layer(WideBasicGN, 256, 3, dropout_rate, stride=2, num_groups=num_groups)
        self.linear = nn.Linear(256, num_classes)

    def _make_layer(self, block, planes, num_blocks, dropout_rate, stride, num_groups):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, dropout_rate, stride, num_groups))
            self.in_planes = planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = nn.SiLU()(self.gn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = nn.AvgPool2d(8)(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

model = ResNet20GN(num_classes=10)
#model = WRN16_4_GN(num_classes=10)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ModuleValidator.fix(model)
model = model.to(device)
optimizer = optim.RMSprop(model.parameters(), lr=LR, momentum=0.9, alpha=0.99)
ModuleValidator.validate(model, strict=False)

privacy_engine = PrivacyEngine()
model, optimizer, train_loader = privacy_engine.make_private_with_epsilon(
    module=model,
    optimizer=optimizer,
    data_loader=train_loader,
    epochs=EPOCHS,
    target_epsilon=EPSILON,
    target_delta=DELTA,
    max_grad_norm=MAX_GRAD_NORM
)


# Adaptive learning rate scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS * len(train_loader), eta_min=8e-6)

# Loss function
criterion = nn.CrossEntropyLoss()


# Grouped Gradient Clipping Function
def GroupedGradClip(gradients, bound, groups):
    # Flatten gradients before splitting
    flat_grad = gradients.view(gradients.size(0), -1)  # Flatten to process gradients as a 2D tensor
    grouped_grads = torch.chunk(flat_grad, groups, dim=1)  # Chunk along the feature dimension
    clipped_grads = []

    # Compute the norm for each group and clip
    for g in grouped_grads:
        grad_norm = torch.norm(g, dim=1, keepdim=True)
        clip_coef = torch.clamp(bound / (grad_norm + 1e-6), max=1.0)  # Clip the gradient norm
        clipped_grads.append(g * clip_coef)

    # Concatenate back the clipped gradients and reshape to original
    clipped_flat_grad = torch.cat(clipped_grads, dim=1)

    # Reshape back to original gradient shape
    return clipped_flat_grad.view_as(gradients)


L=256
# Training and testing functions

def train(model, train_loader, optimizer, epoch, device):
    model.train()
    losses = []
    top1_acc = []

    with BatchMemoryManager(data_loader=train_loader, max_physical_batch_size=MAX_PHYSICAL_BATCH_SIZE, optimizer=optimizer) as memory_safe_data_loader:
        for batch_idx, (data, target) in tqdm(enumerate(memory_safe_data_loader), total=len(memory_safe_data_loader)):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()

            # Apply grouped gradient clipping
            for p in model.parameters():
                  if p.grad is not None:
                      p.grad = GroupedGradClip(p.grad, bound, NUM_GROUPS)
                      noise = torch.normal(mean=0, std=NOISE_MULTIPLIER * bound, size=p.grad.shape, device=device)
                      p.grad = (p.grad + noise) / L  # Normalize by group size



            optimizer.step()
            losses.append(loss.item())
            _, predicted = torch.max(output.data, 1)
            correct = predicted.eq(target).sum().item()
            top1_acc.append(correct / target.size(0))
            if (batch_idx + 1) % 200 == 0:
                epsilon = privacy_engine.get_epsilon(DELTA)
                print(
                    f"(ε = {epsilon:.2f}, δ = {DELTA})"
                )


    avg_loss = np.mean(losses)
    avg_acc = np.mean(top1_acc) * 100
    return avg_loss, avg_acc,epsilon


def test(model, test_loader, device):
    model.eval()
    losses = []
    top1_acc = []

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()

            # Calculate accuracy
            acc = np.mean(preds == labels)


            losses.append(loss.item())
            top1_acc.append(acc)


    avg_loss = np.mean(losses)
    avg_accuracy = np.mean(top1_acc)*100
    return avg_loss, avg_accuracy


# Train and validate the model
for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc,epsilonv = train(model, train_loader, optimizer, epoch, device)
    test_loss, test_accuracy = test(model, test_loader, device)
    print(f'Epoch: {epoch}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%, '
          f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')


    scheduler.step(test_loss)

# Print final epsilon after training
final_epsilon = privacy_engine.get_epsilon(DELTA)
print(f"Final epsilon after {EPOCHS} epochs: {final_epsilon:.2f}")






Files already downloaded and verified
Files already downloaded and verified


 51%|█████     | 200/391 [00:42<00:48,  3.97it/s]

(ε = 0.19, δ = 1e-05)


394it [01:22,  4.75it/s]


Epoch: 1, Train Loss: 2.1421, Train Accuracy: 19.09%, Test Loss: 1.9415, Test Accuracy: 25.12%


 51%|█████     | 200/391 [00:41<00:41,  4.61it/s]

(ε = 0.33, δ = 1e-05)


399it [01:23,  4.76it/s]


Epoch: 2, Train Loss: 1.9038, Train Accuracy: 28.04%, Test Loss: 1.8487, Test Accuracy: 31.20%


 51%|█████     | 200/391 [00:41<00:42,  4.52it/s]

(ε = 0.42, δ = 1e-05)


397it [01:23,  4.74it/s]


Epoch: 3, Train Loss: 1.8089, Train Accuracy: 33.15%, Test Loss: 1.7687, Test Accuracy: 34.74%


 51%|█████     | 200/391 [00:43<00:44,  4.34it/s]

(ε = 0.50, δ = 1e-05)


399it [01:27,  4.55it/s]


Epoch: 4, Train Loss: 1.7395, Train Accuracy: 36.19%, Test Loss: 1.7412, Test Accuracy: 37.75%


 51%|█████     | 200/391 [00:43<01:02,  3.04it/s]

(ε = 0.57, δ = 1e-05)


394it [01:25,  4.60it/s]


Epoch: 5, Train Loss: 1.7061, Train Accuracy: 38.56%, Test Loss: 1.7196, Test Accuracy: 40.27%


 51%|█████     | 200/391 [00:43<00:49,  3.88it/s]

(ε = 0.63, δ = 1e-05)


397it [01:25,  4.63it/s]


Epoch: 6, Train Loss: 1.6877, Train Accuracy: 40.32%, Test Loss: 1.6795, Test Accuracy: 41.84%


 51%|█████     | 200/391 [00:42<00:47,  4.03it/s]

(ε = 0.69, δ = 1e-05)


398it [01:24,  4.71it/s]


Epoch: 7, Train Loss: 1.6490, Train Accuracy: 42.16%, Test Loss: 1.6407, Test Accuracy: 42.94%


 51%|█████     | 200/391 [00:42<00:48,  3.97it/s]

(ε = 0.74, δ = 1e-05)


397it [01:24,  4.69it/s]


Epoch: 8, Train Loss: 1.6399, Train Accuracy: 43.78%, Test Loss: 1.6323, Test Accuracy: 44.23%


 51%|█████     | 200/391 [00:42<00:47,  4.03it/s]

(ε = 0.79, δ = 1e-05)


397it [01:24,  4.71it/s]


Epoch: 9, Train Loss: 1.6297, Train Accuracy: 45.04%, Test Loss: 1.6528, Test Accuracy: 44.72%


 51%|█████     | 200/391 [00:42<01:02,  3.07it/s]

(ε = 0.84, δ = 1e-05)


397it [01:23,  4.74it/s]


Epoch: 10, Train Loss: 1.6365, Train Accuracy: 45.60%, Test Loss: 1.6953, Test Accuracy: 45.36%


 51%|█████     | 200/391 [00:41<00:54,  3.48it/s]

(ε = 0.88, δ = 1e-05)


397it [01:23,  4.76it/s]


Epoch: 11, Train Loss: 1.6358, Train Accuracy: 46.93%, Test Loss: 1.7055, Test Accuracy: 46.37%


 51%|█████     | 200/391 [00:41<00:42,  4.54it/s]

(ε = 0.93, δ = 1e-05)


398it [01:23,  4.76it/s]


Epoch: 12, Train Loss: 1.6446, Train Accuracy: 47.67%, Test Loss: 1.6585, Test Accuracy: 47.36%


 51%|█████     | 200/391 [00:41<00:42,  4.46it/s]

(ε = 0.97, δ = 1e-05)


398it [01:23,  4.77it/s]


Epoch: 13, Train Loss: 1.6132, Train Accuracy: 48.73%, Test Loss: 1.5905, Test Accuracy: 49.20%


 51%|█████     | 200/391 [00:41<00:42,  4.47it/s]

(ε = 1.01, δ = 1e-05)


400it [01:23,  4.77it/s]

(ε = 1.03, δ = 1e-05)





Epoch: 14, Train Loss: 1.5692, Train Accuracy: 50.35%, Test Loss: 1.6080, Test Accuracy: 49.82%


 51%|█████     | 200/391 [00:41<00:42,  4.53it/s]

(ε = 1.05, δ = 1e-05)


397it [01:22,  4.83it/s]


Epoch: 15, Train Loss: 1.5690, Train Accuracy: 51.27%, Test Loss: 1.5882, Test Accuracy: 50.43%


 51%|█████     | 200/391 [00:41<00:42,  4.46it/s]

(ε = 1.09, δ = 1e-05)


397it [01:22,  4.80it/s]


Epoch: 16, Train Loss: 1.5690, Train Accuracy: 51.62%, Test Loss: 1.6065, Test Accuracy: 51.14%


 51%|█████     | 200/391 [00:41<00:42,  4.46it/s]

(ε = 1.12, δ = 1e-05)


397it [01:22,  4.79it/s]


Epoch: 17, Train Loss: 1.5746, Train Accuracy: 52.46%, Test Loss: 1.6155, Test Accuracy: 51.43%


 51%|█████     | 200/391 [00:42<00:42,  4.47it/s]

(ε = 1.16, δ = 1e-05)


397it [01:23,  4.78it/s]


Epoch: 18, Train Loss: 1.5973, Train Accuracy: 51.95%, Test Loss: 1.6255, Test Accuracy: 52.10%


 51%|█████     | 200/391 [00:42<00:42,  4.44it/s]

(ε = 1.19, δ = 1e-05)


399it [01:23,  4.77it/s]


Epoch: 19, Train Loss: 1.5771, Train Accuracy: 53.25%, Test Loss: 1.6095, Test Accuracy: 52.22%


 51%|█████     | 200/391 [00:42<00:42,  4.55it/s]

(ε = 1.23, δ = 1e-05)


394it [01:23,  4.74it/s]


Epoch: 20, Train Loss: 1.5858, Train Accuracy: 53.21%, Test Loss: 1.6022, Test Accuracy: 52.53%


 51%|█████▏    | 201/391 [00:42<00:53,  3.57it/s]

(ε = 1.26, δ = 1e-05)


398it [01:23,  4.78it/s]


Epoch: 21, Train Loss: 1.5531, Train Accuracy: 53.56%, Test Loss: 1.6210, Test Accuracy: 53.18%


 51%|█████     | 200/391 [00:41<00:55,  3.44it/s]

(ε = 1.29, δ = 1e-05)


396it [01:23,  4.76it/s]


Epoch: 22, Train Loss: 1.5563, Train Accuracy: 54.34%, Test Loss: 1.6075, Test Accuracy: 53.48%


 51%|█████     | 200/391 [00:41<00:43,  4.36it/s]

(ε = 1.33, δ = 1e-05)


393it [01:22,  4.74it/s]


Epoch: 23, Train Loss: 1.5549, Train Accuracy: 54.56%, Test Loss: 1.5835, Test Accuracy: 54.19%


 51%|█████     | 200/391 [00:41<00:42,  4.47it/s]

(ε = 1.36, δ = 1e-05)


398it [01:23,  4.76it/s]


Epoch: 24, Train Loss: 1.5641, Train Accuracy: 54.45%, Test Loss: 1.5705, Test Accuracy: 54.59%


 51%|█████     | 200/391 [00:41<00:42,  4.49it/s]

(ε = 1.39, δ = 1e-05)


397it [01:22,  4.80it/s]


Epoch: 25, Train Loss: 1.5407, Train Accuracy: 55.22%, Test Loss: 1.6080, Test Accuracy: 54.39%


 51%|█████     | 200/391 [00:41<00:43,  4.41it/s]

(ε = 1.42, δ = 1e-05)


396it [01:22,  4.81it/s]


Epoch: 26, Train Loss: 1.5683, Train Accuracy: 55.39%, Test Loss: 1.6003, Test Accuracy: 54.98%


 51%|█████     | 200/391 [00:41<00:42,  4.51it/s]

(ε = 1.45, δ = 1e-05)


399it [01:22,  4.81it/s]


Epoch: 27, Train Loss: 1.5340, Train Accuracy: 56.24%, Test Loss: 1.5834, Test Accuracy: 55.13%


 51%|█████     | 200/391 [00:41<00:43,  4.41it/s]

(ε = 1.48, δ = 1e-05)


400it [01:22,  4.83it/s]

(ε = 1.49, δ = 1e-05)





Epoch: 28, Train Loss: 1.5380, Train Accuracy: 56.49%, Test Loss: 1.6599, Test Accuracy: 55.14%


 51%|█████     | 200/391 [00:41<00:42,  4.54it/s]

(ε = 1.51, δ = 1e-05)


396it [01:22,  4.82it/s]


Epoch: 29, Train Loss: 1.5824, Train Accuracy: 55.90%, Test Loss: 1.5820, Test Accuracy: 55.12%


 51%|█████     | 200/391 [00:42<00:46,  4.11it/s]

(ε = 1.53, δ = 1e-05)


394it [01:22,  4.79it/s]


Epoch: 30, Train Loss: 1.5559, Train Accuracy: 56.18%, Test Loss: 1.5666, Test Accuracy: 55.10%


 51%|█████     | 200/391 [00:42<00:56,  3.41it/s]

(ε = 1.56, δ = 1e-05)


399it [01:24,  4.74it/s]


Epoch: 31, Train Loss: 1.5346, Train Accuracy: 56.59%, Test Loss: 1.5973, Test Accuracy: 55.68%


 51%|█████     | 200/391 [00:42<00:53,  3.60it/s]

(ε = 1.59, δ = 1e-05)


399it [01:24,  4.73it/s]


Epoch: 32, Train Loss: 1.5229, Train Accuracy: 56.84%, Test Loss: 1.5499, Test Accuracy: 55.74%


 51%|█████     | 200/391 [00:41<00:43,  4.41it/s]

(ε = 1.62, δ = 1e-05)


400it [01:24,  4.38it/s]

(ε = 1.63, δ = 1e-05)


402it [01:24,  4.74it/s]


Epoch: 33, Train Loss: 1.5246, Train Accuracy: 57.25%, Test Loss: 1.5872, Test Accuracy: 56.53%


 51%|█████     | 200/391 [00:42<00:53,  3.54it/s]

(ε = 1.64, δ = 1e-05)


397it [01:24,  4.72it/s]


Epoch: 34, Train Loss: 1.5214, Train Accuracy: 57.12%, Test Loss: 1.5683, Test Accuracy: 56.88%


 51%|█████     | 200/391 [00:41<00:44,  4.27it/s]

(ε = 1.67, δ = 1e-05)


393it [01:22,  4.76it/s]


Epoch: 35, Train Loss: 1.5069, Train Accuracy: 57.81%, Test Loss: 1.5464, Test Accuracy: 56.84%


 51%|█████     | 200/391 [00:41<00:43,  4.35it/s]

(ε = 1.69, δ = 1e-05)


394it [01:22,  4.80it/s]


Epoch: 36, Train Loss: 1.5037, Train Accuracy: 57.64%, Test Loss: 1.5371, Test Accuracy: 56.98%


 51%|█████     | 200/391 [00:41<00:42,  4.50it/s]

(ε = 1.72, δ = 1e-05)


399it [01:22,  4.84it/s]


Epoch: 37, Train Loss: 1.4914, Train Accuracy: 57.87%, Test Loss: 1.5708, Test Accuracy: 56.53%


 51%|█████     | 200/391 [00:41<00:43,  4.41it/s]

(ε = 1.75, δ = 1e-05)


395it [01:21,  4.83it/s]


Epoch: 38, Train Loss: 1.5262, Train Accuracy: 57.91%, Test Loss: 1.5630, Test Accuracy: 56.96%


 51%|█████     | 200/391 [00:42<00:42,  4.50it/s]

(ε = 1.77, δ = 1e-05)


400it [01:23,  4.51it/s]

(ε = 1.78, δ = 1e-05)


401it [01:23,  4.79it/s]


Epoch: 39, Train Loss: 1.5355, Train Accuracy: 58.04%, Test Loss: 1.5761, Test Accuracy: 57.19%


 51%|█████     | 200/391 [00:42<00:45,  4.20it/s]

(ε = 1.80, δ = 1e-05)


395it [01:22,  4.78it/s]


Epoch: 40, Train Loss: 1.5484, Train Accuracy: 57.91%, Test Loss: 1.5757, Test Accuracy: 57.16%


 51%|█████     | 200/391 [00:42<01:04,  2.95it/s]

(ε = 1.82, δ = 1e-05)


397it [01:23,  4.78it/s]


Epoch: 41, Train Loss: 1.5086, Train Accuracy: 58.44%, Test Loss: 1.5629, Test Accuracy: 57.81%


 51%|█████     | 200/391 [00:41<00:45,  4.21it/s]

(ε = 1.84, δ = 1e-05)


397it [01:23,  4.77it/s]


Epoch: 42, Train Loss: 1.5027, Train Accuracy: 58.89%, Test Loss: 1.5664, Test Accuracy: 57.55%


 51%|█████     | 200/391 [00:41<00:44,  4.27it/s]

(ε = 1.87, δ = 1e-05)


397it [01:23,  4.77it/s]


Epoch: 43, Train Loss: 1.5333, Train Accuracy: 58.70%, Test Loss: 1.5790, Test Accuracy: 58.00%


 51%|█████     | 200/391 [00:41<00:44,  4.31it/s]

(ε = 1.89, δ = 1e-05)


395it [01:21,  4.82it/s]


Epoch: 44, Train Loss: 1.4942, Train Accuracy: 59.33%, Test Loss: 1.5482, Test Accuracy: 58.67%


 51%|█████     | 200/391 [00:41<00:43,  4.37it/s]

(ε = 1.92, δ = 1e-05)


396it [01:22,  4.81it/s]


Epoch: 45, Train Loss: 1.4774, Train Accuracy: 59.56%, Test Loss: 1.5562, Test Accuracy: 58.19%


 51%|█████     | 200/391 [00:41<00:43,  4.37it/s]

(ε = 1.94, δ = 1e-05)


397it [01:22,  4.83it/s]


Epoch: 46, Train Loss: 1.5025, Train Accuracy: 59.50%, Test Loss: 1.5676, Test Accuracy: 58.68%


 51%|█████▏    | 201/391 [00:42<00:42,  4.48it/s]

(ε = 1.96, δ = 1e-05)


399it [01:22,  4.82it/s]


Epoch: 47, Train Loss: 1.5055, Train Accuracy: 59.49%, Test Loss: 1.5803, Test Accuracy: 58.70%


 51%|█████     | 200/391 [00:41<01:04,  2.95it/s]

(ε = 1.99, δ = 1e-05)


396it [01:21,  4.83it/s]


Epoch: 48, Train Loss: 1.5291, Train Accuracy: 59.42%, Test Loss: 1.5641, Test Accuracy: 58.76%


 51%|█████     | 200/391 [00:41<00:42,  4.54it/s]

(ε = 2.01, δ = 1e-05)


398it [01:22,  4.83it/s]


Epoch: 49, Train Loss: 1.5200, Train Accuracy: 59.35%, Test Loss: 1.6442, Test Accuracy: 57.45%


 51%|█████     | 200/391 [00:40<00:44,  4.33it/s]

(ε = 2.03, δ = 1e-05)


400it [01:22,  4.85it/s]

(ε = 2.04, δ = 1e-05)





Epoch: 50, Train Loss: 1.5053, Train Accuracy: 59.96%, Test Loss: 1.5607, Test Accuracy: 58.35%


 51%|█████     | 200/391 [00:41<00:42,  4.48it/s]

(ε = 2.05, δ = 1e-05)


393it [01:20,  4.87it/s]


Epoch: 51, Train Loss: 1.4900, Train Accuracy: 59.81%, Test Loss: 1.5723, Test Accuracy: 58.89%


 51%|█████     | 200/391 [00:41<00:43,  4.38it/s]

(ε = 2.08, δ = 1e-05)


398it [01:22,  4.82it/s]


Epoch: 52, Train Loss: 1.5290, Train Accuracy: 59.07%, Test Loss: 1.5471, Test Accuracy: 58.87%


 51%|█████     | 200/391 [00:42<00:48,  3.91it/s]

(ε = 2.10, δ = 1e-05)


100%|██████████| 391/391 [01:21<00:00,  4.80it/s]


Epoch: 53, Train Loss: 1.4898, Train Accuracy: 59.64%, Test Loss: 1.5152, Test Accuracy: 59.43%


 51%|█████▏    | 201/391 [00:41<00:43,  4.38it/s]

(ε = 2.12, δ = 1e-05)


398it [01:22,  4.82it/s]


Epoch: 54, Train Loss: 1.4775, Train Accuracy: 60.00%, Test Loss: 1.5285, Test Accuracy: 59.19%


 51%|█████     | 200/391 [00:41<00:42,  4.52it/s]

(ε = 2.14, δ = 1e-05)


400it [01:22,  3.33it/s]

(ε = 2.15, δ = 1e-05)


401it [01:22,  4.83it/s]


Epoch: 55, Train Loss: 1.4914, Train Accuracy: 59.64%, Test Loss: 1.5444, Test Accuracy: 58.77%


 51%|█████     | 200/391 [00:41<00:43,  4.43it/s]

(ε = 2.16, δ = 1e-05)


399it [01:22,  4.85it/s]


Epoch: 56, Train Loss: 1.4839, Train Accuracy: 60.18%, Test Loss: 1.5569, Test Accuracy: 58.52%


 51%|█████     | 200/391 [00:41<00:43,  4.35it/s]

(ε = 2.18, δ = 1e-05)


394it [01:20,  4.87it/s]


Epoch: 57, Train Loss: 1.5002, Train Accuracy: 60.43%, Test Loss: 1.5885, Test Accuracy: 58.52%


 51%|█████▏    | 201/391 [00:42<00:43,  4.41it/s]

(ε = 2.20, δ = 1e-05)


397it [01:22,  4.81it/s]


Epoch: 58, Train Loss: 1.4873, Train Accuracy: 60.44%, Test Loss: 1.5258, Test Accuracy: 59.35%


 51%|█████▏    | 201/391 [00:42<00:46,  4.05it/s]

(ε = 2.23, δ = 1e-05)


397it [01:22,  4.79it/s]


Epoch: 59, Train Loss: 1.4754, Train Accuracy: 60.50%, Test Loss: 1.5283, Test Accuracy: 59.06%


 51%|█████     | 200/391 [00:41<00:59,  3.20it/s]

(ε = 2.25, δ = 1e-05)


397it [01:23,  4.77it/s]


Epoch: 60, Train Loss: 1.4671, Train Accuracy: 60.66%, Test Loss: 1.5299, Test Accuracy: 59.48%


 51%|█████▏    | 201/391 [00:41<00:43,  4.41it/s]

(ε = 2.27, δ = 1e-05)


398it [01:22,  4.80it/s]


Epoch: 61, Train Loss: 1.4914, Train Accuracy: 60.52%, Test Loss: 1.5435, Test Accuracy: 59.04%


 51%|█████▏    | 201/391 [00:41<00:42,  4.47it/s]

(ε = 2.29, δ = 1e-05)


399it [01:23,  4.79it/s]


Epoch: 62, Train Loss: 1.5032, Train Accuracy: 60.38%, Test Loss: 1.5045, Test Accuracy: 59.18%


 51%|█████     | 200/391 [00:41<00:45,  4.20it/s]

(ε = 2.31, δ = 1e-05)


394it [01:21,  4.82it/s]


Epoch: 63, Train Loss: 1.4641, Train Accuracy: 60.90%, Test Loss: 1.5198, Test Accuracy: 59.75%


 51%|█████     | 200/391 [00:41<00:43,  4.44it/s]

(ε = 2.33, δ = 1e-05)


396it [01:21,  4.85it/s]


Epoch: 64, Train Loss: 1.4790, Train Accuracy: 60.86%, Test Loss: 1.5710, Test Accuracy: 59.05%


 51%|█████     | 200/391 [00:41<00:44,  4.26it/s]

(ε = 2.35, δ = 1e-05)


400it [01:22,  4.47it/s]

(ε = 2.36, δ = 1e-05)


402it [01:23,  4.84it/s]


Epoch: 65, Train Loss: 1.5360, Train Accuracy: 60.18%, Test Loss: 1.5995, Test Accuracy: 58.84%


 51%|█████▏    | 201/391 [00:42<00:43,  4.37it/s]

(ε = 2.37, δ = 1e-05)


398it [01:23,  4.79it/s]


Epoch: 66, Train Loss: 1.5083, Train Accuracy: 60.76%, Test Loss: 1.5599, Test Accuracy: 59.57%


 51%|█████     | 200/391 [00:42<00:44,  4.25it/s]

(ε = 2.39, δ = 1e-05)


400it [01:23,  4.48it/s]

(ε = 2.40, δ = 1e-05)


401it [01:23,  4.79it/s]


Epoch: 67, Train Loss: 1.4975, Train Accuracy: 60.86%, Test Loss: 1.6153, Test Accuracy: 58.83%


 51%|█████▏    | 201/391 [00:42<00:47,  4.01it/s]

(ε = 2.41, δ = 1e-05)


399it [01:22,  4.82it/s]


Epoch: 68, Train Loss: 1.5028, Train Accuracy: 60.78%, Test Loss: 1.5065, Test Accuracy: 60.14%


 51%|█████     | 200/391 [00:41<00:58,  3.28it/s]

(ε = 2.43, δ = 1e-05)


392it [01:21,  4.81it/s]


Epoch: 69, Train Loss: 1.4698, Train Accuracy: 61.45%, Test Loss: 1.5158, Test Accuracy: 60.54%


 51%|█████     | 200/391 [00:41<00:45,  4.17it/s]

(ε = 2.45, δ = 1e-05)


399it [01:23,  4.78it/s]


Epoch: 70, Train Loss: 1.4656, Train Accuracy: 61.14%, Test Loss: 1.5084, Test Accuracy: 59.73%


 51%|█████▏    | 201/391 [00:41<00:42,  4.43it/s]

(ε = 2.47, δ = 1e-05)


396it [01:21,  4.84it/s]


Epoch: 71, Train Loss: 1.4506, Train Accuracy: 61.50%, Test Loss: 1.5009, Test Accuracy: 60.45%


 51%|█████▏    | 201/391 [00:41<00:43,  4.39it/s]

(ε = 2.49, δ = 1e-05)


393it [01:21,  4.83it/s]


Epoch: 72, Train Loss: 1.4181, Train Accuracy: 61.90%, Test Loss: 1.4808, Test Accuracy: 60.56%


 51%|█████▏    | 201/391 [00:42<00:42,  4.44it/s]

(ε = 2.51, δ = 1e-05)


397it [01:22,  4.80it/s]


Epoch: 73, Train Loss: 1.4771, Train Accuracy: 61.22%, Test Loss: 1.4978, Test Accuracy: 61.05%


 51%|█████     | 200/391 [00:42<00:45,  4.22it/s]

(ε = 2.52, δ = 1e-05)


400it [01:23,  4.78it/s]

(ε = 2.53, δ = 1e-05)





Epoch: 74, Train Loss: 1.4722, Train Accuracy: 61.39%, Test Loss: 1.5184, Test Accuracy: 60.26%


 51%|█████     | 200/391 [00:41<00:54,  3.52it/s]

(ε = 2.54, δ = 1e-05)


399it [01:22,  4.81it/s]


Epoch: 75, Train Loss: 1.4650, Train Accuracy: 61.60%, Test Loss: 1.5212, Test Accuracy: 60.35%


 51%|█████     | 200/391 [00:41<00:59,  3.22it/s]

(ε = 2.56, δ = 1e-05)


396it [01:22,  4.78it/s]


Epoch: 76, Train Loss: 1.4622, Train Accuracy: 62.21%, Test Loss: 1.5177, Test Accuracy: 60.38%


 51%|█████▏    | 201/391 [00:42<00:43,  4.34it/s]

(ε = 2.58, δ = 1e-05)


394it [01:23,  4.74it/s]


Epoch: 77, Train Loss: 1.4564, Train Accuracy: 61.95%, Test Loss: 1.5229, Test Accuracy: 60.66%


 51%|█████     | 200/391 [00:41<00:43,  4.36it/s]

(ε = 2.60, δ = 1e-05)


396it [01:23,  4.74it/s]


Epoch: 78, Train Loss: 1.4734, Train Accuracy: 61.64%, Test Loss: 1.5266, Test Accuracy: 60.81%


 51%|█████▏    | 201/391 [00:42<00:44,  4.25it/s]

(ε = 2.62, δ = 1e-05)


397it [01:24,  4.72it/s]


Epoch: 79, Train Loss: 1.4778, Train Accuracy: 61.49%, Test Loss: 1.5178, Test Accuracy: 61.17%


 51%|█████▏    | 201/391 [00:42<00:43,  4.33it/s]

(ε = 2.64, δ = 1e-05)


397it [01:23,  4.74it/s]


Epoch: 80, Train Loss: 1.4453, Train Accuracy: 61.95%, Test Loss: 1.4905, Test Accuracy: 60.75%


 51%|█████     | 200/391 [00:41<00:46,  4.12it/s]

(ε = 2.65, δ = 1e-05)


396it [01:23,  4.75it/s]


Epoch: 81, Train Loss: 1.4711, Train Accuracy: 61.67%, Test Loss: 1.5063, Test Accuracy: 60.63%


 51%|█████     | 200/391 [00:41<00:46,  4.07it/s]

(ε = 2.67, δ = 1e-05)


396it [01:23,  4.76it/s]


Epoch: 82, Train Loss: 1.4465, Train Accuracy: 61.97%, Test Loss: 1.5152, Test Accuracy: 60.86%


 51%|█████▏    | 201/391 [00:42<00:43,  4.32it/s]

(ε = 2.69, δ = 1e-05)


400it [01:23,  4.40it/s]

(ε = 2.70, δ = 1e-05)


401it [01:24,  4.76it/s]


Epoch: 83, Train Loss: 1.4406, Train Accuracy: 62.23%, Test Loss: 1.5338, Test Accuracy: 60.50%


 51%|█████▏    | 201/391 [00:42<00:44,  4.29it/s]

(ε = 2.71, δ = 1e-05)


399it [01:23,  4.77it/s]


Epoch: 84, Train Loss: 1.4300, Train Accuracy: 62.35%, Test Loss: 1.5164, Test Accuracy: 60.90%


 51%|█████     | 200/391 [00:42<00:44,  4.30it/s]

(ε = 2.73, δ = 1e-05)


395it [01:23,  4.75it/s]


Epoch: 85, Train Loss: 1.4273, Train Accuracy: 62.54%, Test Loss: 1.5281, Test Accuracy: 60.85%


 51%|█████     | 200/391 [00:42<00:44,  4.29it/s]

(ε = 2.75, δ = 1e-05)


395it [01:22,  4.76it/s]


Epoch: 86, Train Loss: 1.4894, Train Accuracy: 61.95%, Test Loss: 1.5486, Test Accuracy: 60.77%


 51%|█████▏    | 201/391 [00:42<00:44,  4.29it/s]

(ε = 2.76, δ = 1e-05)


397it [01:23,  4.75it/s]


Epoch: 87, Train Loss: 1.4306, Train Accuracy: 62.60%, Test Loss: 1.5164, Test Accuracy: 60.56%


 51%|█████     | 200/391 [00:42<00:44,  4.31it/s]

(ε = 2.78, δ = 1e-05)


397it [01:24,  4.72it/s]


Epoch: 88, Train Loss: 1.4270, Train Accuracy: 62.44%, Test Loss: 1.5046, Test Accuracy: 60.62%


 51%|█████     | 200/391 [00:42<00:47,  4.06it/s]

(ε = 2.80, δ = 1e-05)


399it [01:24,  4.71it/s]


Epoch: 89, Train Loss: 1.4463, Train Accuracy: 62.53%, Test Loss: 1.5207, Test Accuracy: 60.95%


 51%|█████     | 200/391 [00:42<00:46,  4.10it/s]

(ε = 2.82, δ = 1e-05)


398it [01:24,  4.73it/s]


Epoch: 90, Train Loss: 1.4578, Train Accuracy: 62.28%, Test Loss: 1.4816, Test Accuracy: 60.70%


 51%|█████     | 200/391 [00:42<00:47,  4.04it/s]

(ε = 2.83, δ = 1e-05)


396it [01:23,  4.73it/s]


Epoch: 91, Train Loss: 1.4025, Train Accuracy: 62.70%, Test Loss: 1.4692, Test Accuracy: 61.26%


 51%|█████▏    | 201/391 [00:43<00:43,  4.33it/s]

(ε = 2.85, δ = 1e-05)


397it [01:24,  4.70it/s]


Epoch: 92, Train Loss: 1.4012, Train Accuracy: 63.20%, Test Loss: 1.5231, Test Accuracy: 61.58%


 51%|█████     | 200/391 [00:42<00:48,  3.92it/s]

(ε = 2.87, δ = 1e-05)


396it [01:23,  4.73it/s]


Epoch: 93, Train Loss: 1.4518, Train Accuracy: 62.70%, Test Loss: 1.5441, Test Accuracy: 60.82%


 51%|█████▏    | 201/391 [00:43<00:44,  4.30it/s]

(ε = 2.89, δ = 1e-05)


399it [01:24,  4.73it/s]


Epoch: 94, Train Loss: 1.4574, Train Accuracy: 62.79%, Test Loss: 1.5520, Test Accuracy: 60.35%


 51%|█████▏    | 201/391 [00:42<00:46,  4.11it/s]

(ε = 2.90, δ = 1e-05)


397it [01:24,  4.71it/s]


Epoch: 95, Train Loss: 1.4554, Train Accuracy: 62.35%, Test Loss: 1.5298, Test Accuracy: 60.88%


 51%|█████     | 200/391 [00:42<00:46,  4.12it/s]

(ε = 2.92, δ = 1e-05)


398it [01:24,  4.73it/s]


Epoch: 96, Train Loss: 1.4391, Train Accuracy: 62.37%, Test Loss: 1.5203, Test Accuracy: 61.08%


 51%|█████     | 200/391 [00:42<00:50,  3.81it/s]

(ε = 2.94, δ = 1e-05)


399it [01:24,  4.71it/s]


Epoch: 97, Train Loss: 1.4411, Train Accuracy: 62.53%, Test Loss: 1.4918, Test Accuracy: 61.07%


 51%|█████     | 200/391 [00:42<00:49,  3.85it/s]

(ε = 2.95, δ = 1e-05)


395it [01:27,  4.52it/s]


Epoch: 98, Train Loss: 1.4162, Train Accuracy: 62.72%, Test Loss: 1.5420, Test Accuracy: 61.15%


 51%|█████     | 200/391 [00:43<00:48,  3.97it/s]

(ε = 2.97, δ = 1e-05)


398it [01:35,  4.17it/s]


Epoch: 99, Train Loss: 1.4227, Train Accuracy: 62.72%, Test Loss: 1.5182, Test Accuracy: 60.87%


 51%|█████     | 200/391 [00:42<00:49,  3.89it/s]

(ε = 2.99, δ = 1e-05)


398it [01:25,  4.66it/s]


Epoch: 100, Train Loss: 1.4257, Train Accuracy: 62.85%, Test Loss: 1.4945, Test Accuracy: 61.67%
Final epsilon after 100 epochs: 3.00


In [4]:
top1_acc=test(model,test_loader,device)
top1_acc

(1.4945187171300252, 61.668049410029504)