## Name: Naman Trisoliya

## ID: 12141150


# Loading libraries


In [1]:
import random
import torch
import numpy as np
import torchvision
import torch.nn as nn
from tqdm import tqdm
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

In [2]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

# Loading Dataset and preprocessing


In [3]:
transform=transforms.Compose([transforms.ToTensor(),
                    transforms.Normalize((0.5),(0.5))])

In [4]:
data_train=datasets.MNIST(root='./data',train=True,download=True,transform=transform)
data_test=datasets.MNIST(root='./data',train=False,download=True,transform=transform)

In [5]:
print(len(data_train),len(data_test))

60000 10000


## Dataset setup for experiments


The experiments were performed with 200 random points taken from each class during training and 10000 points in test set.


In [6]:
batch_size=50

In [7]:
def get_subset_dataloader(dataset, random_seed, num_samples_per_class,batch_size):
    torch.manual_seed(random_seed)

    class_indices = {}
    for i in range(len(dataset.classes)):
        class_indices[i] = []

    for idx, (_, label) in enumerate(dataset):
        class_indices[label].append(idx)

    selected_indices = []
    for _, indices in class_indices.items():
        selected_indices.extend(random.sample(indices, num_samples_per_class))

    subset_dataset = Subset(dataset, selected_indices)
    # print("Subset created with total lenght:",len(subset_dataset))

    dataloader = DataLoader(subset_dataset, batch_size=batch_size, shuffle=True)

    return dataloader

In [8]:
test_loader=torch.utils.data.DataLoader(data_test,batch_size=batch_size,shuffle=False)

In [9]:
classes = ('Zero','One', 'Two', 'Three', 'Four','Five', 'Six', 'Seven', 'Eight', 'Nine')

# Model Building


LeNet-5 Architecture was used in the experiments with maxpooling, ReLU activations and dropout after the dense hidden layer. The total parameter count of the used baseline model is 61706.


In [10]:
class BaseModel(nn.Module):
    def __init__(self,include_dropout):
        super(BaseModel, self).__init__()
        self.include_dropout=include_dropout

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5,stride=1,padding=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5,stride=1)


        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        self.dropout = nn.Dropout(p=0.5)

        self.flatten=nn.Flatten()

    def forward(self, x):
        x = self.maxpool(F.relu(self.conv1(x)))

        x = self.maxpool(F.relu(self.conv2(x)))

        x = self.flatten(x)

        x = F.relu(self.fc1(x))
        if self.include_dropout:
            x = self.dropout(x)

        x = F.relu(self.fc2(x))
        if self.include_dropout:
            x = self.dropout(x)

        x = self.fc3(x)
        return x

model= BaseModel(include_dropout=False)
model.to(device)
summary(model,(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
         MaxPool2d-2            [-1, 6, 14, 14]               0
            Conv2d-3           [-1, 16, 10, 10]           2,416
         MaxPool2d-4             [-1, 16, 5, 5]               0
           Flatten-5                  [-1, 400]               0
            Linear-6                  [-1, 120]          48,120
            Linear-7                   [-1, 84]          10,164
            Linear-8                   [-1, 10]             850
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.06
Params size (MB): 0.24
Estimated Total Size (MB): 0.30
----------------------------------------------------------------


In [11]:
class BatchNormModel(nn.Module):
    def __init__(self):
        super(BatchNormModel, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5,stride=1,padding=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5,stride=1)


        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.norm1 = nn.BatchNorm2d(6)
        self.norm2 = nn.BatchNorm2d(16)
        self.norm3 = nn.BatchNorm1d(120)
        self.norm4 = nn.BatchNorm1d(84)


        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        self.dropout = nn.Dropout(p=0.5)

        self.flatten=nn.Flatten()

    def forward(self, x):
        x = self.maxpool(self.norm1(F.relu(self.conv1(x))))

        x = self.maxpool(self.norm2(F.relu(self.conv2(x))))

        x = self.flatten(x)

        x = self.norm3(F.relu(self.fc1(x)))

        x = self.norm4(F.relu(self.fc2(x)))

        x = self.fc3(x)
        return x

model= BatchNormModel()
model.to(device)
summary(model,(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
       BatchNorm2d-2            [-1, 6, 28, 28]              12
         MaxPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
       BatchNorm2d-5           [-1, 16, 10, 10]              32
         MaxPool2d-6             [-1, 16, 5, 5]               0
           Flatten-7                  [-1, 400]               0
            Linear-8                  [-1, 120]          48,120
       BatchNorm1d-9                  [-1, 120]             240
           Linear-10                   [-1, 84]          10,164
      BatchNorm1d-11                   [-1, 84]             168
           Linear-12                   [-1, 10]             850
Total params: 62,158
Trainable params: 62,158
Non-trainable params: 0
---------------------------------

In [12]:
class DropoutModel(nn.Module):
    def __init__(self):
        super(DropoutModel, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5,stride=1,padding=2)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5,stride=1)


        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        self.dropout = nn.Dropout(p=0.5)

        self.flatten=nn.Flatten()

    def forward(self, x):
        x = self.maxpool(F.relu(self.conv1(x)))
        x = self.dropout(x)

        x = self.maxpool(F.relu(self.conv2(x)))
        x = self.dropout(x)

        x = self.flatten(x)

        x = F.relu(self.fc1(x))
        x = self.dropout(x)

        x = F.relu(self.fc2(x))
        x = self.dropout(x)

        x = self.fc3(x)
        return x

model= DropoutModel()
model.to(device)
summary(model,(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
         MaxPool2d-2            [-1, 6, 14, 14]               0
           Dropout-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
         MaxPool2d-5             [-1, 16, 5, 5]               0
           Dropout-6             [-1, 16, 5, 5]               0
           Flatten-7                  [-1, 400]               0
            Linear-8                  [-1, 120]          48,120
           Dropout-9                  [-1, 120]               0
           Linear-10                   [-1, 84]          10,164
          Dropout-11                   [-1, 84]               0
           Linear-12                   [-1, 10]             850
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
---------------------------------

# Metrics Setup


In [13]:
def calculate_accuracy(output,labels):
    _,predicted=torch.max(output.data, 1)

    total = labels.size(0)
    correct = (predicted.to(device) == labels).sum().item()

    return correct,total

# Regularizers code setup


In [14]:
def double_backprop_loss(model, criterion, inputs, targets, lambda_double_backprop):
    outputs = model(inputs)
    loss = criterion(outputs, targets)

    # Compute gradient of the original loss with respect to model parameters
    model.zero_grad()
    loss.backward(retain_graph=True)
    grad_loss = torch.cat([param.grad.flatten() for param in model.parameters()])

    # Compute squared L2 norm of the gradient
    double_backprop_term = lambda_double_backprop * torch.norm(grad_loss, p=2)**2

    # Total loss including Double Backpropagation regularization
    total_loss = loss + double_backprop_term

    return total_loss

In [15]:
def jac_reg_loss(model, criterion, inputs, targets, lambda_jacreg):
    outputs = model(inputs)
    loss = criterion(outputs, targets)

    # Compute Jacobian of softmax output probabilities w.r.t. input
    inputs.requires_grad_(True)
    outputs = model(inputs)
    outputs=F.softmax(outputs,dim=1)
    grad_outputs = torch.ones_like(outputs)
    grad_softmax = torch.autograd.grad(outputs, inputs, grad_outputs=grad_outputs, retain_graph=True)[0]

    # Compute squared Frobenius norm of Jacobian
    jac_norm_squared = torch.norm(grad_softmax, p='fro')**2

    # Jacobian Regularization term
    jacreg_term = lambda_jacreg * jac_norm_squared

    # Total loss including JacReg
    total_loss = loss + jacreg_term

    return total_loss

In [16]:
def frobreg_loss(model, criterion, inputs, targets, lambda_frobreg):
    outputs = model(inputs)
    loss = criterion(outputs, targets)

    # Compute Jacobian of logits w.r.t. input
    inputs.requires_grad_(True)
    outputs = model(inputs)
    grad_outputs = torch.ones_like(outputs)
    grad_logits = torch.autograd.grad(outputs, inputs, grad_outputs=grad_outputs, retain_graph=True)[0]

    # Compute squared Frobenius norm of Jacobian
    frob_norm_squared = torch.norm(grad_logits, p='fro')**2

    # Frobenius Regularization term
    frobreg_term = lambda_frobreg * frob_norm_squared

    # Total loss including FrobReg
    total_loss = loss + frobreg_term

    return total_loss

In [17]:
def spectral_reg_loss(model, criterion, inputs, targets, lambda_spectreg, k_dimensions):
    outputs = model(inputs)
    loss = criterion(outputs, targets)

    # Compute Jacobian of logits w.r.t. input
    inputs.requires_grad_(True)
    outputs = model(inputs)
    grad_outputs = torch.ones_like(outputs)
    grad_logits = torch.autograd.grad(outputs, inputs, grad_outputs=grad_outputs, retain_graph=True)[0]

    # Apply random projection
    random_proj = torch.randn(grad_logits.shape[2], k_dimensions).to(device)
    # print(grad_logits.shape,random_proj.shape)
    projected_grad_logits = torch.matmul(grad_logits, random_proj)

    # Compute squared L2 norm of the result
    spectral_reg_term = lambda_spectreg * torch.norm(projected_grad_logits, p=2)**2

    # Total loss including SpectReg
    total_loss = loss + spectral_reg_term

    return total_loss

In [18]:
def conf_penalty_loss(model,criterion,inputs,targets,alpha=0.1):
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    # Calculate probabilities from logits using softmax
    probs = torch.softmax(outputs, dim=1)

    # Calculate entropy
    entropy = torch.sum(-probs * torch.log(probs + 1e-10), dim=1).mean()

    # Apply confidence penalty regularization
    penalty = alpha * entropy

    total_loss = loss + penalty

    return total_loss

In [19]:
def Evaluate(test_loader,model):
    with torch.no_grad():
        val_correct=0
        val_total=0
        for i, data in enumerate(test_loader):
            inputs,label=data

            inputs=inputs.to(device)
            label=label.to(device)

            output=model(inputs)
            correct,total=calculate_accuracy(output,label)
            val_correct+=correct
            val_total+=total
    # print(val_correct,val_total)
    return round(100*(val_correct/val_total),2)

# Experiment C2: GRADIENT REGULARIZATION COMPARED WITH DROPOUT AND BATCH NORMALIZATION


In [None]:
batch_size=50
num_samples_per_class=200
seeds=[10,20,30,40,50,60,70,80,90,100]

In [None]:
def train(model,train_loader,test_loader,num_samples_per_class,regularizer,reg_parameter):
    criterion=nn.CrossEntropyLoss()
    opt=torch.optim.Adam(model.parameters(),lr=0.01,betas=(0.9,0.999),weight_decay=0.0005)

    num_epochs= int(10000/((num_samples_per_class*10)/batch_size))
    # num_epochs=1
    milestones = [int(0.5 * num_epochs), int(0.75 * num_epochs)]
    scheduler = MultiStepLR(opt, milestones=milestones, gamma=0.1)

    for epoch in tqdm(range(num_epochs),"Trained"):
        for i, data in enumerate(train_loader):
            inputs,label=data
            inputs=inputs.to(device)
            label=label.to(device)
            opt.zero_grad()

            if regularizer=="SpectReg":
                loss = spectral_reg_loss(model, criterion, inputs, label, reg_parameter, 10)
            elif regularizer=="NoGR":
                output=model(inputs)
                loss = criterion(output,label)
            elif regularizer=="DoubleBack":
                loss = double_backprop_loss(model, criterion, inputs, label,reg_parameter)

            loss.backward()
            opt.step()

        scheduler.step()

    return Evaluate(test_loader,model)

In [None]:
models=["Baseline","Batchnorm","Dropout"]
regularizers=["NoGR","DoubleBack","SpectReg"]

model_params={"Baseline":{"NoGR":0,"DoubleBack":50,"SpectReg":0.01},
              "Batchnorm":{"NoGR":0,"DoubleBack":0.001,"SpectReg":0.001},
              "Dropout":{"NoGR":0,"DoubleBack":50,"SpectReg":0.01}}

accuracy_dict={}
for i in models:
    k={}
    for j in regularizers:
        k[j]=[]
    accuracy_dict[i]=k
print(accuracy_dict)

{'Baseline': {'NoGR': [], 'DoubleBack': [], 'SpectReg': []}, 'Batchnorm': {'NoGR': [], 'DoubleBack': [], 'SpectReg': []}, 'Dropout': {'NoGR': [], 'DoubleBack': [], 'SpectReg': []}}


In [None]:
for model_name in models:
    for reg in regularizers:
            reg_parameter=model_params[model_name][reg]
            print(f"Model Name:{model_name}, Regularizer: {reg}, Parameter: {reg_parameter}")
            for i in range(10):
                print(f"Iteration:{i} seed:{seeds[i]}")

                if model_name=="Baseline":
                    model=BaseModel(include_dropout=False).to(device)
                elif model_name=="Batchnorm":
                    model=BatchNormModel().to(device)
                elif model_name== "Dropout":
                    model=DropoutModel().to(device)

                train_loader=get_subset_dataloader(data_train,seeds[i],num_samples_per_class,batch_size)
                accuracy=train(model,train_loader,test_loader,num_samples_per_class,reg,reg_parameter)
                accuracy_dict[model_name][reg].append(accuracy)
                print("Accuracy:",accuracy)

            accuracy_list=accuracy_dict[model_name][reg]
            print(f"Mean Accuracy:{np.mean(accuracy_list)}, STD: {np.std(accuracy_list)}")
            print("")

Model Name:Baseline, Regularizer: NoGR, Parameter: 0
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [02:10<00:00,  1.91it/s]


Accuracy: 96.09
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [02:12<00:00,  1.89it/s]


Accuracy: 97.23
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [02:07<00:00,  1.97it/s]


Accuracy: 96.91
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [02:08<00:00,  1.95it/s]


Accuracy: 96.42
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [02:07<00:00,  1.95it/s]


Accuracy: 97.06
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [02:07<00:00,  1.96it/s]


Accuracy: 96.63
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [02:08<00:00,  1.94it/s]


Accuracy: 96.93
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [02:07<00:00,  1.95it/s]


Accuracy: 96.83
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [02:06<00:00,  1.98it/s]


Accuracy: 96.88
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [02:07<00:00,  1.96it/s]


Accuracy: 96.75
Mean Accuracy:96.773, STD: 0.31006612198045763

Model Name:Baseline, Regularizer: DoubleBack, Parameter: 50
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


Accuracy: 94.32
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


Accuracy: 96.55
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [02:20<00:00,  1.78it/s]


Accuracy: 96.73
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [02:21<00:00,  1.76it/s]


Accuracy: 95.75
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


Accuracy: 96.85
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [02:20<00:00,  1.78it/s]


Accuracy: 96.46
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [02:22<00:00,  1.75it/s]


Accuracy: 96.66
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [02:21<00:00,  1.76it/s]


Accuracy: 96.83
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


Accuracy: 96.56
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


Accuracy: 96.5
Mean Accuracy:96.321, STD: 0.7288408605450177

Model Name:Baseline, Regularizer: SpectReg, Parameter: 0.01
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [02:22<00:00,  1.75it/s]


Accuracy: 94.75
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [02:21<00:00,  1.76it/s]


Accuracy: 97.07
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [02:22<00:00,  1.75it/s]


Accuracy: 96.87
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [02:23<00:00,  1.74it/s]


Accuracy: 96.92
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [02:22<00:00,  1.75it/s]


Accuracy: 96.95
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [02:23<00:00,  1.74it/s]


Accuracy: 96.22
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [02:23<00:00,  1.74it/s]


Accuracy: 97.1
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [02:21<00:00,  1.76it/s]


Accuracy: 97.04
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [02:22<00:00,  1.75it/s]


Accuracy: 96.52
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [02:24<00:00,  1.73it/s]


Accuracy: 96.67
Mean Accuracy:96.611, STD: 0.6737425324261487

Model Name:Batchnorm, Regularizer: NoGR, Parameter: 0
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [02:17<00:00,  1.82it/s]


Accuracy: 96.95
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.80it/s]


Accuracy: 97.16
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


Accuracy: 97.1
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.80it/s]


Accuracy: 97.41
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


Accuracy: 97.31
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


Accuracy: 97.03
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


Accuracy: 96.92
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


Accuracy: 97.12
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [02:19<00:00,  1.79it/s]


Accuracy: 96.97
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [02:20<00:00,  1.79it/s]


Accuracy: 97.37
Mean Accuracy:97.134, STD: 0.16799999999999984

Model Name:Batchnorm, Regularizer: DoubleBack, Parameter: 0.001
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [02:36<00:00,  1.60it/s]


Accuracy: 96.97
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [02:35<00:00,  1.60it/s]


Accuracy: 97.06
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [02:36<00:00,  1.59it/s]


Accuracy: 97.33
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [02:36<00:00,  1.60it/s]


Accuracy: 97.34
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [02:36<00:00,  1.60it/s]


Accuracy: 97.51
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [02:36<00:00,  1.60it/s]


Accuracy: 97.29
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [02:35<00:00,  1.60it/s]


Accuracy: 97.79
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [02:36<00:00,  1.60it/s]


Accuracy: 97.15
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [02:35<00:00,  1.60it/s]


Accuracy: 97.14
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [02:37<00:00,  1.58it/s]


Accuracy: 97.51
Mean Accuracy:97.309, STD: 0.23338594644922553

Model Name:Batchnorm, Regularizer: SpectReg, Parameter: 0.001
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [02:45<00:00,  1.51it/s]


Accuracy: 97.06
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [02:45<00:00,  1.51it/s]


Accuracy: 97.27
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [02:43<00:00,  1.53it/s]


Accuracy: 97.15
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [02:45<00:00,  1.51it/s]


Accuracy: 97.04
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [02:41<00:00,  1.55it/s]


Accuracy: 97.3
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [02:39<00:00,  1.56it/s]


Accuracy: 97.33
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [02:39<00:00,  1.57it/s]


Accuracy: 97.13
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [02:39<00:00,  1.57it/s]


Accuracy: 97.01
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [02:41<00:00,  1.54it/s]


Accuracy: 97.46
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [02:40<00:00,  1.56it/s]


Accuracy: 97.32
Mean Accuracy:97.207, STD: 0.1421302219797004

Model Name:Dropout, Regularizer: NoGR, Parameter: 0
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [02:13<00:00,  1.88it/s]


Accuracy: 91.39
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [02:13<00:00,  1.88it/s]


Accuracy: 91.94
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [02:14<00:00,  1.86it/s]


Accuracy: 91.69
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [02:14<00:00,  1.85it/s]


Accuracy: 92.24
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [02:12<00:00,  1.88it/s]


Accuracy: 91.85
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [02:11<00:00,  1.90it/s]


Accuracy: 91.9
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [02:11<00:00,  1.90it/s]


Accuracy: 92.2
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [02:11<00:00,  1.90it/s]


Accuracy: 92.37
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [02:11<00:00,  1.91it/s]


Accuracy: 91.76
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [02:09<00:00,  1.93it/s]


Accuracy: 91.68
Mean Accuracy:91.902, STD: 0.2833301960610621

Model Name:Dropout, Regularizer: DoubleBack, Parameter: 50
Iteration:0 seed:10


Trained:  59%|█████▉    | 147/250 [01:23<00:57,  1.80it/s]

In [None]:
models=["Dropout"]
regularizers=["DoubleBack","SpectReg"]

model_params={"Baseline":{"NoGR":0,"DoubleBack":50,"SpectReg":0.01},
              "Batchnorm":{"NoGR":0,"DoubleBack":0.001,"SpectReg":0.001},
              "Dropout":{"NoGR":0,"DoubleBack":50,"SpectReg":0.01}}

accuracy_dict={}
for i in models:
    k={}
    for j in regularizers:
        k[j]=[]
    accuracy_dict[i]=k
print(accuracy_dict)

{'Dropout': {'DoubleBack': [], 'SpectReg': []}}


In [None]:
for model_name in models:
    for reg in regularizers:
            reg_parameter=model_params[model_name][reg]
            print(f"Model Name:{model_name}, Regularizer: {reg}, Parameter: {reg_parameter}")
            for i in range(10):
                print(f"Iteration:{i} seed:{seeds[i]}")

                if model_name=="Baseline":
                    model=BaseModel(include_dropout=False).to(device)
                elif model_name=="Batchnorm":
                    model=BatchNormModel().to(device)
                elif model_name== "Dropout":
                    model=DropoutModel().to(device)

                train_loader=get_subset_dataloader(data_train,seeds[i],num_samples_per_class,batch_size)
                accuracy=train(model,train_loader,test_loader,num_samples_per_class,reg,reg_parameter)
                accuracy_dict[model_name][reg].append(accuracy)
                print("Accuracy:",accuracy)

            accuracy_list=accuracy_dict[model_name][reg]
            print(f"Mean Accuracy:{np.mean(accuracy_list)}, STD: {np.std(accuracy_list)}")
            print("")

Model Name:Dropout, Regularizer: DoubleBack, Parameter: 50
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [02:18<00:00,  1.81it/s]


Accuracy: 91.57
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [02:18<00:00,  1.81it/s]


Accuracy: 91.91
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [02:17<00:00,  1.82it/s]


Accuracy: 91.69
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [02:17<00:00,  1.81it/s]


Accuracy: 91.39
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [02:17<00:00,  1.82it/s]


Accuracy: 90.91
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [02:17<00:00,  1.82it/s]


Accuracy: 90.93
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [02:17<00:00,  1.82it/s]


Accuracy: 91.33
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [02:17<00:00,  1.82it/s]


Accuracy: 91.41
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [02:18<00:00,  1.81it/s]


Accuracy: 91.21
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [02:17<00:00,  1.82it/s]


Accuracy: 90.71
Mean Accuracy:91.30600000000001, STD: 0.35539274049985825

Model Name:Dropout, Regularizer: SpectReg, Parameter: 0.01
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [02:22<00:00,  1.75it/s]


Accuracy: 90.27
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [02:22<00:00,  1.76it/s]


Accuracy: 92.12
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [02:23<00:00,  1.74it/s]


Accuracy: 92.09
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [02:22<00:00,  1.76it/s]


Accuracy: 92.09
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [02:35<00:00,  1.61it/s]


Accuracy: 92.51
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [02:32<00:00,  1.64it/s]


Accuracy: 91.71
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [02:30<00:00,  1.66it/s]


Accuracy: 91.82
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [02:24<00:00,  1.74it/s]


Accuracy: 90.78
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [02:21<00:00,  1.76it/s]


Accuracy: 91.77
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [02:21<00:00,  1.77it/s]


Accuracy: 91.35
Mean Accuracy:91.651, STD: 0.6438858594502621



# Experiment C4: THE EFFECT OF TRAINING SET SIZE


In [20]:
batch_size=50
seeds=[10,20,30,40,50,60,70,80,90,100]

In [21]:
def train(model,train_loader,test_loader,num_samples_per_class,regularizer,reg_parameter):
    criterion=nn.CrossEntropyLoss()
    opt=torch.optim.Adam(model.parameters(),lr=0.001,betas=(0.9,0.999),weight_decay=0.0005)
    
    num_epochs= int(10000/((num_samples_per_class*10)/batch_size))
    # num_epochs=1
    milestones = [int(0.5 * num_epochs), int(0.75 * num_epochs)]  
    scheduler = MultiStepLR(opt, milestones=milestones, gamma=0.1)
    
    for epoch in tqdm(range(num_epochs),"Trained"):
        for i, data in enumerate(train_loader):
            inputs,label=data
            inputs=inputs.to(device)
            label=label.to(device)
            opt.zero_grad()

            if regularizer=="SpectReg":
                loss = spectral_reg_loss(model, criterion, inputs, label, reg_parameter, 10)
            elif regularizer=="JacReg":
                loss = jac_reg_loss(model, criterion, inputs, label, reg_parameter)
            elif regularizer=="DoubleBack":
                loss = double_backprop_loss(model, criterion, inputs, label,reg_parameter)
            elif regularizer=='ConfPenalty':
                loss = conf_penalty_loss(model, criterion, inputs, label, reg_parameter)

            loss.backward()
            opt.step()
    
        scheduler.step()
         
    return Evaluate(test_loader,model)

In [22]:
train_size=[500,1000,2000,3000,4000,5000,10000,15000,20000]
doubleback_par=[50,50,50,20,20,20,5,2,2]
spectreg_par=[0.03,0.03,0.03,0.03,0.03,0.003,0.01,0.01,0.001]
jacreg_par=[0.3,0.03,1,1,1,1,1,1,0.3]
cp_par=[0.01,0.01,0.01,0.01,0.01,0.1,0.1,0.01,0.03]

regularizers=["DoubleBack","SpectReg","JacReg","ConfPenalty"]
params={"DoubleBack":doubleback_par,
        "SpectReg":spectreg_par,
        "JacReg":jacreg_par,
        "ConfPenalty":cp_par}

accuracy_dict={}
for i in train_size:
    k={}
    for j in regularizers:
        k[j]=[]
    accuracy_dict[i]=k


In [23]:
for index,size in enumerate(train_size):
    num_samples_per_class=size//10
    for reg in regularizers:
        reg_parameter=params[reg][index]
        print(f"Train Size: {size}, Regularizer: {reg}, Parameter: {reg_parameter}")
        for i in range(10):
            print(f"Iteration:{i} seed:{seeds[i]}")
            model=BaseModel(include_dropout=True).to(device)
            train_loader=get_subset_dataloader(data_train,seeds[i],num_samples_per_class,batch_size)
            accuracy=train(model,train_loader,test_loader,num_samples_per_class,reg,reg_parameter)
            accuracy_dict[size][reg].append(accuracy)
            print("Accuracy:",accuracy)
        accuracy_list=accuracy_dict[size][reg]
        print(f"Mean Accuracy:{np.mean(accuracy_list)}, STD: {np.std(accuracy_list)}")
        print("")

Train Size: 500, Regularizer: DoubleBack, Parameter: 50
Iteration:0 seed:10


Trained: 100%|██████████| 1000/1000 [03:04<00:00,  5.41it/s]


Accuracy: 95.28
Iteration:1 seed:20


Trained: 100%|██████████| 1000/1000 [03:28<00:00,  4.79it/s]


Accuracy: 94.44
Iteration:2 seed:30


Trained: 100%|██████████| 1000/1000 [03:30<00:00,  4.76it/s]


Accuracy: 93.93
Iteration:3 seed:40


Trained: 100%|██████████| 1000/1000 [03:30<00:00,  4.76it/s]


Accuracy: 93.71
Iteration:4 seed:50


Trained: 100%|██████████| 1000/1000 [03:26<00:00,  4.85it/s]


Accuracy: 94.04
Iteration:5 seed:60


Trained: 100%|██████████| 1000/1000 [03:26<00:00,  4.84it/s]


Accuracy: 94.21
Iteration:6 seed:70


Trained: 100%|██████████| 1000/1000 [03:30<00:00,  4.76it/s]


Accuracy: 94.04
Iteration:7 seed:80


Trained: 100%|██████████| 1000/1000 [03:29<00:00,  4.77it/s]


Accuracy: 94.06
Iteration:8 seed:90


Trained: 100%|██████████| 1000/1000 [03:30<00:00,  4.75it/s]


Accuracy: 94.1
Iteration:9 seed:100


Trained: 100%|██████████| 1000/1000 [03:25<00:00,  4.87it/s]


Accuracy: 93.91
Mean Accuracy:94.172, STD: 0.4116017492674201

Train Size: 500, Regularizer: SpectReg, Parameter: 0.03
Iteration:0 seed:10


Trained: 100%|██████████| 1000/1000 [04:17<00:00,  3.89it/s]


Accuracy: 94.27
Iteration:1 seed:20


Trained: 100%|██████████| 1000/1000 [04:36<00:00,  3.62it/s]


Accuracy: 94.04
Iteration:2 seed:30


Trained: 100%|██████████| 1000/1000 [04:20<00:00,  3.83it/s]


Accuracy: 93.5
Iteration:3 seed:40


Trained: 100%|██████████| 1000/1000 [04:16<00:00,  3.90it/s]


Accuracy: 93.92
Iteration:4 seed:50


Trained: 100%|██████████| 1000/1000 [04:04<00:00,  4.09it/s]


Accuracy: 93.5
Iteration:5 seed:60


Trained: 100%|██████████| 1000/1000 [04:04<00:00,  4.09it/s]


Accuracy: 94.74
Iteration:6 seed:70


Trained: 100%|██████████| 1000/1000 [04:03<00:00,  4.11it/s]


Accuracy: 94.61
Iteration:7 seed:80


Trained: 100%|██████████| 1000/1000 [04:16<00:00,  3.90it/s]


Accuracy: 94.16
Iteration:8 seed:90


Trained: 100%|██████████| 1000/1000 [04:08<00:00,  4.02it/s]


Accuracy: 93.94
Iteration:9 seed:100


Trained: 100%|██████████| 1000/1000 [04:34<00:00,  3.65it/s]


Accuracy: 93.96
Mean Accuracy:94.06400000000001, STD: 0.38606217115899755

Train Size: 500, Regularizer: JacReg, Parameter: 0.3
Iteration:0 seed:10


Trained: 100%|██████████| 1000/1000 [04:30<00:00,  3.70it/s]


Accuracy: 93.5
Iteration:1 seed:20


Trained: 100%|██████████| 1000/1000 [04:32<00:00,  3.67it/s]


Accuracy: 94.56
Iteration:2 seed:30


Trained: 100%|██████████| 1000/1000 [04:19<00:00,  3.86it/s]


Accuracy: 94.27
Iteration:3 seed:40


Trained: 100%|██████████| 1000/1000 [04:21<00:00,  3.82it/s]


Accuracy: 94.0
Iteration:4 seed:50


Trained: 100%|██████████| 1000/1000 [04:18<00:00,  3.87it/s]


Accuracy: 93.27
Iteration:5 seed:60


Trained: 100%|██████████| 1000/1000 [04:24<00:00,  3.78it/s]


Accuracy: 93.32
Iteration:6 seed:70


Trained: 100%|██████████| 1000/1000 [04:02<00:00,  4.12it/s]


Accuracy: 94.23
Iteration:7 seed:80


Trained: 100%|██████████| 1000/1000 [04:38<00:00,  3.59it/s]


Accuracy: 93.69
Iteration:8 seed:90


Trained: 100%|██████████| 1000/1000 [04:24<00:00,  3.78it/s]


Accuracy: 93.52
Iteration:9 seed:100


Trained: 100%|██████████| 1000/1000 [05:26<00:00,  3.06it/s]


Accuracy: 94.0
Mean Accuracy:93.83599999999998, STD: 0.41740148538308025

Train Size: 500, Regularizer: ConfPenalty, Parameter: 0.01
Iteration:0 seed:10


Trained: 100%|██████████| 1000/1000 [03:49<00:00,  4.35it/s]


Accuracy: 94.06
Iteration:1 seed:20


Trained: 100%|██████████| 1000/1000 [03:42<00:00,  4.50it/s]


Accuracy: 94.48
Iteration:2 seed:30


Trained: 100%|██████████| 1000/1000 [03:23<00:00,  4.90it/s]


Accuracy: 93.46
Iteration:3 seed:40


Trained: 100%|██████████| 1000/1000 [03:13<00:00,  5.17it/s]


Accuracy: 94.06
Iteration:4 seed:50


Trained: 100%|██████████| 1000/1000 [03:10<00:00,  5.25it/s]


Accuracy: 93.84
Iteration:5 seed:60


Trained: 100%|██████████| 1000/1000 [03:01<00:00,  5.52it/s]


Accuracy: 93.74
Iteration:6 seed:70


Trained: 100%|██████████| 1000/1000 [02:46<00:00,  6.00it/s]


Accuracy: 94.64
Iteration:7 seed:80


Trained: 100%|██████████| 1000/1000 [02:45<00:00,  6.04it/s]


Accuracy: 94.07
Iteration:8 seed:90


Trained: 100%|██████████| 1000/1000 [02:44<00:00,  6.10it/s]


Accuracy: 93.51
Iteration:9 seed:100


Trained: 100%|██████████| 1000/1000 [02:42<00:00,  6.17it/s]


Accuracy: 94.67
Mean Accuracy:94.05299999999998, STD: 0.41204490046595765

Train Size: 1000, Regularizer: DoubleBack, Parameter: 50
Iteration:0 seed:10


Trained: 100%|██████████| 500/500 [03:07<00:00,  2.66it/s]


Accuracy: 95.49
Iteration:1 seed:20


Trained: 100%|██████████| 500/500 [03:07<00:00,  2.67it/s]


Accuracy: 95.63
Iteration:2 seed:30


Trained: 100%|██████████| 500/500 [03:09<00:00,  2.64it/s]


Accuracy: 95.33
Iteration:3 seed:40


Trained: 100%|██████████| 500/500 [03:10<00:00,  2.63it/s]


Accuracy: 95.36
Iteration:4 seed:50


Trained: 100%|██████████| 500/500 [03:11<00:00,  2.61it/s]


Accuracy: 95.27
Iteration:5 seed:60


Trained: 100%|██████████| 500/500 [03:35<00:00,  2.32it/s]


Accuracy: 95.76
Iteration:6 seed:70


Trained: 100%|██████████| 500/500 [03:39<00:00,  2.28it/s]


Accuracy: 95.91
Iteration:7 seed:80


Trained: 100%|██████████| 500/500 [03:32<00:00,  2.35it/s]


Accuracy: 95.64
Iteration:8 seed:90


Trained: 100%|██████████| 500/500 [03:30<00:00,  2.37it/s]


Accuracy: 95.26
Iteration:9 seed:100


Trained: 100%|██████████| 500/500 [03:40<00:00,  2.26it/s]


Accuracy: 95.79
Mean Accuracy:95.54400000000001, STD: 0.22271955459725645

Train Size: 1000, Regularizer: SpectReg, Parameter: 0.03
Iteration:0 seed:10


Trained: 100%|██████████| 500/500 [04:39<00:00,  1.79it/s]


Accuracy: 94.88
Iteration:1 seed:20


Trained: 100%|██████████| 500/500 [04:24<00:00,  1.89it/s]


Accuracy: 95.25
Iteration:2 seed:30


Trained: 100%|██████████| 500/500 [04:40<00:00,  1.78it/s]


Accuracy: 95.24
Iteration:3 seed:40


Trained: 100%|██████████| 500/500 [04:22<00:00,  1.91it/s]


Accuracy: 95.48
Iteration:4 seed:50


Trained: 100%|██████████| 500/500 [04:07<00:00,  2.02it/s]


Accuracy: 95.47
Iteration:5 seed:60


Trained: 100%|██████████| 500/500 [04:03<00:00,  2.05it/s]


Accuracy: 95.88
Iteration:6 seed:70


Trained: 100%|██████████| 500/500 [03:46<00:00,  2.21it/s]


Accuracy: 95.03
Iteration:7 seed:80


Trained: 100%|██████████| 500/500 [03:56<00:00,  2.12it/s]


Accuracy: 95.95
Iteration:8 seed:90


Trained: 100%|██████████| 500/500 [04:13<00:00,  1.97it/s]


Accuracy: 95.78
Iteration:9 seed:100


Trained: 100%|██████████| 500/500 [04:13<00:00,  1.97it/s]


Accuracy: 95.7
Mean Accuracy:95.46600000000001, STD: 0.34502753513306844

Train Size: 1000, Regularizer: JacReg, Parameter: 0.03
Iteration:0 seed:10


Trained: 100%|██████████| 500/500 [04:09<00:00,  2.00it/s]


Accuracy: 95.18
Iteration:1 seed:20


Trained: 100%|██████████| 500/500 [04:06<00:00,  2.03it/s]


Accuracy: 94.94
Iteration:2 seed:30


Trained: 100%|██████████| 500/500 [04:29<00:00,  1.85it/s]


Accuracy: 95.87
Iteration:3 seed:40


Trained: 100%|██████████| 500/500 [04:36<00:00,  1.81it/s]


Accuracy: 95.26
Iteration:4 seed:50


Trained: 100%|██████████| 500/500 [04:38<00:00,  1.80it/s]


Accuracy: 95.71
Iteration:5 seed:60


Trained: 100%|██████████| 500/500 [04:37<00:00,  1.80it/s]


Accuracy: 95.88
Iteration:6 seed:70


Trained: 100%|██████████| 500/500 [05:09<00:00,  1.62it/s]


Accuracy: 94.84
Iteration:7 seed:80


Trained: 100%|██████████| 500/500 [04:30<00:00,  1.85it/s]


Accuracy: 95.34
Iteration:8 seed:90


Trained: 100%|██████████| 500/500 [04:29<00:00,  1.86it/s]


Accuracy: 95.15
Iteration:9 seed:100


Trained: 100%|██████████| 500/500 [04:47<00:00,  1.74it/s]


Accuracy: 95.46
Mean Accuracy:95.363, STD: 0.345920511100453

Train Size: 1000, Regularizer: ConfPenalty, Parameter: 0.01
Iteration:0 seed:10


Trained: 100%|██████████| 500/500 [03:21<00:00,  2.48it/s]


Accuracy: 95.36
Iteration:1 seed:20


Trained: 100%|██████████| 500/500 [03:05<00:00,  2.69it/s]


Accuracy: 95.57
Iteration:2 seed:30


Trained: 100%|██████████| 500/500 [03:16<00:00,  2.55it/s]


Accuracy: 95.92
Iteration:3 seed:40


Trained: 100%|██████████| 500/500 [03:00<00:00,  2.77it/s]


Accuracy: 96.11
Iteration:4 seed:50


Trained: 100%|██████████| 500/500 [03:22<00:00,  2.47it/s]


Accuracy: 94.91
Iteration:5 seed:60


Trained: 100%|██████████| 500/500 [03:23<00:00,  2.46it/s]


Accuracy: 95.53
Iteration:6 seed:70


Trained: 100%|██████████| 500/500 [03:21<00:00,  2.48it/s]


Accuracy: 95.56
Iteration:7 seed:80


Trained: 100%|██████████| 500/500 [03:33<00:00,  2.34it/s]


Accuracy: 95.78
Iteration:8 seed:90


Trained: 100%|██████████| 500/500 [03:24<00:00,  2.44it/s]


Accuracy: 95.6
Iteration:9 seed:100


Trained: 100%|██████████| 500/500 [03:10<00:00,  2.62it/s]


Accuracy: 95.49
Mean Accuracy:95.583, STD: 0.30744267758396954

Train Size: 2000, Regularizer: DoubleBack, Parameter: 50
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [03:48<00:00,  1.09it/s]


Accuracy: 96.17
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [03:46<00:00,  1.10it/s]


Accuracy: 96.77
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [03:55<00:00,  1.06it/s]


Accuracy: 96.35
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [03:48<00:00,  1.09it/s]


Accuracy: 96.48
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [03:45<00:00,  1.11it/s]


Accuracy: 96.8
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [03:54<00:00,  1.07it/s]


Accuracy: 96.64
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [04:05<00:00,  1.02it/s]


Accuracy: 96.56
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [04:05<00:00,  1.02it/s]


Accuracy: 96.65
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [03:57<00:00,  1.05it/s]


Accuracy: 96.64
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [04:00<00:00,  1.04it/s]


Accuracy: 96.56
Mean Accuracy:96.56199999999998, STD: 0.18032193432857763

Train Size: 2000, Regularizer: SpectReg, Parameter: 0.03
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [04:38<00:00,  1.11s/it]


Accuracy: 95.9
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [04:41<00:00,  1.13s/it]


Accuracy: 96.24
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [04:41<00:00,  1.13s/it]


Accuracy: 96.68
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [04:44<00:00,  1.14s/it]


Accuracy: 96.62
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [04:42<00:00,  1.13s/it]


Accuracy: 96.37
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [04:18<00:00,  1.03s/it]


Accuracy: 96.53
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [04:07<00:00,  1.01it/s]


Accuracy: 96.36
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [04:38<00:00,  1.11s/it]


Accuracy: 96.71
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [04:24<00:00,  1.06s/it]


Accuracy: 96.7
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [04:26<00:00,  1.06s/it]


Accuracy: 96.56
Mean Accuracy:96.46700000000001, STD: 0.24261286033514345

Train Size: 2000, Regularizer: JacReg, Parameter: 1
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [04:45<00:00,  1.14s/it]


Accuracy: 96.49
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [04:18<00:00,  1.03s/it]


Accuracy: 96.4
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [04:35<00:00,  1.10s/it]


Accuracy: 96.7
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [04:18<00:00,  1.03s/it]


Accuracy: 96.76
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [04:23<00:00,  1.06s/it]


Accuracy: 96.66
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [04:21<00:00,  1.05s/it]


Accuracy: 96.42
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [04:17<00:00,  1.03s/it]


Accuracy: 96.55
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [04:25<00:00,  1.06s/it]


Accuracy: 96.59
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [04:25<00:00,  1.06s/it]


Accuracy: 96.5
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [04:33<00:00,  1.10s/it]


Accuracy: 96.87
Mean Accuracy:96.594, STD: 0.14451297519600198

Train Size: 2000, Regularizer: ConfPenalty, Parameter: 0.01
Iteration:0 seed:10


Trained: 100%|██████████| 250/250 [03:08<00:00,  1.32it/s]


Accuracy: 96.48
Iteration:1 seed:20


Trained: 100%|██████████| 250/250 [03:01<00:00,  1.38it/s]


Accuracy: 96.62
Iteration:2 seed:30


Trained: 100%|██████████| 250/250 [03:04<00:00,  1.36it/s]


Accuracy: 96.63
Iteration:3 seed:40


Trained: 100%|██████████| 250/250 [03:01<00:00,  1.37it/s]


Accuracy: 96.64
Iteration:4 seed:50


Trained: 100%|██████████| 250/250 [03:06<00:00,  1.34it/s]


Accuracy: 96.79
Iteration:5 seed:60


Trained: 100%|██████████| 250/250 [02:59<00:00,  1.39it/s]


Accuracy: 96.53
Iteration:6 seed:70


Trained: 100%|██████████| 250/250 [02:57<00:00,  1.41it/s]


Accuracy: 96.4
Iteration:7 seed:80


Trained: 100%|██████████| 250/250 [02:53<00:00,  1.44it/s]


Accuracy: 96.82
Iteration:8 seed:90


Trained: 100%|██████████| 250/250 [02:50<00:00,  1.46it/s]


Accuracy: 96.71
Iteration:9 seed:100


Trained: 100%|██████████| 250/250 [03:01<00:00,  1.38it/s]


Accuracy: 96.4
Mean Accuracy:96.602, STD: 0.1409822683886141

Train Size: 3000, Regularizer: DoubleBack, Parameter: 20
Iteration:0 seed:10


Trained: 100%|██████████| 166/166 [03:28<00:00,  1.26s/it]


Accuracy: 96.94
Iteration:1 seed:20


Trained:  32%|███▏      | 53/166 [01:04<02:18,  1.22s/it]


KeyboardInterrupt: 

In [None]:
train_size=[3000,4000]
doubleback_par=[20,20]
spectreg_par=[0.03,0.03]
jacreg_par=[1,1]
cp_par=[0.01,0.01]

regularizers=["DoubleBack","SpectReg","JacReg","ConfPenalty"]
params={"DoubleBack":doubleback_par,
        "SpectReg":spectreg_par,
        "JacReg":jacreg_par,
        "ConfPenalty":cp_par}

accuracy_dict={}
for i in train_size:
    k={}
    for j in regularizers:
        k[j]=[]
    accuracy_dict[i]=k


In [None]:
for index,size in enumerate(train_size):
    num_samples_per_class=size//10
    for reg in regularizers:
        reg_parameter=params[reg][index]
        print(f"Train Size: {size}, Regularizer: {reg}, Parameter: {reg_parameter}")
        for i in range(10):
            print(f"Iteration:{i} seed:{seeds[i]}")
            model=BaseModel(include_dropout=True).to(device)
            train_loader=get_subset_dataloader(data_train,seeds[i],num_samples_per_class,batch_size)
            accuracy=train(model,train_loader,test_loader,num_samples_per_class,reg,reg_parameter)
            accuracy_dict[size][reg].append(accuracy)
            print("Accuracy:",accuracy)
        accuracy_list=accuracy_dict[size][reg]
        print(f"Mean Accuracy:{np.mean(accuracy_list)}, STD: {np.std(accuracy_list)}")
        print("")

Train Size: 3000, Regularizer: DoubleBack, Parameter: 20
Iteration:0 seed:10


Trained: 100%|██████████| 166/166 [02:18<00:00,  1.20it/s]


Accuracy: 96.93
Iteration:1 seed:20


Trained: 100%|██████████| 166/166 [02:20<00:00,  1.18it/s]


Accuracy: 97.16
Iteration:2 seed:30


Trained: 100%|██████████| 166/166 [02:17<00:00,  1.21it/s]


Accuracy: 97.27
Iteration:3 seed:40


Trained: 100%|██████████| 166/166 [02:16<00:00,  1.22it/s]


Accuracy: 96.77
Iteration:4 seed:50


Trained: 100%|██████████| 166/166 [02:17<00:00,  1.21it/s]


Accuracy: 97.55
Iteration:5 seed:60


Trained: 100%|██████████| 166/166 [02:17<00:00,  1.20it/s]


Accuracy: 97.15
Iteration:6 seed:70


Trained: 100%|██████████| 166/166 [02:17<00:00,  1.21it/s]


Accuracy: 96.93
Iteration:7 seed:80


Trained: 100%|██████████| 166/166 [02:16<00:00,  1.22it/s]


Accuracy: 97.1
Iteration:8 seed:90


Trained: 100%|██████████| 166/166 [02:19<00:00,  1.19it/s]


Accuracy: 97.32
Iteration:9 seed:100


Trained: 100%|██████████| 166/166 [02:17<00:00,  1.20it/s]


Accuracy: 96.96
Mean Accuracy:97.114, STD: 0.21685017869487525

Train Size: 3000, Regularizer: SpectReg, Parameter: 0.03
Iteration:0 seed:10


Trained: 100%|██████████| 166/166 [02:23<00:00,  1.16it/s]


Accuracy: 97.09
Iteration:1 seed:20


Trained: 100%|██████████| 166/166 [02:22<00:00,  1.16it/s]


Accuracy: 96.75
Iteration:2 seed:30


Trained: 100%|██████████| 166/166 [02:29<00:00,  1.11it/s]


Accuracy: 97.2
Iteration:3 seed:40


Trained: 100%|██████████| 166/166 [02:31<00:00,  1.10it/s]


Accuracy: 97.16
Iteration:4 seed:50


Trained: 100%|██████████| 166/166 [02:31<00:00,  1.10it/s]


Accuracy: 96.44
Iteration:5 seed:60


Trained: 100%|██████████| 166/166 [02:31<00:00,  1.10it/s]


Accuracy: 97.24
Iteration:6 seed:70


Trained: 100%|██████████| 166/166 [02:31<00:00,  1.10it/s]


Accuracy: 97.15
Iteration:7 seed:80


Trained: 100%|██████████| 166/166 [02:34<00:00,  1.08it/s]


Accuracy: 96.75
Iteration:8 seed:90


Trained: 100%|██████████| 166/166 [02:32<00:00,  1.09it/s]


Accuracy: 97.29
Iteration:9 seed:100


Trained: 100%|██████████| 166/166 [02:30<00:00,  1.10it/s]


Accuracy: 96.61
Mean Accuracy:96.968, STD: 0.28606992152269467

Train Size: 3000, Regularizer: JacReg, Parameter: 1
Iteration:0 seed:10


Trained: 100%|██████████| 166/166 [02:29<00:00,  1.11it/s]


Accuracy: 96.97
Iteration:1 seed:20


Trained: 100%|██████████| 166/166 [02:30<00:00,  1.10it/s]


Accuracy: 96.66
Iteration:2 seed:30


Trained: 100%|██████████| 166/166 [02:28<00:00,  1.12it/s]


Accuracy: 96.91
Iteration:3 seed:40


Trained: 100%|██████████| 166/166 [02:29<00:00,  1.11it/s]


Accuracy: 97.06
Iteration:4 seed:50


Trained: 100%|██████████| 166/166 [02:29<00:00,  1.11it/s]


Accuracy: 97.25
Iteration:5 seed:60


Trained: 100%|██████████| 166/166 [02:28<00:00,  1.12it/s]


Accuracy: 97.34
Iteration:6 seed:70


Trained: 100%|██████████| 166/166 [02:27<00:00,  1.12it/s]


Accuracy: 97.01
Iteration:7 seed:80


Trained: 100%|██████████| 166/166 [02:27<00:00,  1.12it/s]


Accuracy: 96.99
Iteration:8 seed:90


Trained: 100%|██████████| 166/166 [02:27<00:00,  1.12it/s]


Accuracy: 96.92
Iteration:9 seed:100


Trained: 100%|██████████| 166/166 [02:28<00:00,  1.11it/s]


Accuracy: 96.7
Mean Accuracy:96.98100000000001, STD: 0.19982242116439386

Train Size: 3000, Regularizer: ConfPenalty, Parameter: 0.01
Iteration:0 seed:10


Trained: 100%|██████████| 166/166 [02:16<00:00,  1.22it/s]


Accuracy: 97.16
Iteration:1 seed:20


Trained: 100%|██████████| 166/166 [02:15<00:00,  1.22it/s]


Accuracy: 97.05
Iteration:2 seed:30


Trained: 100%|██████████| 166/166 [02:16<00:00,  1.22it/s]


Accuracy: 96.89
Iteration:3 seed:40


Trained: 100%|██████████| 166/166 [02:16<00:00,  1.22it/s]


Accuracy: 97.06
Iteration:4 seed:50


Trained: 100%|██████████| 166/166 [02:16<00:00,  1.22it/s]


Accuracy: 97.3
Iteration:5 seed:60


Trained: 100%|██████████| 166/166 [02:15<00:00,  1.22it/s]


Accuracy: 96.95
Iteration:6 seed:70


Trained: 100%|██████████| 166/166 [02:16<00:00,  1.21it/s]


Accuracy: 96.9
Iteration:7 seed:80


Trained: 100%|██████████| 166/166 [02:15<00:00,  1.23it/s]


Accuracy: 97.17
Iteration:8 seed:90


Trained: 100%|██████████| 166/166 [02:15<00:00,  1.22it/s]


Accuracy: 96.74
Iteration:9 seed:100


Trained: 100%|██████████| 166/166 [02:08<00:00,  1.30it/s]


Accuracy: 97.32
Mean Accuracy:97.054, STD: 0.1777751388693018

Train Size: 4000, Regularizer: DoubleBack, Parameter: 20
Iteration:0 seed:10


Trained: 100%|██████████| 125/125 [02:16<00:00,  1.10s/it]


Accuracy: 97.23
Iteration:1 seed:20


Trained: 100%|██████████| 125/125 [02:17<00:00,  1.10s/it]


Accuracy: 97.45
Iteration:2 seed:30


Trained: 100%|██████████| 125/125 [02:15<00:00,  1.09s/it]


Accuracy: 97.1
Iteration:3 seed:40


Trained: 100%|██████████| 125/125 [02:16<00:00,  1.09s/it]


Accuracy: 97.36
Iteration:4 seed:50


Trained: 100%|██████████| 125/125 [02:16<00:00,  1.09s/it]


Accuracy: 97.28
Iteration:5 seed:60


Trained: 100%|██████████| 125/125 [02:16<00:00,  1.09s/it]


Accuracy: 97.25
Iteration:6 seed:70


Trained: 100%|██████████| 125/125 [02:16<00:00,  1.09s/it]


Accuracy: 96.96
Iteration:7 seed:80


Trained: 100%|██████████| 125/125 [02:17<00:00,  1.10s/it]


Accuracy: 97.37
Iteration:8 seed:90


Trained: 100%|██████████| 125/125 [02:16<00:00,  1.10s/it]


Accuracy: 97.52
Iteration:9 seed:100


Trained: 100%|██████████| 125/125 [02:15<00:00,  1.09s/it]


Accuracy: 97.54
Mean Accuracy:97.306, STD: 0.17321662737739948

Train Size: 4000, Regularizer: SpectReg, Parameter: 0.03
Iteration:0 seed:10


Trained: 100%|██████████| 125/125 [02:19<00:00,  1.12s/it]


Accuracy: 96.95
Iteration:1 seed:20


Trained: 100%|██████████| 125/125 [02:18<00:00,  1.11s/it]


Accuracy: 97.3
Iteration:2 seed:30


Trained: 100%|██████████| 125/125 [02:18<00:00,  1.11s/it]


Accuracy: 97.16
Iteration:3 seed:40


Trained: 100%|██████████| 125/125 [02:18<00:00,  1.11s/it]


Accuracy: 97.61
Iteration:4 seed:50


Trained: 100%|██████████| 125/125 [02:18<00:00,  1.11s/it]


Accuracy: 97.49
Iteration:5 seed:60


Trained: 100%|██████████| 125/125 [02:18<00:00,  1.11s/it]


Accuracy: 97.4
Iteration:6 seed:70


Trained: 100%|██████████| 125/125 [02:18<00:00,  1.11s/it]


Accuracy: 97.54
Iteration:7 seed:80


Trained: 100%|██████████| 125/125 [02:18<00:00,  1.11s/it]


Accuracy: 97.3
Iteration:8 seed:90


Trained: 100%|██████████| 125/125 [02:17<00:00,  1.10s/it]


Accuracy: 97.39
Iteration:9 seed:100


Trained: 100%|██████████| 125/125 [02:18<00:00,  1.11s/it]


Accuracy: 97.49
Mean Accuracy:97.363, STD: 0.18612092843095307

Train Size: 4000, Regularizer: JacReg, Parameter: 1
Iteration:0 seed:10


Trained: 100%|██████████| 125/125 [02:18<00:00,  1.11s/it]


Accuracy: 97.1
Iteration:1 seed:20


Trained: 100%|██████████| 125/125 [02:24<00:00,  1.15s/it]


Accuracy: 97.2
Iteration:2 seed:30


Trained: 100%|██████████| 125/125 [02:21<00:00,  1.13s/it]


Accuracy: 97.27
Iteration:3 seed:40


Trained: 100%|██████████| 125/125 [02:17<00:00,  1.10s/it]


Accuracy: 97.33
Iteration:4 seed:50


Trained: 100%|██████████| 125/125 [02:16<00:00,  1.09s/it]


Accuracy: 97.43
Iteration:5 seed:60


Trained: 100%|██████████| 125/125 [02:16<00:00,  1.10s/it]


Accuracy: 97.28
Iteration:6 seed:70


Trained: 100%|██████████| 125/125 [02:16<00:00,  1.09s/it]


Accuracy: 97.09
Iteration:7 seed:80


Trained: 100%|██████████| 125/125 [02:16<00:00,  1.09s/it]


Accuracy: 97.24
Iteration:8 seed:90


Trained: 100%|██████████| 125/125 [02:16<00:00,  1.09s/it]


Accuracy: 97.27
Iteration:9 seed:100


Trained: 100%|██████████| 125/125 [02:15<00:00,  1.09s/it]


Accuracy: 97.93
Mean Accuracy:97.314, STD: 0.22641554716936105

Train Size: 4000, Regularizer: ConfPenalty, Parameter: 0.01
Iteration:0 seed:10


Trained: 100%|██████████| 125/125 [02:05<00:00,  1.00s/it]


Accuracy: 97.27
Iteration:1 seed:20


Trained: 100%|██████████| 125/125 [02:06<00:00,  1.01s/it]


Accuracy: 97.71
Iteration:2 seed:30


Trained: 100%|██████████| 125/125 [02:05<00:00,  1.01s/it]


Accuracy: 97.46
Iteration:3 seed:40


Trained: 100%|██████████| 125/125 [02:07<00:00,  1.02s/it]


Accuracy: 97.48
Iteration:4 seed:50


Trained: 100%|██████████| 125/125 [02:05<00:00,  1.01s/it]


Accuracy: 97.37
Iteration:5 seed:60


Trained: 100%|██████████| 125/125 [02:06<00:00,  1.01s/it]


Accuracy: 97.01
Iteration:6 seed:70


Trained: 100%|██████████| 125/125 [02:05<00:00,  1.01s/it]


Accuracy: 96.9
Iteration:7 seed:80


Trained: 100%|██████████| 125/125 [02:05<00:00,  1.01s/it]


Accuracy: 96.99
Iteration:8 seed:90


Trained: 100%|██████████| 125/125 [02:06<00:00,  1.01s/it]


Accuracy: 97.25
Iteration:9 seed:100


Trained: 100%|██████████| 125/125 [02:05<00:00,  1.00s/it]


Accuracy: 97.9
Mean Accuracy:97.33399999999999, STD: 0.30381573362813147



In [None]:
train_size=[5000,10000]
doubleback_par=[20,5]
spectreg_par=[0.003,0.01]
jacreg_par=[1,1]
cp_par=[0.1,0.1]

regularizers=["DoubleBack","SpectReg","JacReg","ConfPenalty"]
params={"DoubleBack":doubleback_par,
        "SpectReg":spectreg_par,
        "JacReg":jacreg_par,
        "ConfPenalty":cp_par}

accuracy_dict={}
for i in train_size:
    k={}
    for j in regularizers:
        k[j]=[]
    accuracy_dict[i]=k


In [None]:
for index,size in enumerate(train_size):
    num_samples_per_class=size//10
    for reg in regularizers:
        reg_parameter=params[reg][index]
        print(f"Train Size: {size}, Regularizer: {reg}, Parameter: {reg_parameter}")
        for i in range(10):
            print(f"Iteration:{i} seed:{seeds[i]}")
            model=BaseModel(include_dropout=True).to(device)
            train_loader=get_subset_dataloader(data_train,seeds[i],num_samples_per_class,batch_size)
            accuracy=train(model,train_loader,test_loader,num_samples_per_class,reg,reg_parameter)
            accuracy_dict[size][reg].append(accuracy)
            print("Accuracy:",accuracy)
        accuracy_list=accuracy_dict[size][reg]
        print(f"Mean Accuracy:{np.mean(accuracy_list)}, STD: {np.std(accuracy_list)}")
        print("")

Train Size: 5000, Regularizer: DoubleBack, Parameter: 20
Iteration:0 seed:10


Trained: 100%|██████████| 100/100 [02:18<00:00,  1.38s/it]


Accuracy: 97.29
Iteration:1 seed:20


Trained: 100%|██████████| 100/100 [02:14<00:00,  1.35s/it]


Accuracy: 97.4
Iteration:2 seed:30


Trained: 100%|██████████| 100/100 [02:14<00:00,  1.34s/it]


Accuracy: 97.52
Iteration:3 seed:40


Trained: 100%|██████████| 100/100 [02:15<00:00,  1.35s/it]


Accuracy: 97.52
Iteration:4 seed:50


Trained: 100%|██████████| 100/100 [02:13<00:00,  1.34s/it]


Accuracy: 97.52
Iteration:5 seed:60


Trained: 100%|██████████| 100/100 [02:15<00:00,  1.35s/it]


Accuracy: 97.42
Iteration:6 seed:70


Trained: 100%|██████████| 100/100 [02:13<00:00,  1.34s/it]


Accuracy: 97.31
Iteration:7 seed:80


Trained: 100%|██████████| 100/100 [02:13<00:00,  1.34s/it]


Accuracy: 97.33
Iteration:8 seed:90


Trained: 100%|██████████| 100/100 [02:13<00:00,  1.34s/it]


Accuracy: 97.73
Iteration:9 seed:100


Trained: 100%|██████████| 100/100 [02:14<00:00,  1.34s/it]


Accuracy: 97.58
Mean Accuracy:97.462, STD: 0.13098091464026243

Train Size: 5000, Regularizer: SpectReg, Parameter: 0.003
Iteration:0 seed:10


Trained: 100%|██████████| 100/100 [02:16<00:00,  1.37s/it]


Accuracy: 97.62
Iteration:1 seed:20


Trained: 100%|██████████| 100/100 [02:17<00:00,  1.38s/it]


Accuracy: 97.37
Iteration:2 seed:30


Trained: 100%|██████████| 100/100 [02:17<00:00,  1.37s/it]


Accuracy: 97.57
Iteration:3 seed:40


Trained: 100%|██████████| 100/100 [02:17<00:00,  1.38s/it]


Accuracy: 97.54
Iteration:4 seed:50


Trained: 100%|██████████| 100/100 [02:16<00:00,  1.37s/it]


Accuracy: 97.5
Iteration:5 seed:60


Trained: 100%|██████████| 100/100 [02:17<00:00,  1.37s/it]


Accuracy: 97.71
Iteration:6 seed:70


Trained: 100%|██████████| 100/100 [02:17<00:00,  1.38s/it]


Accuracy: 97.53
Iteration:7 seed:80


Trained: 100%|██████████| 100/100 [02:17<00:00,  1.38s/it]


Accuracy: 97.39
Iteration:8 seed:90


Trained: 100%|██████████| 100/100 [02:18<00:00,  1.38s/it]


Accuracy: 97.28
Iteration:9 seed:100


Trained: 100%|██████████| 100/100 [02:17<00:00,  1.37s/it]


Accuracy: 97.18
Mean Accuracy:97.46900000000001, STD: 0.1536522046701552

Train Size: 5000, Regularizer: JacReg, Parameter: 1
Iteration:0 seed:10


Trained: 100%|██████████| 100/100 [02:15<00:00,  1.35s/it]


Accuracy: 97.67
Iteration:1 seed:20


Trained: 100%|██████████| 100/100 [02:15<00:00,  1.35s/it]


Accuracy: 97.55
Iteration:2 seed:30


Trained: 100%|██████████| 100/100 [02:14<00:00,  1.35s/it]


Accuracy: 97.81
Iteration:3 seed:40


Trained: 100%|██████████| 100/100 [02:14<00:00,  1.34s/it]


Accuracy: 97.65
Iteration:4 seed:50


Trained: 100%|██████████| 100/100 [02:15<00:00,  1.35s/it]


Accuracy: 97.45
Iteration:5 seed:60


Trained: 100%|██████████| 100/100 [02:14<00:00,  1.34s/it]


Accuracy: 97.53
Iteration:6 seed:70


Trained: 100%|██████████| 100/100 [02:14<00:00,  1.35s/it]


Accuracy: 97.23
Iteration:7 seed:80


Trained: 100%|██████████| 100/100 [02:15<00:00,  1.35s/it]


Accuracy: 97.32
Iteration:8 seed:90


Trained: 100%|██████████| 100/100 [02:14<00:00,  1.35s/it]


Accuracy: 97.82
Iteration:9 seed:100


Trained: 100%|██████████| 100/100 [02:15<00:00,  1.35s/it]


Accuracy: 97.76
Mean Accuracy:97.579, STD: 0.19138704240360704

Train Size: 5000, Regularizer: ConfPenalty, Parameter: 0.1
Iteration:0 seed:10


Trained: 100%|██████████| 100/100 [02:05<00:00,  1.25s/it]


Accuracy: 97.62
Iteration:1 seed:20


Trained: 100%|██████████| 100/100 [02:04<00:00,  1.24s/it]


Accuracy: 97.2
Iteration:2 seed:30


Trained: 100%|██████████| 100/100 [02:04<00:00,  1.25s/it]


Accuracy: 97.72
Iteration:3 seed:40


Trained: 100%|██████████| 100/100 [02:04<00:00,  1.24s/it]


Accuracy: 97.41
Iteration:4 seed:50


Trained: 100%|██████████| 100/100 [02:05<00:00,  1.25s/it]


Accuracy: 97.49
Iteration:5 seed:60


Trained: 100%|██████████| 100/100 [02:04<00:00,  1.25s/it]


Accuracy: 97.64
Iteration:6 seed:70


Trained: 100%|██████████| 100/100 [02:05<00:00,  1.25s/it]


Accuracy: 97.72
Iteration:7 seed:80


Trained: 100%|██████████| 100/100 [02:04<00:00,  1.25s/it]


Accuracy: 97.59
Iteration:8 seed:90


Trained: 100%|██████████| 100/100 [02:04<00:00,  1.25s/it]


Accuracy: 97.61
Iteration:9 seed:100


Trained: 100%|██████████| 100/100 [02:04<00:00,  1.24s/it]


Accuracy: 97.52
Mean Accuracy:97.55199999999999, STD: 0.14918444959177213

Train Size: 10000, Regularizer: DoubleBack, Parameter: 5
Iteration:0 seed:10


Trained: 100%|██████████| 50/50 [02:12<00:00,  2.66s/it]


Accuracy: 98.21
Iteration:1 seed:20


Trained: 100%|██████████| 50/50 [02:12<00:00,  2.65s/it]


Accuracy: 98.05
Iteration:2 seed:30


Trained: 100%|██████████| 50/50 [02:12<00:00,  2.65s/it]


Accuracy: 97.76
Iteration:3 seed:40


Trained: 100%|██████████| 50/50 [02:12<00:00,  2.66s/it]


Accuracy: 97.87
Iteration:4 seed:50


Trained: 100%|██████████| 50/50 [02:12<00:00,  2.65s/it]


Accuracy: 98.19
Iteration:5 seed:60


Trained: 100%|██████████| 50/50 [02:12<00:00,  2.64s/it]


Accuracy: 98.18
Iteration:6 seed:70


Trained: 100%|██████████| 50/50 [02:12<00:00,  2.64s/it]


Accuracy: 98.02
Iteration:7 seed:80


Trained: 100%|██████████| 50/50 [02:11<00:00,  2.64s/it]


Accuracy: 97.75
Iteration:8 seed:90


Trained: 100%|██████████| 50/50 [02:11<00:00,  2.64s/it]


Accuracy: 98.19
Iteration:9 seed:100


Trained: 100%|██████████| 50/50 [02:11<00:00,  2.63s/it]


Accuracy: 97.82
Mean Accuracy:98.00399999999999, STD: 0.17912007146045822

Train Size: 10000, Regularizer: SpectReg, Parameter: 0.01
Iteration:0 seed:10


Trained: 100%|██████████| 50/50 [02:16<00:00,  2.73s/it]


Accuracy: 97.79
Iteration:1 seed:20


Trained: 100%|██████████| 50/50 [02:16<00:00,  2.72s/it]


Accuracy: 98.01
Iteration:2 seed:30


Trained: 100%|██████████| 50/50 [02:17<00:00,  2.74s/it]


Accuracy: 97.94
Iteration:3 seed:40


Trained: 100%|██████████| 50/50 [02:16<00:00,  2.73s/it]


Accuracy: 98.07
Iteration:4 seed:50


Trained: 100%|██████████| 50/50 [02:16<00:00,  2.73s/it]


Accuracy: 97.98
Iteration:5 seed:60


Trained: 100%|██████████| 50/50 [02:15<00:00,  2.71s/it]


Accuracy: 98.14
Iteration:6 seed:70


Trained: 100%|██████████| 50/50 [02:15<00:00,  2.71s/it]


Accuracy: 97.93
Iteration:7 seed:80


Trained: 100%|██████████| 50/50 [02:15<00:00,  2.71s/it]


Accuracy: 97.98
Iteration:8 seed:90


Trained: 100%|██████████| 50/50 [02:15<00:00,  2.71s/it]


Accuracy: 97.89
Iteration:9 seed:100


Trained: 100%|██████████| 50/50 [02:15<00:00,  2.71s/it]


Accuracy: 97.69
Mean Accuracy:97.94200000000001, STD: 0.1235151812531556

Train Size: 10000, Regularizer: JacReg, Parameter: 1
Iteration:0 seed:10


Trained: 100%|██████████| 50/50 [02:13<00:00,  2.67s/it]


Accuracy: 98.26
Iteration:1 seed:20


Trained: 100%|██████████| 50/50 [02:12<00:00,  2.66s/it]


Accuracy: 98.07
Iteration:2 seed:30


Trained: 100%|██████████| 50/50 [02:13<00:00,  2.67s/it]


Accuracy: 98.01
Iteration:3 seed:40


Trained: 100%|██████████| 50/50 [02:13<00:00,  2.66s/it]


Accuracy: 97.8
Iteration:4 seed:50


Trained: 100%|██████████| 50/50 [02:13<00:00,  2.68s/it]


Accuracy: 98.16
Iteration:5 seed:60


Trained: 100%|██████████| 50/50 [02:13<00:00,  2.67s/it]


Accuracy: 98.17
Iteration:6 seed:70


Trained: 100%|██████████| 50/50 [02:13<00:00,  2.67s/it]


Accuracy: 98.05
Iteration:7 seed:80


Trained: 100%|██████████| 50/50 [02:13<00:00,  2.66s/it]


Accuracy: 97.75
Iteration:8 seed:90


Trained: 100%|██████████| 50/50 [02:14<00:00,  2.69s/it]


Accuracy: 98.1
Iteration:9 seed:100


Trained: 100%|██████████| 50/50 [02:15<00:00,  2.71s/it]


Accuracy: 97.84
Mean Accuracy:98.021, STD: 0.16226213359869276

Train Size: 10000, Regularizer: ConfPenalty, Parameter: 0.1
Iteration:0 seed:10


Trained: 100%|██████████| 50/50 [02:05<00:00,  2.52s/it]


Accuracy: 97.98
Iteration:1 seed:20


Trained: 100%|██████████| 50/50 [02:06<00:00,  2.52s/it]


Accuracy: 98.09
Iteration:2 seed:30


Trained: 100%|██████████| 50/50 [02:05<00:00,  2.51s/it]


Accuracy: 97.82
Iteration:3 seed:40


Trained: 100%|██████████| 50/50 [02:04<00:00,  2.48s/it]


Accuracy: 97.98
Iteration:4 seed:50


Trained: 100%|██████████| 50/50 [02:02<00:00,  2.46s/it]


Accuracy: 97.98
Iteration:5 seed:60


Trained: 100%|██████████| 50/50 [02:02<00:00,  2.45s/it]


Accuracy: 98.16
Iteration:6 seed:70


Trained: 100%|██████████| 50/50 [02:01<00:00,  2.44s/it]


Accuracy: 98.17
Iteration:7 seed:80


Trained: 100%|██████████| 50/50 [02:02<00:00,  2.46s/it]


Accuracy: 97.66
Iteration:8 seed:90


Trained: 100%|██████████| 50/50 [02:02<00:00,  2.45s/it]


Accuracy: 98.06
Iteration:9 seed:100


Trained: 100%|██████████| 50/50 [02:02<00:00,  2.44s/it]


Accuracy: 97.69
Mean Accuracy:97.95899999999999, STD: 0.17166537216340585



In [None]:
train_size=[15000,20000]
doubleback_par=[2,2]
spectreg_par=[0.01,0.001]
jacreg_par=[1,0.3]
cp_par=[0.01,0.03]

regularizers=["DoubleBack","SpectReg","JacReg","ConfPenalty"]
params={"DoubleBack":doubleback_par,
        "SpectReg":spectreg_par,
        "JacReg":jacreg_par,
        "ConfPenalty":cp_par}

accuracy_dict={}
for i in train_size:
    k={}
    for j in regularizers:
        k[j]=[]
    accuracy_dict[i]=k


In [None]:
for index,size in enumerate(train_size):
    num_samples_per_class=size//10
    for reg in regularizers:
        reg_parameter=params[reg][index]
        print(f"Train Size: {size}, Regularizer: {reg}, Parameter: {reg_parameter}")
        for i in range(10):
            print(f"Iteration:{i} seed:{seeds[i]}")
            model=BaseModel(include_dropout=True).to(device)
            train_loader=get_subset_dataloader(data_train,seeds[i],num_samples_per_class,batch_size)
            accuracy=train(model,train_loader,test_loader,num_samples_per_class,reg,reg_parameter)
            accuracy_dict[size][reg].append(accuracy)
            print("Accuracy:",accuracy)
        accuracy_list=accuracy_dict[size][reg]
        print(f"Mean Accuracy:{np.mean(accuracy_list)}, STD: {np.std(accuracy_list)}")
        print("")

Train Size: 15000, Regularizer: DoubleBack, Parameter: 2
Iteration:0 seed:10


Trained: 100%|██████████| 33/33 [02:19<00:00,  4.24s/it]


Accuracy: 98.12
Iteration:1 seed:20


Trained: 100%|██████████| 33/33 [02:15<00:00,  4.11s/it]


Accuracy: 98.43
Iteration:2 seed:30


Trained: 100%|██████████| 33/33 [02:14<00:00,  4.08s/it]


Accuracy: 98.05
Iteration:3 seed:40


Trained: 100%|██████████| 33/33 [02:13<00:00,  4.05s/it]


Accuracy: 98.19
Iteration:4 seed:50


Trained: 100%|██████████| 33/33 [02:12<00:00,  4.02s/it]


Accuracy: 98.22
Iteration:5 seed:60


Trained: 100%|██████████| 33/33 [02:12<00:00,  4.01s/it]


Accuracy: 97.72
Iteration:6 seed:70


Trained: 100%|██████████| 33/33 [02:11<00:00,  3.99s/it]


Accuracy: 98.12
Iteration:7 seed:80


Trained: 100%|██████████| 33/33 [02:12<00:00,  4.02s/it]


Accuracy: 98.18
Iteration:8 seed:90


Trained: 100%|██████████| 33/33 [02:11<00:00,  4.00s/it]


Accuracy: 97.98
Iteration:9 seed:100


Trained: 100%|██████████| 33/33 [02:12<00:00,  4.01s/it]


Accuracy: 98.02
Mean Accuracy:98.103, STD: 0.17498857105537094

Train Size: 15000, Regularizer: SpectReg, Parameter: 0.01
Iteration:0 seed:10


Trained: 100%|██████████| 33/33 [02:21<00:00,  4.27s/it]


Accuracy: 98.03
Iteration:1 seed:20


Trained: 100%|██████████| 33/33 [02:19<00:00,  4.23s/it]


Accuracy: 98.14
Iteration:2 seed:30


Trained: 100%|██████████| 33/33 [02:21<00:00,  4.29s/it]


Accuracy: 98.14
Iteration:3 seed:40


Trained: 100%|██████████| 33/33 [02:18<00:00,  4.20s/it]


Accuracy: 98.17
Iteration:4 seed:50


Trained: 100%|██████████| 33/33 [02:17<00:00,  4.17s/it]


Accuracy: 97.83
Iteration:5 seed:60


Trained: 100%|██████████| 33/33 [02:15<00:00,  4.11s/it]


Accuracy: 98.13
Iteration:6 seed:70


Trained: 100%|██████████| 33/33 [02:15<00:00,  4.09s/it]


Accuracy: 98.2
Iteration:7 seed:80


Trained: 100%|██████████| 33/33 [02:14<00:00,  4.08s/it]


Accuracy: 98.05
Iteration:8 seed:90


Trained: 100%|██████████| 33/33 [02:15<00:00,  4.10s/it]


Accuracy: 98.2
Iteration:9 seed:100


Trained: 100%|██████████| 33/33 [02:16<00:00,  4.14s/it]


Accuracy: 98.06
Mean Accuracy:98.095, STD: 0.10519030373565907

Train Size: 15000, Regularizer: JacReg, Parameter: 1
Iteration:0 seed:10


Trained: 100%|██████████| 33/33 [02:14<00:00,  4.06s/it]


Accuracy: 98.11
Iteration:1 seed:20


Trained: 100%|██████████| 33/33 [02:15<00:00,  4.10s/it]


Accuracy: 98.26
Iteration:2 seed:30


Trained: 100%|██████████| 33/33 [02:13<00:00,  4.05s/it]


Accuracy: 97.73
Iteration:3 seed:40


Trained: 100%|██████████| 33/33 [02:13<00:00,  4.05s/it]


Accuracy: 98.14
Iteration:4 seed:50


Trained: 100%|██████████| 33/33 [02:13<00:00,  4.05s/it]


Accuracy: 98.13
Iteration:5 seed:60


Trained: 100%|██████████| 33/33 [02:13<00:00,  4.06s/it]


Accuracy: 97.97
Iteration:6 seed:70


Trained: 100%|██████████| 33/33 [02:13<00:00,  4.06s/it]


Accuracy: 98.1
Iteration:7 seed:80


Trained: 100%|██████████| 33/33 [02:13<00:00,  4.05s/it]


Accuracy: 97.93
Iteration:8 seed:90


Trained: 100%|██████████| 33/33 [02:12<00:00,  4.02s/it]


Accuracy: 98.28
Iteration:9 seed:100


Trained: 100%|██████████| 33/33 [02:15<00:00,  4.11s/it]


Accuracy: 98.03
Mean Accuracy:98.068, STD: 0.15425952158618836

Train Size: 15000, Regularizer: ConfPenalty, Parameter: 0.01
Iteration:0 seed:10


Trained: 100%|██████████| 33/33 [02:03<00:00,  3.75s/it]


Accuracy: 98.01
Iteration:1 seed:20


Trained: 100%|██████████| 33/33 [02:02<00:00,  3.72s/it]


Accuracy: 98.22
Iteration:2 seed:30


Trained: 100%|██████████| 33/33 [02:03<00:00,  3.74s/it]


Accuracy: 98.18
Iteration:3 seed:40


Trained: 100%|██████████| 33/33 [02:03<00:00,  3.75s/it]


Accuracy: 97.94
Iteration:4 seed:50


Trained: 100%|██████████| 33/33 [02:03<00:00,  3.73s/it]


Accuracy: 98.13
Iteration:5 seed:60


Trained: 100%|██████████| 33/33 [02:04<00:00,  3.76s/it]


Accuracy: 98.08
Iteration:6 seed:70


Trained: 100%|██████████| 33/33 [02:04<00:00,  3.77s/it]


Accuracy: 98.07
Iteration:7 seed:80


Trained: 100%|██████████| 33/33 [02:06<00:00,  3.82s/it]


Accuracy: 97.97
Iteration:8 seed:90


Trained: 100%|██████████| 33/33 [02:06<00:00,  3.85s/it]


Accuracy: 98.24
Iteration:9 seed:100


Trained: 100%|██████████| 33/33 [02:06<00:00,  3.84s/it]


Accuracy: 97.93
Mean Accuracy:98.077, STD: 0.10807867504739184

Train Size: 20000, Regularizer: DoubleBack, Parameter: 2
Iteration:0 seed:10


Trained: 100%|██████████| 25/25 [02:14<00:00,  5.37s/it]


Accuracy: 98.08
Iteration:1 seed:20


Trained: 100%|██████████| 25/25 [02:13<00:00,  5.35s/it]


Accuracy: 98.36
Iteration:2 seed:30


Trained: 100%|██████████| 25/25 [02:13<00:00,  5.34s/it]


Accuracy: 98.44
Iteration:3 seed:40


Trained: 100%|██████████| 25/25 [02:14<00:00,  5.39s/it]


Accuracy: 98.21
Iteration:4 seed:50


Trained: 100%|██████████| 25/25 [02:12<00:00,  5.28s/it]


Accuracy: 98.16
Iteration:5 seed:60


Trained: 100%|██████████| 25/25 [02:13<00:00,  5.36s/it]


Accuracy: 98.25
Iteration:6 seed:70


Trained: 100%|██████████| 25/25 [02:12<00:00,  5.32s/it]


Accuracy: 97.82
Iteration:7 seed:80


Trained: 100%|██████████| 25/25 [02:13<00:00,  5.34s/it]


Accuracy: 98.29
Iteration:8 seed:90


Trained: 100%|██████████| 25/25 [02:15<00:00,  5.42s/it]


Accuracy: 98.34
Iteration:9 seed:100


Trained: 100%|██████████| 25/25 [02:17<00:00,  5.50s/it]


Accuracy: 98.3
Mean Accuracy:98.225, STD: 0.1666283289239878

Train Size: 20000, Regularizer: SpectReg, Parameter: 0.001
Iteration:0 seed:10


Trained: 100%|██████████| 25/25 [02:20<00:00,  5.62s/it]


Accuracy: 98.11
Iteration:1 seed:20


Trained: 100%|██████████| 25/25 [02:19<00:00,  5.59s/it]


Accuracy: 98.36
Iteration:2 seed:30


Trained: 100%|██████████| 25/25 [02:18<00:00,  5.56s/it]


Accuracy: 98.45
Iteration:3 seed:40


Trained: 100%|██████████| 25/25 [02:18<00:00,  5.54s/it]


Accuracy: 98.38
Iteration:4 seed:50


Trained: 100%|██████████| 25/25 [02:18<00:00,  5.53s/it]


Accuracy: 98.3
Iteration:5 seed:60


Trained: 100%|██████████| 25/25 [02:19<00:00,  5.56s/it]


Accuracy: 98.12
Iteration:6 seed:70


Trained: 100%|██████████| 25/25 [02:19<00:00,  5.57s/it]


Accuracy: 98.19
Iteration:7 seed:80


Trained: 100%|██████████| 25/25 [02:18<00:00,  5.53s/it]


Accuracy: 98.48
Iteration:8 seed:90


Trained: 100%|██████████| 25/25 [02:21<00:00,  5.65s/it]


Accuracy: 98.43
Iteration:9 seed:100


Trained: 100%|██████████| 25/25 [02:23<00:00,  5.74s/it]


Accuracy: 98.13
Mean Accuracy:98.29499999999999, STD: 0.13822083779228223

Train Size: 20000, Regularizer: JacReg, Parameter: 0.3
Iteration:0 seed:10


Trained: 100%|██████████| 25/25 [02:28<00:00,  5.94s/it]


Accuracy: 98.23
Iteration:1 seed:20


Trained: 100%|██████████| 25/25 [02:24<00:00,  5.77s/it]


Accuracy: 98.65
Iteration:2 seed:30


Trained: 100%|██████████| 25/25 [02:16<00:00,  5.48s/it]


Accuracy: 97.98
Iteration:3 seed:40


Trained: 100%|██████████| 25/25 [02:17<00:00,  5.52s/it]


Accuracy: 98.21
Iteration:4 seed:50


Trained: 100%|██████████| 25/25 [02:18<00:00,  5.53s/it]


Accuracy: 98.18
Iteration:5 seed:60


Trained: 100%|██████████| 25/25 [02:17<00:00,  5.49s/it]


Accuracy: 98.53
Iteration:6 seed:70


Trained: 100%|██████████| 25/25 [02:16<00:00,  5.45s/it]


Accuracy: 97.87
Iteration:7 seed:80


Trained: 100%|██████████| 25/25 [02:15<00:00,  5.44s/it]


Accuracy: 98.08
Iteration:8 seed:90


Trained: 100%|██████████| 25/25 [02:15<00:00,  5.43s/it]


Accuracy: 98.29
Iteration:9 seed:100


Trained: 100%|██████████| 25/25 [02:17<00:00,  5.52s/it]


Accuracy: 98.29
Mean Accuracy:98.231, STD: 0.22160550534677648

Train Size: 20000, Regularizer: ConfPenalty, Parameter: 0.03
Iteration:0 seed:10


Trained: 100%|██████████| 25/25 [02:03<00:00,  4.95s/it]


Accuracy: 98.04
Iteration:1 seed:20


Trained: 100%|██████████| 25/25 [02:04<00:00,  4.98s/it]


Accuracy: 98.33
Iteration:2 seed:30


Trained: 100%|██████████| 25/25 [02:04<00:00,  4.96s/it]


Accuracy: 98.35
Iteration:3 seed:40


Trained: 100%|██████████| 25/25 [02:03<00:00,  4.94s/it]


Accuracy: 98.23
Iteration:4 seed:50


Trained: 100%|██████████| 25/25 [02:03<00:00,  4.92s/it]


Accuracy: 98.45
Iteration:5 seed:60


Trained: 100%|██████████| 25/25 [02:05<00:00,  5.01s/it]


Accuracy: 98.36
Iteration:6 seed:70


Trained: 100%|██████████| 25/25 [02:04<00:00,  4.97s/it]


Accuracy: 98.15
Iteration:7 seed:80


Trained: 100%|██████████| 25/25 [02:03<00:00,  4.93s/it]


Accuracy: 97.98
Iteration:8 seed:90


Trained: 100%|██████████| 25/25 [02:04<00:00,  4.99s/it]


Accuracy: 98.4
Iteration:9 seed:100


Trained: 100%|██████████| 25/25 [02:03<00:00,  4.96s/it]


Accuracy: 98.35
Mean Accuracy:98.264, STD: 0.15047923444781133



# Experiment C6: APPROXIMATING THE FROBENIUS NORM OF THE JACOBIAN DOES NOT DECREASE ACCURACY


In [20]:
batch_size=50
num_samples_per_class=200
seeds=[10,20,30,40,50,60,70,80,90,100]

In [21]:
def train(model,train_loader,test_loader,num_samples_per_class,regularizer):
    criterion=nn.CrossEntropyLoss()
    opt=torch.optim.Adam(model.parameters(),lr=0.001,betas=(0.9,0.999),weight_decay=0.0005)
    
    num_epochs= int(10000/((num_samples_per_class*10)/batch_size))
    # num_epochs=10
    milestones = [int(0.5 * num_epochs), int(0.75 * num_epochs)]  
    scheduler = MultiStepLR(opt, milestones=milestones, gamma=0.1)
    
    for epoch in tqdm(range(num_epochs),"Trained"):
        for i, data in enumerate(train_loader):
            inputs,label=data
            inputs=inputs.to(device)
            label=label.to(device)
            opt.zero_grad()

            if regularizer=="SpectReg":
                loss = spectral_reg_loss(model, criterion, inputs, label, 0.03, 10)
            elif regularizer=="JacReg":
                loss = jac_reg_loss(model, criterion, inputs, label, 1)
            elif regularizer=="FrobReg":
                loss = frobreg_loss(model, criterion, inputs, label, 0.03)

            loss.backward()
            opt.step()
    
        scheduler.step()
         
    return Evaluate(test_loader,model)

### SpectReg


In [22]:
accuracy_list=[]
for i in range(10):
    print(f"Iteration:{i} seed:{seeds[i]}")
    model=BaseModel(include_dropout=True).to(device)
    train_loader=get_subset_dataloader(data_train,seeds[i],num_samples_per_class,batch_size)
    accuracy=train(model,train_loader,test_loader,num_samples_per_class,"SpectReg")
    accuracy_list.append(accuracy)
    print("Accuracy:",accuracy)

Iteration:0 seed:10


Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [04:28<00:00,  1.08s/it]


Accuracy: 96.37
Iteration:1 seed:20
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [04:31<00:00,  1.09s/it]


Accuracy: 96.39
Iteration:2 seed:30
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [04:34<00:00,  1.10s/it]


Accuracy: 96.72
Iteration:3 seed:40
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:57<00:00,  1.05it/s]


Accuracy: 96.74
Iteration:4 seed:50
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:49<00:00,  1.09it/s]


Accuracy: 96.13
Iteration:5 seed:60
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:46<00:00,  1.10it/s]


Accuracy: 96.32
Iteration:6 seed:70
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:45<00:00,  1.11it/s]


Accuracy: 96.52
Iteration:7 seed:80
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:50<00:00,  1.08it/s]


Accuracy: 96.4
Iteration:8 seed:90
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:40<00:00,  1.14it/s]


Accuracy: 96.74
Iteration:9 seed:100
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:47<00:00,  1.10it/s]


Accuracy: 96.54


In [23]:
print("Accuracy List",accuracy_list)
print("Mean Accuracy:",np.mean(accuracy_list))
print("SD:",round(np.std(accuracy_list),2))

Accuracy List [96.37, 96.39, 96.72, 96.74, 96.13, 96.32, 96.52, 96.4, 96.74, 96.54]
Mean Accuracy: 96.487
SD: 0.19


### JacReg


In [24]:
accuracy_list=[]
for i in range(10):
    print(f"Iteration:{i} seed:{seeds[i]}")
    model=BaseModel(include_dropout=True).to(device)
    train_loader=get_subset_dataloader(data_train,seeds[i],num_samples_per_class,batch_size)
    accuracy=train(model,train_loader,test_loader,num_samples_per_class,"JacReg")
    accuracy_list.append(accuracy)
    print("Accuracy:",accuracy)

Iteration:0 seed:10
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:55<00:00,  1.06it/s]


Accuracy: 96.09
Iteration:1 seed:20
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:51<00:00,  1.08it/s]


Accuracy: 96.46
Iteration:2 seed:30
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:48<00:00,  1.09it/s]


Accuracy: 96.65
Iteration:3 seed:40
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:48<00:00,  1.09it/s]


Accuracy: 96.72
Iteration:4 seed:50
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:48<00:00,  1.09it/s]


Accuracy: 96.46
Iteration:5 seed:60
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:46<00:00,  1.10it/s]


Accuracy: 96.61
Iteration:6 seed:70
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:51<00:00,  1.08it/s]


Accuracy: 96.81
Iteration:7 seed:80
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:45<00:00,  1.11it/s]


Accuracy: 96.84
Iteration:8 seed:90
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:53<00:00,  1.07it/s]


Accuracy: 96.78
Iteration:9 seed:100
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:42<00:00,  1.12it/s]


Accuracy: 96.39


In [25]:
print(accuracy_list)
print(np.mean(accuracy_list))
print(np.std(accuracy_list))

[96.09, 96.46, 96.65, 96.72, 96.46, 96.61, 96.81, 96.84, 96.78, 96.39]
96.581
0.22156037551872923


### FrobReg


In [26]:
accuracy_list=[]
for i in range(10):
    print(f"Iteration:{i} seed:{seeds[i]}")
    model=BaseModel(include_dropout=True).to(device)
    train_loader=get_subset_dataloader(data_train,seeds[i],num_samples_per_class,batch_size)
    accuracy=train(model,train_loader,test_loader,num_samples_per_class,"FrobReg")
    accuracy_list.append(accuracy)
    print("Accuracy:",accuracy)

Iteration:0 seed:10
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:58<00:00,  1.05it/s]


Accuracy: 96.91
Iteration:1 seed:20
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:50<00:00,  1.09it/s]


Accuracy: 96.46
Iteration:2 seed:30
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:54<00:00,  1.07it/s]


Accuracy: 96.71
Iteration:3 seed:40
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:54<00:00,  1.07it/s]


Accuracy: 96.69
Iteration:4 seed:50
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:49<00:00,  1.09it/s]


Accuracy: 96.38
Iteration:5 seed:60
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:53<00:00,  1.07it/s]


Accuracy: 96.76
Iteration:6 seed:70
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:48<00:00,  1.09it/s]


Accuracy: 96.34
Iteration:7 seed:80
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:49<00:00,  1.09it/s]


Accuracy: 96.91
Iteration:8 seed:90
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:48<00:00,  1.10it/s]


Accuracy: 96.75
Iteration:9 seed:100
Subset created with total lenght: 2000


Trained: 100%|██████████| 250/250 [03:50<00:00,  1.08it/s]


Accuracy: 96.29


In [27]:
print(accuracy_list)
print(np.mean(accuracy_list))
print(np.std(accuracy_list))

[96.91, 96.46, 96.71, 96.69, 96.38, 96.76, 96.34, 96.91, 96.75, 96.29]
96.61999999999999
0.2209524835796136
