In [1]:
import wandb
import torch
import torch.nn as nn
import numpy as np
from tqdm.auto import tqdm
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torchvision.datasets as ds
import matplotlib.pyplot as plt

In [2]:
def load_cifar(datadir='./data_cache', p=0.0): # will download ~400MB of data into this dir. Change the dir if neccesary. If using paperspace, you can make this /storage
    """This function loads the CIFAR-10 dataset, and applies the random labelling leveraged in the paper Understanding Deep Learning requires rethinking Generalization (2017), 
    Zheng et al. Specifically, we are uniformly accross 10 classes randomizing the labels with probability 1-p"""
    train_ds = ds.CIFAR10(root=datadir, train=True,
                           download=True, transform=None)
    test_ds = ds.CIFAR10(root=datadir, train=False,
                          download=True, transform=None)

    def to_xy(dataset, p):
        X = torch.Tensor(np.transpose(dataset.data, (0, 3, 1, 2))).float() / 255.0  # [0, 1]
        labels = np.array(dataset.targets)
        mask = np.random.uniform(size=labels.shape[0]) < p
        labels[mask] = np.random.randint(0, 10, size=np.sum(mask))
        Y = torch.Tensor(labels).long()
        return X, Y

    X_tr, Y_tr = to_xy(train_ds, p)
    X_te, Y_te = to_xy(test_ds, p)
    return X_tr, Y_tr, X_te, Y_te

def make_loader(dataset, batch_size=128):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size,
            shuffle=True, num_workers=4, pin_memory=True)

X_tr, Y_tr, X_te, Y_te = load_cifar()
X_tr_noisy, Y_tr_noisy, X_te_noisy, Y_te_noisy = load_cifar(p=0.5)
X_tr_randomized, Y_tr_randomized, X_te_randomized, Y_te_randomized = load_cifar(p=1.0)
train_dl = make_loader(TensorDataset(X_tr, Y_tr), batch_size=64)
train_dl_noisy = make_loader(TensorDataset(X_tr_noisy, Y_tr_noisy), batch_size=64)
train_dl_randomized = make_loader(TensorDataset(X_tr_randomized, Y_tr_randomized), batch_size=64)
train_dls = [train_dl, train_dl_noisy, train_dl_randomized]
test_dl = make_loader(TensorDataset(X_te, Y_te), batch_size=64)
test_dl_noisy = make_loader(TensorDataset(X_te_noisy, Y_te_noisy), batch_size=64)
test_dl_randomized = make_loader(TensorDataset(X_te_randomized, Y_te_randomized), batch_size=64)
test_dls = [test_dl, test_dl_noisy, test_dl_randomized]
# X_te_flipped = torch.flip(X_te, [2, 3])
# flipped_test_dl = make_loader(TensorDataset(X_te_flipped, Y_te), batch_size=64)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [8]:
def train_epoch(model, train_dl : DataLoader, opt, k = 50):
    ''' Trains model for one epoch on the provided dataloader, with optimizer opt. Logs stats every k batches.'''
    loss_func = nn.CrossEntropyLoss()
    model.train()
    model.cuda()

    netLoss = 0.0
    nCorrect = 0
    nTotal = 0
    for i, (xB, yB) in enumerate(tqdm(train_dl)):
        opt.zero_grad()
        xB, yB = xB.cuda(), yB.cuda()
        outputs = model(xB)
        loss = loss_func(outputs, yB)
        loss.backward()
        opt.step()
        netLoss += loss.item() * len(xB)  # why do we need to multiply by len(xB) ? 
        with torch.no_grad(): # we don't want to compute the gradients since it's just a callback
            _, preds = torch.max(outputs, dim=1)
            nCorrect += (preds == yB).float().sum()
            nTotal += preds.size(0)
        
        if (i+1) % k == 0:
            train_acc = nCorrect/nTotal
            avg_loss = netLoss/nTotal
            # print(f'\t [Batch {i+1} / {len(train_dl)}] Train Loss: {avg_loss:.3f} \t Train Acc: {train_acc:.3f}')
        torch.cuda.empty_cache()
          
    train_acc = nCorrect/nTotal
    avg_loss = netLoss/nTotal
    return avg_loss, train_acc


def evaluate(model, test_dl, loss_func=nn.CrossEntropyLoss().cuda()):
    ''' Returns loss, acc'''
    model.eval()
    model.cuda()
    nCorrect = 0.0
    nTotal = 0
    net_loss = 0.0
    with torch.no_grad():
        for (xb, yb) in test_dl:
            xb, yb = xb.cuda(), yb.cuda()
            outputs = model(xb)
            loss = len(xb) * loss_func(outputs, yb)
            _, preds = torch.max(outputs, dim=1)
            nCorrect += (preds == yb).float().sum()
            net_loss += loss.item()
            nTotal += preds.size(0)
    acc = nCorrect / float(nTotal)
    loss = net_loss / float(nTotal)
    return loss, acc

In [9]:
class Flatten(nn.Module):
    def forward(self, x): return x.view(x.size(0), x.size(1))

def make_cnn(c=64, num_classes=10, regularized=False):
    ''' Returns a 5-layer CNN with width parameter c. '''
    if regularized: 
        return nn.Sequential(
            # Layer 0
            nn.Conv2d(3, c, kernel_size=3, stride=1,
                      padding=1, bias=True),
            nn.BatchNorm2d(c),
            nn.Dropout(),
            nn.ReLU(),

            # Layer 1
            nn.Conv2d(c, c*2, kernel_size=3,
                      stride=1, padding=1, bias=True),
            nn.BatchNorm2d(c*2),
            nn.Dropout(),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Layer 2
            nn.Conv2d(c*2, c*4, kernel_size=3,
                      stride=1, padding=1, bias=True),
            nn.BatchNorm2d(c*4),
            nn.Dropout(),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Layer 3
            nn.Conv2d(c*4, c*8, kernel_size=3,
                      stride=1, padding=1, bias=True),
            nn.BatchNorm2d(c*8),
            nn.Dropout(),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Layer 4
            nn.MaxPool2d(4),
            Flatten(),
            nn.Linear(c*8, num_classes, bias=True)
        )
    else:
        return nn.Sequential(
            # Layer 0
            nn.Conv2d(3, c, kernel_size=3, stride=1,
                      padding=1, bias=True),
            nn.BatchNorm2d(c),
            nn.ReLU(),

            # Layer 1
            nn.Conv2d(c, c*2, kernel_size=3,
                      stride=1, padding=1, bias=True),
            nn.BatchNorm2d(c*2),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Layer 2
            nn.Conv2d(c*2, c*4, kernel_size=3,
                      stride=1, padding=1, bias=True),
            nn.BatchNorm2d(c*4),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Layer 3
            nn.Conv2d(c*4, c*8, kernel_size=3,
                      stride=1, padding=1, bias=True),
            nn.BatchNorm2d(c*8),
            nn.ReLU(),
            nn.MaxPool2d(2),

            # Layer 4
            nn.MaxPool2d(4),
            Flatten(),
            nn.Linear(c*8, num_classes, bias=True)
        )

## Under-Parametrized (c=4) 

In [10]:
model = make_cnn(c=4)
print('Number of trainable parameters of the current model ',  sum(p.numel() for p in model.parameters() if p.requires_grad))
opt = torch.optim.SGD(model.parameters(), lr=3e-3)  # from the LTH paper, where they trained Conv-4 architectures (c=4:5k epochs, c=64:15k epochs), with dropout
epochs = 5000
runnames = ['True-Labels', 'Noisy-Labels', 'Randomized Labels']
for i, (train_dl, test_dl) in enumerate(zip(train_dls, test_dls)):
    run = wandb.init(project="CS229br_hw0_memorizing_NN_Underparametrized", reinit=True)
    wandb.run.name = runnames[i]
    for i in range(epochs):
        print(f'Starting Epoch {i}')
        train_loss, train_acc = train_epoch(model, train_dl, opt)
        test_loss, test_acc = evaluate(model, test_dl)
        wandb.log({"epochs":i, "train_loss": train_loss, "train_accuracy": train_acc, "test_loss": test_loss, "test_accuracy": test_acc})
        print(f'Epoch {i}:\t Train Loss: {train_loss:.3f} \t Train Acc: {train_acc:.3f}\t Test Acc: {test_acc:.3f}')
    run.finish()

Number of trainable parameters of the current model  6666


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
epochs,28.0
train_loss,1.03369
train_accuracy,0.63634
test_loss,1.12723
test_accuracy,0.6028
_step,28.0
_runtime,170.0
_timestamp,1612451386.0


0,1
epochs,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██
train_loss,█▆▅▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
train_accuracy,▁▃▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇█████████
test_loss,█▆▆▅▅▄▄▄▄▃▃▃▃▃▂▃▃▂▂▂▂▁▁▃▂▂▂▁▁
test_accuracy,▁▂▃▄▄▄▅▅▅▆▆▆▆▆▇▆▆▇▇▇▇██▇▇▇▇██
_step,▁▁▁▂▂▂▃▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆▇▇▇▇▇██
_runtime,▁▁▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇██
_timestamp,▁▁▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇██


Starting Epoch 0


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 0:	 Train Loss: 2.020 	 Train Acc: 0.277	 Test Acc: 0.353
Starting Epoch 1


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 1:	 Train Loss: 1.738 	 Train Acc: 0.380	 Test Acc: 0.364
Starting Epoch 2


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 2:	 Train Loss: 1.621 	 Train Acc: 0.418	 Test Acc: 0.435
Starting Epoch 3


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 3:	 Train Loss: 1.545 	 Train Acc: 0.445	 Test Acc: 0.417
Starting Epoch 4


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 4:	 Train Loss: 1.486 	 Train Acc: 0.466	 Test Acc: 0.394
Starting Epoch 5


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 5:	 Train Loss: 1.438 	 Train Acc: 0.485	 Test Acc: 0.394
Starting Epoch 6


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 6:	 Train Loss: 1.394 	 Train Acc: 0.501	 Test Acc: 0.459
Starting Epoch 7


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 7:	 Train Loss: 1.357 	 Train Acc: 0.514	 Test Acc: 0.470
Starting Epoch 8


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 8:	 Train Loss: 1.323 	 Train Acc: 0.526	 Test Acc: 0.510
Starting Epoch 9


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 9:	 Train Loss: 1.292 	 Train Acc: 0.539	 Test Acc: 0.517
Starting Epoch 10


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 10:	 Train Loss: 1.264 	 Train Acc: 0.550	 Test Acc: 0.532
Starting Epoch 11


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 11:	 Train Loss: 1.239 	 Train Acc: 0.559	 Test Acc: 0.467
Starting Epoch 12


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 12:	 Train Loss: 1.216 	 Train Acc: 0.571	 Test Acc: 0.546
Starting Epoch 13


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 13:	 Train Loss: 1.195 	 Train Acc: 0.576	 Test Acc: 0.562
Starting Epoch 14


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 14:	 Train Loss: 1.176 	 Train Acc: 0.584	 Test Acc: 0.576
Starting Epoch 15


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 15:	 Train Loss: 1.157 	 Train Acc: 0.590	 Test Acc: 0.565
Starting Epoch 16


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 16:	 Train Loss: 1.141 	 Train Acc: 0.596	 Test Acc: 0.539
Starting Epoch 17


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 17:	 Train Loss: 1.126 	 Train Acc: 0.602	 Test Acc: 0.584
Starting Epoch 18


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 18:	 Train Loss: 1.112 	 Train Acc: 0.609	 Test Acc: 0.567
Starting Epoch 19


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 19:	 Train Loss: 1.100 	 Train Acc: 0.610	 Test Acc: 0.595
Starting Epoch 20


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 20:	 Train Loss: 1.087 	 Train Acc: 0.617	 Test Acc: 0.566
Starting Epoch 21


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 21:	 Train Loss: 1.078 	 Train Acc: 0.621	 Test Acc: 0.567
Starting Epoch 22


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 22:	 Train Loss: 1.065 	 Train Acc: 0.625	 Test Acc: 0.597
Starting Epoch 23


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 23:	 Train Loss: 1.055 	 Train Acc: 0.629	 Test Acc: 0.568
Starting Epoch 24


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 24:	 Train Loss: 1.045 	 Train Acc: 0.632	 Test Acc: 0.594
Starting Epoch 25


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 25:	 Train Loss: 1.037 	 Train Acc: 0.635	 Test Acc: 0.595
Starting Epoch 26


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 26:	 Train Loss: 1.028 	 Train Acc: 0.639	 Test Acc: 0.590
Starting Epoch 27


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 27:	 Train Loss: 1.019 	 Train Acc: 0.641	 Test Acc: 0.538
Starting Epoch 28


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 28:	 Train Loss: 1.010 	 Train Acc: 0.645	 Test Acc: 0.608
Starting Epoch 29


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 29:	 Train Loss: 1.004 	 Train Acc: 0.646	 Test Acc: 0.604
Starting Epoch 30


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 30:	 Train Loss: 0.997 	 Train Acc: 0.650	 Test Acc: 0.603
Starting Epoch 31


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 31:	 Train Loss: 0.989 	 Train Acc: 0.652	 Test Acc: 0.577
Starting Epoch 32


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 32:	 Train Loss: 0.982 	 Train Acc: 0.656	 Test Acc: 0.613
Starting Epoch 33


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 33:	 Train Loss: 0.977 	 Train Acc: 0.657	 Test Acc: 0.625
Starting Epoch 34


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 34:	 Train Loss: 0.970 	 Train Acc: 0.661	 Test Acc: 0.586
Starting Epoch 35


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 35:	 Train Loss: 0.963 	 Train Acc: 0.662	 Test Acc: 0.620
Starting Epoch 36


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 36:	 Train Loss: 0.958 	 Train Acc: 0.662	 Test Acc: 0.601
Starting Epoch 37


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 37:	 Train Loss: 0.952 	 Train Acc: 0.667	 Test Acc: 0.608
Starting Epoch 38


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 38:	 Train Loss: 0.947 	 Train Acc: 0.666	 Test Acc: 0.605
Starting Epoch 39


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 39:	 Train Loss: 0.943 	 Train Acc: 0.668	 Test Acc: 0.583
Starting Epoch 40


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 40:	 Train Loss: 0.940 	 Train Acc: 0.671	 Test Acc: 0.629
Starting Epoch 41


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 41:	 Train Loss: 0.933 	 Train Acc: 0.672	 Test Acc: 0.613
Starting Epoch 42


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 42:	 Train Loss: 0.930 	 Train Acc: 0.673	 Test Acc: 0.639
Starting Epoch 43


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 43:	 Train Loss: 0.924 	 Train Acc: 0.676	 Test Acc: 0.616
Starting Epoch 44


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 44:	 Train Loss: 0.921 	 Train Acc: 0.678	 Test Acc: 0.626
Starting Epoch 45


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 45:	 Train Loss: 0.917 	 Train Acc: 0.679	 Test Acc: 0.634
Starting Epoch 46


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 46:	 Train Loss: 0.914 	 Train Acc: 0.678	 Test Acc: 0.629
Starting Epoch 47


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 47:	 Train Loss: 0.910 	 Train Acc: 0.681	 Test Acc: 0.621
Starting Epoch 48


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 48:	 Train Loss: 0.905 	 Train Acc: 0.683	 Test Acc: 0.641
Starting Epoch 49


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 49:	 Train Loss: 0.903 	 Train Acc: 0.681	 Test Acc: 0.632
Starting Epoch 50


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 50:	 Train Loss: 0.899 	 Train Acc: 0.684	 Test Acc: 0.643
Starting Epoch 51


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 51:	 Train Loss: 0.896 	 Train Acc: 0.683	 Test Acc: 0.644
Starting Epoch 52


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 52:	 Train Loss: 0.892 	 Train Acc: 0.687	 Test Acc: 0.640
Starting Epoch 53


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 53:	 Train Loss: 0.889 	 Train Acc: 0.687	 Test Acc: 0.629
Starting Epoch 54


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 54:	 Train Loss: 0.887 	 Train Acc: 0.688	 Test Acc: 0.639
Starting Epoch 55


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 55:	 Train Loss: 0.884 	 Train Acc: 0.690	 Test Acc: 0.610
Starting Epoch 56


  0%|          | 0/782 [00:00<?, ?it/s]

RuntimeError: DataLoader worker (pid 30808) is killed by signal: Killed. 