In [30]:
import torch as tt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.ToTensor()
path = "./CIFAR-10"
train_dataset = datasets.CIFAR10(root=path, train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

print(len(train_dataset))

50000


In [31]:
def relu(x):
    return tt.maximum(x, tt.tensor(0.0, device=x.device))

def softmax(x):
    exp_x = tt.exp(x - x.max(dim=1, keepdim=True).values)
    return exp_x / exp_x.sum(dim=1, keepdim=True)

def cross_entropy(pred, target):
    N = target.size(0)
    log_likelihood = -tt.log(pred[tt.arange(N), target] + 1e-9)
    return log_likelihood.mean()

In [32]:
D_in = 3*32*32
h1, h2, h3, h4 = 1024, 512, 256, 128
D_out = 10

class Net(tt.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = tt.nn.Linear(D_in, h1)
        self.fc2 = tt.nn.Linear(h1, h2)
        self.fc3 = tt.nn.Linear(h2, h3)
        self.fc4 = tt.nn.Linear(h3, h4)
        self.fc5 = tt.nn.Linear(h4, D_out)

    def forward(self, x):
        x = x.view(x.size(0), -1)   # flatten
        x = relu(self.fc1(x))
        x = relu(self.fc2(x))
        x = relu(self.fc3(x))
        x = relu(self.fc4(x))
        x = self.fc5(x)
        return softmax(x)

In [33]:
def l2_loss(model, lam):
    l2 = 0.0
    for p in model.parameters():
        l2+= tt.sum(tt.abs(p)**2)
    
    return lam*l2

def l1_loss(model, lam):
    l1 = 0.0
    for p in model.parameters():
        l1+= tt.sum(tt.abs(p))
    
    return lam*l1

In [34]:
device = "cpu"

def evaluate(model, loader):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    with tt.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            probs = model(imgs)
            loss = cross_entropy(probs, labels)
            total_loss += loss.item() * imgs.size(0)
            preds = probs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    avg_loss = total_loss / total
    acc = correct / total
    return avg_loss, acc

epoch = 2

case = [(l2_loss, 1e-4), (l1_loss, 1e-4)]

test_dataset = datasets.CIFAR10(root="./CIFAR-10", train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

for reguliser, lam in case:
    print(f"\nRunning {reguliser.__name__} with lambda={lam}")
    model = Net().to(device)
    opt = tt.optim.Adam(model.parameters(), lr=1e-4)
    
    for _ in range(epoch):
        for i, (imgs, labels) in enumerate(train_loader):
            imgs, labels = imgs.to(device), labels.to(device)

            probs = model(imgs)
            loss = cross_entropy(probs, labels) + reguliser(model, lam)

            opt.zero_grad()
            loss.backward()
            opt.step()

            if i % 100 == 0:
                print(f"Batch {i}, Loss: {loss.item():.4f}")

    test_loss, test_acc = evaluate(model, test_loader)
    print(f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc*100:.2f}%")



Running l2_loss with lambda=0.0001
Batch 0, Loss: 2.3674
Batch 100, Loss: 2.3578
Batch 200, Loss: 2.0001
Batch 300, Loss: 1.8889
Batch 400, Loss: 1.9382
Batch 500, Loss: 1.8697
Batch 600, Loss: 1.8773
Batch 700, Loss: 1.8942
Batch 0, Loss: 1.9129
Batch 100, Loss: 2.0236
Batch 200, Loss: 1.7124
Batch 300, Loss: 1.7897
Batch 400, Loss: 1.7683
Batch 500, Loss: 1.6548
Batch 600, Loss: 1.7223
Batch 700, Loss: 1.8107
Test Loss: 1.6747, Test Acc: 40.26%

Running l1_loss with lambda=0.0001
Batch 0, Loss: 6.3640
Batch 100, Loss: 3.7208
Batch 200, Loss: 2.8255
Batch 300, Loss: 2.5588
Batch 400, Loss: 2.5221
Batch 500, Loss: 2.4744
Batch 600, Loss: 2.2785
Batch 700, Loss: 2.3591
Batch 0, Loss: 2.4403
Batch 100, Loss: 2.2018
Batch 200, Loss: 2.2636
Batch 300, Loss: 2.2206
Batch 400, Loss: 2.1848
Batch 500, Loss: 2.3379
Batch 600, Loss: 2.1920
Batch 700, Loss: 2.2630
Test Loss: 1.9828, Test Acc: 25.42%


In [None]:
'''
Running l2_loss with lambda=0.0001
Batch 0, Loss: 2.3674
Batch 100, Loss: 2.3578
Batch 200, Loss: 2.0001
Batch 300, Loss: 1.8889
Batch 400, Loss: 1.9382
Batch 500, Loss: 1.8697
Batch 600, Loss: 1.8773
Batch 700, Loss: 1.8942
Batch 0, Loss: 1.9129
Batch 100, Loss: 2.0236
Batch 200, Loss: 1.7124
Batch 300, Loss: 1.7897
Batch 400, Loss: 1.7683
Batch 500, Loss: 1.6548
Batch 600, Loss: 1.7223
Batch 700, Loss: 1.8107
Test Loss: 1.6747, Test Acc: 40.26%

Running l1_loss with lambda=0.0001
Batch 0, Loss: 6.3640
Batch 100, Loss: 3.7208
Batch 200, Loss: 2.8255
Batch 300, Loss: 2.5588
Batch 400, Loss: 2.5221
Batch 500, Loss: 2.4744
Batch 600, Loss: 2.2785
Batch 700, Loss: 2.3591
Batch 0, Loss: 2.4403
Batch 100, Loss: 2.2018
Batch 200, Loss: 2.2636
Batch 300, Loss: 2.2206
Batch 400, Loss: 2.1848
Batch 500, Loss: 2.3379
Batch 600, Loss: 2.1920
Batch 700, Loss: 2.2630
Test Loss: 1.9828, Test Acc: 25.42%'''