In [2]:
from torchfactor.factorization.svdnet import SVDNet

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data

import torchvision
import torchvision.transforms as transforms


import numpy as np
import tqdm

import matplotlib.pyplot as plt

In [9]:
class dense_net( nn.Module):
    def __init__(self):
        super(dense_net, self).__init__()

#         self.layers =  [nn.Linear(28 * 28, 256), nn.ReLU(),
#                         nn.Linear(256, 128), nn.ReLU(),
#                         nn.Linear(128, 64), nn.ReLU(),
#                         nn.Linear(64, 10), nn.Softmax(-1)]
        
#         self.layers = nn.Sequential(*self.layers)
        self.conv1 = nn.Conv2d(1, 16, 3)
        self.bn1 = nn.BatchNorm2d(16)

        self.conv2 = nn.Conv2d(16, 32, 5)
        self.bn2 = nn.BatchNorm2d(32)


        self.conv3 = nn.Conv2d(32, 32, 3)
        self.bn3 = nn.BatchNorm2d(32)


        self.conv4 = nn.Conv2d(32, 32, 5)
        self.bn4 = nn.BatchNorm2d(32)               
        
        self.pool = nn.MaxPool2d(2, 2)
        
        self.fc_net = nn.Sequential(
            nn.Linear(32 * 2 * 2, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(), 
            nn.Linear(64, 64),
            nn.ReLU(), 
            nn.Linear(64, 10),
        )

    def forward(self, x):
#         x = x.view(x.shape[0], -1)
#         return self.layers(x)
        x = F.relu(self.conv1(x))
        x = self.bn1(x)

        x = F.relu(self.conv2(x))
        x = self.bn2(x)

        x = self.pool(x)

        x = F.relu(self.conv3(x))
        x = self.bn3(x)

        x = F.relu(self.conv4(x))
        x = self.bn4(x)

        x = self.pool(x)

        x = x.view(-1, 32 * 2 * 2)
        return self.fc_net(x)

In [35]:
net = dense_net().cuda()

model_parameters = filter(lambda p: p.requires_grad, net.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('This model has {} params'.format(params))


This model has 77674 params


In [11]:
criterion = nn.CrossEntropyLoss()
opt = optim.Adam(net.parameters(), lr=4e-3)
n_epochs = 5
TOTAL_CLASSES = 10

train_transform = transforms.Compose(
    [transforms.ToTensor()])
test_transform = transforms.Compose(
    [transforms.ToTensor()])

In [12]:
mean_losses = []
for e in range(n_epochs):
    net.train()
    losses = []
    
    train_dataset = torchvision.datasets.FashionMNIST(root = "data", train=True, 
                                                      transform=train_transform, 
                                                      target_transform=None, download=True) 
    
    train_dataloader = data.DataLoader(train_dataset, batch_size=32, 
                                       shuffle=True, num_workers=4, drop_last=True)

    for i, batch in enumerate(tqdm.tqdm(train_dataloader)):
        img, label = batch
        img, label = img.cuda(), label.cuda()

        opt.zero_grad()

        pred = net(img)

        loss = criterion(pred, label)
        
        loss.backward()
        opt.step()

        losses.append(loss.item())
    mean_losses.append(np.array(losses).mean()) 

100%|██████████| 1875/1875 [00:29<00:00, 62.71it/s] 
100%|██████████| 1875/1875 [00:20<00:00, 91.54it/s] 
100%|██████████| 1875/1875 [00:19<00:00, 95.30it/s] 
100%|██████████| 1875/1875 [00:28<00:00, 65.16it/s] 
100%|██████████| 1875/1875 [00:36<00:00, 51.33it/s]


In [13]:
def calculate_accuracy(dataloader, is_gpu):
    """ Util function to calculate val set accuracy,
    both overall and per class accuracy
    Args:
        dataloader (torch.utils.data.DataLoader): val set 
        is_gpu (bool): whether to run on GPU
    Returns:
        tuple: (overall accuracy, class level accuracy)
    """    
    correct = 0.
    total = 0.
    predictions = []

    class_correct = list(0. for i in range(TOTAL_CLASSES))
    class_total = list(0. for i in range(TOTAL_CLASSES))

    for data in dataloader:
        images, labels = data
        if is_gpu:
            images = images.cuda()
            labels = labels.cuda()
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(list(predicted.cpu().numpy()))
        total += labels.size(0)
        correct += (predicted == labels).sum()

        c = (predicted == labels).squeeze()
        for i in range(len(labels)):
            label = labels[i]
            class_correct[label] += c[i]
            class_total[label] += 1

    class_accuracy = 100 * np.divide(class_correct, class_total)
    return 100*correct/total, class_accuracy

test_dataset = torchvision.datasets.FashionMNIST(root = "data", train=False, 
                                                 transform=test_transform, target_transform=None, download=False)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=256, 
                                         shuffle=False, num_workers=2, drop_last=False) #DO NOT CHANGE


In [14]:
net.eval()

test_accuracy, test_classwise_accuracy = calculate_accuracy(testloader, True)
print(test_accuracy)
print(test_classwise_accuracy)

tensor(90.0500, device='cuda:0')
[tensor(90., device='cuda:0') tensor(97.9000, device='cuda:0')
 tensor(90., device='cuda:0') tensor(91.6000, device='cuda:0')
 tensor(83.4000, device='cuda:0') tensor(97.6000, device='cuda:0')
 tensor(59.1000, device='cuda:0') tensor(95.2000, device='cuda:0')
 tensor(98.3000, device='cuda:0') tensor(97.4000, device='cuda:0')]


In [25]:
class svd_layer( nn.Module):
    def __init__(self, n, num_hhrs):
        super(svd_layer, self).__init__()
        self.svd = SVDNet(n, num_hhrs)
        
    def forward(self, x):
        return self.svd(x.transpose(0,-1)).transpose(0,-1)
   
class svd_net( nn.Module):
    def __init__(self):
        super(svd_net, self).__init__()

        self.conv1 = nn.Conv2d(1, 16, 3)
        self.bn1 = nn.BatchNorm2d(16)

        self.conv2 = nn.Conv2d(16, 32, 5)
        self.bn2 = nn.BatchNorm2d(32)


        self.conv3 = nn.Conv2d(32, 32, 3)
        self.bn3 = nn.BatchNorm2d(32)


        self.conv4 = nn.Conv2d(32, 32, 5)
        self.bn4 = nn.BatchNorm2d(32)               
        
        self.pool = nn.MaxPool2d(2, 2)
        
        self.fc_net = nn.Sequential(
            svd_layer(128, 20),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(), 
            svd_layer(64, 20),
            nn.ReLU(), 
            nn.Linear(64, 10),
        )

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.bn1(x)

        x = F.relu(self.conv2(x))
        x = self.bn2(x)

        x = self.pool(x)

        x = F.relu(self.conv3(x))
        x = self.bn3(x)

        x = F.relu(self.conv4(x))
        x = self.bn4(x)

        x = self.pool(x)

        x = x.view(-1, 32 * 2 * 2)
        return self.fc_net(x)

In [33]:
net = svd_net().cuda()

model_parameters = filter(lambda p: p.requires_grad, net.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('This model has {} params'.format(params))

criterion = nn.CrossEntropyLoss()
opt = optim.Adam(net.parameters(), lr=4e-3)
n_epochs = 5
TOTAL_CLASSES = 10

train_transform = transforms.Compose(
    [transforms.ToTensor()])
test_transform = transforms.Compose(
    [transforms.ToTensor()])

This model has 64874 params


In [27]:
mean_losses = []
for e in range(n_epochs):
    net.train()
    losses = []
    
    train_dataset = torchvision.datasets.FashionMNIST(root = "data", train=True, 
                                                      transform=train_transform, 
                                                      target_transform=None, download=True) 
    
    train_dataloader = data.DataLoader(train_dataset, batch_size=32, 
                                       shuffle=True, num_workers=4, drop_last=True)

    for i, batch in enumerate(tqdm.tqdm(train_dataloader)):
        img, label = batch
        img, label = img.cuda(), label.cuda()

        opt.zero_grad()

        pred = net(img)

        loss = criterion(pred, label)
        
        loss.backward()
        opt.step()

        losses.append(loss.item())
    mean_losses.append(np.array(losses).mean()) 

100%|██████████| 1875/1875 [04:29<00:00,  6.95it/s]
100%|██████████| 1875/1875 [04:30<00:00,  6.94it/s]
100%|██████████| 1875/1875 [04:34<00:00,  6.82it/s]
100%|██████████| 1875/1875 [04:28<00:00,  6.99it/s]
100%|██████████| 1875/1875 [04:32<00:00,  6.87it/s]


In [28]:
net.eval()

test_accuracy, test_classwise_accuracy = calculate_accuracy(testloader, True)
print(test_accuracy)
print(test_classwise_accuracy)

tensor(90.8900, device='cuda:0')
[tensor(81.4000, device='cuda:0') tensor(97.7000, device='cuda:0')
 tensor(84.2000, device='cuda:0') tensor(93.0000, device='cuda:0')
 tensor(90.8000, device='cuda:0') tensor(98.4000, device='cuda:0')
 tensor(74.1000, device='cuda:0') tensor(96.3000, device='cuda:0')
 tensor(97.4000, device='cuda:0') tensor(95.6000, device='cuda:0')]
