In [3]:
from torchfactor.factorization.direct_tuckernet import DirectTuckerNet

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data

import torchvision
import torchvision.transforms as transforms


import numpy as np
import tqdm

import matplotlib.pyplot as plt

In [10]:
class dense_net( nn.Module):
    def __init__(self):
        super(dense_net, self).__init__()
        self.c1_weights = nn.Parameter(torch.Tensor(16, 1, 3, 3))
        self.c2_weights = nn.Parameter(torch.Tensor(32, 16, 5, 5))
        self.c3_weights = nn.Parameter(torch.Tensor(32, 32, 3, 3))
        self.c4_weights = nn.Parameter(torch.Tensor(32, 32, 5, 5))
        
        torch.nn.init.normal_(self.c1_weights)
        torch.nn.init.normal_(self.c2_weights)
        torch.nn.init.normal_(self.c3_weights)
        torch.nn.init.normal_(self.c4_weights)

        #self.conv1 = nn.Conv2d(1, 16, 3)
        self.bn1 = nn.BatchNorm2d(16)

        #self.conv2 = nn.Conv2d(16, 32, 5)
        self.bn2 = nn.BatchNorm2d(32)


        #self.conv3 = nn.Conv2d(32, 32, 3)
        self.bn3 = nn.BatchNorm2d(32)


        #self.conv4 = nn.Conv2d(32, 32, 5)
        self.bn4 = nn.BatchNorm2d(32)               
        
        self.pool = nn.MaxPool2d(2, 2)
        
        self.fc_net = nn.Sequential(
            nn.Linear(32 * 2 * 2, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(), 
            nn.Linear(64, 64),
            nn.ReLU(), 
            nn.Linear(64, 10),
        )

    def forward(self, x):
        x = F.relu(F.conv2d(x, self.c1_weights))
        x = self.bn1(x)

        x = F.relu(F.conv2d(x, self.c2_weights))
        x = self.bn2(x)

        x = self.pool(x)

        x = F.relu(F.conv2d(x, self.c3_weights))
        x = self.bn3(x)

        x = F.relu(F.conv2d(x, self.c4_weights))
        x = self.bn4(x)

        x = self.pool(x)

        x = x.view(-1, 32 * 2 * 2)
        return self.fc_net(x)

In [11]:
net = dense_net().cuda()

model_parameters = filter(lambda p: p.requires_grad, net.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('This model has {} params'.format(params))

This model has 77562 params


In [12]:
criterion = nn.CrossEntropyLoss()
opt = optim.Adam(net.parameters(), lr=4e-3)
n_epochs = 5
TOTAL_CLASSES = 10

train_transform = transforms.Compose(
    [transforms.ToTensor()])
test_transform = transforms.Compose(
    [transforms.ToTensor()])

In [13]:
mean_losses = []
for e in range(n_epochs):
    net.train()
    losses = []
    
    train_dataset = torchvision.datasets.FashionMNIST(root = "data", train=True, 
                                                      transform=train_transform, 
                                                      target_transform=None, download=True) 
    
    train_dataloader = data.DataLoader(train_dataset, batch_size=32, 
                                       shuffle=True, num_workers=4, drop_last=True)

    for i, batch in enumerate(tqdm.tqdm(train_dataloader)):
        img, label = batch
        img, label = img.cuda(), label.cuda()

        opt.zero_grad()

        pred = net(img)

        loss = criterion(pred, label)
        
        loss.backward()
        opt.step()

        losses.append(loss.item())
    mean_losses.append(np.array(losses).mean()) 

100%|██████████| 1875/1875 [00:27<00:00, 67.02it/s] 
100%|██████████| 1875/1875 [00:20<00:00, 90.83it/s] 
100%|██████████| 1875/1875 [00:23<00:00, 79.95it/s]
100%|██████████| 1875/1875 [00:23<00:00, 80.13it/s]
100%|██████████| 1875/1875 [00:24<00:00, 76.99it/s]


In [14]:
def calculate_accuracy(dataloader, is_gpu):
    """ Util function to calculate val set accuracy,
    both overall and per class accuracy
    Args:
        dataloader (torch.utils.data.DataLoader): val set 
        is_gpu (bool): whether to run on GPU
    Returns:
        tuple: (overall accuracy, class level accuracy)
    """    
    correct = 0.
    total = 0.
    predictions = []

    class_correct = list(0. for i in range(TOTAL_CLASSES))
    class_total = list(0. for i in range(TOTAL_CLASSES))

    for data in dataloader:
        images, labels = data
        if is_gpu:
            images = images.cuda()
            labels = labels.cuda()
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        predictions.extend(list(predicted.cpu().numpy()))
        total += labels.size(0)
        correct += (predicted == labels).sum()

        c = (predicted == labels).squeeze()
        for i in range(len(labels)):
            label = labels[i]
            class_correct[label] += c[i]
            class_total[label] += 1

    class_accuracy = 100 * np.divide(class_correct, class_total)
    return 100*correct/total, class_accuracy

test_dataset = torchvision.datasets.FashionMNIST(root = "data", train=False, 
                                                 transform=test_transform, target_transform=None, download=False)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=256, 
                                         shuffle=False, num_workers=2, drop_last=False) #DO NOT CHANGE


In [15]:
net.eval()

test_accuracy, test_classwise_accuracy = calculate_accuracy(testloader, True)
print(test_accuracy)
print(test_classwise_accuracy)

tensor(88.3000, device='cuda:0')
[tensor(93.6000, device='cuda:0') tensor(96.4000, device='cuda:0')
 tensor(86.4000, device='cuda:0') tensor(93.8000, device='cuda:0')
 tensor(74.9000, device='cuda:0') tensor(98., device='cuda:0')
 tensor(54.1000, device='cuda:0') tensor(93.7000, device='cuda:0')
 tensor(97., device='cuda:0') tensor(95.1000, device='cuda:0')]


In [35]:
class tucker_net( nn.Module):
    def __init__(self):
        super(tucker_net, self).__init__()
#         self.c1_weights = nn.Parameter(torch.Tensor(16, 1, 3, 3))
#         self.c2_weights = nn.Parameter(torch.Tensor(32, 16, 5, 5))
#         self.c3_weights = nn.Parameter(torch.Tensor(32, 32, 3, 3))
#         self.c4_weights = nn.Parameter(torch.Tensor(32, 32, 5, 5))
        
        self.c1_tuck = DirectTuckerNet((16, 1, 3 * 3), (16, 1, 3 * 3))
        self.c2_tuck = DirectTuckerNet((32, 16, 5 * 5), (26, 16, 3 * 3))
        self.c3_tuck = DirectTuckerNet((32, 32, 3 * 3), (26, 26, 3 * 3))
        self.c4_tuck = DirectTuckerNet((32, 32, 5 * 5), (26, 26, 5 * 5))
        

        #self.conv1 = nn.Conv2d(1, 16, 3)
        self.bn1 = nn.BatchNorm2d(16)

        #self.conv2 = nn.Conv2d(16, 32, 5)
        self.bn2 = nn.BatchNorm2d(32)


        #self.conv3 = nn.Conv2d(32, 32, 3)
        self.bn3 = nn.BatchNorm2d(32)


        #self.conv4 = nn.Conv2d(32, 32, 5)
        self.bn4 = nn.BatchNorm2d(32)               
        
        self.pool = nn.MaxPool2d(2, 2)
        
        self.fc_net = nn.Sequential(
            nn.Linear(32 * 2 * 2, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(), 
            nn.Linear(64, 64),
            nn.ReLU(), 
            nn.Linear(64, 10),
        )

    def forward(self, x):
        c1_weights = self.c1_tuck().view(16, 1, 3, 3)
        c2_weights = self.c2_tuck().view(32, 16, 5, 5)
        c3_weights = self.c3_tuck().view(32, 32, 3, 3)
        c4_weights = self.c4_tuck().view(32, 32, 5, 5)
        
        x = F.relu(F.conv2d(x, c1_weights))
        x = self.bn1(x)

        x = F.relu(F.conv2d(x, c2_weights))
        x = self.bn2(x)

        x = self.pool(x)

        x = F.relu(F.conv2d(x, c3_weights))
        x = self.bn3(x)

        x = F.relu(F.conv2d(x, c4_weights))
        x = self.bn4(x)

        x = self.pool(x)

        x = x.view(-1, 32 * 2 * 2)
        return self.fc_net(x)

In [36]:
net = tucker_net().cuda()

model_parameters = filter(lambda p: p.requires_grad, net.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print('This model has {} params'.format(params))

criterion = nn.CrossEntropyLoss()
opt = optim.Adam(net.parameters(), lr=4e-3)
n_epochs = 5
TOTAL_CLASSES = 10

train_transform = transforms.Compose(
    [transforms.ToTensor()])
test_transform = transforms.Compose(
    [transforms.ToTensor()])

This model has 62359 params


In [37]:
mean_losses = []
for e in range(n_epochs):
    net.train()
    losses = []
    
    train_dataset = torchvision.datasets.FashionMNIST(root = "data", train=True, 
                                                      transform=train_transform, 
                                                      target_transform=None, download=True) 
    
    train_dataloader = data.DataLoader(train_dataset, batch_size=32, 
                                       shuffle=True, num_workers=4, drop_last=True)

    for i, batch in enumerate(tqdm.tqdm(train_dataloader)):
        img, label = batch
        img, label = img.cuda(), label.cuda()

        opt.zero_grad()

        pred = net(img)

        loss = criterion(pred, label)
        
        loss.backward()
        opt.step()

        losses.append(loss.item())
    mean_losses.append(np.array(losses).mean()) 

100%|██████████| 1875/1875 [00:32<00:00, 57.76it/s]
100%|██████████| 1875/1875 [00:31<00:00, 58.67it/s]
100%|██████████| 1875/1875 [00:31<00:00, 58.83it/s]
100%|██████████| 1875/1875 [00:31<00:00, 59.99it/s]
100%|██████████| 1875/1875 [00:32<00:00, 58.00it/s]


In [38]:
net.eval()

test_accuracy, test_classwise_accuracy = calculate_accuracy(testloader, True)
print(test_accuracy)
print(test_classwise_accuracy)

tensor(77.8600, device='cuda:0')
[tensor(74.8000, device='cuda:0') tensor(90., device='cuda:0')
 tensor(63.4000, device='cuda:0') tensor(87.6000, device='cuda:0')
 tensor(78.3000, device='cuda:0') tensor(78.2000, device='cuda:0')
 tensor(26.5000, device='cuda:0') tensor(92.8000, device='cuda:0')
 tensor(95.0000, device='cuda:0') tensor(92., device='cuda:0')]
