# **ResNet Pytorch implementation for FashionMNIST classification**
First we import the required packages.

In [1]:
%matplotlib inline
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
import numpy as np
import torchvision
import torchvision.datasets as datasets
import torchvision.models as models
from torchvision import transforms
import torch.optim as optim
import time
import tqdm as tqdm
from torch.autograd import Variable

## **Load Dataset**
We can load data from pytorch dataset and preprocess it using *transform* function.

Note that the ResNet implemented in torchvision take RGB images as inputs, which has three channels. So, here we repeat the single-channel grey scale digits image three times to fit the torchvision model.

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                                # expand chennel from 1 to 3 to fit 
                                # ResNet pretrained model
                                transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
                                ]) 
batch_size = 256

# download dataset
mnist_train = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
mnist_test = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
print(len(mnist_train), len(mnist_test))

# Load dataset
train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size,
    shuffle=True, num_workers=0)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size,
    shuffle=True, num_workers=0)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw
Processing...
Done!
60000 10000


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


## **Building the model**


In [3]:
# print(models.resnet18())
class ResNetFeatrueExtractor18(nn.Module):
    def __init__(self, pretrained = True):
        super(ResNetFeatrueExtractor18, self).__init__()
        model_resnet18 = models.resnet18(pretrained=pretrained)
        self.conv1 = model_resnet18.conv1
        self.bn1 = model_resnet18.bn1
        self.relu = model_resnet18.relu
        self.maxpool = model_resnet18.maxpool
        self.layer1 = model_resnet18.layer1
        self.layer2 = model_resnet18.layer2
        self.layer3 = model_resnet18.layer3
        self.layer4 = model_resnet18.layer4
        self.avgpool = model_resnet18.avgpool

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)

        return x

class ResClassifier(nn.Module):
    def __init__(self, dropout_p=0.5): #in_features=512
        super(ResClassifier, self).__init__()        
        self.fc = nn.Linear(512, 10)
    def forward(self, x):       
        out = self.fc(x)
        return out

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        torch.nn.init.xavier_uniform_(m.weight)
    elif classname.find('Linear') != -1:
        torch.nn.init.xavier_uniform_(m.weight)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.01)

# calculate test accuracy
def test_accuracy(data_iter, netG, netF):
    """Evaluate testset accuracy of a model."""
    acc_sum,n = 0,0
    for (imgs, labels) in data_iter:
        # send data to the GPU if cuda is availabel
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            labels = labels.cuda()
        netG.eval()
        netF.eval()
        with torch.no_grad():
            labels = labels.long()
            acc_sum += torch.sum((torch.argmax(netF(netG(imgs)), dim=1) == labels)).float()
            n += labels.shape[0]
    return acc_sum.item()/n





## **Training using Pre-trained model**

In [4]:
netG = ResNetFeatrueExtractor18(pretrained = True)
netF = ResClassifier()

if torch.cuda.is_available():
    netG = netG.cuda()
    netF = netF.cuda()

# setting up optimizer for both feature generator G and classifier F.
opt_g = optim.SGD(netG.parameters(), lr=0.01, weight_decay=0.0005)
opt_f = optim.SGD(netF.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)

# loss function
criterion = nn.CrossEntropyLoss()

for epoch in range(0, 10):
    n, start = 0, time.time()
    train_l_sum = torch.tensor([0.0], dtype=torch.float32)
    train_acc_sum = torch.tensor([0.0], dtype=torch.float32)
    for i, (imgs, labels) in tqdm.tqdm(enumerate(iter(train_loader))):
        netG.train()
        netF.train()
        imgs = Variable(imgs)
        labels = Variable(labels)
        # train on GPU if possible  
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            labels = labels.cuda()
            train_l_sum = train_l_sum.cuda()
            train_acc_sum = train_acc_sum.cuda()

        opt_g.zero_grad()
        opt_f.zero_grad()

        # extracted feature
        bottleneck = netG(imgs)     
        
        # predicted labels
        label_hat = netF(bottleneck)

        # loss function
        loss= criterion(label_hat, labels)
        loss.backward()
        opt_g.step()
        opt_f.step()
        
        # calcualte training error
        netG.eval()
        netF.eval()
        labels = labels.long()
        train_l_sum += loss.float()
        train_acc_sum += (torch.sum((torch.argmax(label_hat, dim=1) == labels))).float()
        n += labels.shape[0]
    test_acc = test_accuracy(iter(test_loader), netG, netF) 
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'\
        % (epoch + 1, train_l_sum/n, train_acc_sum/n, test_acc, time.time() - start))


Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


HBox(children=(FloatProgress(value=0.0, max=46827520.0), HTML(value='')))




235it [00:17, 13.36it/s]
2it [00:00, 18.67it/s]

epoch 1, loss 0.0020, train acc 0.821, test acc 0.869, time 18.5 sec


235it [00:17, 13.30it/s]
3it [00:00, 16.64it/s]

epoch 2, loss 0.0011, train acc 0.895, test acc 0.877, time 18.5 sec


235it [00:17, 13.12it/s]
2it [00:00, 19.11it/s]

epoch 3, loss 0.0009, train acc 0.913, test acc 0.888, time 18.9 sec


235it [00:18, 12.97it/s]
2it [00:00, 19.03it/s]

epoch 4, loss 0.0008, train acc 0.927, test acc 0.898, time 19.0 sec


235it [00:18, 12.84it/s]
2it [00:00, 18.66it/s]

epoch 5, loss 0.0006, train acc 0.940, test acc 0.896, time 19.2 sec


235it [00:18, 12.73it/s]
2it [00:00, 19.56it/s]

epoch 6, loss 0.0006, train acc 0.946, test acc 0.891, time 19.4 sec


235it [00:18, 12.62it/s]
2it [00:00, 19.80it/s]

epoch 7, loss 0.0005, train acc 0.956, test acc 0.892, time 19.5 sec


235it [00:18, 12.46it/s]
2it [00:00, 19.15it/s]

epoch 8, loss 0.0004, train acc 0.962, test acc 0.890, time 19.7 sec


235it [00:19, 12.37it/s]
2it [00:00, 19.67it/s]

epoch 9, loss 0.0003, train acc 0.969, test acc 0.898, time 19.9 sec


235it [00:19, 12.36it/s]


epoch 10, loss 0.0003, train acc 0.975, test acc 0.900, time 19.9 sec


## **Training without Pre-trained model**
The training with pre-trained model is around 2% better than the non-pre-trained model, as Fashion is a more complicated dataset.

In [5]:
netG = ResNetFeatrueExtractor18(pretrained = False)
netF = ResClassifier()

if torch.cuda.is_available():
    netG = netG.cuda()
    netF = netF.cuda()

# setting up optimizer for both feature generator G and classifier F.
opt_g = optim.SGD(netG.parameters(), lr=0.01, weight_decay=0.0005)
opt_f = optim.SGD(netF.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)

# loss function
criterion = nn.CrossEntropyLoss()

for epoch in range(0, 10):
    n, start = 0, time.time()
    train_l_sum = torch.tensor([0.0], dtype=torch.float32)
    train_acc_sum = torch.tensor([0.0], dtype=torch.float32)
    for i, (imgs, labels) in tqdm.tqdm(enumerate(iter(train_loader))):
        netG.train()
        netF.train()
        imgs = Variable(imgs)
        labels = Variable(labels)
        # train on GPU if possible  
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            labels = labels.cuda()
            train_l_sum = train_l_sum.cuda()
            train_acc_sum = train_acc_sum.cuda()

        opt_g.zero_grad()
        opt_f.zero_grad()

        # extracted feature
        bottleneck = netG(imgs)     
        
        # predicted labels
        label_hat = netF(bottleneck)

        # loss function
        loss= criterion(label_hat, labels)
        loss.backward()
        opt_g.step()
        opt_f.step()
        
        # calcualte training error
        netG.eval()
        netF.eval()
        labels = labels.long()
        train_l_sum += loss.float()
        train_acc_sum += (torch.sum((torch.argmax(label_hat, dim=1) == labels))).float()
        n += labels.shape[0]
    test_acc = test_accuracy(iter(test_loader), netG, netF) 
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'\
        % (epoch + 1, train_l_sum/n, train_acc_sum/n, test_acc, time.time() - start))

235it [00:18, 12.72it/s]
2it [00:00, 19.98it/s]

epoch 1, loss 0.0021, train acc 0.806, test acc 0.832, time 19.4 sec


235it [00:18, 12.59it/s]
2it [00:00, 18.55it/s]

epoch 2, loss 0.0013, train acc 0.876, test acc 0.855, time 19.6 sec


235it [00:18, 12.38it/s]
2it [00:00, 19.49it/s]

epoch 3, loss 0.0011, train acc 0.898, test acc 0.868, time 19.9 sec


235it [00:19, 12.37it/s]
2it [00:00, 18.21it/s]

epoch 4, loss 0.0009, train acc 0.913, test acc 0.862, time 20.0 sec


235it [00:19, 12.36it/s]
2it [00:00, 18.25it/s]

epoch 5, loss 0.0008, train acc 0.927, test acc 0.874, time 19.9 sec


235it [00:19, 12.36it/s]
2it [00:00, 18.78it/s]

epoch 6, loss 0.0007, train acc 0.937, test acc 0.877, time 19.9 sec


235it [00:19, 12.27it/s]
2it [00:00, 19.14it/s]

epoch 7, loss 0.0006, train acc 0.948, test acc 0.876, time 20.1 sec


235it [00:19, 12.19it/s]
2it [00:00, 17.91it/s]

epoch 8, loss 0.0005, train acc 0.956, test acc 0.876, time 20.2 sec


235it [00:19, 12.14it/s]
2it [00:00, 19.06it/s]

epoch 9, loss 0.0004, train acc 0.962, test acc 0.872, time 20.3 sec


235it [00:19, 12.17it/s]


epoch 10, loss 0.0003, train acc 0.970, test acc 0.877, time 20.2 sec
