In [1]:
%matplotlib inline
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
import numpy as np
import torchvision
import torchvision.datasets as datasets
from torchvision import transforms
import torch.optim as optim
import time

batch_size = 256
transform = transforms.Compose([transforms.ToTensor(),]) 

mnist_train = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
mnist_test = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size,
    shuffle=True, num_workers=0)
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size,
    shuffle=True, num_workers=0)


# **The Model**

In [2]:
class Flatten(torch.nn.Module):
    def forward(self, x):
        return x.view(-1,784)

net = nn.Sequential(Flatten(),
                    nn.Linear(784,256),
                    nn.ReLU(),
                    nn.Linear(256,10),
                    )

def init_weights(m):
    if type(m) == nn.Linear:
        # Initialize weight parameter by a normal distribition 
        # with a mean of 0 and standard deviation of 0.01.
        nn.init.normal_(m.weight.data, std=0.01)
        # The bias parameter is initialized to zero by default.
        m.bias.data.fill_(0.0)

net.apply(init_weights)

Sequential(
  (0): Flatten()
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

# **Training**

In [3]:
loss = nn.CrossEntropyLoss()
lr, num_epochs = 0.5, 5
opt_n = optim.SGD(net.parameters(), lr=lr)

def evaluate_accuracy(data_iter, net):
    """Evaluate accuracy of a model on the given data set."""
    acc_sum,n = 0,0
    for (imgs, labels) in data_iter:
        net.eval()
        with torch.no_grad():
            labels = labels.float()
            acc_sum += torch.sum((torch.argmax(net(imgs), dim=1) == labels)).float()
            n += labels.shape[0]
    return acc_sum.item()/n

for epoch in range(1, num_epochs+1):
    train_loader_iter = iter(train_loader)
    train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
    from tqdm import tqdm
    for _, (imgs, labels) in tqdm(enumerate(train_loader_iter)):
        net.train()
        opt_n.zero_grad()
        if torch.cuda.is_available():
            imgs = imgs.cuda()
            labels = labels.cuda()
        # Label prediction from LeNet
        y_hat = net(imgs)
        l = loss(y_hat, labels)
        # Backprobagation
        l.backward()
        opt_n.step()

        # Calculate tarining error
        with torch.no_grad():
            labels = labels.long()
            train_l_sum += l.item()
            train_acc_sum += (torch.sum(torch.argmax(y_hat, dim=1) == labels)).float().item()
            n += labels.shape[0]
    # calculate testing error every epoch.
    test_acc = evaluate_accuracy(iter(test_loader), net)
    print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
          % (epoch, train_l_sum/n, train_acc_sum/n, test_acc,
            time.time() - start))

235it [00:05, 40.19it/s]
4it [00:00, 39.09it/s]

epoch 1, loss 0.0032, train acc 0.693, test acc 0.710, time 6.8 sec


235it [00:06, 37.39it/s]
4it [00:00, 37.77it/s]

epoch 2, loss 0.0020, train acc 0.809, test acc 0.829, time 7.1 sec


235it [00:05, 43.35it/s]
5it [00:00, 44.19it/s]

epoch 3, loss 0.0017, train acc 0.840, test acc 0.824, time 6.2 sec


235it [00:05, 42.53it/s]
5it [00:00, 42.51it/s]

epoch 4, loss 0.0015, train acc 0.855, test acc 0.830, time 6.3 sec


235it [00:05, 43.10it/s]


epoch 5, loss 0.0015, train acc 0.863, test acc 0.860, time 6.2 sec
