In [13]:
import torch
import torchvision
from torch import nn
from torch.utils import data
import matplotlib.pyplot as plt
import time

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [14]:
# Alexnet
net = nn.Sequential(
    nn.Conv2d(1, 96, kernel_size = 11, stride = 4, padding = 1), nn.ReLU(),
    nn.MaxPool2d(kernel_size = 3, stride = 2),
    nn.Conv2d(96, 256, kernel_size = 5, padding = 2), nn.ReLU(),
    nn.MaxPool2d(kernel_size = 3, stride = 2),
    nn.Conv2d(256, 384, kernel_size = 3, padding = 1), nn.ReLU(),
    nn.Conv2d(384, 384, kernel_size = 3, padding = 1), nn.ReLU(),
    nn.Conv2d(384, 256, kernel_size = 3, padding = 1), nn.ReLU(),
    nn.MaxPool2d(kernel_size = 3, stride = 2), nn.Flatten(),
    nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(p = 0.5),
    nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(p = 0.5),
    nn.Linear(4096, 10))

In [15]:
# print the output shape of the net
X = torch.randn(1, 1, 224, 224)
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__, 'Output shape:\t', X.shape)

Conv2d Output shape:	 torch.Size([1, 96, 54, 54])
ReLU Output shape:	 torch.Size([1, 96, 54, 54])
MaxPool2d Output shape:	 torch.Size([1, 96, 26, 26])
Conv2d Output shape:	 torch.Size([1, 256, 26, 26])
ReLU Output shape:	 torch.Size([1, 256, 26, 26])
MaxPool2d Output shape:	 torch.Size([1, 256, 12, 12])
Conv2d Output shape:	 torch.Size([1, 384, 12, 12])
ReLU Output shape:	 torch.Size([1, 384, 12, 12])
Conv2d Output shape:	 torch.Size([1, 384, 12, 12])
ReLU Output shape:	 torch.Size([1, 384, 12, 12])
Conv2d Output shape:	 torch.Size([1, 256, 12, 12])
ReLU Output shape:	 torch.Size([1, 256, 12, 12])
MaxPool2d Output shape:	 torch.Size([1, 256, 5, 5])
Flatten Output shape:	 torch.Size([1, 6400])
Linear Output shape:	 torch.Size([1, 4096])
ReLU Output shape:	 torch.Size([1, 4096])
Dropout Output shape:	 torch.Size([1, 4096])
Linear Output shape:	 torch.Size([1, 4096])
ReLU Output shape:	 torch.Size([1, 4096])
Dropout Output shape:	 torch.Size([1, 4096])
Linear Output shape:	 torch.Size([1,

In [16]:
batch_size = 64
resize = 224
trans = torchvision.transforms.Compose([torchvision.transforms.Resize(resize),
                                        torchvision.transforms.ToTensor()])
# training dataset
train_dataset = torchvision.datasets.MNIST(root = '../data/', train = True, transform = trans, download = True)
train_loader = data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
#testing dataset
test_dataset = torchvision.datasets.MNIST(root = '../data/', train = False, transform = trans, download = True)
test_loader = data.DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = True)

In [17]:
# define optimizer
learning_rate = 0.01
momentum = 0.5
optimizer = torch.optim.SGD(net.parameters(), lr = learning_rate, momentum = momentum)

In [18]:
def train(net, num_epochs, train_loader, test_loader):
    net.to(device)
    loss = nn.CrossEntropyLoss()
    best_acc = 0

    # training
    for epoch in range(num_epochs):
        net.train()
        train_l = 0.0
        time_s = time.perf_counter()
        for i, (X, y) in enumerate(train_loader):
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()

            train_l += l.item()

            '''
            rate = (i + 1) / len(train_loader)
            a = "*" * int(rate * 50)
            b = "." * int((1 - rate) * 50)
            print('\rtrain loss: {:^3.0f}%[{}->{}] {:.3f}'.format(int(rate * 100), a, b, l, end = ""))
            '''

        net.eval()
        test_acc = 0.0
        with torch.no_grad():
            #for X, y in enumerate(test_loader):
            for X, y in test_loader:
                #X, y = torch.tensor(X).to(device), torch.tensor(y).to(device)
                X, y = X.to(device), y.to(device)
                y_hat = torch.max(net(X), dim=1)[1]
                #y_hat = net(X)
                test_acc += torch.eq(y_hat, y).sum().item()
            test_acc /= len(test_loader)
            if best_acc < test_acc:
                best_acc = test_acc
        print('[epoch %d] train_loss: %.3f, test_accuracy: %.3f' % (epoch + 1, train_l / i, test_acc))
        print('time: {%d}\n' % (int(time.perf_counter() - time_s)))

    print('Finished training! Best_accuracy: %.3f' % (best_acc))

num_epochs = 15
train(net, num_epochs, train_loader, test_loader)



[epoch 1] train_loss: 1.836, test_accuracy: 59.522
time: {66}

[epoch 2] train_loss: 0.138, test_accuracy: 62.197
time: {65}

[epoch 3] train_loss: 0.074, test_accuracy: 62.924
time: {65}

[epoch 4] train_loss: 0.054, test_accuracy: 62.822
time: {65}

[epoch 5] train_loss: 0.045, test_accuracy: 63.102
time: {65}

[epoch 6] train_loss: 0.036, test_accuracy: 63.013
time: {65}

[epoch 7] train_loss: 0.032, test_accuracy: 63.159
time: {64}

[epoch 8] train_loss: 0.028, test_accuracy: 63.121
time: {65}

[epoch 9] train_loss: 0.024, test_accuracy: 63.127
time: {65}

[epoch 10] train_loss: 0.021, test_accuracy: 63.153
time: {65}

[epoch 11] train_loss: 0.018, test_accuracy: 63.287
time: {65}

[epoch 12] train_loss: 0.017, test_accuracy: 63.255
time: {65}

[epoch 13] train_loss: 0.015, test_accuracy: 63.210
time: {65}

[epoch 14] train_loss: 0.014, test_accuracy: 63.274
time: {65}

[epoch 15] train_loss: 0.012, test_accuracy: 63.268
time: {65}

Finished training! Best_accuracy: 63.287
