## The architecture of AlexNet

The AlexNet has 8 layers, consisting of **5 convolutional layers** and **3 fully-connected layers**. To make training easier, AlexNet uses the **ReLU** activation function instead of sigmoid.

AlexNet controls the model complexity of the fully-connected layer by **dropout**. 

In [None]:
import torch
from torch import nn
import torchvision

# we are using FashionMNIST dataset here
alexnet = nn.Sequential(
                        # 1. first conv2d layer, the output size is (96, 54,54)
                        nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(), \
                        # 2. first maxpooling layer, the output size is (96, 26, 26)
                        nn.MaxPool2d(kernel_size=3, stride=2 ),
                        # 3. second conv2d layer, the output size is (256, 26, 26)
                        nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(), \
                        # 4. second max pooling layer, the output size is (256, 12, 12)
                        nn.MaxPool2d(kernel_size=3, stride=2), \
                        # 5. third conv2d layer, the output size is (384,12, 12)
                        nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),\
                        # 6. fourth conv2d layer, the output size is (384, 12, 12)
                        nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(), \
                        # 7. fifth conv2d layer, the output size is (256, 12,12)
                        nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(), \
                        # 8. third maxpooling layer, the output size is (256, 5, 5)
                        nn.MaxPool2d(kernel_size=3, stride=2), \
                        # after flattening the output size is now (6400,)
                        nn.Flatten(),\
                        # 9. first fully connected layer, the output size is (4096,)
                        nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(p=0.5), \
                        # 10. second fully connected layer, the output size is (4096,)
                        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(p=0.5), \
                        # 11. third fully connected layer, the output size is (10,)
                        nn.Linear(4096, 10)
                       )
                        

In [None]:
X = torch.randn(1, 1, 224, 224)
for layer in alexnet:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

## Reading the dataset
Still we use the FashionMNIST dataset to train and test the AlexNet model.

We need to adjust our dataset loader to resize the images to $224 \times 224$.

In [17]:
def load_fashion_mnist_dataset(batch_size, resize=None):
    trans = [torchvision.transforms.ToTensor()]
    if resize:
        trans.insert(0, torchvision.transforms.Resize(resize))
    trans = torchvision.transforms.Compose(trans)
    train_dataset = torchvision.datasets.FashionMNIST(root="/scratch/home/acct-hpc/hpchxj/models/data", \
                                                     train=True, transform=trans, download=True)
    test_dataset = torchvision.datasets.FashionMNIST(root="/scratch/home/acct-hpc/hpchxj/models/data", \
                                                     train=False, transform=trans, download=True)
    return iter(torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)), \
           iter(torch.utils.data.DataLoader(test_dataset, batch_size = batch_size, shuffle=True, num_workers=4))
    
    

## Model Evaluation

In [46]:
def accuracy(y_hat, y):
    """compare the number of correct predictions."""
    if len(y_hat.shape)>1 and y_hat.shape[1]>1:
        y_hat = y_hat.argmax(axis=1)
    cmp = (y_hat.type(y.dtype))==y
    # The tensor class rewrite the __float__() method.
    return float(cmp.type(torch.int).sum())


class Accumulator(object):
    """For accumulating sums over `n` variables."""
    def __init__(self, n):
        self.data = [0.0]*n
        
    def add(self, *args):
        self.data = [a+float(b) for a,b in zip(self.data, args)]
    
    def reset(self):
        self.data = [0.0]*len(self.data)
        
    def __getitem__(self, idx):
        return self.data[idx]
    
    
def evaluate_accuracy_gpu(net, data_iter, device=None):
    """coumpute the accuacy of model net on the dataset on GPU"""
    if isinstance(net, nn.Module):
        # Set model to evalustion mode. This only makes sense when there are BN/Dropout layers in the model.
        net.eval()
        if not device:
            device = next(iter(net.parameters())).device
            
        metric = Accumulator(2)
        
        with torch.no_grad():
            for X,y in data_iter:
                print(X, y)
                if isinstance(X, list):
                    X = [x.to(device) for x in X]
                else:
                    X = X.to(device)
                y = y.to(device)
                print(len(y))
                metric.add(accuracy(net(X), y), len(y))
        print(metric[0], metric[1])      
        return metric[0]/metric[1]
            

## Traning

In [47]:
# def train_ch6_on_gpu(net, train_iter, test_iter, num_epochs, lr, device=None):
#     def init_params(m):
#         if isinstance(m, torch.nn.Conv2d) or isinstance(m, nn.Linear):
#             torch.nn.init.xavier_uniform_(m.weight)
        
#     net.apply(init_params)
    
#     if not device:
#         device = next(iter(net.parameters())).device
    
#     net.to(device)
    
#     loss = torch.nn.CrossEntropyLoss()
#     optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    
#     for epoch in range(num_epochs):
#         net.train()
#         metric = Accumulator(3)
#         for X, y in train_iter:
#             optimizer.zero_grad()
#             if isinstance(X, list):
#                 X = [x.to(device) for x in X]
#             else:
#                 X = X.to(device)
#             y = y.to(device)
#             y_hat = net(X)
#             l = loss(y_hat, y)
#             l.backward()
#             optimizer.step()
        
#         with torch.no_grad():
#             metric.add(l * X.shape[0], accuracy(y_hat, y), X.shape[0])
#             train_l = metric[0] / metric[2]
#             train_acc = metric[1] / metric[2]
#         test_acc = evaluate_accuracy_gpu(net, test_iter)
#         print(epoch)
#         print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, '
#           f'test acc {test_acc:.3f}')
            
def train_ch6_on_gpu(net, train_iter, test_iter, num_epochs, lr, device):
    """Train a model with a GPU (defined in Chapter 6)."""
    def init_params(m):
        if type(m)==nn.Linear or type(m)==nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    
    net.apply(init_params)
    net.to(device)
    
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    
    for epoch in range(num_epochs):
        metric = Accumulator(3)
        net.train()
        for i, (X,y) in enumerate(train_iter):
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], accuracy(y_hat, y), X.shape[0])
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
        test_acc = evaluate_accuracy_gpu(net, test_iter)
        print(f'loss {train_l:.3f}, train acc {train_acc:.3f}, '
          f'test acc {test_acc:.3f}')

In [51]:
lr, num_epochs = 0.01, 10
batch_size = 256
train_iter, test_iter = load_fashion_mnist_dataset(batch_size, resize=224)

Exception ignored in: Traceback (most recent call last):
  File "/lustre/share/conda_env/pytorch/lib/python3.8/multiprocessing/queues.py", line 235, in _feed
    close()
  File "/lustre/share/conda_env/pytorch/lib/python3.8/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/lustre/share/conda_env/pytorch/lib/python3.8/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
Traceback (most recent call last):
  File "/lustre/share/conda_env/pytorch/lib/python3.8/multiprocessing/queues.py", line 235, in _feed
    close()
  File "/lustre/share/conda_env/pytorch/lib/python3.8/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/lustre/share/conda_env/pytorch/lib/python3.8/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
<function _MultiProcessingDataLoaderIter.__del__ at 0x2ad5f37b05e0>Exception ignored in: 
Except

In [50]:
train_ch6_on_gpu(alexnet, train_iter, test_iter, num_epochs, lr, torch.device("cuda:0"))

0.0 0.0


ZeroDivisionError: float division by zero