## Alexnet

In [1]:
!nvidia-smi

Mon Mar  7 17:53:07 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.51.06    Driver Version: 450.51.06    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:09.0 Off |                    0 |
| N/A   54C    P0    42W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [13]:
import paddle
import matplotlib.pyplot as plt
import matplotlib.image as mping
import numpy as np

paddle.__version__, paddle.device.get_device()

('2.2.2', 'gpu:0')

### Data loading

In [3]:
import paddle.vision.transforms as T
from paddle.vision.datasets import FashionMNIST

# loading and normalization
# resize the shape to 224x224
transform = [T.Resize(size=224), T.Normalize(mean=[127.5], std=[127.5])]
transform = T.Compose(transform)

# constructing traning set and test set
fashionmnist_train = FashionMNIST(mode='train', transform=transform)
fashionmnist_test = FashionMNIST(mode='test', transform=transform)

In [4]:
# nums of train set and test set
len(fashionmnist_train), len(fashionmnist_test)

(60000, 10000)

In [5]:
# As for one sample, channel = 1, height, weight = 224
# for adapting to the input shape of Alexnet
fashionmnist_train[0][0].shape, fashionmnist_test[0][0].shape

((1, 224, 224), (1, 224, 224))

In [6]:
train_loader = paddle.io.DataLoader(fashionmnist_train, batch_size=256, shuffle=True)
test_loader = paddle.io.DataLoader(fashionmnist_test, batch_size=64, shuffle=False)

### Model

In [7]:
import paddle.nn as nn

alexnet = nn.Sequential(
    # Here, we use a larger 11 x 11 window to capture objects. At the same
    # time, we use a stride of 4 to greatly reduce the height and width of the
    # output. Here, the number of output channels is much larger than that in
    # LeNet
    nn.Conv2D(in_channels=1, out_channels=96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
    nn.MaxPool2D(kernel_size=3, stride=2), 
    # Make the convolution window smaller, set padding to 2 for consistent
    # height and width across the input and output, and increase the number of
    # output channels
    nn.Conv2D(in_channels=96, out_channels=256, kernel_size=5, padding=2), nn.ReLU(),
    nn.MaxPool2D(kernel_size=3, stride=2),
    # Use three successive convolutional layers and a smaller convolution
    # window. Except for the final convolutional layer, the number of output
    # channels is further increased. Pooling layers are not used to reduce the
    # height and width of input after the first two convolutional layers
    nn.Conv2D(in_channels=256, out_channels=384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2D(in_channels=384, out_channels=384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2D(in_channels=384, out_channels=256, kernel_size=3, padding=1), nn.ReLU(),
    nn.MaxPool2D(kernel_size=3, stride=2),
    nn.Flatten(),
    # Here, the number of outputs of the fully-connected layer is several
    # times larger than that in LeNet. Use the dropout layer to mitigate
    # overfitting
    nn.Linear(in_features=6400, out_features=4096), nn.ReLU(),
    nn.Dropout(p=0.5),
    nn.Linear(in_features=4096, out_features=4096), nn.ReLU(),
    nn.Dropout(p=0.5), 
    # Output layer. Since we are using Fashion-MNIST, the number of classes is
    # 10, instead of 1000 as in the paper
    nn.Linear(in_features=4096, out_features=10)) 

W0307 17:53:13.791188  3679 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.0, Runtime API Version: 10.1
W0307 17:53:13.796064  3679 device_context.cc:465] device: 0, cuDNN Version: 7.6.


In [8]:
print(alexnet)

Sequential(
  (0): Conv2D(1, 96, kernel_size=[11, 11], stride=[4, 4], padding=1, data_format=NCHW)
  (1): ReLU()
  (2): MaxPool2D(kernel_size=3, stride=2, padding=0)
  (3): Conv2D(96, 256, kernel_size=[5, 5], padding=2, data_format=NCHW)
  (4): ReLU()
  (5): MaxPool2D(kernel_size=3, stride=2, padding=0)
  (6): Conv2D(256, 384, kernel_size=[3, 3], padding=1, data_format=NCHW)
  (7): ReLU()
  (8): Conv2D(384, 384, kernel_size=[3, 3], padding=1, data_format=NCHW)
  (9): ReLU()
  (10): Conv2D(384, 256, kernel_size=[3, 3], padding=1, data_format=NCHW)
  (11): ReLU()
  (12): MaxPool2D(kernel_size=3, stride=2, padding=0)
  (13): Flatten()
  (14): Linear(in_features=6400, out_features=4096, dtype=float32)
  (15): ReLU()
  (16): Dropout(p=0.5, axis=None, mode=upscale_in_train)
  (17): Linear(in_features=4096, out_features=4096, dtype=float32)
  (18): ReLU()
  (19): Dropout(p=0.5, axis=None, mode=upscale_in_train)
  (20): Linear(in_features=4096, out_features=10, dtype=float32)
)


In [9]:
# instantiation
model = paddle.Model(alexnet)

# visualization of the model workflow
model.summary((-1, 1, 224, 224)) # [N C H W]

---------------------------------------------------------------------------
 Layer (type)       Input Shape          Output Shape         Param #    
   Conv2D-1      [[1, 1, 224, 224]]    [1, 96, 54, 54]        11,712     
    ReLU-1       [[1, 96, 54, 54]]     [1, 96, 54, 54]           0       
  MaxPool2D-1    [[1, 96, 54, 54]]     [1, 96, 26, 26]           0       
   Conv2D-2      [[1, 96, 26, 26]]     [1, 256, 26, 26]       614,656    
    ReLU-2       [[1, 256, 26, 26]]    [1, 256, 26, 26]          0       
  MaxPool2D-2    [[1, 256, 26, 26]]    [1, 256, 12, 12]          0       
   Conv2D-3      [[1, 256, 12, 12]]    [1, 384, 12, 12]       885,120    
    ReLU-3       [[1, 384, 12, 12]]    [1, 384, 12, 12]          0       
   Conv2D-4      [[1, 384, 12, 12]]    [1, 384, 12, 12]      1,327,488   
    ReLU-4       [[1, 384, 12, 12]]    [1, 384, 12, 12]          0       
   Conv2D-5      [[1, 384, 12, 12]]    [1, 256, 12, 12]       884,992    
    ReLU-5       [[1, 256, 12, 12]] 

{'total_params': 46764746, 'trainable_params': 46764746}

In [10]:
# X = paddle.to_tensor(np.random.rand(1, 1, 224, 224).astype('float32'))
# for layer in alexnet:
#     X = layer(X)
#     print(layer.__class__.__name__, 'output shape:\t', X.shape)

### Training

In [11]:
# optimizer and loss
model.prepare(optimizer=paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=1e-3),
              loss=nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())

# training
model.fit(train_loader,
        # eval_data = test_loader
        epochs=15,
        verbose=1,
        )

The loss value printed in the log is the current step, and the metric is the average value of previous steps.
Epoch 1/15


  return (isinstance(seq, collections.Sequence) and


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [12]:
model.evaluate(fashionmnist_test, verbose=1)

Eval begin...
Eval samples: 10000


{'loss': [2.6923413], 'acc': 0.9131}