## VGG

In [1]:
!nvidia-smi

Mon Mar  7 19:20:42 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.51.06    Driver Version: 450.51.06    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:09.0 Off |                    0 |
| N/A   56C    P0    56W / 300W |   3749MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import paddle
import matplotlib.pyplot as plt
import matplotlib.image as mping
import numpy as np

paddle.__version__, paddle.device.get_device()

  from collections import MutableMapping
  from collections import Iterable, Mapping
  from collections import Sized


('2.2.2', 'gpu:0')

### Data loading

In [3]:
import paddle.vision.transforms as T
from paddle.vision.datasets import FashionMNIST

# loading and normalization
# resize the shape to 224x224
transform = [T.Resize(size=224), T.Normalize(mean=[127.5], std=[127.5])]
transform = T.Compose(transform)

# constructing traning set and test set
fashionmnist_train = FashionMNIST(mode='train', transform=transform)
fashionmnist_test = FashionMNIST(mode='test', transform=transform)

In [4]:
# nums of train set and test set
len(fashionmnist_train), len(fashionmnist_test)

(60000, 10000)

In [5]:
# As for one sample, channel = 1, height, weight = 224
# for adapting to the input shape of VGG
fashionmnist_train[0][0].shape, fashionmnist_test[0][0].shape

((1, 224, 224), (1, 224, 224))

In [6]:
train_loader = paddle.io.DataLoader(fashionmnist_train, batch_size=256, shuffle=True)
test_loader = paddle.io.DataLoader(fashionmnist_test, batch_size=64, shuffle=False)

### Model

In [7]:
from paddle import nn

# define vgg block as vgg's builing blocks
def vgg_block(num_convs, in_channels, out_channels):
    
    layers = []

    for _ in range(num_convs):
        layers.append(nn.Conv2D(in_channels, out_channels,
                                kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels

    layers.append(nn.MaxPool2D(kernel_size=2,stride=2))

    return nn.Sequential(*layers)

In [8]:
vgg_block(2, 3, 6)

W0307 19:20:48.284443  9736 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.0, Runtime API Version: 10.1
W0307 19:20:48.288749  9736 device_context.cc:465] device: 0, cuDNN Version: 7.6.


Sequential(
  (0): Conv2D(3, 6, kernel_size=[3, 3], padding=1, data_format=NCHW)
  (1): ReLU()
  (2): Conv2D(6, 6, kernel_size=[3, 3], padding=1, data_format=NCHW)
  (3): ReLU()
  (4): MaxPool2D(kernel_size=2, stride=2, padding=0)
)

In [9]:
conv_arch = ((1, 64), (1, 128), (2, 256), (2, 512), (2, 512))

In [10]:
def vgg(conv_arch):
    conv_blks = []
    in_channels = 1
    # The convolutional part
    for (num_convs, out_channels) in conv_arch:
        conv_blks.append(vgg_block(num_convs, in_channels, out_channels))
        in_channels = out_channels

    return nn.Sequential(
        *conv_blks, nn.Flatten(),
        # The fully-connected part
        nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 10))

vgg_net = vgg(conv_arch)

In [11]:
print(vgg_net)

Sequential(
  (0): Sequential(
    (0): Conv2D(1, 64, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (1): ReLU()
    (2): MaxPool2D(kernel_size=2, stride=2, padding=0)
  )
  (1): Sequential(
    (0): Conv2D(64, 128, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (1): ReLU()
    (2): MaxPool2D(kernel_size=2, stride=2, padding=0)
  )
  (2): Sequential(
    (0): Conv2D(128, 256, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (1): ReLU()
    (2): Conv2D(256, 256, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (3): ReLU()
    (4): MaxPool2D(kernel_size=2, stride=2, padding=0)
  )
  (3): Sequential(
    (0): Conv2D(256, 512, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (1): ReLU()
    (2): Conv2D(512, 512, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (3): ReLU()
    (4): MaxPool2D(kernel_size=2, stride=2, padding=0)
  )
  (4): Sequential(
    (0): Conv2D(512, 512, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (1): ReLU()
    (2): Conv2D(512, 512

In [12]:
# a version with a smaller number of channels for sufficient training
ratio = 4
small_conv_arch = [(pair[0], pair[1] // ratio) for pair in conv_arch]
vgg_net = vgg(small_conv_arch)

print(vgg_net)

Sequential(
  (0): Sequential(
    (0): Conv2D(1, 16, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (1): ReLU()
    (2): MaxPool2D(kernel_size=2, stride=2, padding=0)
  )
  (1): Sequential(
    (0): Conv2D(16, 32, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (1): ReLU()
    (2): MaxPool2D(kernel_size=2, stride=2, padding=0)
  )
  (2): Sequential(
    (0): Conv2D(32, 64, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (1): ReLU()
    (2): Conv2D(64, 64, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (3): ReLU()
    (4): MaxPool2D(kernel_size=2, stride=2, padding=0)
  )
  (3): Sequential(
    (0): Conv2D(64, 128, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (1): ReLU()
    (2): Conv2D(128, 128, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (3): ReLU()
    (4): MaxPool2D(kernel_size=2, stride=2, padding=0)
  )
  (4): Sequential(
    (0): Conv2D(128, 128, kernel_size=[3, 3], padding=1, data_format=NCHW)
    (1): ReLU()
    (2): Conv2D(128, 128, kern

In [13]:
# instantiation
model = paddle.Model(vgg_net)

# visualization of the model workflow
model.summary((-1, 1, 224, 224)) # [N C H W]

---------------------------------------------------------------------------
 Layer (type)       Input Shape          Output Shape         Param #    
   Conv2D-11     [[1, 1, 224, 224]]   [1, 16, 224, 224]         160      
    ReLU-13     [[1, 16, 224, 224]]   [1, 16, 224, 224]          0       
  MaxPool2D-7   [[1, 16, 224, 224]]   [1, 16, 112, 112]          0       
   Conv2D-12    [[1, 16, 112, 112]]   [1, 32, 112, 112]        4,640     
    ReLU-14     [[1, 32, 112, 112]]   [1, 32, 112, 112]          0       
  MaxPool2D-8   [[1, 32, 112, 112]]    [1, 32, 56, 56]           0       
   Conv2D-13     [[1, 32, 56, 56]]     [1, 64, 56, 56]        18,496     
    ReLU-15      [[1, 64, 56, 56]]     [1, 64, 56, 56]           0       
   Conv2D-14     [[1, 64, 56, 56]]     [1, 64, 56, 56]        36,928     
    ReLU-16      [[1, 64, 56, 56]]     [1, 64, 56, 56]           0       
  MaxPool2D-9    [[1, 64, 56, 56]]     [1, 64, 28, 28]           0       
   Conv2D-15     [[1, 64, 28, 28]]  

{'total_params': 43093322, 'trainable_params': 43093322}

### Training

In [14]:
# optimizer and loss
model.prepare(optimizer = paddle.optimizer.Adam(parameters=model.parameters(), learning_rate=1e-3),
              loss=nn.CrossEntropyLoss(),
              metrics=paddle.metric.Accuracy())

# training
model.fit(train_loader,
        # eval_data = test_loader
        epochs=15,
        verbose=1,
        )

The loss value printed in the log is the current step, and the metric is the average value of previous steps.
Epoch 1/15


  return (isinstance(seq, collections.Sequence) and


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [15]:
model.evaluate(fashionmnist_test, verbose=1)

Eval begin...
Eval samples: 10000


{'loss': [3.0994368e-06], 'acc': 0.9242}