In [1]:
import paddle as P 
import paddle.nn as nn 
import paddle.nn.functional as F 
import numpy as np 
import matplotlib.pyplot as plt 
import os 
from PIL import Image 
import math

ModuleNotFoundError: No module named 'paddle'

In [2]:
! unzip -qo data/data129883/MNIST.zip -d data

# Global Variable

In [3]:
channel = 1
h = 28
w = 28
epoch = 10
batchsize = 64
path_train = 'data/MNIST/train/'
path_test = 'data/MNIST/test/'

# Data

In [4]:
def reader(path):
    folder_name = os.listdir(path)
    folder_num = len(folder_name)
    file_name_list = []
    label_list = []
    eye = np.eye(folder_num)
    category_dict = {}
    for i in range(folder_num):
        category_dict[i] = folder_name[i]
        file_name = os.listdir(path+folder_name[i])
        for j in range(len(file_name)):
            file_name_list += [[folder_name[i],file_name[j]]]
            label_list += [i]
    return file_name_list, label_list, category_dict, eye, folder_num


def data_generator(batchsize, channel=channel, h=h, w=w, path=path_train):
    #
    file_name_list, label_list, _, eye, folder_num = reader(path)
    #
    state = np.random.get_state()
    np.random.shuffle(file_name_list)
    np.random.set_state(state)
    np.random.shuffle(label_list)
    num = 0
    images = np.zeros([batchsize,channel,h,w])
    labels = np.zeros([batchsize,folder_num])
    for i in range(len(label_list)):
        image = Image.open(path+file_name_list[i][0]+'/'+file_name_list[i][1])
        image = image.resize((h,w),Image.BICUBIC)
        image = np.array(image) / 255 * 2 - 1
        images[num,:,:,:] = image.reshape([h,w,channel]).transpose([2, 0, 1])
        labels[num,:] = eye[label_list[i]]
        num += 1
        if num != 0 and num % batchsize == 0:
            yield images.astype("float32"), labels.astype("float32")
            num = 0

# Network Structure

In [5]:
class Net(nn.Layer):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2D(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2D(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2D()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
        # Spatial transformer localization-network
        self.localization = nn.Sequential(
            nn.Conv2D(1, 8, kernel_size=7),
            nn.MaxPool2D(2, stride=2),
            nn.ReLU(),
            nn.Conv2D(8, 10, kernel_size=5),
            nn.MaxPool2D(2, stride=2),
            nn.ReLU()
        )
        # Regressor for the 3 * 2 affine matrix
        self.fc_loc = nn.Sequential(
            nn.Linear(10 * 3 * 3, 32),
            nn.ReLU(),
            nn.Linear(32, 3 * 2)
        )
        # Initialize the weights/bias with identity transformation
        self.fc_loc[2].weight.set_value(P.zeros([32, 3*2], dtype='float32'))
        self.fc_loc[2].bias.set_value(P.to_tensor([1, 0, 0, 0, 1, 0], dtype='float32'))

    # Spatial transformer network forward function
    def stn(self, x):
        xs = self.localization(x)
        xs = xs.reshape([-1, 10 * 3 * 3])
        theta = self.fc_loc(xs)
        theta = theta.reshape([-1, 2, 3])
        #
        grid = F.affine_grid(theta, x.shape)
        x = F.grid_sample(x, grid)
        return x

    def forward(self, x):
        # transform the input
        x = self.stn(x)
        # Perform the usual forward pass
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.reshape([-1, 320])
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        x = F.softmax(x, axis=1)
        return x

# Metric

In [6]:
def metric(net, batchsize=500, path=path_test):
    net.eval()
    data = data_generator(batchsize=batchsize, path=path)
    file_name_list, _, _, _, _ = reader(path)
    N = len(file_name_list)
    n = 0
    for image, label in data:
        image = P.to_tensor(image)
        label = P.to_tensor(label)
        predict = net(image)
        n += P.sum(P.argmax(predict, 1) == P.argmax(label, 1)).numpy()
    return n/N

# Show

In [18]:
def show(net, batchsize=8, path=path_test):
    net.eval()
    data = data_generator(batchsize=batchsize, path=path)
    image, _ = next(data)
    image_transform = P.to_tensor(image)
    image_transform = (image_transform + 1) / 2
    image_transform = image_transform.transpose([0,2,3,1]).numpy()
    for i in range(2):
        for j in range(batchsize):
            plt.subplot(2,batchsize,j)
            plt.imshow(image[j])


In [19]:
show(net)

ValueError: num must be 1 <= num <= 16, not 0

<Figure size 432x288 with 0 Axes>

# Train

In [7]:
def train(epochs, batchsize):
    net = Net()
    optimizer = P.optimizer.Adam(learning_rate=1e-4, beta1=0.9, parameters=net.parameters())
    #
    for epoch in range(epochs):
        net.train()
        data = data_generator(batchsize)
        for image, label in data:      
            image = P.to_tensor(image)  
            label = P.to_tensor(label)  
            y = net(image)
            loss = P.mean(P.sum(-label*P.log(y+1e-8), axis=1)) 
            optimizer.clear_grad()
            loss.backward()
            optimizer.step()
        #  
        accuracy = metric(net)
        print('Epoch: {}. Loss: {:.4f}. Accuracy: {}'.format(epoch, loss.numpy().item(), accuracy.item()))

In [8]:
train(epoch, batchsize)

W0301 17:37:48.898911  1533 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.0, Runtime API Version: 10.1
W0301 17:37:48.904063  1533 device_context.cc:465] device: 0, cuDNN Version: 7.6.


Epoch: 0. Loss: 1.4784194231033325. Accuracy: 0.7847
Epoch: 1. Loss: 0.8772228956222534. Accuracy: 0.918
Epoch: 2. Loss: 0.5756063461303711. Accuracy: 0.9424
Epoch: 3. Loss: 0.40646690130233765. Accuracy: 0.9568
Epoch: 4. Loss: 0.3620942234992981. Accuracy: 0.9604
Epoch: 5. Loss: 0.30423325300216675. Accuracy: 0.9671
Epoch: 6. Loss: 0.2570858299732208. Accuracy: 0.9707
Epoch: 7. Loss: 0.1629200577735901. Accuracy: 0.9744
Epoch: 8. Loss: 0.2564988136291504. Accuracy: 0.9758
Epoch: 9. Loss: 0.1767169088125229. Accuracy: 0.9761
Epoch: 10. Loss: 0.2526986002922058. Accuracy: 0.9783
Epoch: 11. Loss: 0.2029482126235962. Accuracy: 0.976
Epoch: 12. Loss: 0.1438068449497223. Accuracy: 0.9792
Epoch: 13. Loss: 0.3689615726470947. Accuracy: 0.9788
Epoch: 14. Loss: 0.2632424235343933. Accuracy: 0.9813
Epoch: 15. Loss: 0.14257284998893738. Accuracy: 0.9816
Epoch: 16. Loss: 0.21213743090629578. Accuracy: 0.9827
Epoch: 17. Loss: 0.23665952682495117. Accuracy: 0.9843
Epoch: 18. Loss: 0.1502188146114349

In [9]:
net = Net()
x = P.randn([2,1,28,28])
y = net.stn(x)
print(y.shape)

[2, 1, 28, 28]
