In [6]:
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt

# torch.manual_seed(1)

EPOCH = 25
LR = 0.0005
DOWNLOAD_MNIST = False

train_data = torchvision.datasets.MNIST(root='./mnist/', train=True, transform=torchvision.transforms.ToTensor(),
                                        download=DOWNLOAD_MNIST, )
test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)

print(train_data.train_data.shape)

train_x = torch.unsqueeze(train_data.train_data, dim=1).type(torch.FloatTensor) / 255.
train_y = train_data.train_labels
print(train_x.shape)

test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)[:] / 255.  # Tensor on GPU
test_y = test_data.test_labels[:]
device = torch.device("mps")

torch.Size([60000, 28, 28])
torch.Size([60000, 1, 28, 28])


In [7]:
class Classifer(nn.Module):
    def __init__(self):
        super(Classifer, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=1 , out_channels=16, kernel_size=5, stride=1, padding=2),
                                nn.Conv2d(in_channels=16 , out_channels=16, kernel_size=3, stride=1, padding=1),
                                nn.BatchNorm2d(16),
                                nn.ReLU(inplace=True),
                                nn.Dropout2d(p=0.1),
                                nn.MaxPool2d(kernel_size=2))
        self.conv2 = nn.Sequential(nn.Conv2d(in_channels=16 , out_channels=32, kernel_size=5, stride=1, padding=2),
                                nn.Conv2d(in_channels=32 , out_channels=32, kernel_size=3, stride=1, padding=1),
                                nn.BatchNorm2d(32),
                                nn.ReLU(inplace=True),
                                nn.Dropout2d(p=0.2),
                                nn.MaxPool2d(kernel_size=2))  
        self.conv3 = nn.Sequential(nn.Conv2d(in_channels=32 , out_channels=64, kernel_size=5, stride=1, padding=2),
                                nn.BatchNorm2d(64),
                                nn.ReLU(inplace=True),
                                nn.Dropout2d(p=0.25))
        self.fc = nn.Sequential(nn.Linear(64*7*7,100),
                                nn.BatchNorm1d(100),
                                nn.Dropout(0.2),
                                nn.Linear(100,10),
                                nn.ReLU())
    def forward(self, x):
        x = self.conv3(self.conv2(self.conv1(x)))
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        output = x
        return output




model = Classifer()
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=LR)
# loss_func = nn.MSELoss()
loss_func = nn.CrossEntropyLoss()

data_size = 20000
batch_size = 100

for epoch in range(EPOCH):
    random_indx = np.random.permutation(data_size)
    for batch_i in range(data_size // batch_size):
        indx = random_indx[batch_i * batch_size:(batch_i + 1) * batch_size]

        b_x = train_x[indx, :]
        b_y = train_y[indx]

        output = model(b_x.to(device))
    
        loss = loss_func(output, b_y.to(device))

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch_i % 50 == 0:
            model.eval()
            with torch.no_grad():
                test_output = model(test_x.to(device))
                pred_y = torch.max(test_output, 1)[1].data.squeeze()
                accuracy = torch.sum(pred_y == test_y.to(device)).type(torch.FloatTensor) / test_y.size(0)
                print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.cpu().numpy(), '| test accuracy: %.3f' % accuracy)


Epoch:  0 | train loss: 2.3090 | test accuracy: 0.097
Epoch:  0 | train loss: 1.0963 | test accuracy: 0.652
Epoch:  0 | train loss: 0.6590 | test accuracy: 0.759
Epoch:  0 | train loss: 0.7938 | test accuracy: 0.766
Epoch:  1 | train loss: 0.4127 | test accuracy: 0.780
Epoch:  1 | train loss: 0.6787 | test accuracy: 0.781
Epoch:  1 | train loss: 0.5146 | test accuracy: 0.785
Epoch:  1 | train loss: 0.4006 | test accuracy: 0.787
Epoch:  2 | train loss: 0.5713 | test accuracy: 0.786
Epoch:  2 | train loss: 0.5296 | test accuracy: 0.786
Epoch:  2 | train loss: 0.4961 | test accuracy: 0.790
Epoch:  2 | train loss: 0.6094 | test accuracy: 0.788
Epoch:  3 | train loss: 0.6353 | test accuracy: 0.789
Epoch:  3 | train loss: 0.3990 | test accuracy: 0.788
Epoch:  3 | train loss: 0.3992 | test accuracy: 0.786
Epoch:  3 | train loss: 0.3982 | test accuracy: 0.790
Epoch:  4 | train loss: 0.4220 | test accuracy: 0.790
Epoch:  4 | train loss: 0.4916 | test accuracy: 0.789
Epoch:  4 | train loss: 0.44

KeyboardInterrupt: 

In [7]:
test_output = fc(test_x)
pred_y = torch.max(test_output, 1)[1].data.squeeze()  # move the computation in GPU

print(pred_y, 'prediction number')
print(test_y, 'real number')

tensor([7, 2, 1,  ..., 3, 9, 5]) prediction number
tensor([7, 2, 1,  ..., 3, 9, 5]) real number


torch.Size([2000])

tensor([[ -2.5184,  -8.5824,  -0.6187,   3.0813,  -6.2037,  -1.7433, -11.3742,
           7.9952,  -2.0846,  -0.2455],
        [ -0.2027,  -0.6667,   6.3977,   2.8499, -13.8127,   1.6088,   0.5629,
         -11.7980,   0.1761,  -9.4924],
        [ -5.6566,   4.7353,  -0.8758,  -1.2021,  -2.0522,  -2.4119,  -2.2544,
          -0.0299,  -0.5935,  -2.6085],
        [  8.5143,  -9.4927,  -0.9003,  -2.4009,  -8.2609,  -1.1546,  -0.1482,
          -2.3034,  -3.1212,  -1.2488],
        [ -2.3296,  -6.8946,  -1.5348,  -4.9243,   6.0338,  -3.9376,   0.1403,
          -2.0302,  -0.9111,   1.8864],
        [ -7.3118,   6.0550,  -1.9936,  -1.3987,  -2.1926,  -4.3265,  -4.9954,
           1.3960,  -0.7662,  -2.0353],
        [ -6.3345,  -7.6327,  -7.6763,  -2.3350,   6.2307,   0.1029,  -4.3228,
          -1.4999,   2.7457,   1.2418],
        [ -7.2737,  -4.2829,  -1.8956,  -1.2960,   1.3093,  -1.6075,  -4.8665,
          -1.8817,  -1.3280,   5.1082],
        [  0.2546,  -6.3597,   1.0990,  -8.9196,