In [1]:
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms

In [2]:
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,),(0.3081,))])
batch_size=64

In [3]:
train_data=MNIST('./datas/mnist',download=False,train=True,transform=transform)
test_data=MNIST('./datas/mnist',download=False,train=False,transform=transform)

In [4]:
train_loader=DataLoader(dataset=train_data,shuffle=True,batch_size=batch_size,num_workers=8)
test_loader=DataLoader(dataset=test_data,shuffle=False,batch_size=batch_size,num_workers=8)

In [5]:
class ResidualBlock(torch.nn.Module):
    def __init__(self,channel):
        super(ResidualBlock,self).__init__()
        self.conv1=torch.nn.Conv2d(channel,channel,kernel_size=3,padding=1)
        self.conv2=torch.nn.Conv2d(channel,channel,kernel_size=3,padding=1)
    def forward(self,x):
        y=F.relu(self.conv1(x))
        y=self.conv2(y)
        return F.relu(x+y)

In [6]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model,self).__init__()
        self.conv1=torch.nn.Conv2d(1,16,kernel_size=5)
        self.conv2=torch.nn.Conv2d(16,32,kernel_size=5)
        self.pooling=torch.nn.MaxPool2d(kernel_size=2)
        self.rb1=ResidualBlock(16)
        self.rb2=ResidualBlock(32)
        self.linear=torch.nn.Linear(512,10)
    def forward(self,x):
        in_size=x.size(0)
        x=self.pooling(F.relu(self.conv1(x)))
        x=self.rb1(x)
        x=self.pooling(F.relu(self.conv2(x)))
        x=self.rb2(x)
        x=x.view(in_size,-1)
#         print(x.size(1))
        x=self.linear(x)
        return x
model=Model()

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)

Model(
  (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (pooling): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (rb1): ResidualBlock(
    (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (rb2): ResidualBlock(
    (conv1): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (linear): Linear(in_features=512, out_features=10, bias=True)
)

In [8]:
criterion=torch.nn.CrossEntropyLoss(size_average=True)
optimizer=torch.optim.SGD(params=model.parameters(),lr=0.01,momentum=0.5)



In [9]:
def train(epoch):
    running_loss=0.0
    for batch_idx,data in enumerate(train_loader):
        x,y=data
        x,y=x.to(device),y.to(device)
        optimizer.zero_grad()
        y_=model(x)
        loss=criterion(y_,y)
        loss.backward()
        optimizer.step()
        running_loss+=loss.item()
        if batch_idx%500==0:
            print('[%d, %5d] loss: %.3f'%(epoch+1,batch_idx+1,running_loss/500))
            running_loss=0.0
def test():
    correct=0
    total=0
    with torch.no_grad():
        for data in test_loader:
            x,y=data
            x,y=x.to(device),y.to(device)
            y_=model(x)
            _,predicted=torch.max(y_.data,dim=1)
            total+=y.size(0)
            correct+=(predicted==y).sum().item()
    print('Accuracy=',100*correct/total)

In [10]:
%%time
for epoch in range(10):
    train(epoch)
    test()

[1,     1] loss: 0.005
[1,   501] loss: 0.352
Accuracy= 96.52
[2,     1] loss: 0.000
[2,   501] loss: 0.082
Accuracy= 98.13
[3,     1] loss: 0.000
[3,   501] loss: 0.059
Accuracy= 98.7
[4,     1] loss: 0.000
[4,   501] loss: 0.049
Accuracy= 98.86
[5,     1] loss: 0.000
[5,   501] loss: 0.040
Accuracy= 98.73
[6,     1] loss: 0.000
[6,   501] loss: 0.036
Accuracy= 99.08
[7,     1] loss: 0.000
[7,   501] loss: 0.032
Accuracy= 98.76
[8,     1] loss: 0.000
[8,   501] loss: 0.028
Accuracy= 99.06
[9,     1] loss: 0.000
[9,   501] loss: 0.022
Accuracy= 98.89
[10,     1] loss: 0.000
[10,   501] loss: 0.021
Accuracy= 99.11
Wall time: 2min 8s
