In [9]:
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import sklearn
from sklearn.metrics import accuracy_score

In [7]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3), # outputsize = (28+2*0-3)/1 + 1 = 26
            nn.BatchNorm2d(16),
            nn.ReLU(True)
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3), # b,32,24,24
            nn.BatchNorm2d(32),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2) # b,32,12,12
        )
        
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3), # b,64,10,10
            nn.BatchNorm2d(64),
            nn.ReLU(True)
        )
        
        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3),
            nn.BatchNorm2d(128),
            nn.ReLU(128),
            nn.MaxPool2d(kernel_size=2, stride=2) # b,128,4,4
        )
        
        self.fc = nn.Sequential(
            nn.Linear(128 * 4 * 4, 1024),
            nn.ReLU(True),
            nn.Linear(1024, 128),
            nn.ReLU(True),
            nn.Linear(128, 10)
        )
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [22]:
# hyperparameters
batch_size = 64
learning_rate = 1e-2
num_epoches = 5

# data preprocessing
data_tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

#dataset
train_dataset = datasets.MNIST(
    root='./data', train=True, transform=data_tf
)
test_dataset = datasets.MNIST(
    root='./data', train=False, transform=data_tf
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [23]:
# define model
if torch.cuda.is_available():
    model = CNN().cuda()
else:
    model = CNN()

In [24]:
# training
def train(net, train_data, valid_data, num_epoch, optimizer, criterion):
    print(net)
    length = len(train_data)
    for epoch in range(num_epoch):
        train_loss = 0
        train_acc = 0
        net.train()
        for iter, data in enumerate(train_data):
            im, label = data
#             im = im.view(im.size(0), -1)
            if torch.cuda.is_available():
                im = Variable(im).cuda()
                label = Variable(label).cuda()
            else:
                im = Variable(im)
                label = Variable(label)
            
            output = net(im)
            loss = criterion(output, label)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            _, pred_label = torch.max(output.data.cpu(), 1)
            train_loss += loss.data
            temp_loss = loss.data

            train_acc += accuracy_score(label.data.cpu(), pred_label) * label.size(0) # 累计计算预测的准确率

            temp_acc = accuracy_score(label.data.cpu(), pred_label)   # 计算预测的准确率
            if iter % 300 == 0 and iter > 0:
                print('Epoch {}/{},Iter {}/{} Loss: {:.4f},ACC:{:.4f}' \
                      .format(epoch, num_epoches - 1, iter, length, temp_loss, temp_acc))
        
        if valid_data is not None:
            valid_loss = 0
            valid_acc = 0
            net.eval()
            with torch.no_grad():
                for iter, data in enumerate(valid_data):
                    im, label = data
#                     im = im.view(im.size()[0], -1)
                    if torch.cuda.is_available():
                        im = Variable(im).cuda() 
                        label = Variable(label).cuda()
                    else:
                        im = Variable(im) 
                        label = Variable(label)
                    output = net(im)

                    _, pred_label = torch.max(output.data.cpu(), 1)
                    loss = criterion(output, label)
                    valid_loss += loss.data
                    # valid_acc += torch.sum(pred_label == label.data)
                    valid_acc += accuracy_score(label.data.cpu(), pred_label) * label.size(0)
            print('Epoch {}/{},complete! train_loss: {:.4f},train_acc:{:.4f}' \
                  .format(epoch, num_epoches - 1, train_loss, train_acc / 60000),
                  'valid_loss: {:.4f},valid_acc:{:.4f}'.format(valid_loss, valid_acc / 10000)
                  )

In [25]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), learning_rate)
train(model, train_loader, test_loader, num_epoches, optimizer, criterion)

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (layer4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Se

In [27]:
# save the model
torch.save(model, './model.pth')
torch.save(model.state_dict(), './model_state.pth')

  "type " + obj.__name__ + ". It won't be checked "
