In [1]:
import os
import sys
import time
import torch
from torch import nn, optim
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from torch import optim
from torch.autograd import Variable
from torch.utils.data import DataLoader

In [2]:
# network
class Residual(nn.Module):
    def __init__(self,in_channel,num_channel,use_conv1x1=False,strides=1):
        super(Residual,self).__init__()
        self.relu=nn.ReLU()
        self.bn1=nn.BatchNorm2d(in_channel,eps=1e-3)
        self.conv1=nn.Conv2d(in_channels =in_channel,out_channels=num_channel,kernel_size=3,padding=1,stride=strides)
        self.bn2=nn.BatchNorm2d(num_channel,eps=1e-3)
        self.conv2=nn.Conv2d(in_channels=num_channel,out_channels=num_channel,kernel_size=3,padding=1)
        if use_conv1x1:
            self.conv3=nn.Conv2d(in_channels=in_channel,out_channels=num_channel,kernel_size=1,stride=strides)
        else:
            self.conv3=None

    def forward(self, x):
        y=self.conv1(self.relu(self.bn1(x)))
        y=self.conv2(self.relu(self.bn2(y)))
        # print (y.shape)
        if self.conv3:
            x=self.conv3(x)
        # print (x.shape)
        z=y+x
        return z

# ResNet block
def ResNet_block(in_channels,num_channels,num_residuals,first_block=False):
    layers=[]
    for i in range(num_residuals):
        if i==0 and not first_block:
            layers+=[Residual(in_channels,num_channels,use_conv1x1=True,strides=2)]
        elif i>0 and not first_block:
            layers+=[Residual(num_channels,num_channels)]
        else:
            layers += [Residual(in_channels, num_channels)]
    blk=nn.Sequential(*layers)
    return blk


class ResNet(nn.Module):
    def __init__(self,in_channel,num_classes):
        super(ResNet,self).__init__()
        self.block1=nn.Sequential(nn.Conv2d(in_channels=in_channel,out_channels=64,kernel_size=7,stride=2,padding=3),
                                  nn.BatchNorm2d(64),
                                  nn.ReLU(),
                                  nn.MaxPool2d(kernel_size=3,stride=2,padding=1))
        self.block2=nn.Sequential(ResNet_block(64,64,2,True),
                                  ResNet_block(64,128,2),
                                  ResNet_block(128,256,2),
                                  ResNet_block(256,512,2))
        self.block3=nn.Sequential(nn.AvgPool2d(kernel_size=3))
        self.Dense=nn.Linear(512,10)

    def forward(self,x):
        y=self.block1(x)
        y=self.block2(y)
        y=self.block3(y)
        y=y.view(-1,512)
        y=self.Dense(y)
        return y

In [3]:
def load_data_fashion_mnist(batch_size, root='/home/kesci/input/FashionMNIST2065'):
    """Download the fashion mnist dataset and then load into memory."""

    normalize = transforms.Normalize(mean=[0.286], std=[0.353])
    train_augs = transforms.Compose([transforms.ToTensor(), normalize])
    test_augs = transforms.Compose([transforms.ToTensor(), normalize])
    
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=train_augs)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=test_augs)
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=0)

    return train_iter, test_iter

In [4]:
batch_size = 64  
train_iter, test_iter = load_data_fashion_mnist(batch_size, root='/home/kesci/input/FashionMNIST2065')

In [5]:
def evaluate_accuracy(data_iter, net, device=None):
    if device is None and isinstance(net, torch.nn.Module):
        # 如果没指定device就使用net的device
        device = list(net.parameters())[0].device
    net.eval() 
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
            n += y.shape[0]
    net.train() # 改回训练模式
    return acc_sum / n

In [6]:
def train_model(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print("training on ", device)
    loss = torch.nn.CrossEntropyLoss()
    best_test_acc = 0
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
        if test_acc > best_test_acc:
            print('find best! save at ..work/best.pth')
            best_test_acc = test_acc
            torch.save(net.state_dict(), '/home/kesci/work/best.pth')

In [7]:
net=ResNet(1,10).cuda()
print (net)

ResNet(
  (block1): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (block2): Sequential(
    (0): Sequential(
      (0): Residual(
        (relu): ReLU()
        (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      )
      (1): Residual(
        (relu): ReLU()
        (bn1): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): BatchNorm2d

In [8]:
lr, num_epochs = 0.01, 10
optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) 
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
train_model(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training on  cuda:0
epoch 1, loss 0.4046, train acc 0.855, test acc 0.872, time 68.2 sec
find best! save at ..work/best.pth
epoch 2, loss 0.2704, train acc 0.900, test acc 0.890, time 68.2 sec
find best! save at ..work/best.pth
epoch 3, loss 0.2349, train acc 0.911, test acc 0.891, time 68.2 sec
find best! save at ..work/best.pth
epoch 4, loss 0.2068, train acc 0.923, test acc 0.908, time 68.1 sec
find best! save at ..work/best.pth
epoch 5, loss 0.1850, train acc 0.930, test acc 0.901, time 68.2 sec
epoch 6, loss 0.1699, train acc 0.935, test acc 0.901, time 68.1 sec
epoch 7, loss 0.1533, train acc 0.941, test acc 0.905, time 68.2 sec
epoch 8, loss 0.1429, train acc 0.946, test acc 0.903, time 68.1 sec
epoch 9, loss 0.1309, train acc 0.951, test acc 0.900, time 68.2 sec
epoch 10, loss 0.1195, train acc 0.955, test acc 0.904, time 68.2 sec


In [9]:
# 加载最优模型
net.load_state_dict(torch.load('/home/kesci/work/best.pth'))
net = net.to(device)

In [10]:
# inference测试集
net.eval() 
id = 0
preds_list = []
with torch.no_grad():
    for X, y in test_iter:
        batch_pred = list(net(X.to(device)).argmax(dim=1).cpu().numpy())
        for y_pred in batch_pred:
            preds_list.append((id, y_pred))
            id += 1

In [11]:
# 生成csv文件
with open('submission.csv', 'w') as f:
    f.write('ID,Prediction\n')
    for id, pred in preds_list:
        f.write('{},{}\n'.format(id, pred))