In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [None]:
batch_size = 256
num_workers = 4
lr = 0.01
num_epochs = 50

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm

In [None]:
class Inception(nn.Module):
    def __init__(self, in_c, c1, c2, c3, c4):
        super(Inception, self).__init__()
        # path 1
        self.p1_1 = nn.Conv2d(in_c, c1, 1)
        # path 2
        self.p2_1 = nn.Conv2d(in_c, c2[0], 1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], 3, 1, 1)
        # path 3
        self.p3_1 = nn.Conv2d(in_c, c3[0], 1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], 5, 1, 2)
        # path 4
        self.p4_1 = nn.MaxPool2d(3, 1, 1)
        self.p4_2 = nn.Conv2d(in_c, c4, 1)
    
    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        return torch.cat([p1, p2, p3, p4], dim=1)

In [None]:
class GoogLeNet(nn.Module):
    def __init__(self):
        super(GoogLeNet, self).__init__()
        b1 = nn.Sequential(
            nn.Conv2d(1, 64, 7, 2, 3),
            nn.ReLU(),
            nn.MaxPool2d(3, 2, 1))
        b2 = nn.Sequential(
            nn.Conv2d(64, 64, 1),
            nn.Conv2d(64, 192, 3, 1, 1),
            nn.MaxPool2d(3, 2, 1))
        b3 = nn.Sequential(
            Inception(192, 64, (96, 128), (16, 32), 32),
            Inception(256, 128, (128, 192), (32, 96), 64))
        b4 = nn.Sequential(
            Inception(480, 192, (96, 208), (16, 48), 64),
            Inception(512, 160, (112, 224), (24, 64), 64),
            Inception(512, 128, (128, 256), (24, 64), 64),
            Inception(512, 112, (144, 288), (32, 64), 64),
            Inception(528, 256, (160, 320), (32, 128), 128),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        b5 = nn.Sequential(
            Inception(832, 256, (160, 320), (32, 128), 128),
            Inception(832, 384, (192, 384), (48, 128), 128),
            nn.AdaptiveAvgPool2d((1, 1)))
        self.bs = nn.Sequential(b1, b2, b3, b4, b5)
        self.fc = nn.Linear(1024, 10)
    def forward(self, x):
        b = x.shape[0]
        feature = self.bs(x)
        return self.fc(feature.view(b, -1))

In [None]:
trans = []
trans.append(transforms.Resize((96, 96)))
trans.append(transforms.ToTensor())
transform = transforms.Compose(trans)
mnist_train = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=True, download=True, transform=transform)
mnist_test = torchvision.datasets.FashionMNIST(root='~/Datasets/FashionMNIST', train=False, download=True, transform=transform)
print(len(mnist_train), len(mnist_test))
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [None]:
net = GoogLeNet().cuda()
optimizer = torch.optim.Adam(net.parameters(), lr)
loss = torch.nn.CrossEntropyLoss()

In [None]:
def train_FashionMNIST(net, train_iter, optimizer):
    train_loss = 0.0
    train_acc = 0.0
    train_num = 0
    
    for X, y in train_iter:
        X = X.cuda()
        y = y.cuda()
        y_hat = net(X)
        l = loss(y_hat, y)
        optimizer.zero_grad()
        # net.zero_grad()
        l.backward()
        optimizer.step()
        
        train_loss += l.item()
        train_acc += (y_hat.argmax(dim=1) == y).sum().item()
        train_num += y.shape[0]
    
    train_loss /= train_num
    train_acc /= train_num
    print('train loss: %.4f, train acc: %.3f' % (train_loss, train_acc))

In [None]:
def test_FashionMNIST(net, test_iter):
    test_acc = 0.0
    test_num = 0
    
    for X, y in test_iter:
        X = X.cuda()
        y = y.cuda()
        y_hat = net(X)
        test_acc += (y_hat.argmax(dim=1) == y).sum().item()
        test_num += y.shape[0]
    
    test_acc /= test_num
    print('test acc: %.3f' % (test_acc))

In [None]:
for i in range(num_epochs):
    print(f'epoch: {i}')
    train_FashionMNIST(net, train_iter, optimizer)
    test_FashionMNIST(net, test_iter)
    print('----------------')

In [None]:
# Adam: lr=0.001, epoch 49, test_acc=0.920, batch_size=256
# Adam: lr=0.005, epoch 0, test_acc=0.100, batch_size=256
# Adam: lr=0.0005, epoch 25, test_acc=0.918, batch_size=256
# Adam: lr=0.01, epoch 0, test_acc=0.01, batch_size=256