In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms

from tqdm import tqdm

from Model import *

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# cfg = {
#     'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
#     'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
#     'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
#     'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
# }

# class VGG(nn.Module):
#     def __init__(self, vgg_name):
#         super(VGG, self).__init__()
#         self.features = self._make_layers(cfg[vgg_name])
#         self.classifier = nn.Linear(512, 10)

#     def forward(self, x):
#         out = self.features(x)
#         out = out.view(out.size(0), -1)
#         out = self.classifier(out)
#         return out

#     def _make_layers(self, cfg):
#         layers = []
#         in_channels = 3
#         for x in cfg:
#             if x == 'M':
#                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
#             else:
#                 layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
#                            nn.BatchNorm2d(x),
#                            nn.ReLU(inplace=True)]
#                 in_channels = x
#         layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
#         return nn.Sequential(*layers)

In [4]:
transform_train = transforms.Compose([transforms.RandomCrop(32, padding=4),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.RandomVerticalFlip(),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [5]:
train_dataset = torchvision.datasets.CIFAR10(root='./Dataset', train=True, download=True, transform=transform_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True, num_workers=2)

Files already downloaded and verified


In [6]:
test_dataset = torchvision.datasets.CIFAR10(root='./Dataset', train=False, download=True, transform=transform_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)

Files already downloaded and verified


In [7]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [8]:
# net = VGG('VGG19')
net = SimpleDLA()

In [9]:
net = net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [11]:
def valid(epoch):
    net.eval()
    loss_list, p_list, y_list = [], [], []
    for idx, (x, y) in enumerate(test_loader):
        x, y = x.to(device), y.to(device)
        p = net(x)

        loss = criterion(p, y)
        loss_list.append(loss.item())

        _, p = p.max(axis=-1)
        y_list.append(y.detach().cpu().numpy())
        p_list.append(p.detach().cpu().numpy())
    
    loss = np.array(loss_list).mean()
    acc = (np.array(p_list) == np.array(y_list)).astype(np.float32).mean()
    
    return loss, acc

In [12]:
def train(epoch):
    net.train()        
    loss_list, p_list, y_list = [], [], []
    with tqdm((enumerate(train_loader)), desc='epoch%3d'%epoch, total=len(train_loader), ncols=0) as t:
        for idx, (x, y) in t:
            x, y = x.to(device), y.to(device)
            p = net(x)
            
            optimizer.zero_grad()
            loss = criterion(p, y)
            loss.backward()
            optimizer.step()

            loss_list.append(loss.item())

            _, p = p.max(axis=-1)
            y_list.append(y.detach().cpu().numpy())
            p_list.append(p.detach().cpu().numpy())
            if idx+1 < len(train_loader):
                t.set_postfix({'loss':'%0.4f'%loss_list[-1]})
            else:
                loss = np.array(loss_list).mean()
                acc = (np.array(p_list) == np.array(y_list)).astype(np.float32).mean()
                
                valid_loss, valid_acc = valid(epoch)
                t.set_postfix({'loss':'%0.4f'%loss, 'acc':'%0.4f'%acc, 'valid_loss':'%0.4f'%valid_loss, 'valid_acc':'%0.4f'%valid_acc})
        return loss, acc

In [None]:
for epoch in range(200):
    train(epoch)

epoch  0: 100% 500/500 [00:56<00:00,  8.78it/s, loss=1.8834, acc=0.2844, valid_loss=1.6484, valid_acc=0.3800]
epoch  1: 100% 500/500 [00:51<00:00,  9.63it/s, loss=1.5177, acc=0.4334, valid_loss=1.4034, valid_acc=0.4793]
epoch  2: 100% 500/500 [00:50<00:00,  9.83it/s, loss=1.3066, acc=0.5235, valid_loss=1.3129, valid_acc=0.5320]
epoch  3: 100% 500/500 [00:51<00:00,  9.75it/s, loss=1.1528, acc=0.5799, valid_loss=1.1057, valid_acc=0.6052]
epoch  4: 100% 500/500 [00:51<00:00,  9.80it/s, loss=1.0390, acc=0.6267, valid_loss=0.9796, valid_acc=0.6493]
epoch  5: 100% 500/500 [00:51<00:00,  9.72it/s, loss=0.9571, acc=0.6551, valid_loss=0.9957, valid_acc=0.6407]
epoch  6: 100% 500/500 [00:52<00:00,  9.52it/s, loss=0.8963, acc=0.6780, valid_loss=0.9495, valid_acc=0.6620]
epoch  7: 100% 500/500 [00:55<00:00,  9.04it/s, loss=0.8397, acc=0.6991, valid_loss=0.8248, valid_acc=0.7065]
epoch  8: 100% 500/500 [00:55<00:00,  9.03it/s, loss=0.7924, acc=0.7173, valid_loss=0.8421, valid_acc=0.7059]
epoch  9: 

epoch 74: 100% 500/500 [00:54<00:00,  9.25it/s, loss=0.1051, acc=0.9630, valid_loss=0.4252, valid_acc=0.8823]
epoch 75: 100% 500/500 [00:53<00:00,  9.27it/s, loss=0.1034, acc=0.9632, valid_loss=0.5123, valid_acc=0.8692]
epoch 76: 100% 500/500 [00:53<00:00,  9.32it/s, loss=0.1012, acc=0.9640, valid_loss=0.4357, valid_acc=0.8782]
epoch 77:  25% 125/500 [00:12<00:34, 10.86it/s, loss=0.1448]