In [1]:
'''Train CIFAR10 with PyTorch.'''
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms

import os
import argparse
import matplotlib.pyplot as plt

import torchsummary

#from models import *
#from utils import progress_bar


In [2]:
os.environ['KMP_DUPLICATE_LIB_OK']='True'
parser = argparse.ArgumentParser(description='PyTorch CIFAR10 Training')
parser.add_argument('--lr', default=0.001, type=float, help='learning rate')
parser.add_argument('--resume', '-r', action='store_true',
                    help='resume from checkpoint')
args = parser.parse_args(args=[])

In [3]:
epoch_num = 25#300

device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch
print(device)

cuda


In [4]:
# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.ColorJitter(0.1,0.1,0.1,0.1),
    #전이학습
    transforms.Resize(256),
    transforms.RandomCrop(224, padding=0),
    #transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    
    #과적합 방지 효과가 있다고 함
    transforms.RandomRotation(3), 
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.Resize(224),#전이학습
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', 
    train=True,
    download=True, 
    transform=transform_train)

trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=32,
    #batch_size=32,
    shuffle=True,
    num_workers=4)

testset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False, 
    download=True,
    transform=transform_test)

testloader = torch.utils.data.DataLoader(
    testset,
    batch_size=32, 
    #batch_size=32, 
    shuffle=False, 
    num_workers=4)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


In [5]:
# Model
print('==> Building model..')
# net = VGG('VGG19')

# net = ResNet50()
# 전이학습
#net = torchvision.models.resnet50(pretrained=True)
net = torchvision.models.resnet50(pretrained=True)
num_fltrs = net.fc.in_features

net = net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True


==> Building model..


In [6]:
net

DataParallel(
  (module): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
      

In [7]:
if args.resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/ckpt.pth')
    net.load_state_dict(checkpoint['net'])
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']

In [8]:
class MyNewNet(nn.Module):
    def __init__(self):
        super(MyNewNet, self).__init__()
        self.resnet50 = torchvision.models.resnet50(pretrained=True)
        self.linear1 = nn.Linear(512*4, 512)
        self.bn1 = nn.BatchNorm1d(512)
        self.drop1 = nn.Dropout(0.3)
        self.linear2 = nn.Linear(512, 10)

    # Forward Pass 정의 부분
    
    def forward(self, x):
        x = self.resnet50(x)
        #x = self.linear1(x)
        x = F.gelu(self.drop1(self.bn1(self.linear1(x))))
    
        return F.softmax(self.linear2(x), dim=1)
    

class MyEnsemble(nn.Module):

    def __init__(self):
        super(MyEnsemble, self).__init__()
        self.resnet50 = torchvision.models.resnet50(pretrained=True)
        self.mobilenet_v2 = torchvision.models.mobilenet_v2(pretrained=True)
        #self.modelC = modelC

        self.fc1 = nn.Linear(512*4+1280, 10)

    def forward(self, x):
        out1 = self.resnet50(x)
        out2 = self.mobilenet_v2(x)
        #out3 = self.modelC(x)

        out = torch.cat((out1 + out2),1)# + out3

        x = self.fc1(out)
        return F.softmax(x, dim=1)

In [9]:
#my_model = MyNewNet()
my_model = MyEnsemble()

In [10]:
my_model

MyEnsemble(
  (resnet50): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
      

In [11]:
for param in my_model.parameters():
  param.requires_grad = True
#for param in my_model.linear1.parameters():
#  param.requires_grad = True
#for param in my_model.linear2.parameters():
#  param.requires_grad = True
for param in my_model.fc1.parameters():
  param.requires_grad = True

In [12]:
from adabelief_pytorch import AdaBelief

criterion = nn.CrossEntropyLoss()
#optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
#optimizer = optim.Adam(net.parameters(), lr=args.lr)
optimizer=AdaBelief(net.parameters(), lr=0.0001, eps =1e-16, betas=(0.9, 0.999),
                    weight_decay = 5e-4, weight_decouple=False, rectify=False,
                    fixed_decay=False)


[31mPlease check your arguments if you have upgraded adabelief-pytorch from version 0.0.5.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    rectify
-----------------------  -----  -----------------  ---------
adabelief-pytorch=0.0.5  1e-08  False              False
>=0.1.0 (Current 0.2.0)  1e-16  True               True
[34mSGD better than Adam (e.g. CNN for Image Classification)    Adam better than SGD (e.g. Transformer, GAN)
----------------------------------------------------------  ----------------------------------------------
Recommended eps = 1e-8                                      Recommended eps = 1e-16
[34mFor a complete table of recommended hyperparameters, see
[34mhttps://github.com/juntang-zhuang/Adabelief-Optimizer
[32mYou can disable the log message by setting "print_change_log = False", though it is recommended to keep as a reminder.
[0m


In [13]:
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epoch_num, last_epoch=-1)

In [14]:
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        if batch_idx % 50 == 0:
            print('Epoch: {} | Batch_idx: {} |  Loss: ({:.4f}) | Acc: ({:.2f}%) ({}/{})'.format(epoch, batch_idx, train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
    print('Epoch: {} | Batch_idx: {} |  Loss: ({:.4f}) | Acc: ({:.2f}%) ({}/{})'.format(epoch, batch_idx, train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
    return (100. * correct / total)

def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        print('# TEST : Loss: ({:.4f}) | Acc: ({:.2f}%) ({}/{})'.format(test_loss / (batch_idx + 1), 100. * correct / total, correct, total))

    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc:
        #  print('Saving..')
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        #torch.save(state, './checkpoint/ckpt.pth')
        best_acc = acc
    return best_acc

In [15]:
train_error = []
test_error = []

lerningrate_temp = []

In [None]:
if __name__ == "__main__":
    for epoch in range(start_epoch, start_epoch+epoch_num): 
        train_error.append(train(epoch))    
        test_error.append(test(epoch))          
        scheduler.step()
        print(optimizer.param_groups[0]['lr'])
        lerningrate_temp.append(optimizer.param_groups[0]['lr'])
        
        #print(train_error)
        #print(test_error)
    
    plt.plot(train_error,label='train_acc')
    plt.plot(test_error,label='test_acc')
    plt.legend()
    plt.show()
    
    plt.plot(lerningrate_temp,label='lr')
    plt.legend()
    plt.show()


Epoch: 0


  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Epoch: 0 | Batch_idx: 0 |  Loss: (9.0679) | Acc: (0.00%) (0/32)
Epoch: 0 | Batch_idx: 50 |  Loss: (2.6272) | Acc: (48.16%) (786/1632)
Epoch: 0 | Batch_idx: 100 |  Loss: (1.7595) | Acc: (59.84%) (1934/3232)
Epoch: 0 | Batch_idx: 150 |  Loss: (1.4002) | Acc: (65.75%) (3177/4832)
Epoch: 0 | Batch_idx: 200 |  Loss: (1.1877) | Acc: (69.64%) (4479/6432)
Epoch: 0 | Batch_idx: 250 |  Loss: (1.0532) | Acc: (72.39%) (5814/8032)
Epoch: 0 | Batch_idx: 300 |  Loss: (0.9578) | Acc: (74.19%) (7146/9632)
Epoch: 0 | Batch_idx: 350 |  Loss: (0.8901) | Acc: (75.64%) (8496/11232)
Epoch: 0 | Batch_idx: 400 |  Loss: (0.8336) | Acc: (76.87%) (9864/12832)
Epoch: 0 | Batch_idx: 450 |  Loss: (0.7885) | Acc: (77.81%) (11229/14432)
Epoch: 0 | Batch_idx: 500 |  Loss: (0.7556) | Acc: (78.53%) (12590/16032)
Epoch: 0 | Batch_idx: 550 |  Loss: (0.7250) | Acc: (79.22%) (13968/17632)
Epoch: 0 | Batch_idx: 600 |  Loss: (0.6996) | Acc: (79.88%) (15363/19232)
Epoch: 0 | Batch_idx: 650 |  Loss: (0.6756) | Acc: (80.43%) (167