In [1]:
from __future__ import print_function

import sys
import argparse
import time
import math

import torch
import torch.backends.cudnn as cudnn

from main_ce import set_loader
from util import AverageMeter
from util import adjust_learning_rate, warmup_learning_rate, accuracy
from util import set_optimizer
# from networks.resnet_big import SupConResNet, LinearClassifier
from networks.EN import SupConEN, LinearClassifier

try:
    import apex
    from apex import amp, optimizers
except ImportError:
    pass

In [2]:
def set_model(opt):
#     model = SupConResNet(name=opt.model)
    model = SupConEN(name=opt.model)
    criterion = torch.nn.CrossEntropyLoss()

    classifier = LinearClassifier(name=opt.model, num_classes=opt.n_cls)

    ckpt = torch.load(opt.ckpt, map_location='cpu')
    state_dict = ckpt['model']

    if torch.cuda.is_available():
        if torch.cuda.device_count() > 1:
            model.encoder = torch.nn.DataParallel(model.encoder)
        else:
            new_state_dict = {}
            for k, v in state_dict.items():
                k = k.replace("module.", "")
                new_state_dict[k] = v
            state_dict = new_state_dict
        model = model.cuda()
        classifier = classifier.cuda()
        criterion = criterion.cuda()
        cudnn.benchmark = True

        model.load_state_dict(state_dict)

    return model, classifier, criterion

In [3]:
def train(train_loader, model, classifier, criterion, optimizer, epoch, opt):
    """one epoch training"""
    model.eval()
    classifier.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    end = time.time()
    for idx, (images, labels) in enumerate(train_loader):
        data_time.update(time.time() - end)

        images = images.cuda(non_blocking=True)
        labels = labels.cuda(non_blocking=True)
        bsz = labels.shape[0]

        # warm-up learning rate
        warmup_learning_rate(opt, epoch, idx, len(train_loader), optimizer)

        # compute loss
        with torch.no_grad():
            features = model.encoder(images)
        output = classifier(features.detach())
        loss = criterion(output, labels)

        # update metric
        losses.update(loss.item(), bsz)
        acc1, acc5 = accuracy(output, labels, topk=(1, 5))
        top1.update(acc1[0], bsz)

        # SGD
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # print info
        if (idx + 1) % opt.print_freq == 0:
            print('Train: [{0}][{1}/{2}]\t'
                  'BT {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'DT {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'loss {loss.val:.3f} ({loss.avg:.3f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                   epoch, idx + 1, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1))
            sys.stdout.flush()

    return losses.avg, top1.avg

In [4]:
def validate(val_loader, model, classifier, criterion, opt):
    """validation"""
    model.eval()
    classifier.eval()

    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    with torch.no_grad():
        end = time.time()
        for idx, (images, labels) in enumerate(val_loader):
            images = images.float().cuda()
            labels = labels.cuda()
            bsz = labels.shape[0]

            # forward
            output = classifier(model.encoder(images))
            loss = criterion(output, labels)

            # update metric
            losses.update(loss.item(), bsz)
            acc1, acc5 = accuracy(output, labels, topk=(1, 5))
            top1.update(acc1[0], bsz)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if idx % opt.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                       idx, len(val_loader), batch_time=batch_time,
                       loss=losses, top1=top1))

    print(' * Acc@1 {top1.avg:.3f}'.format(top1=top1))
    return losses.avg, top1.avg

In [5]:
best_acc = 0
parser = argparse.ArgumentParser('argument for training')

parser.add_argument('--print_freq', type=int, default=10,
                    help='print frequency')
parser.add_argument('--save_freq', type=int, default=50,
                    help='save frequency')
parser.add_argument('--batch_size', type=int, default=256,
                    help='batch_size')
parser.add_argument('--num_workers', type=int, default=16,
                    help='num of workers to use')
parser.add_argument('--epochs', type=int, default=100,
                    help='number of training epochs')
# optimization
parser.add_argument('--learning_rate', type=float, default=0.1,
                    help='learning rate')
parser.add_argument('--lr_decay_epochs', type=str, default='60,75,90',
                    help='where to decay lr, can be a list')
parser.add_argument('--lr_decay_rate', type=float, default=0.2,
                    help='decay rate for learning rate')
parser.add_argument('--weight_decay', type=float, default=0,
                    help='weight decay')
parser.add_argument('--momentum', type=float, default=0.9,
                    help='momentum')

# model dataset
parser.add_argument('--model', type=str, default='resnet50')
parser.add_argument('--dataset', type=str, default='cifar10',
                    choices=['cifar10', 'cifar100', 'path'], help='dataset')
parser.add_argument('--mean', type=str, help='mean of dataset in path in form of str tuple')
parser.add_argument('--std', type=str, help='std of dataset in path in form of str tuple')
parser.add_argument('--data_folder', type=str, default=None, help='path to custom dataset')
parser.add_argument('--n_cls', type=int, default=2)


# other setting
parser.add_argument('--cosine', action='store_true',
                    help='using cosine annealing')
parser.add_argument('--warm', action='store_true',
                    help='warm-up for large batch training')

parser.add_argument('--ckpt', type=str, default='',
                    help='path to pre-trained model')

_StoreAction(option_strings=['--ckpt'], dest='ckpt', nargs=None, const=None, default='', type=<class 'str'>, choices=None, help='path to pre-trained model', metavar=None)

In [6]:
opt = parser.parse_args(["--batch_size=2", "--learning_rate=5", "--dataset=path", "--data_folder=./datasets", "--mean=(0.4914, 0.4822, 0.4465)", \
                   "--std=(0.2675, 0.2565, 0.2761)", \
                         "--ckpt=last.pth"])

In [7]:
type(eval(opt.mean))

tuple

In [8]:
iterations = opt.lr_decay_epochs.split(',')
opt.lr_decay_epochs = list([])
for it in iterations:
    opt.lr_decay_epochs.append(int(it))
    
# set the path according to the environment
opt.data_folder = './datasets/'

opt.model_name = '{}_{}_lr_{}_decay_{}_bsz_{}'.\
    format(opt.dataset, opt.model, opt.learning_rate, opt.weight_decay,
           opt.batch_size)

if opt.cosine:
    opt.model_name = '{}_cosine'.format(opt.model_name)

# warm-up for large-batch training,
if opt.warm:
    opt.model_name = '{}_warm'.format(opt.model_name)
    opt.warmup_from = 0.01
    opt.warm_epochs = 10
    if opt.cosine:
        eta_min = opt.learning_rate * (opt.lr_decay_rate ** 3)
        opt.warmup_to = eta_min + (opt.learning_rate - eta_min) * (
                1 + math.cos(math.pi * opt.warm_epochs / opt.epochs)) / 2
    else:
        opt.warmup_to = opt.learning_rate

if opt.dataset == 'cifar10':
    opt.n_cls = 10
elif opt.dataset == 'cifar100':
    opt.n_cls = 100
elif opt.dataset == 'path':
    opt.n_cls = 2
else:
    raise ValueError('dataset not supported: {}'.format(opt.dataset))

In [9]:
# build data loader
train_loader, val_loader = set_loader(opt)

In [10]:
# build model and criterion
model, classifier, criterion = set_model(opt)

In [11]:
inputs = torch.rand(1, 3, 240, 240)

In [12]:
feat = model(inputs)

In [13]:
feat.shape

torch.Size([1, 1280])

In [14]:
classifier.forward(feat)

tensor([[-0.0102,  0.0023]], grad_fn=<AddmmBackward>)

In [15]:
# build optimizer
optimizer = set_optimizer(opt, classifier)

In [16]:
train_loader.dataset

Dataset ImageFolder
    Number of datapoints: 10
    Root location: ./datasets/
    StandardTransform
Transform: Compose(
               RandomResizedCrop(size=(32, 32), scale=(0.2, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.4914, 0.4822, 0.4465))
           )

In [17]:
# training routine
for epoch in range(1, opt.epochs + 1):
    adjust_learning_rate(opt, optimizer, epoch)

    # train for one epoch
    time1 = time.time()
    loss, acc = train(train_loader, model, classifier, criterion,
                      optimizer, epoch, opt)
    time2 = time.time()
    print('Train epoch {}, total time {:.2f}, accuracy:{:.2f}'.format(
        epoch, time2 - time1, acc))

    # eval for one epoch
    loss, val_acc = validate(val_loader, model, classifier, criterion, opt)
    if val_acc > best_acc:
        best_acc = val_acc

print('best accuracy: {:.2f}'.format(best_acc))

AssertionError: Torch not compiled with CUDA enabled

In [None]:
it = iter(train_loader)
images, labels = it.next()

In [None]:
float(80) > np.asarray('60,75,90')

In [None]:
# def adjust_learning_rate(args, optimizer, epoch):
# parser.add_argument('--learning_rate', type=float, default=0.1,
#                     help='learning rate')
# parser.add_argument('--lr_decay_epochs', type=str, default='60,75,90',
#                     help='where to decay lr, can be a list')
# parser.add_argument('--lr_decay_rate', type=float, default=0.2,
#                     help='decay rate for learning rate')
# parser.add_argument('--weight_decay', type=float, default=0,
#                     help='weight decay')
# parser.add_argument('--momentum', type=float, default=0.9,
#                     help='momentum')

lr = 0.1
epoch = 1
# if args.cosine:
#     eta_min = lr * (args.lr_decay_rate ** 3)
#     lr = eta_min + (lr - eta_min) * (
#             1 + math.cos(math.pi * epoch / args.epochs)) / 2
# else:
steps = np.sum(epoch > np.asarray('60,75,90'))
if steps > 0:
    lr = lr * (0.2 ** steps)

# for param_group in optimizer.param_groups:
#     param_group['lr'] = lr

In [None]:
lr_decay_epochs='60,75,90'
iterations = lr_decay_epochs.split(',')
lr_decay_epochs = list([])
for it in iterations:
    lr_decay_epochs.append(int(it))

In [None]:
np.asarray(lr_decay_epochs)

In [None]:
np.sum(epoch > np.asarray(lr_decay_epochs))