In [1]:
import argparse
import os
import random
import shutil
import time
import warnings

import torch
import numpy as np
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.distributed as dist
import torch.optim
import torch.multiprocessing as mp
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
import torchvision.datasets as datasets

import matplotlib.pyplot as plt
from torchinfo import summary

from utils.utils import cutmix, cutmix_criterion
from utils.config import config
from model import MobileFormer
from display import AverageMeter,ProgressMeter

In [2]:
#load the model
global best_acc1


# create model
# assert args.name in ['mf52', 'mf294', 'mf508'] 
model_name = 'mf52'

print('create model {}'.format(model_name))
cfg = config[model_name]
model = MobileFormer(cfg)

create model mf52


In [3]:
params = sum(p.numel() for p in model.parameters())
print(params)
# print the overall architecture
summary(model, input_size=(1,3, 224, 224))

2562334


Layer (type:depth-idx)                                       Output Shape              Param #
MobileFormer                                                 [1, 10]                   384
├─Sequential: 1-1                                            [1, 8, 112, 112]          --
│    └─Conv2d: 2-1                                           [1, 8, 112, 112]          216
│    └─BatchNorm2d: 2-2                                      [1, 8, 112, 112]          16
│    └─hswish: 2-3                                           [1, 8, 112, 112]          --
├─Sequential: 1-2                                            [1, 12, 56, 56]           --
│    └─Conv2d: 2-4                                           [1, 24, 56, 56]           240
│    └─hswish: 2-5                                           [1, 24, 56, 56]           --
│    └─Conv2d: 2-6                                           [1, 12, 56, 56]           300
│    └─BatchNorm2d: 2-7                                      [1, 12, 56, 56]           24
├

In [4]:
# Prepare data
# Data loading code

path = './data'
traindir = os.path.join(path, 'train')
testdir = os.path.join(path, 'test')

cinic_mean = [0.47889522, 0.47227842, 0.43047404]
cinic_std = [0.24205776, 0.23828046, 0.25874835]

print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean = cinic_mean,std = cinic_std)
])

transform_test = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean = cinic_mean,std = cinic_std),
])

trainset = datasets.ImageFolder(traindir, transform=transform_train)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=256, shuffle=True, num_workers=1)

testset = datasets.ImageFolder(testdir, transform=transform_test)
test_loader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=1)


==> Preparing data..


In [5]:
print('trainset lens: ', trainset.__len__())
print('testset lens: ', testset.__len__())

# print the label
classes  = trainset.classes
class_to_idx = trainset.class_to_idx
print(class_to_idx)

# get image and display
# id = 35555
# img = trainset.__getitem__(id)[0]
# label = trainset.__getitem__(id)[1]
# print(img.shape)
# print(label)
# numpy_img = img.numpy().transpose((1, 2, 0))
# plt.figure()
# plt.imshow(numpy_img)
# plt.show()

trainset lens:  90002
testset lens:  90000
{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}


In [6]:
# define loss function (criterion) and optimizer

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = model.to(device)

criterion = nn.CrossEntropyLoss().to(device)

learnig_rate = 0.0008
wd = 0.1
optimizer = torch.optim.AdamW(model.parameters(), lr=learnig_rate, weight_decay=wd)
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=3)

# optimizer = torch.optim.SGD(model.parameters(), learnig_rate,
#                             momentum=0.9,
#                             weight_decay=1e-4)

cudnn.benchmark = True

start_epoch=0

In [7]:
def train(train_loader, model, criterion, optimizer, epoch, args,f1,f2):

    # switch to train mode
    model.train()

    # end = time.time()
    
    correct = 0
    train_loss = 0
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        #data_time.update(time.time() - end)

        images = images.to(device)
        target = target.to(device)
        
        images, target_a, target_b, lam = cutmix(images, target, 1.0)
        output = model(images)
        loss = cutmix_criterion(criterion, output, target_a, target_b, lam)

        #output = model(images)
        #loss = criterion(output, target)
        train_loss +=loss.item()
        
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()
        

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        lr_scheduler.step()

            
        if i % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.1f}%)]\tLoss: {:.6f}'.format(
                epoch, i * len(images), len(train_loader.dataset),
                100. * i / len(train_loader), loss.item()/len(images)))
            
    train_loss /= len(train_loader.dataset)
    train_acc = correct / len(train_loader.dataset)
    
    with open(f1,'a') as f:
        f.write(str(train_loss) +"\n")
    with open(f2,'a') as f:
        f.write(str(train_acc.item()) +"\n")
    
    print('\nTraining Set: Average loss: {:.4f},Accuracy: {}/{} ({:.1f}%)\n'.format(
            train_loss, correct, len(train_loader.dataset),100.*train_acc))


In [8]:
def validate(val_loader, model, criterion, args,f3,f4):

    # switch to evaluate mode
    model.eval()
    
    test_loss = 0
    correct = 0
    with torch.no_grad():
        #end = time.time()
        for i, (images, target) in enumerate(val_loader):
            
            
            images = images.to(device)
            target = target.to(device)

            # compute output
            output = model(images)
            loss = criterion(output, target)
            test_loss+=loss.item()
            
            
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).sum()
            
        
        test_loss /= len(test_loader.dataset)
        test_acc = correct / float(len(test_loader.dataset))
        
        with open(f3,'a') as f:
            f.write(str(test_loss) +"\n")
        with open(f4,'a') as f:
            f.write(str(test_acc.item()) +"\n")

        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.1f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset), 100. * test_acc))

    return test_acc


In [8]:
def adjust_learning_rate(optimizer, epoch, lr):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = lr * (0.1 ** (epoch // 20))
    print('lr', lr)
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [9]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [10]:
def save_checkpoint(state, is_best, filepath,filename='checkpoint.pth.tar'):
    torch.save(state, os.path.join(filepath,filename))
    if is_best:
        print('\n!!!!!!!!!!!!!!!!! NEW BEST ACC !!!!!!!!!!!!! \n')
        shutil.copyfile(os.path.join(filepath,filename),  os.path.join(filepath,'model_best.pth.tar'))

In [11]:
checkpoint = torch.load('./checkpoint/model_best.pth.tar')
start_epoch = checkpoint['epoch']
best_acc1 = checkpoint['best_acc1']
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])

In [11]:
record_path="./exp_record"
train_loss_txt = os.path.join(record_path,"train_loss.txt")
train_acc_txt = os.path.join(record_path,"train_acc.txt")
test_loss_txt = os.path.join(record_path,"test_loss.txt")
test_acc_txt = os.path.join(record_path,"test_acc.txt")

In [None]:
#train
args = 0
best_acc1 = 0

f1 = train_loss_txt
f2 = train_acc_txt
f3 = test_loss_txt
f4 = test_acc_txt
                   
for epoch in range(start_epoch, start_epoch+200):

    start_time = time.time()

    # adjust_learning_rate(optimizer, epoch, learnig_rate)
    for param_group in optimizer.param_groups:
        print(param_group['lr'])
    
    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch, args,f1,f2)

    # evaluate on validation set
    acc1 = validate(test_loader, model, criterion, args,f3,f4)

    end_time = time.time()

    interval = end_time - start_time
    print('One epoch time cost: {:.3f} s \n'.format(interval))

    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)


    save_checkpoint({
        'epoch': epoch + 1,
        #'arch': args.arch,
        'state_dict': model.state_dict(),
        'best_acc1': best_acc1,
        'optimizer': optimizer.state_dict(),
    }, is_best,"./checkpoint")

0.0008

Training Set: Average loss: 0.0079,Accuracy: 23526/90002 (26.1%)


Test set: Average loss: 0.0158, Accuracy: 37840/90000 (42.0%)

One epoch time cost: 404.781 s 


!!!!!!!!!!!!!!!!! NEW BEST ACC !!!!!!!!!!!!! 

0.0006000000000000001

Training Set: Average loss: 0.0072,Accuracy: 31597/90002 (35.1%)


Test set: Average loss: 0.0141, Accuracy: 43700/90000 (48.6%)

One epoch time cost: 368.276 s 


!!!!!!!!!!!!!!!!! NEW BEST ACC !!!!!!!!!!!!! 

0.0002000000000000001

Training Set: Average loss: 0.0069,Accuracy: 35072/90002 (39.0%)


Test set: Average loss: 0.0134, Accuracy: 47137/90000 (52.4%)

One epoch time cost: 366.465 s 


!!!!!!!!!!!!!!!!! NEW BEST ACC !!!!!!!!!!!!! 

0.0008

Training Set: Average loss: 0.0067,Accuracy: 36099/90002 (40.1%)


Test set: Average loss: 0.0127, Accuracy: 49888/90000 (55.4%)

One epoch time cost: 370.780 s 


!!!!!!!!!!!!!!!!! NEW BEST ACC !!!!!!!!!!!!! 

0.0006000000000000001

Training Set: Average loss: 0.0065,Accuracy: 39466/90002 (43.9%)


Test

In [None]:
parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('--name', default='mf294', type=str,
                    help='model name')
parser.add_argument('--data', metavar='DIR',
                    help='path to dataset')
parser.add_argument('--num_cls', default=1000, type=int,
                    help='number of classes')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                    help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
                    help='number of total epochs to run')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                    help='manual epoch number (useful on restarts)')
parser.add_argument('--batch-size', default=256, type=int,
                    metavar='N',
                    help='mini-batch size (default: 256), this is the total '
                         'batch size of all GPUs on the current node when '
                         'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum')
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float,
                    metavar='W', help='weight decay (default: 1e-4)',
                    dest='weight_decay')
parser.add_argument('-p', '--print-freq', default=10, type=int,
                    metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                    help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', action='store_true',
                    help='use pre-trained model')
parser.add_argument('--world-size', default=-1, type=int,
                    help='number of nodes for distributed training')
parser.add_argument('--rank', default=-1, type=int,
                    help='node rank for distributed training')
parser.add_argument('--dist-url', default='tcp://224.66.41.62:55554', type=str,
                    help='url used to set up distributed training')
parser.add_argument('--dist-backend', default='nccl', type=str,
                    help='distributed backend')
parser.add_argument('--seed', default=None, type=int,
                    help='seed for initializing training. ')
parser.add_argument('--gpu', default=None, type=int,
                    help='GPU id to use.')
parser.add_argument('--multiprocessing-distributed', action='store_true',
                    help='Use multi-processing distributed training to launch '
                         'N processes per node, which has N GPUs. This is the '
                         'fastest way to use PyTorch for either single node or '
                         'multi node data parallel training')
parser.add_argument('--cutmix', action='store_true',
                    help='Use cutmix data augument')
parser.add_argument('--cutmix-prob', default=0.5, type=float,
                    help='cutmix probility')
parser.add_argument('--beta', default=1.0, type=float)

In [None]:
best_acc1 = 0
gpu = 1

def main():
    args = parser.parse_args()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    if args.dist_url == "env://" and args.world_size == -1:
        args.world_size = int(os.environ["WORLD_SIZE"])

    args.distributed = args.world_size > 1 or args.multiprocessing_distributed

    ngpus_per_node = torch.cuda.device_count()
    print('n_per_node:', ngpus_per_node)
    if args.multiprocessing_distributed:
        # Since we have ngpus_per_node processes per node, the total world_size
        # needs to be adjusted accordingly
        args.world_size = ngpus_per_node * args.world_size
        print('world_size:', args.world_size)
        # Use torch.multiprocessing.spawn to launch distributed processes: the
        # main_worker process function
        mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
    else:
        # Simply call main_worker function
        main_worker(args.gpu, ngpus_per_node, args)

In [None]:

    # device allocation
    
    
    # if not torch.cuda.is_available():
    #     print('using CPU, this will be slow')
    # elif args.distributed:
    #     print('ddp mode')
    #     model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    #     # For multiprocessing distributed, DistributedDataParallel constructor
    #     # should always set the single device scope, otherwise,
    #     # DistributedDataParallel will use all available devices.
    #     if args.gpu is not None:
    #         torch.cuda.set_device(args.gpu)
    #         model.cuda(args.gpu)
    #         # When using a single GPU per process and per
    #         # DistributedDataParallel, we need to divide the batch size
    #         # ourselves based on the total number of GPUs we have
    #         args.batch_size = int(args.batch_size / ngpus_per_node)
    #         args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
    #         model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
    #     else:
    #         model.cuda()
    #         # DistributedDataParallel will divide and allocate batch_size to all
    #         # available GPUs if device_ids are not set
    #         model = torch.nn.parallel.DistributedDataParallel(model)
    # elif args.gpu is not None:
    #     torch.cuda.set_device(args.gpu)
    #     model = model.cuda(args.gpu)
    # else:
    #     # DataParallel will divide and allocate batch_size to all available GPUs
    #     # if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
    #     #     model.features = torch.nn.DataParallel(model.features)
    #     #     model.cuda()
    #     # else:
    #     model = torch.nn.DataParallel(model).cuda()

  
    
    # optionally resume from a checkpoint
    
    # if args.resume:
    #     if os.path.isfile(args.resume):
    #         print("=> loading checkpoint '{}'".format(args.resume))
    #         if args.gpu is None:
    #             checkpoint = torch.load(args.resume)
    #         else:
    #             # Map model to be loaded to specified single gpu.
    #             loc = 'cuda:{}'.format(args.gpu)
    #             checkpoint = torch.load(args.resume, map_location=loc)
    #         args.start_epoch = checkpoint['epoch']
    #         best_acc1 = checkpoint['best_acc1']
    #         if args.gpu is not None:
    #             # best_acc1 may be from a checkpoint from a different GPU
    #             best_acc1 = best_acc1.to(args.gpu)
    #         model.load_state_dict(checkpoint['state_dict'])
    #         optimizer.load_state_dict(checkpoint['optimizer'])
    #         print("=> loaded checkpoint '{}' (epoch {})"
    #               .format(args.resume, checkpoint['epoch']))
    #     else:
    #         print("=> no checkpoint found at '{}'".format(args.resume))

   

   
    # if args.evaluate:
    #     validate(val_loader, model, criterion, args)
    #     return

    