In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import datasets as DS
import copy
import os
import time
import logging
print("PyTorch Version: ",torch.__version__)

PyTorch Version:  1.10.0a0+0aef44c


In [None]:
'''
params:
  patch_size: patch_size for dataLoader
  batch_size: batch number for each training epoch
  workers: cores for loading the data, default 8
return:
  dataLoader: a dict with key 'train' 'val' 'test' for each phase in training and evaluating model
'''
def getDataSet(mode, patch_size, batch_size, workers=8):
    # build a class to satisfy the input of loader producer provided by the paper
    class Args:
      dataset_path = "/storage/data/classification_dataset_balanced/"
      patch_size = 1
      batch_size = 1
      workers = 1
      mode = 0
      def __init__(self, mode, patch_size, batch_size, workers):
        self.patch_size = patch_size
        self.batch_size = batch_size
        self.workers = workers
        self.mode = mode
    args = Args(mode, patch_size, batch_size, workers)
    # use the loader producer from the paper
    dataset = DS.CODEBRIM(torch.cuda.is_available(),args)
    dataLoaders = {'train': dataset.train_loader, 'val': dataset.val_loader, 'test':dataset.test_loader}
    return dataLoaders

In [None]:
class EfficientNet(nn.Module):
  def __init__(self):
    super(EfficientNet, self).__init__()
#     self.model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained = False)
    # load efficient net from torchhub
    self.model = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet', type='efficientnet-widese-b0')
    self.model.classifier[3] = nn.Linear(1280,6) #modify the output layer
  def forward(self, x):
    x = self.model(x)
    x = torch.sigmoid(x) # the output from model should be fed into sigmoid to get the probability 
    return x
  def _initialize_weights(self):
    print("initialize parameters")
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        #using kaiming's method to initialize convolution layer parameters as requested in the paper
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)
      elif isinstance(m, nn.Linear):
        #other parameters use normal distribution to initialize
        nn.init.normal_(m.weight, 0, 0.01)  
        nn.init.constant_(m.bias, 0)

In [None]:
'''
print training log -loss -accuracy -learning rate
params:
    output_dir: log output path
return:
    logger: logger.info() to print log into file
'''
def log_creater(output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    log_name = '{}.log'.format(time.strftime('%Y-%m-%d-%H-%M'))
    final_log_file = os.path.join(output_dir,log_name)
 
 
    # creat a log
    log = logging.getLogger('train_log')
    log.setLevel(logging.DEBUG)
 
    # FileHandler
    file = logging.FileHandler(final_log_file)
    file.setLevel(logging.DEBUG)
 
    # StreamHandler
    stream = logging.StreamHandler()
    stream.setLevel(logging.DEBUG)
 
    # Formatter
    formatter = logging.Formatter(
        '[%(asctime)s][line: %(lineno)d] ==> %(message)s')
 
    # setFormatter
    file.setFormatter(formatter)
    stream.setFormatter(formatter)

     # addHandler
    log.addHandler(file)
    log.addHandler(stream)
 
    log.info('creating {}'.format(final_log_file))
    return log
logger = log_creater("./train_log")

[2021-12-07 07:59:03,627][line: 39] ==> creating ./train_log/2021-12-07-07-59.log


In [None]:
import torchvision.transforms as transforms
import datasets as DS
import random
import torch
import numpy as np
#geometric transform
#0->RandomHorizontalFlip
#1->RandomVerticalFlip
#2->RandomRotation
#3->RandomResizedCrop
#4->RandomPerspective
geometric_transform_list = []
RandomHorizontalFlip = transforms.Compose([
    transforms.Resize(224),  # mandate
    transforms.RandomHorizontalFlip(p = 1),
    transforms.ToTensor()])  # mandate
geometric_transform_list.append(RandomHorizontalFlip)

RandomVerticalFlip = transforms.Compose([
    transforms.Resize(224),  # mandate
    transforms.RandomVerticalFlip(p = 1),
    transforms.ToTensor()])  # mandate
geometric_transform_list.append(RandomVerticalFlip)

RandomRotation = transforms.Compose([
    transforms.Resize(224),  # mandate
    transforms.RandomRotation(360),
    transforms.ToTensor()])  # mandate
geometric_transform_list.append(RandomRotation)

RandomResizedCrop = transforms.Compose([
    transforms.Resize(256),  # mandate
    transforms.RandomResizedCrop(size = 224),
    transforms.ToTensor()])  # mandate
geometric_transform_list.append(RandomResizedCrop)

RandomPerspective = transforms.Compose([
    transforms.Resize(224),  # mandate
    transforms.RandomPerspective(distortion_scale=0.4, p = 1),
    transforms.ToTensor()])  # mandate
geometric_transform_list.append(RandomPerspective)

#photometric transform
#0->GaussianBlur
#1->RandomAdjustSharpness
#2->Normalize
photometric_transform_list = []

GaussianBlur = transforms.Compose([#?torch.nn.Sequential
    transforms.Resize(224),  # mandate
    transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
    transforms.ToTensor()])  # mandate
photometric_transform_list.append(GaussianBlur)

RandomAdjustSharpness = transforms.Compose([#?torch.nn.Sequential
    transforms.Resize(224),  # mandate
    transforms.RandomAdjustSharpness(sharpness_factor = 2, p=1),
    transforms.ToTensor()])  # mandate
photometric_transform_list.append(RandomAdjustSharpness)

Normalize = torch.nn.Sequential(
    transforms.Normalize((0.499, 0.559, 0.535), (0.021, 0.018, 0.019)))  
Normalize = torch.jit.script(Normalize)
photometric_transform_list.append(Normalize)

#mode
#-2->no transform
#-1->randomly select a transform
#0->RandomHorizontalFlip
#1->RandomVerticalFlip
#2->RandomRotation
#3->RandomResizedCrop
#4->RandomPerspective
#5->GaussianBlur
#6->RandomAdjustSharpness
#7->Normalize
def random_transform(input_batch, mode):
    toPILTransform = transforms.ToPILImage()
    toTensorTransform = transforms.ToTensor()
    if(mode == -2):
        return input_batch
    i = 0
    for tensor in input_batch:
        img = toPILTransform(tensor)                
        choice = mode
        if choice == -1:#mix mode
            choice = np.random.randint(0, high = 8)         
        if(choice < 5):
            input_batch[i] = geometric_transform_list[choice](img)
        if(choice >=5 and choice < 7):
            input_batch[i] = photometric_transform_list[choice-5](img)
        if(choice == 7):
            input_batch[i] = photometric_transform_list[2](tensor)
        i = i + 1
    return input_batch#torch.tensor(ret)



In [None]:
'''
train a model according to the given parameters
params:
    root_dir: root_path
    model: model for training
    logger: return from log_creater()
    lr_h: max(initial) learning rate
    lr_l: min(final) learing rate
    dataLoaders: return from getDataSet()
    num_epochs: training epochs
    resume: bool, save checkpoint every 10 epoch when the training interrupted, load checkpoint to resume
    checkpoint: path for checkpoints, valid when resume == True
    device: training device
'''

def train(root_dir, model, logger, lr_h, lr_l, dataLoaders, num_epochs = 300, resume=False, 
    checkpoint = None, device = "cpu"):
    start_epoch = 1
    optimizer = optim.SGD(model.parameters(), lr=lr_h, momentum=0.9, weight_decay = 1e-5*0.1)
    # scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=10, T_mult=2, eta_min=lr_l)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = num_epochs, eta_min=lr_l)
    best_acc_hard = 0.0
    best_acc_soft = 0.0
    criterion = torch.nn.BCELoss()
    save_path_hard = root_dir + '/hard.pth'
    save_path_soft = root_dir + '/soft.pth'
    iters = len(dataLoader['train'])
    if resume:
        path_checkpoint = root_dir + checkpoint  # checkpoint path
        checkpoint = torch.load(path_checkpoint)  # load checkpoint
        model.load_state_dict(checkpoint['net'])  # load model
        scheduler.load_state_dict(checkpoint['scheduler'])
        optimizer.load_state_dict(checkpoint['optimizer'])  # load optimizer
        start_epoch = checkpoint['epoch']  # load epoch
        best_acc_soft = checkpoint['best_acc_soft']
        best_acc_hard = checkpoint['best_acc_hard']


    for epoch in range(start_epoch, num_epochs+1):  # loop over the dataset multiple times

        if epoch % 10 == 0:
            checkpoint = {
            "net": model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict(),
            "epoch": epoch,
            "best_acc_soft": best_acc_soft,
            "best_acc_hard": best_acc_hard
            }
            if not os.path.isdir(root_dir + "/checkpoint"):
                os.mkdir(root_dir + "/checkpoint")
            torch.save(checkpoint, root_dir + '/checkpoint/ckpt_best_%s.pth' %(str(epoch)))
        #an epoch is divided into two phases train and evaluate
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects_hard = 0
            running_corrects_soft = 0

            for i, sample in enumerate(dataLoaders[phase]):
                inputs, labels = sample
                inputs = inputs.to(device)
                labels = labels.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                outputs = outputs >= 0.5  # binarizing sigmoid output by thresholding with 0.5
                equality_matrix = (outputs.float() == labels).float()
                hard = torch.sum(torch.prod(equality_matrix, dim=1))
                soft = torch.mean(equality_matrix)
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                #adjustment in scheduler
                    scheduler.step(epoch + i / iters)
        
                running_loss += loss.item() * inputs.size(0)
                running_corrects_hard += hard.item()
                running_corrects_soft += soft.item()

            #calculate loss and accuracy for the epoch
            epoch_loss = running_loss / len(dataLoaders[phase].dataset)
            epoch_acc_hard = running_corrects_hard / len(dataLoaders[phase].dataset)
            epoch_acc_soft = running_corrects_soft / len(dataLoaders[phase])
            logger.info('{} Epoch:[{}/{}]\t loss={:.5f}\t acc_hard={:.3f} acc_soft={:.3f} lr={:.7f}'.format\
            (phase, epoch , num_epochs, epoch_loss, epoch_acc_hard, epoch_acc_soft, \
            optimizer.state_dict()['param_groups'][0]['lr'] ))

            # deep copy the model
            if  phase == 'val' and epoch_acc_hard > best_acc_hard:
                best_acc_hard = epoch_acc_hard
                #   best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), save_path_hard)

            if  phase == 'val' and epoch_acc_soft > best_acc_soft:
                best_acc_soft = epoch_acc_soft
                #   best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), save_path_soft)
    #evaluate
    model = EfficientNet()
    model.load_state_dict(torch.load(root_dir + '/hard.pth'))
    model.to(device)
    model.eval()
    logger.info("hard:--------------------------------------")
    evaluation(dataLoaders, device, model, logger)

    model.load_state_dict(torch.load(root_dir + '/soft.pth'))
    model.to(device)
    model.eval()
    logger.info("soft:--------------------------------------")
    evaluation(dataLoaders, device, model, logger)



'''
evaluate a model
params:
    dataLoaders: return from getDataSet()
    device: training device
    model: trained model to evaluate
    logger: return from log_creater()
'''


def evaluation(dataLoaders, device, model, logger):
    criterion = torch.nn.BCELoss()
    # calculate loss and accuracy for train data, evaluate data and test data
    for phase in ['train', 'val', 'test']:
        running_loss = 0.0
        running_corrects_hard = 0
        running_corrects_soft = 0

      
        for i, sample in enumerate(dataLoaders[phase]):
            inputs, labels = sample
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            outputs = outputs >= 0.5  # binarizing sigmoid output by thresholding with 0.5
            equality_matrix = (outputs.float() == labels).float()
            hard = torch.sum(torch.prod(equality_matrix, dim=1))
            soft = torch.mean(equality_matrix)
            running_loss += loss.item() * inputs.size(0)
            running_corrects_hard += hard.item()
            running_corrects_soft += soft.item()

        epoch_loss = running_loss / len(dataLoaders[phase].dataset)
        epoch_acc_hard = running_corrects_hard / len(dataLoaders[phase].dataset)
        epoch_acc_soft = running_corrects_soft / len(dataLoaders[phase])
        logger.info("{}: loss:{:.5f} acc_soft:{:.3f} acc_hard:{:.3f}".format(phase, epoch_loss, epoch_acc_soft, epoch_acc_hard))     



In [None]:
n=1
resume = True
#0->RandomHorizontalFlip
#1->RandomVerticalFlip
#2->RandomRotation
#3->RandomResizedCrop
#4->RandomPerspective
#5->GaussianBlur
#6->RandomAdjustSharpness
#7->random_select
#8->normalize
mode_list = [8]
#mode_list = [2]#random_rotation
#grid search 
for batch_size in [16]:
    patch_size = 224
    for mode in mode_list:
        dataLoader = getDataSet(mode, patch_size, batch_size)
        for lr in [(1.5*(1e-2),1.5*(1e-5))]:  
            logger.info("experiment on:"+ str(mode))     
            root_dir = './' + str(mode) + '-' + str(mode) + '-' + str(mode)
            if not os.path.isdir(root_dir):
                os.mkdir(root_dir)
            logger.info("batch_size:" + str(batch_size))
            logger.info("patch_size:" + str(patch_size))
            logger.info("learning rate high:" + str(lr[0]))
            logger.info("learning rate low:" + str(lr[1]))
            model = EfficientNet()
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model._initialize_weights()
            if torch.cuda.is_available():
                model = model.to(device)
            lr_h = lr[0]
            lr_l = lr[1]
            # do a step in grid search
            
            train(root_dir, model, logger, lr_h, lr_l, dataLoader, num_epochs = 150, resume=resume, 
    checkpoint = "/checkpoint/ckpt_best_50.pth", device = device)




parameters:
5
224
5


[2021-12-07 07:59:05,797][line: 20] ==> experiment on:5


[2021-12-07 07:59:05,798][line: 24] ==> batch_size:16


[2021-12-07 07:59:05,798][line: 25] ==> patch_size:224


[2021-12-07 07:59:05,799][line: 26] ==> learning rate high:0.015


[2021-12-07 07:59:05,799][line: 27] ==> learning rate low:1.5000000000000002e-05


Downloading: "https://github.com/NVIDIA/DeepLearningExamples/archive/torchhub.zip" to /root/.cache/torch/hub/torchhub.zip


Downloading: "https://api.ngc.nvidia.com/v2/models/nvidia/efficientnet_widese_b0_pyt_amp/versions/20.12.0/files/nvidia_efficientnet-widese-b0_210412.pth" to /root/.cache/torch/hub/checkpoints/nvidia_efficientnet-widese-b0_210412.pth


  0%|          | 0.00/32.4M [00:00<?, ?B/s]

initialize parameters




[2021-12-07 08:07:29,377][line: 93] ==> train Epoch:[50/150]	 loss=0.06034	 acc_hard=0.908 acc_soft=0.979 lr=0.0000198


I1207 08:07:29.377522 139766070904640 3841376463.py:93] train Epoch:[50/150]	 loss=0.06034	 acc_hard=0.908 acc_soft=0.979 lr=0.0000198


[2021-12-07 08:08:00,579][line: 93] ==> val Epoch:[50/150]	 loss=0.37260	 acc_hard=0.596 acc_soft=0.890 lr=0.0000198


I1207 08:08:00.579540 139766070904640 3841376463.py:93] val Epoch:[50/150]	 loss=0.37260	 acc_hard=0.596 acc_soft=0.890 lr=0.0000198


[2021-12-07 08:08:59,833][line: 93] ==> train Epoch:[51/150]	 loss=0.06135	 acc_hard=0.905 acc_soft=0.978 lr=0.0000493


I1207 08:08:59.833852 139766070904640 3841376463.py:93] train Epoch:[51/150]	 loss=0.06135	 acc_hard=0.905 acc_soft=0.978 lr=0.0000493


[2021-12-07 08:09:03,723][line: 93] ==> val Epoch:[51/150]	 loss=0.37218	 acc_hard=0.591 acc_soft=0.887 lr=0.0000493


I1207 08:09:03.723223 139766070904640 3841376463.py:93] val Epoch:[51/150]	 loss=0.37218	 acc_hard=0.591 acc_soft=0.887 lr=0.0000493


[2021-12-07 08:10:02,549][line: 93] ==> train Epoch:[52/150]	 loss=0.06348	 acc_hard=0.899 acc_soft=0.977 lr=0.0000984


I1207 08:10:02.549632 139766070904640 3841376463.py:93] train Epoch:[52/150]	 loss=0.06348	 acc_hard=0.899 acc_soft=0.977 lr=0.0000984


[2021-12-07 08:10:06,457][line: 93] ==> val Epoch:[52/150]	 loss=0.37453	 acc_hard=0.599 acc_soft=0.890 lr=0.0000984


I1207 08:10:06.457987 139766070904640 3841376463.py:93] val Epoch:[52/150]	 loss=0.37453	 acc_hard=0.599 acc_soft=0.890 lr=0.0000984


[2021-12-07 08:11:06,012][line: 93] ==> train Epoch:[53/150]	 loss=0.06319	 acc_hard=0.899 acc_soft=0.977 lr=0.0001668


I1207 08:11:06.012797 139766070904640 3841376463.py:93] train Epoch:[53/150]	 loss=0.06319	 acc_hard=0.899 acc_soft=0.977 lr=0.0001668


[2021-12-07 08:11:09,920][line: 93] ==> val Epoch:[53/150]	 loss=0.37531	 acc_hard=0.596 acc_soft=0.889 lr=0.0001668


I1207 08:11:09.920840 139766070904640 3841376463.py:93] val Epoch:[53/150]	 loss=0.37531	 acc_hard=0.596 acc_soft=0.889 lr=0.0001668


[2021-12-07 08:12:09,666][line: 93] ==> train Epoch:[54/150]	 loss=0.06297	 acc_hard=0.903 acc_soft=0.978 lr=0.0002543


I1207 08:12:09.666120 139766070904640 3841376463.py:93] train Epoch:[54/150]	 loss=0.06297	 acc_hard=0.903 acc_soft=0.978 lr=0.0002543


[2021-12-07 08:12:13,584][line: 93] ==> val Epoch:[54/150]	 loss=0.37348	 acc_hard=0.593 acc_soft=0.891 lr=0.0002543


I1207 08:12:13.584882 139766070904640 3841376463.py:93] val Epoch:[54/150]	 loss=0.37348	 acc_hard=0.593 acc_soft=0.891 lr=0.0002543


[2021-12-07 08:13:13,331][line: 93] ==> train Epoch:[55/150]	 loss=0.06134	 acc_hard=0.906 acc_soft=0.979 lr=0.0003606


I1207 08:13:13.331681 139766070904640 3841376463.py:93] train Epoch:[55/150]	 loss=0.06134	 acc_hard=0.906 acc_soft=0.979 lr=0.0003606


[2021-12-07 08:13:17,213][line: 93] ==> val Epoch:[55/150]	 loss=0.37014	 acc_hard=0.594 acc_soft=0.889 lr=0.0003606


I1207 08:13:17.213491 139766070904640 3841376463.py:93] val Epoch:[55/150]	 loss=0.37014	 acc_hard=0.594 acc_soft=0.889 lr=0.0003606


[2021-12-07 08:14:16,449][line: 93] ==> train Epoch:[56/150]	 loss=0.06050	 acc_hard=0.905 acc_soft=0.978 lr=0.0004852


I1207 08:14:16.449522 139766070904640 3841376463.py:93] train Epoch:[56/150]	 loss=0.06050	 acc_hard=0.905 acc_soft=0.978 lr=0.0004852


[2021-12-07 08:14:20,345][line: 93] ==> val Epoch:[56/150]	 loss=0.38124	 acc_hard=0.581 acc_soft=0.887 lr=0.0004852


I1207 08:14:20.345672 139766070904640 3841376463.py:93] val Epoch:[56/150]	 loss=0.38124	 acc_hard=0.581 acc_soft=0.887 lr=0.0004852


[2021-12-07 08:15:19,957][line: 93] ==> train Epoch:[57/150]	 loss=0.06039	 acc_hard=0.902 acc_soft=0.978 lr=0.0006276


I1207 08:15:19.957569 139766070904640 3841376463.py:93] train Epoch:[57/150]	 loss=0.06039	 acc_hard=0.902 acc_soft=0.978 lr=0.0006276


[2021-12-07 08:15:23,921][line: 93] ==> val Epoch:[57/150]	 loss=0.38560	 acc_hard=0.583 acc_soft=0.886 lr=0.0006276


I1207 08:15:23.921538 139766070904640 3841376463.py:93] val Epoch:[57/150]	 loss=0.38560	 acc_hard=0.583 acc_soft=0.886 lr=0.0006276
