In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import crack_dataset as DS
import copy
import os
import time
import logging
print("PyTorch Version: ",torch.__version__)

PyTorch Version:  1.10.0


In [8]:
def getDataSet(patch_size, batch_size, workers=8):
    class Args:
      # dataset_path = "/storage/data/classification_dataset_balanced/"
      dataset_path = "../p2_data/data/classification_dataset_balanced/"
      patch_size = 1
      batch_size = 1
      workers = 1
      def __init__(self, patch_size, batch_size, workers):
        self.patch_size = patch_size
        self.batch_size = batch_size
        self.workers = workers
    args = Args(patch_size, batch_size, workers)
    dataset = DS.CODEBRIM(torch.cuda.is_available(),args)
    dataLoaders = {'train': dataset.train_loader, 'val': dataset.val_loader, 'test':dataset.test_loader}
    return dataLoaders

In [9]:
class Resnet(nn.Module):
  """
  Resnet model class modified for multi-target Crack dataset
  """
  def __init__(self):
    super(Resnet, self).__init__()
    self.model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=False)
    self.model.fc = nn.Linear(2048,6) # modify the output layer
  def forward(self, x):
    """
    forward step
    """
    x = self.model(x)
    x = torch.sigmoid(x)  # for binary output
    return x
  def _initialize_weights(self):
    """
    initialzie the parameters
    """
    print("initialize parameters")
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        if m.bias is not None:
            nn.init.constant_(m.bias, 0)
      elif isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, 0, 0.01)  # initialize with normal distribution
        nn.init.constant_(m.bias, 0)


def get_resnet50(num_class=6, pretrained=False):
    import torchvision.models as models
    # load vgg net from torchhub
    model = models.resnet50(pretrained=pretrained)  # Resnet-50
    model.fc = nn.Linear(in_features=model.fc.in_features, out_features=num_class, bias=True) # modify the output layer

    return model

In [10]:
def log_creater(output_dir):
    """
    create logger object for registering staffs
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    log_name = '{}.log'.format(time.strftime('%Y-%m-%d-%H-%M'))
    final_log_file = os.path.join(output_dir,log_name)
 
 
    # creat a log
    log = logging.getLogger('train_log')
    log.setLevel(logging.DEBUG)
 
    # FileHandler
    file = logging.FileHandler(final_log_file)
    file.setLevel(logging.DEBUG)
 
    # StreamHandler
    stream = logging.StreamHandler()
    stream.setLevel(logging.DEBUG)
 
    # Formatter
    formatter = logging.Formatter(
        '[%(asctime)s][line: %(lineno)d] ==> %(message)s')
 
    # setFormatter
    file.setFormatter(formatter)
    stream.setFormatter(formatter)

     # addHandler
    log.addHandler(file)
    log.addHandler(stream)
 
    log.info('creating {}'.format(final_log_file))
    return log

In [11]:
def train(root_dir, model, logger, lr_h, lr_l, dataLoaders, num_epochs = 300, resume=False, 
    checkpoint = None, device = "cpu"):
    start_epoch = 1
    optimizer = optim.SGD(model.parameters(), lr=lr_h, momentum=0.9)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=10, T_mult=2, eta_min=lr_l)
    best_acc_hard = 0.0
    best_acc_soft = 0.0
    criterion = torch.nn.BCELoss()
    save_path_hard = root_dir + '/hard.pth'
    save_path_soft = root_dir + '/soft.pth'
    iters = len(dataLoader['train'])
    if resume:
        path_checkpoint = root_dir + checkpoint  # checkpoint path
        checkpoint = torch.load(path_checkpoint)  # load the checkpoint
        model.load_state_dict(checkpoint['net'])  # load the learnable params
        scheduler.load_state_dict(checkpoint['scheduler'])
        optimizer.load_state_dict(checkpoint['optimizer'])  # load the params for optimizers
        start_epoch = checkpoint['epoch']  # set the start epoch
        best_acc_soft = checkpoint['best_acc_soft']
        best_acc_hard = checkpoint['best_acc_hard']


    for epoch in range(start_epoch, num_epochs+1):  # loop over the dataset multiple times

        if epoch % 20 == 0:
            checkpoint = {
            "net": model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict(),
            "epoch": epoch,
            "best_acc_soft": best_acc_soft,
            "best_acc_hard": best_acc_hard
            }
            if not os.path.isdir(root_dir + "/resnetWpretrain_checkpoint"):
                os.mkdir(root_dir + "/resnetWpretrain_checkpoint")
            torch.save(checkpoint, root_dir + '/resnetWpretrain_checkpoint/ckpt_best_%s.pth' %(str(epoch)))

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects_hard = 0
            running_corrects_soft = 0

            for i, sample in enumerate(dataLoaders[phase]):
                inputs, labels = sample
                inputs = inputs.to(device)
                labels = labels.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = model(inputs)
                outputs = torch.sigmoid(outputs)
                loss = criterion(outputs, labels)

                outputs = outputs >= 0.5  # binarizing sigmoid output by thresholding with 0.5
                equality_matrix = (outputs.float() == labels).float()
                hard = torch.sum(torch.prod(equality_matrix, dim=1))
                soft = torch.mean(equality_matrix)
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                #adjustment in scheduler
                    scheduler.step(epoch + i / iters)
        
                running_loss += loss.item() * inputs.size(0)
                running_corrects_hard += hard.item()
                running_corrects_soft += soft.item()

            epoch_loss = running_loss / len(dataLoaders[phase].dataset)
            epoch_acc_hard = running_corrects_hard / len(dataLoaders[phase].dataset)
            epoch_acc_soft = running_corrects_soft / len(dataLoaders[phase])
            logger.info('{} Epoch:[{}/{}]\t loss={:.5f}\t acc_hard={:.3f} acc_soft={:.3f} lr={:.7f}'.format\
            (phase, epoch , num_epochs, epoch_loss, epoch_acc_hard, epoch_acc_soft, \
            optimizer.state_dict()['param_groups'][0]['lr'] ))

            # deep copy the model
            if epoch >= 150 and phase == 'val' and epoch_acc_hard > best_acc_hard:
                best_acc_hard = epoch_acc_hard
                #   best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), save_path_hard)

            if epoch >= 150 and phase == 'val' and epoch_acc_soft > best_acc_soft:
                best_acc_soft = epoch_acc_soft
                #   best_model_wts = copy.deepcopy(model.state_dict())
                torch.save(model.state_dict(), save_path_soft)

    model = get_resnet50(num_class=6, pretrained=True)
    model.load_state_dict(torch.load(root_dir + '/hard.pth'))
    model.to(device)
    model.eval()
    logger.info("hard:")
    evaluation(dataLoaders, device, model, logger)

    model.load_state_dict(torch.load(root_dir + '/soft.pth'))
    model.to(device)
    model.eval()
    logger.info("soft:")
    evaluation(dataLoaders, device, model, logger)



def evaluation(dataLoaders, device, model, logger):
    criterion = torch.nn.BCELoss()
    for phase in ['train', 'val', 'test']:
        running_loss = 0.0
        running_corrects_hard = 0
        running_corrects_soft = 0

      
        for i, sample in enumerate(dataLoaders[phase]):
            inputs, labels = sample
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            outputs = torch.sigmoid(outputs)
            loss = criterion(outputs, labels)

            outputs = outputs >= 0.5  # binarizing sigmoid output by thresholding with 0.5
            equality_matrix = (outputs.float() == labels).float()
            hard = torch.sum(torch.prod(equality_matrix, dim=1))
            soft = torch.mean(equality_matrix)
            running_loss += loss.item() * inputs.size(0)
            running_corrects_hard += hard.item()
            running_corrects_soft += soft.item()

        epoch_loss = running_loss / len(dataLoaders[phase].dataset)
        epoch_acc_hard = running_corrects_hard / len(dataLoaders[phase].dataset)
        epoch_acc_soft = running_corrects_soft / len(dataLoaders[phase])
        logger.info("{}: loss:{:.5f} acc_soft:{:.3f} acc_hard:{:.3f}".format(phase, epoch_loss, epoch_acc_soft, epoch_acc_hard))     



In [12]:
import numpy as np

logger = log_creater("./resnetWpretrain_log")
batch_size = 16
patch_size = 224
dataLoader = getDataSet(patch_size, batch_size)

lr = (1e-2,1e-5)
root_dir = './' + str(batch_size) + '-' + str(patch_size) + '-' + str(lr[0])
if not os.path.isdir(root_dir):
    os.mkdir(root_dir)
logger.info("batch_size:" + str(batch_size))
logger.info("patch_size:" + str(patch_size))
logger.info("learning rate high:" + str(lr[0]))
logger.info("learning rate low:" + str(lr[1]))
model = get_resnet50(num_class=6, pretrained=True)

# get the model parameters
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(f'model parameter number is: {params}')

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    model = model.to(device)
lr_h = lr[0]
lr_l = lr[1]
train(root_dir, model, logger, lr_h, lr_l, dataLoader, num_epochs = 300, resume=False, 
checkpoint = None, device = device)




[2021-12-04 10:05:54,427][line: 35] ==> creating ./train_log/2021-12-04-10-05.log
[2021-12-04 10:05:54,427][line: 35] ==> creating ./train_log/2021-12-04-10-05.log
[2021-12-04 10:05:55,126][line: 12] ==> batch_size:4
[2021-12-04 10:05:55,126][line: 12] ==> batch_size:4
[2021-12-04 10:05:55,127][line: 13] ==> patch_size:224
[2021-12-04 10:05:55,127][line: 13] ==> patch_size:224
[2021-12-04 10:05:55,129][line: 14] ==> learning rate high:0.01
[2021-12-04 10:05:55,129][line: 14] ==> learning rate high:0.01
[2021-12-04 10:05:55,130][line: 15] ==> learning rate low:1e-05
[2021-12-04 10:05:55,130][line: 15] ==> learning rate low:1e-05
Using cache found in /Users/elenath/.cache/torch/hub/pytorch_vision_v0.10.0


model parameter number is: 23520326


KeyboardInterrupt: 