In [None]:
######## required downloads and useful instructions ##############
import warnings
warnings.filterwarnings('ignore')
!pip install ray torch torchvision
!pip install 'ray[tune]'
!pip install biopython

In [129]:
######### imports #############
import os
import argparse
import sys
import time
import math

import torch
import torch.nn.functional as F
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
from skimage.transform import resize
import tabulate
from torch.utils.data import DataLoader, ConcatDataset

from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, auc, roc_auc_score, average_precision_score
from itertools import product
import numpy as np 
import pandas as pd
import random

from scipy.stats import friedmanchisquare
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

In [132]:
def get_args():
  parser = argparse.ArgumentParser(description='SGD/SWA training')
  parser.add_argument('--dir', type=str, default='.', help='training directory (default: None)')

  parser.add_argument('--dataset', type=str, default='SEMEION', help='dataset name (default: CIFAR10)')
  parser.add_argument('--data_path', type=str, default='.', metavar='PATH',
                      help='path to datasets location (default: None)')
  parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='input batch size (default: 128)')
  parser.add_argument('--num_workers', type=int, default=4, metavar='N', help='number of workers (default: 4)')
  parser.add_argument('--model', type=str, default='VGG16', metavar='MODEL',
                      help='model name (default: None)')

  parser.add_argument('--resume', type=str, default=None, metavar='CKPT',
                      help='checkpoint to resume training from (default: None)')

  parser.add_argument('--epochs', type=int, default=200, metavar='N', help='number of epochs to train (default: 200)')
  parser.add_argument('--save_freq', type=int, default=25, metavar='N', help='save frequency (default: 25)')
  parser.add_argument('--eval_freq', type=int, default=5, metavar='N', help='evaluation frequency (default: 5)')
  parser.add_argument('--lr_init', type=float, default=0.05, metavar='LR', help='initial learning rate (default: 0.01)')
  parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)')
  parser.add_argument('--wd', type=float, default=5e-4, help='weight decay (default: 1e-4)')

  parser.add_argument('--swa', action='store_true', default=False, help='swa usage flag (default: off)')
  parser.add_argument('--swa_start', type=float, default=100, metavar='N', help='SWA start epoch number (default: 161)')
  parser.add_argument('--swa_lr', type=float, default=0.07, metavar='LR', help='SWA LR (default: 0.05)')
  parser.add_argument('--swa_c_epochs', type=int, default=1, metavar='N',
                      help='SWA model collection frequency/cycle length in epochs (default: 1)')
  parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)')

  parser.add_argument('--imrpoved', action='store_true', default=True, help='improved usage flag (default: off)')
  args, unknown = parser.parse_known_args()
  return args 

In [130]:
"""
    VGG model definition
    ported from https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
"""

# possible models for net architecture
__all__ = ['VGG16']


# reconstruct layers from model arhitecture 
def make_layers(cfg, batch_norm=False):
    layers = list()
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
          conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
          if batch_norm:
              layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
          else:
              layers += [conv2d, nn.ReLU(inplace=True)]
          in_channels = v
    return nn.Sequential(*layers)


cfg = {
    16: [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    19: [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M',
         512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, num_classes=10, depth=16, batch_norm=False):
        super(VGG, self).__init__()
        self.features = make_layers(cfg[depth], batch_norm)
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(512, 512),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(512, 512),
            nn.ReLU(True),
            nn.Linear(512, num_classes),
        )

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                m.bias.data.zero_()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


class Base:
    base = VGG
    args = list()
    kwargs = dict()
    transform_train = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    transform_train_gray_scale = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.Resize(32),
        transforms.RandomCrop(32, padding=4),
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.repeat(3, 1, 1) ),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
       
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    
    transform_test_gray_scale = transforms.Compose([
        transforms.Resize(32),
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.repeat(3, 1, 1) ),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))

       
    ])  


class VGG16(Base):
    pass


In [131]:
"""
    utils.py
"""

def adjust_learning_rate(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
    return lr

def train_epoch(loader, model, criterion, optimizer):
    loss_sum = 0.0
    correct = 0.0

    model.train()

    for i, (input, target) in enumerate(loader):
        input = input.cuda()#async=True)
        target = target.cuda()#sasync=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        output = model(input_var)
        loss = criterion(output, target_var)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_sum += loss.item() * input.size(0)
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target_var.data.view_as(pred)).sum().item()

    return {
        'loss': loss_sum / len(loader.dataset),
        'accuracy': correct / len(loader.dataset) * 100.0,
    }

def one_vs_rest(y_true, predicted, proba, num_classes):
  metrics_all_classes = []
  for class_num in range(num_classes):
    y_true_class = [1 if y==class_num else 0 for y in y_true]
    predicted_class = [1 if pred==class_num else 0 for pred in predicted]
    metrics_all_classes.append(list(get_metrics_one_vs_rest(y_true_class, predicted_class, proba[:, class_num]).values()))
  metrics_all_classes = np.mean(metrics_all_classes, axis=0)
  metrics =['ACC', 'TPR', 'FPR', 'Precision', 'AUC', 'PR_CURVE']
  metrics_results = dict()
  for i in range(len(metrics)):
    metrics_results[metrics[i]] = metrics_all_classes[i]
  return metrics_results


def get_metrics_one_vs_rest( y_true, predicted, proba):
  cm =  confusion_matrix(y_true, predicted)
  TN = cm[0][0]
  FP = cm[0][1]
  FN = cm[1][0]
  TP = cm[1][1] 
  # Overall accuracy for each class
  ACC = (TP+TN)/(TP+FP+FN+TN) # doesn't calculate as in the paper
  # true positive rate
  TPR = TP/(TP+FN)
  # false positive rate
  FPR = FP/(FP+TN)
  # Precision or positive predictive value
  Precision = TP/(TP+FP)
  #AUC
  AUC = roc_auc_score(y_true, proba)
  #PR-CURVE
  PR_CURVE = average_precision_score(y_true, proba)
  return { 'ACC': ACC, 'TPR': TPR, 'FPR':FPR, 'Precision': Precision, 'AUC':AUC, 'PR_CURVE':PR_CURVE}

def eval(loader, model, criterion, num_classes, training_time=0):
    loss_sum = 0.0
    correct = 0.0

    model.eval()
    y_true = torch.tensor([], dtype=torch.long, device=torch.device('cuda:0'))
    predicted = torch.tensor([], device=torch.device('cuda:0'))
    proba = torch.tensor([], device=torch.device('cuda:0'))

    inference_time = time.time()
    inference_times = []
    for i, (input, target) in enumerate(loader):
        input = input.cuda()#async=True)
        target = target.cuda()#async=True)
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)
        #print(f'input{input}\n input var{input_var}\n target{target}\n target_var{target_var}')
        output = model(input_var)
        loss = criterion(output, target_var)
        loss_sum += loss.item() * input.size(0)
        #print(f'data {output.data} \n data max { output.data.max(1)} \n pred {output.data.max(1, keepdim=True)[1]} pred {output.data.max(1)[1]}')
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target_var.data.view_as(pred)).sum().item()
        #print(f'target as pred {target_var.data.view_as(pred)}\n sum {pred.eq(target_var.data.view_as(pred)).sum()}\n item {pred.eq(target_var.data.view_as(pred)).sum().item()}')
        y_true = torch.cat((y_true, target_var), 0)
        predicted = torch.cat((predicted, output.data.max(1)[1]), 0)
        proba = torch.cat((proba, output.data), 0)
        if (i + 1) % 8 == 0:
          inference_time = time.time() - inference_time 
          inference_times.append(inference_time)
          inference_time = time.time()

    predicted = predicted.cpu().numpy()
    y_true = y_true.cpu().numpy()
    proba = proba.cpu().numpy()
    
    metrics = one_vs_rest(y_true, predicted, proba, num_classes)
    metrics['Training Time'] = training_time
    metrics['Inference Time'] =  np.mean(inference_times)
    accuracy =  correct / len(loader.dataset) * 100.0
    return {
        'loss': loss_sum / len(loader.dataset),
        'accuracy': accuracy,
        'metrics': metrics,
    }


def moving_average(net1, net2, alpha=1):
    for param1, param2 in zip(net1.parameters(), net2.parameters()):
        param1.data *= (1.0 - alpha)
        param1.data += param2.data * alpha


In [133]:
def find_accuracy_for_specific_hyperparameters(config , model_cfg, loaders):      
  print('Find best parameters')
  model = model_cfg.base(*model_cfg.args, num_classes=num_classes, **model_cfg.kwargs)
  model.cuda()

  num_epoch = config[1]
  lr_init = config[0]

  criterion = F.cross_entropy
  optimizer = torch.optim.SGD(
      model.parameters(),
      lr=lr_init,
      momentum=args.momentum,
      weight_decay=args.wd
  )
  start_epoch = 0

  for epoch in range(start_epoch, num_epoch)[:1]:
      lr = schedule(epoch, lr_init, num_epoch)
      # utils.
      adjust_learning_rate(optimizer, lr)
      train_res = train_epoch(loaders['train'], model, criterion, optimizer)
      test_res = eval(loaders['val'], model, criterion, num_classes)

      #printing per epoch 
      values = [epoch + 1, lr, train_res['loss'], train_res['accuracy'], test_res['loss'], test_res['accuracy']]
      table = tabulate.tabulate([values], ['ep', 'lr', 'tr_loss', 'tr_acc', 'te_loss', 'te_acc'], tablefmt='simple', floatfmt='8.4f')
      print(table)

  return test_res['accuracy'] 

In [134]:
def evaluate_model(lr_init, num_epoch, train_loader, test_loader, num_classes):      
  print('Evaluate model')
  model = model_cfg.base(*model_cfg.args, num_classes=num_classes, **model_cfg.kwargs)
  model.cuda()

  if args.swa:
      swa_model = model_cfg.base(*model_cfg.args, num_classes=num_classes, **model_cfg.kwargs)
      swa_model.cuda()
      swa_n = 0

  criterion = F.cross_entropy
  optimizer = torch.optim.SGD(
      model.parameters(),
      lr=lr_init,
      momentum=args.momentum,
      weight_decay=args.wd
  )
  start_epoch = 0
  
  for epoch in range(start_epoch, num_epoch):
      lr = schedule(epoch, lr_init, num_epoch)
      # utils.
      adjust_learning_rate(optimizer, lr)
      training_time = time.time()
      train_res = train_epoch(train_loader, model, criterion, optimizer)
      training_time = time.time() - training_time
      test_res = eval(test_loader, model, criterion, num_classes, training_time)

      if not args.swa:
         print(f'{test_res},')

      #if we are using swa and we are in the part of the swa and the modulu of the cycle is 0:
      if args.swa and (epoch + 1) >= args.swa_start and (epoch + 1 - args.swa_start) % args.swa_c_epochs == 0:
          # calculate the avarage of the weights
          moving_average(swa_model, model, 1.0 / (swa_n + 1))
          swa_n += 1
          # evaluate test preformantce with the parameters
          swa_res = eval(test_loader, swa_model, criterion, num_classes, training_time)  
          print(f'{swa_res},')
      elif args.swa:
        print(f'epoch = {epoch}/{num_epoch},')       
      
  if args.swa:
    return swa_res
  else:
    return test_res

In [135]:
def schedule(epoch, lr_init, num_epoch):
    t = (epoch) / (args.swa_start if args.swa else num_epoch)
    lr_ratio = args.swa_lr / lr_init if args.swa else 0.01
    if t <= 0.5:
        factor = 1.0
    elif t <= 0.9:
        factor = 1.0 - (1.0 - lr_ratio) * (t - 0.5) / 0.4
    else:
        factor = lr_ratio
    return lr_init * factor


In [136]:
def hyper_parameters_optimization(dataset):
  if args.swa:  # optimization is applied only on sgd 
      return [args.lr_init, args.epochs]
  best_parameters = None
  best_accuracy = 0
  # outer k-fold Cross Validation 
  kfold = KFold(n_splits=10, shuffle=True)
  for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
      train_subs = torch.utils.data.dataset.Subset(dataset, train_ids)
      config = {
          'lr_init': [x / 100.0 for x in range(1, 11)],
          'num_epochs': list(range(50, 300, 25))
      }
      all_options = list(product(config['lr_init'], config['num_epochs']))
      # select randomly 50 times
      for _ in range(50)[:1]:
          curr_config = random.choice(all_options)
          print(f'choice {curr_config}')
          # inner k-fold for hyperparameters optimization
          mean_acc = 0
          inner_kfold = KFold(n_splits=3, shuffle=True)
          for fold, (in_train_ids, in_val_ids) in enumerate(inner_kfold.split(train_subs)):
              inner_train = torch.utils.data.dataset.Subset(dataset, in_train_ids)
              inner_val = torch.utils.data.dataset.Subset(dataset, in_val_ids)

              # Define data loaders for training and validation data in this fold
              inner_train_loader = torch.utils.data.DataLoader(
                  inner_train,
                  batch_size=args.batch_size,
                  num_workers=args.num_workers,
                  shuffle=True)

              inner_val_loader = torch.utils.data.DataLoader(
                  inner_val,
                  batch_size=args.batch_size,
                  shuffle=False,
                  num_workers=args.num_workers)

              loaders = {'train': inner_train_loader, 'val': inner_val_loader}
              acc = find_accuracy_for_specific_hyperparameters(curr_config, model_cfg, loaders)
              mean_acc += acc

          mean_acc = mean_acc / 3
          if best_accuracy < mean_acc:
              best_accuracy = mean_acc
              best_parameters = curr_config
          print(f'best parameters {best_parameters}')


  return best_parameters 

In [137]:
def download_dataset():
  if args.dataset == 'EMNIST':
    train_set = ds(path, train=True, download=True, transform=model_cfg.transform_train_gray_scale, split='digits')
    test_set = ds(path, train=False, download=True, transform=model_cfg.transform_test_gray_scale, split='digits')
  elif args.dataset == 'MNIST' or args.dataset == 'USPS' or args.dataset == 'FashionMNIST' or args.dataset == 'KMNIST' or args.dataset == 'QMNIST':
    train_set = ds(path, train=True, download=True, transform=model_cfg.transform_train_gray_scale)
    test_set = ds(path, train=False, download=True, transform=model_cfg.transform_test_gray_scale)
  elif args.dataset == 'STL10' or args.dataset == 'SVHN':
    train_set = ds(path, download= True ,transform=model_cfg.transform_train)
  elif args.dataset == 'SEMEION':
     train_set = ds(path, download=True, transform=model_cfg.transform_train_gray_scale)
  else:# CIFAR10, CIFAR100
    train_set = ds(path, train=True, download=True, transform=model_cfg.transform_train)
    test_set = ds(path, train=False, download=True, transform=model_cfg.transform_test)

  if args.dataset == 'CIFAR100':
    num_classes = 100 
  else:
    num_classes = 10 
  if args.dataset == 'STL10' or args.dataset == 'SVHN' or args.dataset == 'SEMEION':
    dataset = train_set
  else:
    dataset = ConcatDataset([train_set, test_set])
  return dataset, num_classes

In [None]:
def friedman_test(sgd, swa, improved):
  # compare samples
  return friedmanchisquare(sgd, swa, improved)

def post_hoc_test(sgd, swa, improved):
  scores = sgd
  scores.extend(swa)
  scores.extend(improved)
  #create DataFrame to hold data
  df = pd.DataFrame({'score': scores,
                    'group': np.repeat(['SGD', 'SWA', 'Improved'], repeats=10)}) 
  # perform Tukey's test
  tukey = pairwise_tukeyhsd(endog=df['score'], groups=df['group'], alpha=0.05)
  #display results
  print(tukey)

def significant_test(file_name, dataset_name, metric):
  df = pd.read_excel(file_name, header=None).iloc[:,:-1]
  df.columns= ['Dataset Name', 'Algorithm Name', 'Cross Validation [1-10]', 'HyperParamaters Values', 'ACC', 'TPR', 'FPR', 'Precision','AUC', 'PR-CURVE', 'Training Time', 'Inference Time']
  sgd = df.loc[(df['Dataset Name'] == dataset_name) & (df['Algorithm Name'] == 'SGD')][metric].values.tolist()
  swa = df.loc[(df['Dataset Name'] == dataset_name) & (df['Algorithm Name'] == 'SWA')][metric].values.tolist()
  improved =  df.loc[(df['Dataset Name'] == dataset_name) & (df['Algorithm Name'] == 'improved')][metric].values.tolist()
  stat, p = friedman_test(sgd, swa, improved)
  #print('Statistics=%.3f, p=%.6f' % (stat, p))
  # interpret
  alpha = 0.05
  if p > alpha:
    print('Same distributions (fail to reject H0)')
  else:
    #print('Different distributions (reject H0)')
    post_hoc_test(sgd, swa, improved)


In [None]:
"""
  MAIN
"""
#parse args
args = get_args()

os.makedirs(args.dir, exist_ok=True)
with open(os.path.join(args.dir, 'command.sh'), 'w') as f:
    f.write(' '.join(sys.argv))
    f.write('\n')

torch.backends.cudnn.benchmark = True
torch.manual_seed(args.seed)
torch.cuda.manual_seed(args.seed)

print('Using model %s' % args.model)
model_cfg = VGG16()  

if args.swa:
  print('Using SWA')
  alg_name = 'SWA'
elif args.imrpoved:
  print('Using SWA Improved')
  alg_name = 'Improved'
else:
  print('Using SGD')
  alg_name = 'SGD'

#loading dataset from torchvision package 
print('Loading dataset %s from %s' % (args.dataset, args.data_path))
ds = getattr(torchvision.datasets, args.dataset)  
path = os.path.join(args.data_path, args.dataset.lower())

dataset, num_classes = download_dataset()


values = []
columns = ['Dataset Name', 'Algorithm Name', 'Cross Validation [1-10]', 'HyperParamaters Values', 'ACC', 'TPR', 'FPR', 'Precision','AUC', 'PR-CURVE', 'Training Time', 'Inference Time']

#hyper parameters optimization
best_parameters = hyper_parameters_optimization(dataset)

# outer k-fold Cross Validation 
kfold = KFold(n_splits=10, shuffle=True)
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
    train_subs = torch.utils.data.dataset.Subset(dataset, train_ids)
    test_subs = torch.utils.data.dataset.Subset(dataset, test_ids)

    train_loader = torch.utils.data.DataLoader(
    train_subs,
    batch_size=args.batch_size,
    num_workers=args.num_workers,
    shuffle=True)
        
    test_loader = torch.utils.data.DataLoader(
    test_subs,
    batch_size=args.batch_size,
    num_workers=args.num_workers,
    shuffle=True)

    res = evaluate_model(best_parameters[0], best_parameters[1], train_loader, test_loader, num_classes)
    value = [args.dataset, alg_name, fold+1, best_parameters] + list(res['metrics'].values())
    values.append(value)

df_results = pd.DataFrame(data=values, columns = columns)
if os.path.isfile('results.xlsx'):
  df_results.to_excel('results.xlsx', mode='a', header=False)
else:
  df_results.to_excel('results.xlsx')
print(df_results.to_string())
for name in ['CIFAR10', 'CIFAR100', 'EMNIST', 'FashionMNIST', 'KMNIST','MNIST', 'QMNIST', 'STL10', 'USPS', 'SVHN', 'SEMEION']:
  print(f'Dataset {name}')
  significant_test('datasets.xlsx',name, 'ACC')