In [11]:
!pip install timm
!pip install monai

[0m

In [13]:
import os
from tqdm import tqdm
import pickle
import argparse
import time
import torch
import yaml
from torch.optim import SGD, Adam
from torch.nn import CrossEntropyLoss
from IPython.display import display

from utils import set_seed, load_model, save, get_model, update_optimizer, get_data
from epoch import train_epoch, val_epoch, test_epoch
import cli
import greenstand_utils as gu  # GREENSTAND
from monai.losses import FocalLoss # GREENSTAND

def load_config_file(hyperparameter_config_file='hyperparameters.yaml'):
    with open(hyperparameter_config_file) as file:
        contents = yaml.safe_load(file)
    return contents

def get_args(contents):
    new_args = []
    for item in contents:
        new_args.append('--' + item)
        new_args.append(str(contents[item]))
    return new_args


def train(args):
    set_seed(args, use_gpu=torch.cuda.is_available())
    
    # Get Data  # GREENSTAND
    g_args = vars(args) # GREENSTAND
    g_args['prefixes'] = g_args['prefixes'].split(',') # GREENSTAND
    train_loader, val_loader, test_loader, dataset_attributes = gu.sync_split_get_dataloaders(vars(args), True)  # GREENSTAND
    print(f"Dataset Attributes: {dataset_attributes}") # GREENSTANDS

    model = gu.load_preloaded_model(args, dataset_attributes)  # GREENSTAND - Get pretrained model if specified 
    
    # GREENSTAND - Choose loss
    if args.use_focal_loss == 'y':
        criteria = FocalLoss(to_onehot_y=True) #BinaryFocalLossWithLogits(alpha=1.0)
    else: 
        criteria = CrossEntropyLoss()

    if args.use_gpu:
        torch.cuda.set_device(0)
        model.cuda()
        criteria.cuda()

    # GREENSTAND - Choose Optimizer
    if args.use_adam_optimizer == 'y':
        optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=args.mu)
    else:
        optimizer = SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.mu, nesterov=True)

    # Containers for storing metrics over epochs
    loss_train, acc_train, topk_acc_train = [], [], []
    loss_val, acc_val, topk_acc_val, avgk_acc_val, class_acc_val = [], [], [], [], []

    save_name = args.save_name_xp.strip()
    save_dir = os.path.join(os.getcwd(), 'results', save_name)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    print('args.k : ', args.k)

    lmbda_best_acc = None
    best_val_acc = float('-inf')

    for epoch in tqdm(range(args.n_epochs), desc='epoch', position=0):
        t = time.time()
        optimizer = update_optimizer(optimizer, lr_schedule=dataset_attributes['lr_schedule'], epoch=epoch)

        loss_epoch_train, acc_epoch_train, topk_acc_epoch_train = train_epoch(model, optimizer, train_loader,
                                                                              criteria, loss_train, acc_train,
                                                                              topk_acc_train, args.k,
                                                                              dataset_attributes['n_train'],
                                                                              args.use_gpu)

        loss_epoch_val, acc_epoch_val, topk_acc_epoch_val, \
        avgk_acc_epoch_val, lmbda_val = val_epoch(model, val_loader, criteria,
                                                  loss_val, acc_val, topk_acc_val, avgk_acc_val,
                                                  class_acc_val, args.k, dataset_attributes, args.use_gpu)

        # save model at every epoch
        save(model, optimizer, epoch, os.path.join(save_dir, save_name + '_weights.tar'))

        # save model with best val accuracy
        if acc_epoch_val > best_val_acc:
            best_val_acc = acc_epoch_val
            lmbda_best_acc = lmbda_val
            save(model, optimizer, epoch, os.path.join(save_dir, save_name + '_weights_best_acc.tar'))

        print()
        print(f'epoch {epoch} took {time.time()-t:.2f}')
        print(f'loss_train : {loss_epoch_train}')
        print(f'loss_val : {loss_epoch_val}')
        print(f'acc_train : {acc_epoch_train} / topk_acc_train : {topk_acc_epoch_train}')
        print(f'acc_val : {acc_epoch_val} / topk_acc_val : {topk_acc_epoch_val} / '
              f'avgk_acc_val : {avgk_acc_epoch_val}')

    # load weights corresponding to best val accuracy and evaluate on test
    load_model(model, os.path.join(save_dir, save_name + '_weights_best_acc.tar'), args.use_gpu)
    loss_test_ba, acc_test_ba, topk_acc_test_ba, \
    avgk_acc_test_ba, class_acc_test, confuse = test_epoch(model, test_loader, criteria, args.k,
                                                  lmbda_best_acc, args.use_gpu,
                                                  dataset_attributes)
    
    print("Average test accuracy: {}".format(avgk_acc_test_ba))
    print("Average class accuracies: {}".format(class_acc_test))
    display(confuse)

    # Save the results as a dictionary and save it as a pickle file in desired location

    results = {'loss_train': loss_train, 'acc_train': acc_train, 'topk_acc_train': topk_acc_train,
               'loss_val': loss_val, 'acc_val': acc_val, 'topk_acc_val': topk_acc_val, 'class_acc_val': class_acc_val,
               'avgk_acc_val': avgk_acc_val,
               'test_results': {'loss': loss_test_ba,
                                'accuracy': acc_test_ba,
                                'topk_accuracy': topk_acc_test_ba,
                                'avgk_accuracy': avgk_acc_test_ba,
                                'class_acc_dict': class_acc_test},
               'params': args.__dict__}

    
    with open(os.path.join(save_dir, save_name + '.pkl'), 'wb') as f:
        pickle.dump(results, f)
        
    return acc_test_ba
        
def predict(args):
    set_seed(args, use_gpu=torch.cuda.is_available())
    
    # Get Data  # GREENSTAND
    g_args = vars(args) # GREENSTAND
    g_args['prefixes'] = g_args['prefixes'].split(',') # GREENSTAND
    train_loader, val_loader, test_loader, dataset_attributes = gu.sync_split_get_dataloaders(vars(args), True)  # GREENSTAND
    print(f"Dataset Attributes: {dataset_attributes}") # GREENSTANDS

    model = gu.load_preloaded_model_prediction(args, dataset_attributes)  # GREENSTAND - Get pretrained model if specified 
    
    # GREENSTAND - Choose loss
    if args.use_focal_loss == 'y':
        criteria = FocalLoss(to_onehot_y=True) #BinaryFocalLossWithLogits(alpha=1.0)
    else: 
        criteria = CrossEntropyLoss()

    if args.use_gpu:
        torch.cuda.set_device(0)
        model.cuda()
        criteria.cuda()

    # GREENSTAND - Choose Optimizer
    if args.use_adam_optimizer == 'y':
        optimizer = Adam(model.parameters(), lr=args.lr, weight_decay=args.mu)
    else:
        optimizer = SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.mu, nesterov=True)

    # Containers for storing metrics over epochs
    loss_train, acc_train, topk_acc_train = [], [], []
    loss_val, acc_val, topk_acc_val, avgk_acc_val, class_acc_val = [], [], [], [], []

    lmbda_best_acc = None
    best_val_acc = float('-inf')

    # load weights corresponding to best val accuracy and evaluate on test
    loss_test_ba, acc_test_ba, topk_acc_test_ba, \
    avgk_acc_test_ba, class_acc_test, confuse = test_epoch(model, test_loader, criteria, args.k,
                                                  lmbda_best_acc, args.use_gpu,
                                                  dataset_attributes)
    
    print("Average test accuracy: {}".format(avgk_acc_test_ba))
    print("Average class accuracies: {}".format(class_acc_test))
    display(confuse)

    # Save the results as a dictionary and save it as a pickle file in desired location

    results = {'loss_train': loss_train, 'acc_train': acc_train, 'topk_acc_train': topk_acc_train,
               'loss_val': loss_val, 'acc_val': acc_val, 'topk_acc_val': topk_acc_val, 'class_acc_val': class_acc_val,
               'avgk_acc_val': avgk_acc_val,
               'test_results': {'loss': loss_test_ba,
                                'accuracy': acc_test_ba,
                                'topk_accuracy': topk_acc_test_ba,
                                'avgk_accuracy': avgk_acc_test_ba,
                                'class_acc_dict': class_acc_test},
               'params': args.__dict__}

In [16]:
"""To run: python main.py --lr=0.05 --n_epochs=80 --k 1 3 5 10 --model=resnet50 --root=path_to_data --save_name_xp=xp1"""

"""
Provide your arguments here in this format:
[
 argname1, argvalue1,
 argname2, argvalue2
]
"""
print(f"Use CUDA: {torch.cuda.is_available()}")

config = load_config_file(hyperparameter_config_file='hyperparameters.yaml')
if config['grid_search'] == 'y':
    gu.grid_search(args, {})
else:
    arg_list = get_args(config)
    parser = argparse.ArgumentParser()
    cli.add_all_parsers(parser)
    args = parser.parse_args(args=arg_list)
    if config['train_model'] == 'y':
        train(args)
    else:
        predict(args)

Use CUDA: True
Seed:	 1
Creating metadata file with pre-determined train, val, test splits...
Loading datasets...
Writing metadata.json...
Creating data loaders...
Loading datasets...
Dataset Attributes: {'n_train': 38844, 'n_val': 9713, 'n_test': 12147, 'n_classes': 16, 'lr_schedule': [40, 50, 60], 'class2num_instances': {'train': {0: 138, 1: 6480, 2: 2549, 3: 6400, 4: 126, 5: 203, 6: 40, 7: 1002, 8: 6399, 9: 45, 10: 6890, 11: 1955, 12: 38, 13: 80, 14: 6400, 15: 99}, 'val': {0: 35, 1: 1620, 2: 637, 3: 1600, 4: 32, 5: 51, 6: 11, 7: 250, 8: 1600, 9: 11, 10: 1722, 11: 489, 12: 10, 13: 20, 14: 1600, 15: 25}, 'test': {0: 44, 1: 2025, 2: 797, 3: 2000, 4: 40, 5: 64, 6: 13, 7: 314, 8: 2000, 9: 15, 10: 2154, 11: 612, 12: 12, 13: 26, 14: 2000, 15: 31}}, 'class_to_idx': {'ACACAURI': 0, 'ANACOCCI': 1, 'ANNOMURI': 2, 'AZADINDI': 3, 'CATALONG': 4, 'CEDRODOR': 5, 'DOMBTORR': 6, 'GLIRSEPI': 7, 'GMELARBO': 8, 'GREVROBU': 9, 'MANGINDI': 10, 'MORIOLEI': 11, 'SENNSIAM': 12, 'SIMAGLAU': 13, 'TAMAINDI': 14

train: 100%|██████████| 2428/2428 [08:15<00:00,  4.90it/s]
  batch_proba = F.softmax(batch_output_val)
val: 100%|██████████| 608/608 [02:23<00:00,  4.24it/s]
epoch:  33%|███▎      | 1/3 [10:43<21:26, 643.27s/it]


epoch 0 took 643.27
loss_train : 0.04615113477682719
loss_val : 0.04434975903462833
acc_train : 0.38966120893831735 / topk_acc_train : {1: 0.38966120893831735}
acc_val : 0.419231957170802 / topk_acc_val : {1: 0.419231957170802} / avgk_acc_val : {1: 0.42540924534129515}


train: 100%|██████████| 2428/2428 [08:30<00:00,  4.75it/s]
val: 100%|██████████| 608/608 [02:11<00:00,  4.64it/s]
epoch:  67%|██████▋   | 2/3 [21:28<10:44, 644.51s/it]


epoch 1 took 645.38
loss_train : 0.04490549136283199
loss_val : 0.04525294318132385
acc_train : 0.4087375141592009 / topk_acc_train : {1: 0.4087375141592009}
acc_val : 0.4442499742612993 / topk_acc_val : {1: 0.4442499742612993} / avgk_acc_val : {1: 0.44445588386698237}


train:   1%|▏         | 31/2428 [00:06<08:52,  4.50it/s]
epoch:  67%|██████▋   | 2/3 [21:35<10:47, 647.78s/it]


KeyboardInterrupt: 