In [12]:
epsilon = 1e-7

# parser = argparse.ArgumentParser(description='PyTorch training for deep abstaining classifiers',
#     formatter_class=argparse.ArgumentDefaultsHelpFormatter)

class argparser():
    def __init__(self,k_p=0.1,k_i=0.1,k_d=0.05,abst_rate=None,output_path='./',expt_name='test_exp',
                 save_best_model=False,save_val_scores=False,
                 data_parallel=False,nesterov=False,resume=False,eval_model=None,use_gpu=False,
                 save_train_scores=False,exclude_train_indices=None,label_noise_info=None,
                 rand_labels=None,alpha_init_factor=64,alpha_final=1.0,loss_fn='dac_loss',depth=16,
                 learn_epochs=10,test_batch_size=128,batch_size=128,dataset='cifar10',dropout=0.2,net_type='vggnet',lr=0.001):
        self.lr=lr
        self.net_type=net_type
        self.dropout=dropout
        self.dataset=dataset
        self.batch_size=batch_size
        self.test_batch_size=test_batch_size
        self.learn_epochs=learn_epochs
        self.depth=depth
        self.loss_fn=loss_fn
        self.alpha_final=alpha_final
        self.alpha_init_factor=alpha_init_factor
        self.rand_labels=rand_labels
        self.label_noise_info=label_noise_info
        self.exclude_train_indices=exclude_train_indices
        self.save_train_scores=save_train_scores
        self.use_gpu=use_gpu
        self.eval_model=eval_model
        self.resume=resume
        self.nesterov=nesterov
        self.data_parallel=data_parallel
        self.save_val_scores=save_val_scores
        self.save_best_model=save_best_model
        self.expt_name=expt_name
        self.output_path=output_path
        self.abst_rate=abst_rate
        self.k_p=k_p
        self.k_i=k_i
        self.k_d=k_d
        


args = argparser()


import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.nn.modules.loss import _Loss
import os
import sys
import time
import datetime
from torch.autograd import Variable
from utils import gpu_utils,  label_noise
import pdb
import numpy as np
from networks import wide_resnet,lenet,vggnet, resnet, resnet2, cnn
from networks import config as cf

#import dac_loss_pid
#import dac_loss

from loss_functions import loss_fn_dict

try:
    import cPickle as cp
except ModuleNotFoundError: #no cPickle in python 3
    import pickle as cp

#do time compression or dilation
epochs=10
epdl=1.0
learn_epochs=0
epochs = int(epochs*epdl)
learn_epochs = int(learn_epochs*epdl)

save_epoch_model=None
if not save_epoch_model is None:
    save_epoch_model = int(save_epoch_model*epdl)

log_file=None
if not log_file is None:
    sys.stdout = open(log_file,'w')
    sys.stderr = sys.stdout

seed=1
torch.manual_seed(seed)

batch_size=128
test_batch_size=128
start_epoch, num_epochs = 1, epochs
batch_size = batch_size
best_acc = 0.

print('\n[Phase 1] : Data Preparation')
if args.dataset == 'cifar10':
    trainset, testset, num_classes = get_CIFAR10(root="./")
elif args.dataset == 'svhn':
    trainset, testset, num_classes = get_SVHN()
sys.stdout.flush()
#abstain class id is the last class
abstain_class_id = num_classes
#simulate label noise if needed
trainset = label_noise.label_noise(args, trainset, num_classes)
#set data loaders
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=test_batch_size, shuffle=False, num_workers=2)

if args.save_train_scores:
    train_perf_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=False, num_workers=2)


# GPU specific stuff. 
# TODO: move to gpu_utils
use_cuda=True
cuda_device=None
if args.use_gpu:
    if not args.data_parallel:
        #keep trying to get a GPU if use GPU is specified
        while(cuda_device is None):
            cuda_device = gpu_utils.get_cuda_device(args)
            use_cuda = True
    else: #data parallel training
        if args.parallel_device_count is None:
            cuda_devices = gpu_utils.get_free_gpu_list(torch.cuda.device_count())
            num_devices = len(cuda_devices)
        else:
            num_devices = args.parallel_device_count
            cuda_devices = gpu_utils.get_free_gpu_list(torch.cuda.device_count())[0:num_devices]

        if len(cuda_devices) == 0:
            print("No free GPUs, exitting")
            exit()

        cuda_device = cuda_devices[0]

        if len(cuda_devices) < num_devices:
            print("Warning: Specified number of GPus to use is %d but only %d available" %(num_devices,len(cuda_devices)))
        if len(cuda_devices) == 1:
            print("warning: data parallel requested, but only 1 free GPU available")
        use_cuda = True
        print("Using GPUs %s" %(cuda_devices))

if use_cuda:
    torch.cuda.manual_seed(seed)


#only evaluate model and output softmaxes on train and test set
if  args.eval_model is not None:
    print('\n[Evaluation only] : Model setup')
    net = torch.load(args.eval_model, map_location=lambda storage, loc: storage )['net']
    if use_cuda:
        net = net.cuda(cuda_device)
        cudnn.benchmark = True

    net.eval()
    expt_name = str(args.expt_name) if args.expt_name is not None else ""
    expt_name = "_"+expt_name

    train_softmax_scores = []

    for batch_idx, (inputs, targets) in enumerate(train_perf_loader):
        print("train batch %s" %(batch_idx))
        if use_cuda:
            inputs, targets = inputs.cuda(cuda_device), targets.cuda(cuda_device) # GPU settings
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)               # Forward Propagation
        p_out = F.softmax(outputs,dim=1)
        train_softmax_scores.append(p_out.data)

    train_scores = torch.cat(train_softmax_scores).cpu().numpy()
    print('Saving train softmax scores in evaluation mode to %s' %(os.path.basename(args.eval_model)+".train_scores_eval"))
    np.save(os.path.basename(args.eval_model)+expt_name+".train_scores_eval", train_scores)

    test_softmax_scores=[]
    for batch_idx, (inputs, targets) in enumerate(testloader):
        print("test batch %s" %(batch_idx))
        if use_cuda:
            inputs, targets = inputs.cuda(cuda_device), targets.cuda(cuda_device) # GPU settings
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)               # Forward Propagation
        p_out = F.softmax(outputs,dim=1)
        test_softmax_scores.append(p_out.data)

    test_scores = torch.cat(test_softmax_scores).cpu().numpy()
    print('Saving validation softmax scores in evaluation mode to %s' %(os.path.basename(args.eval_model)+".val_scores_eval"))
    np.save(os.path.basename(args.eval_model)+expt_name+".val_scores_eval", test_scores)
    sys.exit(0)



def getNetwork(args):
    if args.loss_fn is None:
        extra_class = 0
    else:
        extra_class = 1

    if (args.net_type == 'lenet'):
        net = lenet.LeNet(num_classes+extra_class)
        file_name = 'lenet'
        net.apply(lenet.conv_init)

    elif (args.net_type == 'vggnet'):
        #net = vggnet.VGG(args.depth, num_classes+extra_class, args.dropout)
        
        net = vggnet.VGG(args.depth, num_classes+extra_class)
        file_name = 'vgg-'+str(args.depth)
#         net.apply(vggnet.conv_init)
        net.load_state_dict(torch.load(f"vgg16_cifar_model_9.pt"))

    elif (args.net_type == 'resnet'):
        net = resnet.ResNet(args.depth, num_classes+extra_class)
        file_name = 'resnet-'+str(args.depth)
#         net.apply(resnet.conv_init)
        net.load_state_dict(torch.load(f"resnet18_cifar_model_9.pt"))

    elif (args.net_type == 'resnet2'):

        if args.dataset == 'mnist' or args.dataset == 'fashion':
            num_channels = 1
        else:
            num_channels = 3

        if args.depth == 34:
            net = resnet2.ResNet34(num_classes=num_classes+extra_class,num_input_channels=num_channels)
            file_name = 'resnet2-34'#+str(args.depth)

        elif args.depth == 18:
            #pdb.set_trace()
            net = resnet2.ResNet18(num_classes=num_classes+extra_class,num_input_channels=num_channels)
            file_name = 'resnet2-18'#+str(args.depth)

        else:
            print('Error : Resnet-2 Network depth should either be 18 or 34')
            sys.exit(0)

        net.apply(resnet2.conv_init)

    elif (args.net_type == 'wide-resnet'):
        net = wide_resnet.Wide_ResNet(args.depth, args.widen_factor, args.dropout, num_classes+extra_class)
        file_name = 'wide-resnet-'+str(args.depth)+'x'+str(args.widen_factor)
        net.apply(wide_resnet.conv_init)

    else:
        print('Error : Network should be either [LeNet / VGGNet / ResNet / Wide_ResNet')
        sys.exit(0)

    return net, file_name



print('\n[Phase 2] : Model setup')
if args.resume:
    # Load checkpoint
    print('| Resuming from checkpoint...')
    assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!'
    _, file_name = getNetwork(args)
    checkpoint = torch.load('./checkpoint/'+args.dataset+os.sep+file_name+'.t7')
    net = checkpoint['net']
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']

else:
    #print('| Building net type [' + args.net_type + ']...')
    print('| Building net')
    #net, file_name = getNetwork(args)
    #net.apply(conv_init)
    if args.net_type is None:
        print("Using Default conv net")
        file_name = 'conv_net'
        if args.loss_fn is None: #no abstention. use the actual number of classes
            net = cnn.ConvNet(num_classes,args.dropout)
        else: #use extra class for abstention 
            net = cnn.ConvNet(num_classes+1,args.dropout)

    else:
        print('| Building net type [' + args.net_type + ']...')
        net, file_name = getNetwork(args)
        #net.apply(conv_init)
sys.stdout.flush()


#set up loss function and CUDA-fy if needed
if args.loss_fn is None:
    criterion = nn.CrossEntropyLoss()
    print('Using regular  (non-abstaining) loss function during training')
    if use_cuda:
        criterion = nn.CrossEntropyLoss().cuda(cuda_device)
else:
    if args.loss_fn == 'dac_loss':
        if args.abst_rate is None:
            criterion = loss_fn_dict['dac_loss'](model=net, learn_epochs=learn_epochs, 
                total_epochs=epochs,  use_cuda=use_cuda, alpha_final=args.alpha_final, 
                alpha_init_factor=args.alpha_init_factor)
        else:
            pid_tunings = (args.k_p, args.k_i, args.k_d)
            criterion = loss_fn_dict['dac_loss_pid'](model=net, learn_epochs=learn_epochs,
                 total_epochs=epochs, use_cuda=use_cuda, cuda_device=cuda_device, abst_rate=args.abst_rate,
                 alpha_final=args.alpha_final,alpha_init_factor=args.alpha_init_factor, pid_tunings=pid_tunings)
    else:
        print("Unknown loss function")
        sys.exit(0)

    if use_cuda:
        criterion = criterion.cuda(cuda_device)


#CUDA-fy network
#pdb.set_trace()
if use_cuda:
    if args.data_parallel:
        net = torch.nn.DataParallel(net, device_ids=cuda_devices).cuda(cuda_device)
    else:
        net = net.cuda(cuda_device)
    cudnn.benchmark = True


def get_hms(seconds):
    m, s = divmod(seconds, 60)
    h, m = divmod(m, 60)

    return h, m, s

#pdb.set_trace()
def train(epoch):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    abstain = 0

    if args.dataset == 'mnist':
        if int(epoch/epdl) > 5 and  int(epoch/epdl) <= 20:
            args.lr = 0.01
        if int(epoch/epdl) > 20 and int(epoch/epdl) <=50:
            args.lr = 0.001

    #optimizer = optim.SGD(net.parameters(), lr=cf.learning_rate(args.lr, epoch), momentum=0.9, 
        optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, 
            nesterov=args.nesterov, weight_decay=5e-4)
        print('\n=> Training Epoch #%d, LR=%.4f' %(epoch, args.lr))


    else: #cifar 10/100/stl-10/tin200/fashion
        optimizer = optim.SGD(net.parameters(), lr=cf.learning_rate(args.lr, int(epoch/epdl)),
         momentum=0.9, weight_decay=5e-4,nesterov=args.nesterov)
        print('\n=> Training Epoch #%d, LR=%.4f' %(epoch, cf.learning_rate(args.lr, int(epoch/epdl))))

    #print('\n=> Training Epoch #%d, LR=%.4f' %(epoch, cf.learning_rate(args.lr, epoch)))
    #pdb.set_trace()
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        #print(type(inputs))
        #print(dir(inputs.cuda))
        #quit()
        if use_cuda:
            #pdb.set_trace()
            inputs, targets = inputs.cuda(cuda_device), targets.cuda(cuda_device) # GPU settings
            #inputs, targets = inputs.cuda(), targets.cuda() # GPU settings
        optimizer.zero_grad()
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)               # Forward Propagation
        #pdb.set_trace()
        if args.loss_fn is None:
            loss = criterion(outputs, targets)
        else:
            loss = criterion(outputs, targets, epoch)  # Loss

        loss.backward()  # Backward Propagation
        optimizer.step() # Optimizer update

        train_loss += loss.data.item()
        _, predicted = torch.max(outputs.data, 1)
        this_batch_size =targets.size(0) 
        total += this_batch_size
        correct += predicted.eq(targets.data).cpu().sum().data.item()

        abstained_now = predicted.eq(abstain_class_id).sum().data.item()
        abstain += abstained_now

        if total-abstain != 0:
            #pdb.set_trace()
            abst_acc = 100.*correct/(float(total-abstain))
        else:
            abst_acc = 1.

        sys.stdout.write('\r')
        sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tAbstained %d Abstention rate %.4f Cumulative Abstention Rate: %.4f Loss: %.4f Acc@1: %.3f%% Acc@2: %.3f%%'
                %(epoch, num_epochs, batch_idx+1,
                    (len(trainset)//batch_size)+1, abstain, float(abstained_now)/this_batch_size, float(abstain)/total, loss.data.item(), 100.*correct/float(total), abst_acc))


        sys.stdout.flush()

    #if args.loss_fn == 'dac_loss_pid':
    #criterion.print_abst_stats(epoch)



def save_train_scores(epoch):
    #net.eval()

    train_softmax_scores = []
    total = 0
    abstained = 0

    for batch_idx, (inputs, targets) in enumerate(train_perf_loader):
        if use_cuda:
            inputs, targets = inputs.cuda(cuda_device), targets.cuda(cuda_device) # GPU settings
        inputs, targets = Variable(inputs), Variable(targets)
        outputs = net(inputs)               # Forward Propagation
        p_out = F.softmax(outputs,dim=1)
        #pdb.set_trace()
        total += p_out.size(0)
        _,predicted = torch.max(p_out.data,1)
        abstained += predicted.eq(abstain_class_id).sum().data.item()
        train_softmax_scores.append(p_out.data)

    train_scores = torch.cat(train_softmax_scores).cpu().numpy()
    print('Saving train softmax scores at  Epoch %d' %(epoch))
    #if args.log_file is None:
    # if args.expt_name is None:
    # fn = 'test'
    # else:
    # fn = args.expt_name 
    fn = args.expt_name if args.expt_name else 'test'
    np.save(args.output_path+fn+".train_scores.epoch_"+str(epoch), train_scores)
    print("\n##### Epoch %d Train Abstention Rate at end of epoch %.4f" 
            %(epoch, float(abstained)/total))


def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    abstain = 0

    if args.save_val_scores:
        val_softmax_scores = []

    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            if use_cuda:
                inputs, targets = inputs.cuda(cuda_device), targets.cuda(cuda_device)
            inputs, targets = Variable(inputs), Variable(targets)
            outputs = net(inputs)
            if args.loss_fn is None:
                loss = criterion(outputs, targets)
            else:
                loss = criterion(outputs, targets, epoch)

            if args.save_val_scores:
                p_out = F.softmax(outputs,dim=1)
                val_softmax_scores.append(p_out.data)


            test_loss += loss.data.item()
            _, predicted = torch.max(outputs.data, 1)
            # if epoch >= args.learn_epochs-1:
            # 	pdb.set_trace()
            total += targets.size(0)
            correct += predicted.eq(targets.data).cpu().sum().data.item()
            abstain += predicted.eq(abstain_class_id).sum().data.item()

        if args.save_val_scores:
            val_scores = torch.cat(val_softmax_scores).cpu().numpy()

            print('Saving softmax scores at Validation Epoch %d' %(epoch))
            fn = args.expt_name if args.expt_name else 'test'
            #np.save(fn+".train_scores.epoch_"+str(epoch), train_scores)

            np.save(args.output_path+fn+".val_scores.epoch_"+str(epoch), val_scores)

        #pdb.set_trace()
        acc = 100.*correct/float(total)
        if total-abstain != 0:
            abst_acc = 100.*correct/(float(total-abstain))
        else:
            abst_acc = 100.

        print("\n| Validation Epoch #%d\t\t\tAbstained: %d Loss: %.4f Acc@1: %.2f%% Acc@2: %.2f%% " %(epoch, abstain, test_loss/(batch_idx+1), acc,abst_acc))

        #return

        # Save checkpoint when best model

        if acc > best_acc or epoch == save_epoch_model:# or (int(epoch/args.epdl) > 60 and int(epoch/args.epdl) <= 80):

            if args.save_best_model:
                print('| Saving Best model...\t\t\tTop1 = %.2f%%' %(acc))
                state = {
                        'net':net if use_cuda else net,
                        'acc':acc,
                        'epoch':epoch,
                }
                if not os.path.isdir('checkpoint'):
                    os.mkdir('checkpoint')
                save_point = './checkpoint/'+args.dataset+os.sep
                if not os.path.isdir(save_point):
                    os.mkdir(save_point)
                #torch.save(state, save_point+file_name+'_rand_label_'+str(args.rand_labels)+'_epoch_'+str(epoch)+'_081318.t7')
                if args.expt_name == "":
                    if not args.log_file is None:
                        expt_name = os.path.basename(args.log_file).replace(".log","")
                    else:
                        expt_name = 'test' #assuming that if a log file has not been specified this is a test run.
                else:
                    expt_name = args.expt_name
                if args.no_overwrite:
                    torch.save(state, save_point+file_name+'_expt_name_'+str(expt_name)+'_epoch_'+str(epoch)+'.t7')
                else:
                    torch.save(state, save_point+file_name+'_expt_name_'+str(expt_name)+'.t7')
        if acc > best_acc:
            best_acc = acc



print('\n[Phase 3] : Training model')
print('| Training Epochs = ' + str(num_epochs))
print('| Initial Learning Rate = ' + str(args.lr))
sys.stdout.flush()

#print('| Optimizer = ' + str(optim_type))

elapsed_time = 0
for epoch in range(start_epoch, start_epoch+num_epochs):
    start_time = time.time()

    train(epoch)
    if args.save_train_scores:
        save_train_scores(epoch)
    test(epoch)

    epoch_time = time.time() - start_time
    elapsed_time += epoch_time
    print('| Elapsed time : %d:%02d:%02d'  %(get_hms(elapsed_time)))
    sys.stdout.flush()

print('\n[Phase 4] : Testing model')
print('* Test results : Acc@1 = %.2f%%' %(best_acc))


[Phase 1] : Data Preparation
Files already downloaded and verified
Files already downloaded and verified

[Phase 2] : Model setup
| Building net
| Building net type [vggnet]...
using dac loss function


[Phase 3] : Training model
| Training Epochs = 10
| Initial Learning Rate = 0.001

=> Training Epoch #1, LR=0.0010
| Epoch [  1/ 10] Iter[391/391]		Abstained 0 Abstention rate 0.0000 Cumulative Abstention Rate: 0.0000 Loss: 0.0167 Acc@1: 99.540% Acc@2: 99.540%
| Validation Epoch #1			Abstained: 0 Loss: 0.3308 Acc@1: 92.04% Acc@2: 92.04% 
| Elapsed time : 0:00:20

=> Training Epoch #2, LR=0.0010
| Epoch [  2/ 10] Iter[391/391]		Abstained 0 Abstention rate 0.0000 Cumulative Abstention Rate: 0.0000 Loss: 0.0026 Acc@1: 99.604% Acc@2: 99.604%0%
| Validation Epoch #2			Abstained: 0 Loss: 0.3325 Acc@1: 92.03% Acc@2: 92.03% 
| Elapsed time : 0:00:41

=> Training Epoch #3, LR=0.0010
| Epoch [  3/ 10] Iter[391/391]		Abstained 0 Abstention rate 0.0000 Cumulative Abstention Rate: 0.0000 Loss: 0.02

In [13]:
torch.save(net,'DAC_vgg16_cifar_9.pt')

In [78]:
import torchvision.datasets as datasets
import torchvision.transforms as transforms
def get_SVHN():
    dataset = datasets.SVHN
    num_classes = 10
    input_size = 32
    transform_train = transforms.Compose([
            transforms.RandomCrop(32,padding=4),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
    train_dataset = dataset(root='./data', split='train', transform=transform_train,
                                     target_transform=None, download=True)
    test_dataset = dataset(root='./data', split='test', transform=transform_test,
                                   target_transform=None, download=True)
    return train_dataset,test_dataset,num_classes

In [3]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
def get_CIFAR10(root="./"):
    input_size = 32
    num_classes = 10
    normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    
    train_transform = transforms.Compose(
        [
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]
    )
    train_dataset = datasets.CIFAR10(
        root + "data/CIFAR10", train=True, transform=train_transform, download=True
    )

    test_transform = transforms.Compose(
        [
            transforms.ToTensor(),
            normalize,
        ]
    )
    test_dataset = datasets.CIFAR10(
        root + "data/CIFAR10", train=False, transform=test_transform, download=True
    )

    return train_dataset, test_dataset, num_classes

In [None]:
output=[]
for batch_idx, (inputs, targets) in enumerate(testloader):
        
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda() # GPU settings
        
        outputs = net(inputs)               # Forward Propagation
        p_out = F.softmax(outputs,dim=1)
        output.extend(p_out)

    

In [None]:
p_out