In [1]:
import time
import torch
import torch.autograd as autograd

def eigen_variance(net, criterion, dataloader, n_iters=10, tol=1e-2, verbose=False):
    n_parameters = num_parameters(net)
    v0 = torch.randn(n_parameters)

    Av_func = lambda v: variance_vec_prod(net, criterion, dataloader, v)
    mu = power_method(v0, Av_func, n_iters, tol, verbose)
    return mu


def eigen_hessian(net, criterion, dataloader, n_iters=10, tol=1e-2, verbose=False):
    n_parameters = num_parameters(net)
    v0 = torch.randn(n_parameters)

    Av_func = lambda v: hessian_vec_prod(net, criterion, dataloader, v)
    mu = power_method(v0, Av_func, n_iters, tol, verbose)
    return mu


def variance_vec_prod(net, criterion, dataloader, v):
    X, y = dataloader.X, dataloader.y
    Av, Hv, n_samples = 0, 0, len(y)

    for i in range(n_samples):
        bx, by = X[i:i+1].cuda(), y[i:i+1].cuda()
        Hv_i = Hv_batch(net, criterion, bx, by, v)
        Av_i = Hv_batch(net, criterion, bx, by, Hv_i)
        Av += Av_i
        Hv += Hv_i
    Av /= n_samples
    Hv /= n_samples
    H2v = hessian_vec_prod(net, criterion, dataloader, Hv)
    return Av - H2v


def hessian_vec_prod(net, criterion, dataloader, v):
    Hv_t = 0
    n_batchs = len(dataloader)
    dataloader.idx = 0
    for _ in range(n_batchs):
        bx, by = next(dataloader)
        Hv_t += Hv_batch(net, criterion, bx.cuda(), by.cuda(), v)

    return Hv_t/n_batchs


def Hv_batch(net, criterion, batch_x, batch_y, v):
    """
    Hessian vector multiplication
    """
    net.eval()
    logits = net(batch_x)
    loss = criterion(logits, batch_y)

    grads = autograd.grad(loss, net.parameters(), create_graph=True, retain_graph=True)
    idx, res = 0, 0
    for grad_i in grads:
        ng = torch.numel(grad_i)
        v_i = v[idx:idx+ng].cuda()
        res += torch.dot(v_i, grad_i.view(-1))
        idx += ng

    Hv = autograd.grad(res, net.parameters())
    Hv = [t.data.cpu().view(-1) for t in Hv]
    Hv = torch.cat(Hv)
    return Hv


def power_method(v0, Av_func, n_iters=10, tol=1e-3, verbose=False):
    mu = 0
    v = v0/v0.norm()
    for i in range(n_iters):
        time_start = time.time()

        Av = Av_func(v)
        mu_pre = mu
        mu = torch.dot(Av,v).item()
        v = Av/Av.norm()

        if abs(mu-mu_pre)/abs(mu) < tol:
            break
        if verbose:
            print('%d-th step takes %.0f seconds, \t %.2e'%(i+1,time.time()-time_start,mu))
    return mu


def num_parameters(net):
    """
    return the number of parameters for given model
    """
    n_parameters = 0
    for para in net.parameters():
        n_parameters += para.data.numel()

    return n_parameters


In [2]:
import os
import torch
import torchvision.datasets as dsets


class DataLoader:

    def __init__(self,X,y,batch_size):
        self.X, self.y = X, y 
        self.batch_size = batch_size
        self.n_samples = len(y)
        self.idx = 0

    def __len__(self):
        length = self.n_samples // self.batch_size
        if self.n_samples > length * self.batch_size:
            length += 1
        return length

    def __iter__(self):
        return self    

    def __next__(self):
        if self.idx >= self.n_samples:
            self.idx = 0
            rnd_idx = torch.randperm(self.n_samples)
            self.X = self.X[rnd_idx]
            self.y = self.y[rnd_idx]

        idx_end = min(self.idx+self.batch_size, self.n_samples)
        batch_X = self.X[self.idx:idx_end]
        batch_y = self.y[self.idx:idx_end]
        self.idx = idx_end

        return batch_X,batch_y


def load_fmnist(training_size, batch_size=100):
    train_set = dsets.FashionMNIST('data/fashionmnist', train=True, download=True)
    train_X, train_y = train_set.data[0:training_size].float()/255, \
                       to_one_hot(train_set.targets[0:training_size])
    train_loader = DataLoader(train_X, train_y, batch_size)

    test_set = dsets.FashionMNIST('data/fashionmnist', train=False,download=True)
    test_X, test_y = test_set.data.float()/255, \
                     to_one_hot(test_set.targets)
    test_loader = DataLoader(test_X, test_y, batch_size)

    return train_loader, test_loader


def load_cifar10(training_size, batch_size=100):
    """
    load cifar10 dataset. Notice that here we only use examples
    corresponding to label 0 and 1. Thus the training_size is at 
    most 10000.
    """
    train_set = dsets.CIFAR10('data/cifar10', train=True, download=True)
    train_X,train_y = modify_cifar_data(train_set.data, train_set.targets, training_size)
    train_loader = DataLoader(train_X, train_y, batch_size)

    test_set = dsets.CIFAR10('data/cifar10', train=False, download=True)
    test_X,test_y = modify_cifar_data(test_set.data, test_set.targets)
    test_loader = DataLoader(test_X, test_y, batch_size)

    return train_loader, test_loader


def modify_cifar_data(X, y, n_samples=-1):
    X = torch.from_numpy(X.transpose([0,3,1,2]))
    y = torch.LongTensor(y)

    X_t = torch.Tensor(50000,3,32,32)
    y_t = torch.LongTensor(50000)
    idx = 0
    for i in range(len(y)):
        if y[i] == 0 or y[i] == 1:
            y_t[idx] = y[i]
            X_t[idx,:,:,:] = X[i,:,:,:]
            idx += 1
    X = X_t[0:idx]
    y = y_t[0:idx] 

    if n_samples > 1:
        X = X[0:n_samples]
        y = y[0:n_samples]

    # preprocess the data
    X = X.float()/255.0
    y = to_one_hot(y) 

    return X, y


def to_one_hot(labels):
    if labels.ndimension()==1:
        labels.unsqueeze_(1)
    n_samples = labels.shape[0]
    n_classes = labels.max()+1

    one_hot_labels = torch.FloatTensor(n_samples,n_classes)
    one_hot_labels.zero_()
    one_hot_labels.scatter_(1, labels, 1)

    return one_hot_labels


if __name__ == '__main__':
    train_loader, test_loader = load_cifar10(training_size=10000,batch_size=500)
    for i in range(30):
        batch_x, batch_y = next(train_loader)
        print(i, batch_x.shape, batch_y.shape)

    for i in range(4):
        batch_x, batch_y = next(test_loader)
        print(i, batch_x.shape, batch_y.shape)




Files already downloaded and verified
Files already downloaded and verified
0 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
1 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
2 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
3 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
4 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
5 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
6 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
7 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
8 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
9 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
10 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
11 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
12 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
13 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
14 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
15 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
16 torch.Size([500, 3, 32, 32]) torch.Size([500, 2])
17 torch.Size([500, 3, 32, 32]) t

In [3]:
import time
import torch

def train(model, criterion, optimizer, dataloader, batch_size, n_iters=50000, verbose=True):
    model.train()
    acc_avg, loss_avg = 0, 0

    since = time.time()
    for iter_now in range(n_iters):
        optimizer.zero_grad()
        loss,acc = compute_minibatch_gradient(model, criterion, dataloader, batch_size)
        optimizer.step()

        acc_avg = 0.9 * acc_avg + 0.1 * acc if acc_avg > 0 else acc
        loss_avg = 0.9 * loss_avg + 0.1 * loss if loss_avg > 0 else loss

        if iter_now%200 == 0 and verbose:
            now = time.time()
            print('%d/%d, took %.0f seconds, train_loss: %.1e, train_acc: %.2f'%(
                    iter_now+1, n_iters, now-since, loss_avg, acc_avg))
            since = time.time()


def compute_minibatch_gradient(model, criterion, dataloader, batch_size):
    loss,acc = 0,0
    n_loads = batch_size // dataloader.batch_size

    for i in range(n_loads):
        inputs,targets = next(dataloader)
        inputs, targets = inputs.cuda(), targets.cuda()

        logits = model(inputs)
        E = criterion(logits,targets)
        E.backward()
        
        loss += E.item()
        acc += accuracy(logits.data,targets)

    for p in model.parameters():
        p.grad.data /= n_loads

    return loss/n_loads, acc/n_loads


def accuracy(logits, targets):
    n = logits.shape[0]
    if targets.ndimension() == 2:
        _, y_trues = torch.max(targets,1)
    else:
        y_trues = targets 
    _, y_preds = torch.max(logits,1)

    acc = (y_trues==y_preds).float().sum()*100.0/n 
    return acc


In [4]:
'''
Modified from https://github.com/pytorch/vision.git
'''
import math

import torch.nn as nn
import torch.nn.init as init

__all__ = [
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
]


class VGG(nn.Module):
    '''
    VGG model
    '''
    def __init__(self, features,feature_size=512,num_classes=10):
        super(VGG, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(
            nn.Linear(feature_size, 128),
            nn.ReLU(True),
            nn.Linear(128,num_classes),
        )
         # Initialize weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                m.bias.data.zero_()


    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)


cfg = {
    'A': [16, 'M', 16, 'M', 32, 'M',  64, 'M', 64, 'M'],
    'A1': [16, 'M', 32, 'M', 32, 32, 'M', 64, 64, 'M', 128, 128, 'M'],
    'A2': [32, 'M', 64, 'M', 64, 64, 'M', 128, 128, 'M', 256, 256, 'M'],
    'A3': [64, 'M', 128, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M'],
    'A4': [128, 'M', 256, 'M', 256, 256, 'M', 512, 512, 'M', 1024, 1024, 'M'],
    'B': [16, 16, 'M', 32, 32, 'M', 64, 64, 'M', 128, 128, 'M', 128, 128, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M',
          512, 512, 512, 512, 'M'],
}


def vgg11(num_classes=10):
    """VGG 11-layer model (configuration "A")"""
    return VGG(make_layers(cfg['A']),feature_size=64,num_classes=num_classes)


def vgg11_big(num_classes=10):
    """VGG 11-layer model (configuration "A")"""
    return VGG(make_layers(cfg['A3']),cfg['A3'][-2],num_classes)

def vgg11_bn(num_classes):
    """VGG 11-layer model (configuration "A") with batch normalization"""
    return VGG(make_layers(cfg['A'], batch_norm=True))


def vgg13(num_classes=10):
    """VGG 13-layer model (configuration "B")"""
    return VGG(make_layers(cfg['B']),num_classes)


def vgg13_bn():
    """VGG 13-layer model (configuration "B") with batch normalization"""
    return VGG(make_layers(cfg['B'], batch_norm=True))


def vgg16():
    """VGG 16-layer model (configuration "D")"""
    return VGG(make_layers(cfg['D']))


def vgg16_bn():
    """VGG 16-layer model (configuration "D") with batch normalization"""
    return VGG(make_layers(cfg['D'], batch_norm=True))


def vgg19():
    """VGG 19-layer model (configuration "E")"""
    return VGG(make_layers(cfg['E']))


def vgg19_bn():
    """VGG 19-layer model (configuration 'E') with batch normalization"""
    return VGG(make_layers(cfg['E'], batch_norm=True))


In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class LeNet(nn.Module):
    def __init__(self):
        super(LeNet,self).__init__()
        self.conv1 = nn.Conv2d(1,6,5,stride=1) # 28-5+1=24
        self.conv2 = nn.Conv2d(6,16,5,stride=1) # 12-5+1=8
        self.fc1 = nn.Linear(4*4*16,200)
        self.fc2 = nn.Linear(200,10)

    def forward(self,x):
        if x.ndimension()==3:
            x = x.unsqueeze(0)
        o = F.relu(self.conv1(x))
        o = F.avg_pool2d(o,2,2)

        o = F.relu(self.conv2(o))
        o = F.avg_pool2d(o,2,2)

        o = o.view(o.shape[0],-1)
        o = self.fc1(o)
        o = F.relu(o)
        o = self.fc2(o)
        return o

class FNN(nn.Module):
    def __init__(self):
        super(FNN,self).__init__()
        self.net = nn.Sequential(nn.Linear(784,500),
                            nn.ReLU(),
                            nn.Linear(500,500),
                            nn.ReLU(),
                            nn.Linear(500,500),
                            nn.ReLU(),
                            nn.Linear(500,10))

    def forward(self,x):
        x = x.view(x.shape[0],-1)
        o = self.net(x)
        return o


def lenet():
    return LeNet()

def fnn():
    return FNN()



In [6]:
import math
# from .models.vgg import vgg11
# from .models.mnist import fnn
# from .data import load_fmnist,load_cifar10
# from .trainer import accuracy
# from .linalg import eigen_variance, eigen_hessian



# def load_net(dataset):
#     if dataset == 'fashionmnist':
#             return fnn().cuda()
#     elif dataset == 'cifar10':
#             return vgg11(num_classes=2).cuda()
#     else:
#         raise ValueError('Dataset %s is not supported'%(dataset))


# def load_data(dataset, training_size, batch_size):
#     if dataset == 'fashionmnist':
#             return load_fmnist(training_size, batch_size)
#     elif dataset == 'cifar10':
#             return load_cifar10(training_size, batch_size)
#     else:
#         raise ValueError('Dataset %s is not supported'%(dataset))


# def get_sharpness(net, criterion, dataloader, n_iters=10, tol=1e-2, verbose=False):
#     v = eigen_hessian(net, criterion, dataloader, \
#                       n_iters=n_iters, tol=tol, verbose=verbose)
#     return v


# def get_nonuniformity(net, criterion, dataloader, n_iters=10, tol=1e-2, verbose=False):
#     v = eigen_variance(net, criterion, dataloader, \
#                       n_iters=n_iters, tol=tol, verbose=verbose)
#     return math.sqrt(v)


def eval_accuracy(model, criterion, dataloader):
    model.eval()
    n_batchs = len(dataloader)
    dataloader.idx = 0

    loss_t, acc_t = 0.0, 0.0
    for i in range(n_batchs):
        inputs,targets = next(dataloader)
        inputs, targets = inputs.cuda(), targets.cuda()

        logits = model(inputs)
        loss_t += criterion(logits,targets).item()
        acc_t += accuracy(logits.data,targets)

    return loss_t/n_batchs, acc_t/n_batchs


In [7]:
import os
import argparse
import json
import torch

# from src.trainer import train
# from src.utils import load_net, load_data, eval_accuracy



# def get_args():
#     argparser = argparse.ArgumentParser(description=__doc__)
#     argparser.add_argument('--gpuid',
#                           default='0,', help='gpu id, [0] ')
#     argparser.add_argument('--dataset', 
#                           default='fashionmnist', help='dataset, [fashionmnist] | cifar10')
#     argparser.add_argument('--n_samples', type=int,
#                            default=1000, help='training set size, [1000]')
#     argparser.add_argument('--load_size', type=int,
#                            default=1000, help='load size for dataset, [1000]')
#     argparser.add_argument('--optimizer', 
#                            default='sgd', help='optimizer, [sgd]')
#     argparser.add_argument('--n_iters', type=int,
#                            default=10000, help='number of iteration used to train nets, [10000]')
#     argparser.add_argument('--batch_size', type=int,
#                            default=1000, help='batch size, [1000]')
#     argparser.add_argument('--learning_rate', type=float,
#                            default=1e-1, help='learning rate')
#     argparser.add_argument('--momentum', type=float,
#                            default='0.0', help='momentum, [0.0]')
#     argparser.add_argument('--model_file', 
#                            default='fnn.pkl', help='filename to save the net, fnn.pkl')

#     args = argparser.parse_args()
#     if args.load_size > args.batch_size:
#         raise ValueError('load size should not be larger than batch size')
#     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpuid

#     print('===> Config:')
#     print(json.dumps(vars(args), indent=2))
#     return args

# def get_optimizer(net, optimizer):
#     if optimizer == 'sgd':
#         return torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)
#     elif optimizer == 'adam':
#         return torch.optim.Adam(net.parameters(), lr=learning_rate)
#     else:
#         raise ValueError('optimizer %s has not been supported'%(optimizer))

# args = get_args()

criterion = torch.nn.MSELoss().cuda()


# train_loader, test_loader = load_data(args.dataset,
#                                       training_size=args.n_samples,
#                                       batch_size=args.load_size)
# net = load_net(args.dataset)
# optimizer = get_optimizer(net, args)
# print(optimizer)

# print('===> Architecture:')
# print(net)

# print('===> Start training')
# train(net, criterion, optimizer, train_loader, args.batch_size, args.n_iters, verbose=True)

# train_loss, train_accuracy = eval_accuracy(net, criterion, train_loader)
# test_loss, test_accuracy = eval_accuracy(net, criterion, test_loader)
# print('===> Solution: ')
# print('\t train loss: %.2e, acc: %.2f' % (train_loss, train_accuracy))
# print('\t test loss: %.2e, acc: %.2f' % (test_loss, test_accuracy))

# torch.save(net.state_dict(), args.model_file)


gpuid = '0,'
dataset= 'fashionmnist'
n_samples = 1000
load_size = 1000
optimizer = 'sgd'
n_iters = 10000
batch_size = 1000
learning_rate = 1e-1
momentum = 0.0
model_file = 'fnn.pkl'


# criterion = torch.nn.MSELoss().cuda()
# train_loader, test_loader = load_data(dataset,
#                                       training_size=n_samples,
#                                       batch_size=load_size)

# net = load_net(dataset)

if dataset == 'fashionmnist':
        train_loader, test_loader = load_fmnist(training_size=n_samples, batch_size=load_size)
        net = fnn().cuda()
        
elif dataset == 'cifar10':
        train_loader, test_loader = load_cifar10(training_size=n_samples, batch_size=load_size)
        net = vgg11(num_classes=2).cuda()

# optimizer = get_optimizer(net, optimizer)

if optimizer == 'sgd':
    optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum)
elif optimizer == 'adam':
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

print(optimizer)

print('===> Architecture:')
print(net)

print('===> Start training')
train(net, criterion, optimizer, train_loader, batch_size, n_iters, verbose=True)

train_loss, train_accuracy = eval_accuracy(net, criterion, train_loader)
test_loss, test_accuracy = eval_accuracy(net, criterion, test_loader)
print('===> Solution: ')
print('\t train loss: %.2e, acc: %.2f' % (train_loss, train_accuracy))
print('\t test loss: %.2e, acc: %.2f' % (test_loss, test_accuracy))

# torch.save(net.state_dict(), model_file)


SGD (
Parameter Group 0
    dampening: 0
    lr: 0.1
    momentum: 0.0
    nesterov: False
    weight_decay: 0
)
===> Architecture:
FNN(
  (net): Sequential(
    (0): Linear(in_features=784, out_features=500, bias=True)
    (1): ReLU()
    (2): Linear(in_features=500, out_features=500, bias=True)
    (3): ReLU()
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Linear(in_features=500, out_features=10, bias=True)
  )
)
===> Start training
1/10000, took 0 seconds, train_loss: 9.9e-02, train_acc: 4.70
201/10000, took 2 seconds, train_loss: 6.7e-02, train_acc: 59.92
401/10000, took 2 seconds, train_loss: 5.2e-02, train_acc: 69.31
601/10000, took 2 seconds, train_loss: 4.3e-02, train_acc: 73.48
801/10000, took 2 seconds, train_loss: 3.9e-02, train_acc: 76.45
1001/10000, took 2 seconds, train_loss: 3.5e-02, train_acc: 79.87
1201/10000, took 2 seconds, train_loss: 3.3e-02, train_acc: 81.22
1401/10000, took 2 seconds, train_loss: 3.0e-02, train_acc: 83.86
1

In [9]:
import os
import time
import argparse
import json
import torch

# from src.utils import load_net, load_data, \
#                       get_sharpness, get_nonuniformity, \
#                       eval_accuracy



gpuid = '0,'
dataset= 'fashionmnist'
n_samples = 1000
batch_size = 1000
model_file = 'fnn.pkl'
os.environ["CUDA_VISIBLE_DEVICES"] = gpuid
print('===> Config:')

# def get_args():
#     argparser = argparse.ArgumentParser(description=__doc__)
#     argparser.add_argument('--gpuid',default='0,')
#     argparser.add_argument('--dataset',default='fashionmnist',
#                             help='dataset choosed, [fashionmnist] | cifar10')
#     argparser.add_argument('--n_samples',type=int,
#                             default=1000, help='training set size, [1000]')
#     argparser.add_argument('--batch_size', type=int,
#                             default=1000, help='batch size')
#     argparser.add_argument('--model_file', default='fnn.pkl',
#                             help='file name of the pretrained model')
#     args = argparser.parse_args()
#     os.environ["CUDA_VISIBLE_DEVICES"]=args.gpuid

#     print('===> Config:')
#     print(json.dumps(vars(args),indent=2))
#     return args




# load model
# criterion = torch.nn.MSELoss().cuda()
# train_loader,test_loader = load_data(dataset, 
#                                     training_size=n_samples, 
#                                     batch_size=batch_size)
# net = load_net(dataset)
# net.load_state_dict(torch.load(model_file))

# # Evaluate models
# train_loss, train_accuracy = eval_accuracy(net, criterion, train_loader)
# test_loss, test_accuracy = eval_accuracy(net, criterion, test_loader)

print('===> Basic information of the given model: ')
print('\t train loss: %.2e, acc: %.2f'%(train_loss, train_accuracy))
print('\t test loss: %.2e, acc: %.2f'%(test_loss, test_accuracy))

print('===> Compute sharpness:')
# sharpness = get_sharpness(net, criterion, train_loader, \
#                             n_iters=10, verbose=True, tol=1e-4)

sharpness = eigen_hessian(net, criterion, train_loader, n_iters=10, tol=1e-4, verbose=False)

print('Sharpness is %.2e\n'%(sharpness))

print('===> Compute non-uniformity:')
# non_uniformity = get_nonuniformity(net, criterion, train_loader, \
#                                     n_iters=10, verbose=True, tol=1e-4)

non_uniformity = math.sqrt(eigen_variance(net, criterion, train_loader, n_iters=10, tol=1e-4, verbose=True))

print('Non-uniformity is %.2e\n'%(non_uniformity))


===> Config:
===> Basic information of the given model: 
	 train loss: 2.87e-03, acc: 100.00
	 test loss: 3.36e-02, acc: 80.47
===> Compute sharpness:
Sharpness is 1.98e+01

===> Compute non-uniformity:
1-th step takes 35 seconds, 	 9.02e-03
2-th step takes 36 seconds, 	 9.82e+02
3-th step takes 36 seconds, 	 1.31e+03


KeyboardInterrupt: 