In [None]:
import random
import torch
import numpy as np
from __future__ import division
from nats_bench import create
from scipy.optimize import minimize_scalar
from pprint import pprint
# Create the API for tologoy search space
api = create("../fake_torch_dir/NATS-tss-v1_0-3ffb9-full", 'tss', fast_mode=True, verbose=False)

In [None]:
def EVBMF(Y, sigma2=None, H=None):
    L, M = Y.shape  # has to be L<=M

    if H is None:
        H = L

    alpha = L/M
    tauubar = 2.5129*np.sqrt(alpha)

    # SVD of the input matrix, max rank of H
    # U, s, V = np.linalg.svd(Y)
    U, s, V = torch.svd(Y)
    U = U[:, :H]
    s = s[:H]
    V = V[:H].T

    # Calculate residual
    residual = 0.
    if H < L:
        # residual = np.sum(np.sum(Y**2)-np.sum(s**2))
        residual = torch.sum(np.sum(Y**2)-np.sum(s**2))

    # Estimation of the variance when sigma2 is unspecified
    if sigma2 is None:
        xubar = (1+tauubar)*(1+alpha/tauubar)
        eH_ub = int(np.min([np.ceil(L/(1+alpha))-1, H]))-1
        # upper_bound = (np.sum(s**2)+residual)/(L*M)
        # lower_bound = np.max(
        #     [s[eH_ub+1]**2/(M*xubar), np.mean(s[eH_ub+1:]**2)/M])
        upper_bound = (torch.sum(s**2)+residual)/(L*M)
        lower_bound = torch.max(torch.stack(
            [s[eH_ub+1]**2/(M*xubar), torch.mean(s[eH_ub+1:]**2)/M], dim=0))

        scale = 1.  # /lower_bound
        s = s*np.sqrt(scale)
        residual = residual*scale
        lower_bound = lower_bound*scale
        upper_bound = upper_bound*scale

        sigma2_opt = minimize_scalar(
            EVBsigma2, args=(L, M, s.cpu().numpy(), residual, xubar),
            bounds=[lower_bound.cpu().numpy(), upper_bound.cpu().numpy()],
            method='Bounded')
        sigma2 = sigma2_opt.x

    # Threshold gamma term
    threshold = np.sqrt(M*sigma2*(1+tauubar)*(1+alpha/tauubar))
    # pos = np.sum(s > threshold)
    pos = torch.sum(s > threshold)

    # Formula (15) from [2]
    # d = torch.multiply(s[:pos]/2,
    #                    1-torch.divide(
    #                        torch.tensor((L+M)*sigma2, device=s.device),
    #     s[:pos]**2) + torch.sqrt((1-torch.divide(
    #         torch.tensor(
    #             (L+M)*sigma2, device=s.device),
    #         s[:pos]**2))**2 -
    #     4*L*M*sigma2**2/s[:pos]**4))
    # d = np.multiply(s[:pos]/2, 1-np.divide((L+M)*sigma2, s[:pos]**2) + np.sqrt(
    #     (1-np.divide((L+M)*sigma2, s[:pos]**2))**2 - 4*L*M*sigma2**2/s[:pos]**4))
    d = (s[:pos]/2)*(1-(L+M)*sigma2/s[:pos]**2 +
                     torch.sqrt((1 -
                                 (L+M)*sigma2/s[:pos]**2)**2 - 4*L*M*sigma2**2/s[:pos]**4))

    # Computation of the posterior
    # post = {}
    # post['ma'] = np.zeros(H)
    # post['mb'] = np.zeros(H)
    # post['sa2'] = np.zeros(H)
    # post['sb2'] = np.zeros(H)
    # post['cacb'] = np.zeros(H)

    # tau = np.multiply(d, s[:pos])/(M*sigma2)
    # delta = np.multiply(np.sqrt(np.divide(M*d, L*s[:pos])), 1+alpha/tau)

    # post['ma'][:pos] = np.sqrt(np.multiply(d, delta))
    # post['mb'][:pos] = np.sqrt(np.divide(d, delta))
    # post['sa2'][:pos] = np.divide(sigma2*delta, s[:pos])
    # post['sb2'][:pos] = np.divide(sigma2, np.multiply(delta, s[:pos]))
    # post['cacb'][:pos] = np.sqrt(np.multiply(d, s[:pos])/(L*M))
    # post['sigma2'] = sigma2
    # post['F'] = 0.5*(L*M*np.log(2*np.pi*sigma2) +
    #                  (residual+np.sum(s**2))/sigma2 + np.sum(
    #                      M*np.log(tau+1) + L*np.log(tau/alpha + 1) - M*tau))

    return U[:, :pos], torch.diag(d), V[:, :pos]  # , post


def EVBsigma2(sigma2, L, M, s, residual, xubar):
    H = len(s)

    alpha = L/M
    x = s**2/(M*sigma2)

    z1 = x[x > xubar]
    z2 = x[x <= xubar]
    tau_z1 = tau(z1, alpha)

    term1 = np.sum(z2 - np.log(z2))
    term2 = np.sum(z1 - tau_z1)
    term3 = np.sum(np.log(np.divide(tau_z1+1, z1)))
    term4 = alpha*np.sum(np.log(tau_z1/alpha+1))

    obj = term1+term2+term3+term4 + residual/(M*sigma2) + (L-H)*np.log(sigma2)

    return obj


def phi0(x):
    return x-np.log(x)


def phi1(x, alpha):
    return np.log(tau(x, alpha)+1) + alpha*np.log(tau(x, alpha)/alpha + 1
                                                  ) - tau(x, alpha)


def tau(x, alpha):
    return 0.5 * (x-(1+alpha) + np.sqrt((x-(1+alpha))**2 - 4*alpha))


In [None]:
    def compute_low_rank(tensor: torch.Tensor,
                         normalizer: float) -> torch.Tensor:
        if tensor.requires_grad:
            tensor = tensor.detach()
        try:
            tensor_size = tensor.shape
            if tensor_size[0] > tensor_size[1]:
                tensor = tensor.T
            U_approx, S_approx, V_approx = EVBMF(tensor)
        except RuntimeError:
            return None, None, None
        rank = S_approx.shape[0] / tensor_size[0]  # normalizer
        low_rank_eigen = torch.diag(S_approx).data.cpu().numpy()
        if len(low_rank_eigen) != 0:
            condition = low_rank_eigen[0] / low_rank_eigen[-1]
            sum_low_rank_eigen = low_rank_eigen / \
                max(low_rank_eigen)
            sum_low_rank_eigen = np.sum(sum_low_rank_eigen)
        else:
            condition = 0
            sum_low_rank_eigen = 0
        KG = sum_low_rank_eigen / tensor_size[0]  # normalizer
        return rank, KG, condition

In [None]:
dataset = 'cifar10'
index = 531
info = api.get_more_info(531, dataset, hp='200', is_random=False)
params = api.get_net_param(531, 'cifar10', None)
#print(info)
#print(config[111].keys())


In [None]:
print(params[111].keys())
print(params[111]['cells.0.layers.0.op.2.weight'].shape)

In [47]:
for i in params:
    #Gets main dictionary key
    for k, v in params[i].items():
        if('weight' in k and len(list(v.size())) == 4):
            print(k)
            get_metrics(i,k)
            print("\n")

stem.0.weight
torch.Size([16, 3, 3, 3])
in: 0.6666666666666666 0.4399328629175822 3.1269677
out: 0.25 0.18781526386737823 2.4044902


cells.0.layers.0.op.1.weight
torch.Size([16, 16, 1, 1])
in: 0.0 0.0 0
out: 0.0 0.0 0


cells.0.layers.2.op.1.weight
torch.Size([16, 16, 1, 1])
in: 0.0 0.0 0
out: 0.0 0.0 0


cells.0.layers.3.op.1.weight
torch.Size([16, 16, 3, 3])
in: 0.4375 0.2394258826971054 3.2372427
out: 0.125 0.10987097024917603 1.3193736


cells.0.layers.4.op.1.weight
torch.Size([16, 16, 1, 1])
in: 0.0625 0.0625 1.0
out: 0.0625 0.0625 1.0


cells.0.layers.5.op.1.weight
torch.Size([16, 16, 3, 3])
in: 0.375 0.23981879651546478 3.5918655
out: 0.25 0.176105797290802 1.9296715


cells.1.layers.0.op.1.weight
torch.Size([16, 16, 1, 1])
in: 0.0 0.0 0
out: 0.0 0.0 0


cells.1.layers.2.op.1.weight
torch.Size([16, 16, 1, 1])
in: 0.0 0.0 0
out: 0.0 0.0 0


cells.1.layers.3.op.1.weight
torch.Size([16, 16, 3, 3])
in: 0.1875 0.1335415542125702 2.2323227
out: 0.25 0.15970441699028015 2.228719


cel

In [None]:
def get_metrics(key1,key2):
    layer_tensor=params[key1][key2]
    tensor_size = layer_tensor.shape
    print(tensor_size)
    #print(layer_tensor)
    mode_3_unfold = layer_tensor.permute(1, 0, 2, 3)
    mode_3_unfold = torch.reshape(
                        mode_3_unfold, [tensor_size[1], tensor_size[0] *
                                        tensor_size[2] * tensor_size[3]])
    in_rank, in_KG, in_condition = compute_low_rank(mode_3_unfold,tensor_size[1])
    print("in:", in_rank, in_KG, in_condition)
    mode_4_unfold = layer_tensor
    mode_4_unfold = torch.reshape(
                        mode_4_unfold, [tensor_size[0], tensor_size[1] *
                                        tensor_size[2] * tensor_size[3]])
    out_rank, out_KG, out_condition = compute_low_rank(mode_4_unfold, tensor_size[0])
    print("out:", out_rank, out_KG, out_condition)
    return in_rank, out_rank, in_KG, out_KG, in_condition, out_condition