# BigGAN Hessian Computation
This repo tries to prove whether we could compute hessian of BigGAN (activation or image similarity) by forward finite difference method.

In [None]:
from pytorch_pretrained_biggan import BigGAN, truncated_noise_sample
import torch
import torch.optim as optim
import torch.nn.functional as F
from hessian_eigenthings.power_iter import Operator, deflated_power_iteration
from hessian_eigenthings.lanczos import lanczos
from lanczos_generalized import lanczos_generalized
from GAN_hvp_operator import GANHVPOperator, GANForwardHVPOperator, compute_hessian_eigenthings, get_full_hessian
#%
import numpy as np
import matplotlib.pylab as plt
from time import time
from os.path import join
from imageio import imwrite
from build_montages import build_montages, color_framed_montages
import torchvision.models as tv

In [None]:
BGAN = BigGAN.from_pretrained("biggan-deep-256")
#%
for param in BGAN.parameters():
    param.requires_grad_(False)
embed_mat = BGAN.embeddings.parameters().__next__().data
BGAN.cuda()
#%
alexnet = tv.alexnet(pretrained=True).cuda()
alexnet.eval() # this is important or there will be trial to trial variability due to DropOut
for param in alexnet.parameters():
    param.requires_grad_(False)

In [None]:
from FeatLinModel import FeatLinModel, get_model_layers

In [None]:
from torchvision.transforms import Normalize, Compose
RGB_mean = torch.tensor([0.485, 0.456, 0.406]).view(1,-1,1,1).cuda()
RGB_std  = torch.tensor([0.229, 0.224, 0.225]).view(1,-1,1,1).cuda()
preprocess = Compose([lambda img: (F.interpolate(img, (224, 224), mode='bilinear', align_corners=True) - RGB_mean) / RGB_std])
preprocess_resize = Compose([lambda img: F.interpolate(img, (224, 224), mode='bilinear', align_corners=True) ])


In [None]:
from hessian import hessian
from IPython.display import clear_output
from hessian_eigenthings.utils import progress_bar
from GAN_hvp_operator import get_full_hessian
def tuning_plot_BigGAN(G, preprocess, objective, feat, eigvals, eigvects, space="class",
        eig_id_arr=(0, 1, 5, 10, 15, 20, 40, 60, 80,99, 120, 127),
        save_indiv=False, save_row=False, summary_dir="", veclabel="eig", titlestr="", lim=(-1, 1), ticks=21,
        pad=24, cmap=plt.cm.viridis, RND=None):
    if RND is None: RND = np.random.randint(100)
    vec_norm = feat.norm().item()

    if space == "class":
        step = 2 / (ticks - 1)
        step_arr = torch.linspace(lim[0], lim[1], ticks)
        ref_vect = feat.detach().clone()
    elif space == "full":
        step = 2 / (ticks - 1)
        step_arr = torch.linspace(lim[0], lim[1], ticks)
        ref_vect = feat.detach().clone()
    elif space == "noise":
        theta_arr_deg =  np.linspace(-90, 90, ticks) # np.arange(-5, 6)
        theta_arr = theta_arr_deg / 180 * np.pi
        ref_vect = (feat / vec_norm).cpu().numpy()
    img_list_all = []
    scores_col = [] # array version of scores
    scores_all = [] # list version of scores
    # eig_id_arr = [0, 1, 5, 10, 15, 20, 40, 60, 80,99,150,200,250,299,450]
    batch = 6
    for eig_id in eig_id_arr: #,600,799]:
        # eig_id = 0
        perturb_vect = eigvects[eig_id,:]  # PC_vectors[1,:]
        if space == "class":
            perturb_vecs = step_arr.unsqueeze(1) @ torch.from_numpy(perturb_vect).unsqueeze(0)
            perturb_vecs = torch.cat((torch.zeros_like(perturb_vecs), perturb_vecs), dim=1)
            codes_arc = perturb_vecs.cuda() + ref_vect.cuda()
            codes_arc.requires_grad_(False)
            csr = 0
            with torch.no_grad():
                img_batchs = []
                while csr < codes_arc.size(0):
                    csr_end = min(csr + batch, codes_arc.size(0))
                    imgs = G.generator(codes_arc[csr:csr_end, :], 0.6)
                    img_batchs.append(imgs)
                    csr = csr_end
                imgs = torch.cat(tuple(img_batchs), dim=0)
        elif space == "noise":
            codes_arc = np.array([np.cos(theta_arr),
                              np.sin(theta_arr) ]).T @ np.array([ref_vect, perturb_vect])
            norms = np.linalg.norm(codes_arc, axis=1)
            codes_arc = codes_arc / norms[:, np.newaxis] * vec_norm
            imgs = G.visualize(torch.from_numpy(codes_arc).float().cuda())
        elif space == "full":
            perturb_vecs = step_arr.unsqueeze(1) @ torch.from_numpy(perturb_vect).unsqueeze(0)
            codes_arc = perturb_vecs.cuda() + ref_vect.cuda()
            codes_arc.requires_grad_(False)
            csr = 0
            img_batchs = []
            with torch.no_grad():
                while csr < codes_arc.size(0):
                    csr_end = min(csr + batch, codes_arc.size(0))
                    imgs = G.generator(codes_arc[csr:csr_end, :], 0.6)
                    img_batchs.append(imgs)
                    csr = csr_end
                imgs = torch.cat(tuple(img_batchs), dim=0)
        scores = - objective(preprocess(imgs), scaler=False)
        scores_col.append(scores.cpu().numpy())
        scores_all.extend(scores.cpu().squeeze().tolist())
        npimgs = np.clip((imgs.detach().cpu().permute([2, 3, 1, 0]).numpy() + 1) / 2, 0, 1)

        if save_indiv:
            for i in range(npimgs.shape[3]):
                angle = theta_arr_deg[i]
                imwrite(join(newimg_dir, "norm%d_%s%d_ang%d.jpg" % (vec_norm, veclabel, eig_id, angle)), npimgs[:, :, :, i])

        img_list = [npimgs[:, :, :, i] for i in range(npimgs.shape[3])]
        img_list_all.extend(img_list)
        if save_row:
            mtg1 = build_montages(img_list, [256, 256], [ticks, 1])[0]
            imwrite(join(summary_dir, "norm%d_%s_%d.jpg" % (vec_norm, veclabel, eig_id)), mtg1)
    mtg_all = build_montages(img_list_all, [256, 256], [ticks, int(len(img_list_all) // ticks)])[0]
    imwrite(join(summary_dir, "norm%d_%s_%d.jpg" % (vec_norm, veclabel, RND)), mtg_all)
    print("Write to ", join(summary_dir, "norm%d_%s_%d.jpg" % (vec_norm, veclabel, RND)))

    mtg_frm = color_framed_montages(img_list_all, [256, 256], [ticks, int(len(img_list_all) // ticks)], scores_all, pad=pad, cmap=cmap)[0]
    imwrite(join(summary_dir, "norm%d_%s_framed_%d.jpg" % (vec_norm, veclabel, RND)), mtg_frm)
    print("Write to ", join(summary_dir, "norm%d_%s_framed_%d.jpg" % (vec_norm, veclabel, RND)))

    scores_col = np.array(scores_col)
    plt.figure(figsize=[8,10],dpi=100)
    plt.matshow(scores_col)
    plt.axis('image')
    plt.title("Neural Tuning Towards Different Eigen Vectors of Activation")
    plt.xlabel("Angle")
    plt.ylabel("Eigen Vector #")
    eiglabel = ["%d %.3f"%(id,eig) for id, eig in zip(eig_id_arr, eigvals[list(eig_id_arr)])]
    plt.yticks(range(len(eig_id_arr)), eiglabel) # eig_id_arr
    plt.ylim(top=-0.5, bottom=len(eig_id_arr) - 0.5)
    plt.colorbar()
    plt.suptitle(titlestr)
    plt.savefig(join(summary_dir, "norm%d_%s_score_mat_%02d.jpg" % (vec_norm, veclabel, RND)) , dpi=100, bbox_inches='tight')
    plt.show()
    print("Write to ", join(summary_dir, "norm%d_%s_score_mat_%02d.jpg" % (vec_norm, veclabel, RND)) )
    return img_list, scores_col

In [None]:
import torch.nn as nn
class BigGAN_wrapper():#nn.Module
    def __init__(self, BigGAN, space="class"):
        self.BigGAN = BigGAN
        self.space = space

    def visualize(self, code, scale=1.0):
        imgs = self.BigGAN.generator(code, 0.6)
        return torch.clamp((imgs + 1.0) / 2.0, 0, 1) * scale

G = BigGAN_wrapper(BGAN)

In [None]:
%%time
feat = torch.randn(256).cuda().requires_grad_(True)
img = BGAN.generator(feat, 0.7)
obj = img.mean()
# obj.backward()

In [None]:
%%time
from hessian import hessian
H = hessian(obj, feat)

eigval, eigvec = np.linalg.eigh(H.cpu().numpy())
plt.plot(sorted(eigval))
plt.show()

In [None]:
objective = FeatLinModel(alexnet, layername="features_10", type="neuron", chan=10, pos=(7, 7))

In [None]:
feat = torch.randn(256).cuda().requires_grad_(True)
act = objective(preprocess(BGAN.generator(feat, 0.7)))

In [None]:
%%time
from hessian import hessian
H_act = hessian(act, feat)

In [None]:
%%time
eigval, eigvec = np.linalg.eigh(H_act.cpu().numpy())
plt.plot(sorted(eigval))
plt.show()

In [None]:
plt.hist(eigval, bins=30, log=True)
plt.show()

In [None]:
del objective

Above, we have proved that we can compute hessian for BigGAN by backprop 2 times and we can find its spectrum. 

Next, I'd like to examine its spectrum in noise space and code space

In [None]:
onehot = torch.zeros(1, 1000).requires_grad_(False)
onehot[0, 1] = 1

In [None]:
classvec = BGAN.embeddings(onehot.cuda())
noisevec = torch.from_numpy(truncated_noise_sample(1, 128, 0.6))
#%
img = BGAN.generator(torch.cat((noisevec.cuda(), classvec, ), dim=1), 0.6)
#%
plt.imshow((img.cpu().permute(2,3,1,0).squeeze() + 1.0) / 2.0)
plt.axis('off')
plt.show()

In [None]:
classvec = BGAN.embeddings(onehot.cuda())
noisevec = torch.from_numpy(truncated_noise_sample(1, 128, 0.6)).cuda()
classvec.requires_grad_(True)
img = BGAN.generator(torch.cat((noisevec, classvec, ), dim=1), 0.6)
act = alexnet(preprocess_resize(img))[0,1]
plt.imshow((img.detach().cpu().permute(2,3,1,0).squeeze() + 1.0) / 2.0)
plt.axis('off')
plt.show()

In [None]:
%%time
H_class = hessian(act, classvec)

In [None]:
act.backward()

In [None]:
onehot = torch.zeros(1, 1000).requires_grad_(False)
onehot[0, 1] = 1
classvec = BGAN.embeddings(onehot.cuda())
noisevec = torch.from_numpy(truncated_noise_sample(1, 128, 0.6)).cuda()
alexnet.train(True)
classvec.requires_grad_(True)
print(classvec.norm())
optimizer = optim.Adam([classvec], lr=1.5E-3)
for step in range(200):
    optimizer.zero_grad()
    img = BGAN.generator(torch.cat((noisevec, classvec, ), dim=1), 0.6)
    obj =  - alexnet(preprocess_resize(img))[0,1]
    obj.backward()
    optimizer.step()
    if np.mod((step + 1), 10) == 0:
        print("step %d: %.2f"%(step, obj.item()))
        plt.imshow((img.detach().cpu().permute(2,3,1,0).squeeze() + 1.0) / 2.0)
        plt.axis('off')
        plt.show()
print(classvec.norm())
img = BGAN.generator(torch.cat((noisevec, classvec, ), dim=1), 0.6)
act = alexnet(preprocess_resize(img))[0,1]
plt.imshow((img.detach().cpu().permute(2,3,1,0).squeeze() + 1.0) / 2.0)
plt.axis('off')
plt.show()
alexnet.eval()

In [None]:
classvec.grad.norm() / classvec.norm()

In [None]:
classvec.norm()

In [None]:
%%time
classvec.requires_grad_(True)
noisevec.requires_grad_(False)
img = BGAN.generator(torch.cat((noisevec, classvec, ), dim=1), 0.6)
act =  - alexnet(preprocess_resize(img))[0,1]
H_act_class = get_full_hessian(act, classvec)
#%
eigval_class, eigvec_class = np.linalg.eigh(H_act_class)
plt.plot(sorted(eigval_class))
plt.show()
plt.hist(eigval_class, bins=30, log=True)
plt.show()

In [None]:
savedir = r"E:\OneDrive - Washington University in St. Louis\HessTune\BigGAN"
# objective = lambda img, scaler: alexnet(img)[:,1].mean() if scaler else alexnet(img)[:,1]
alexnet.eval()
objective = FeatLinModel(alexnet, layername="classifier_6", type="neuron", chan=1, pos=(1,1))
ref_vect = torch.cat((noisevec.detach(), classvec.detach(), ), dim=1)

In [None]:
img_list, scores_col = tuning_plot_BigGAN(BGAN, preprocess_resize, objective, 
            ref_vect, eigval_class, eigvec_class, eig_id_arr=(1,2,3,4,5,6,7,8,9,10, 20, 30, 40, 50, 60, 70, 80, 100, 110, 120), 
            space="class", ticks=11, lim=(-2,2), summary_dir=savedir)

In [None]:
sort_idx = np.argsort(eigval_class)
img_list, scores_col = tuning_plot_BigGAN(BGAN, preprocess_resize, objective, 
            ref_vect, eigval_class, eigvec_class, eig_id_arr=sort_idx[-15:], 
            space="class", ticks=15, lim=(-1,1), summary_dir=savedir)

In [None]:
sort_idx = np.argsort(np.abs(eigval_class))
img_list, scores_col = tuning_plot_BigGAN(BGAN, preprocess_resize, objective, 
            ref_vect, eigval_class, eigvec_class, eig_id_arr=sort_idx[:15], 
            space="class", ticks=15, lim=(-1,1), summary_dir=savedir)

In [None]:
del BGAN, alexnet, objective
torch.cuda.empty_cache()

In [None]:
del act
torch.cuda.empty_cache()

In [None]:
RGB_mean = torch.tensor([0.485, 0.456, 0.406]).view(1,-1,1,1).cuda()
RGB_std  = torch.tensor([0.229, 0.224, 0.225]).view(1,-1,1,1).cuda()
preprocess = Compose([lambda img: (F.interpolate(img, (224, 224), mode='bilinear', align_corners=True) - RGB_mean) / RGB_std])
objective = FeatLinModel(alexnet, layername="classifier_6", type="neuron", chan=1, pos=(1, 1))
ref_vect = torch.cat((noisevec, classvec, ), dim=1).detach().clone()
BGANHVP = GANForwardHVPOperator(G, ref_vect, objective, preprocess=preprocess)

In [None]:
%%time
t0 = time()
eigvals, eigvects = lanczos(BGANHVP, num_eigenthings=128, use_gpu=True)
print(time() - t0)  # 40 sec 146sec for 2000 eigens
eigvals = eigvals[::-1]
eigvects = eigvects[::-1, :]

In [None]:
eigval_fullfor = eigvals
eigvec_fullfor = eigvects


In [None]:
sort_idx = np.argsort(np.abs(eigval_fullfor))
img_list, scores_col = tuning_plot_BigGAN(BGAN, preprocess_resize, objective, 
            ref_vect, eigval_fullfor, eigvec_fullfor, eig_id_arr=sort_idx[-15:], 
            space="full", ticks=15, lim=(-1,1), summary_dir=savedir, veclabel="eig_forward_full")

In [None]:
sort_idx = np.argsort(np.abs(eigval_fullfor))
img_list, scores_col = tuning_plot_BigGAN(BGAN, preprocess_resize, objective, 
            ref_vect, eigval_fullfor, eigvec_fullfor, eig_id_arr=sort_idx[:15], 
            space="full", ticks=15, lim=(-1,1), summary_dir=savedir, veclabel="eig_forward_full")

In [None]:
sort_idx = np.argsort(-np.abs(eigval_fullfor))
img_list, scores_col = tuning_plot_BigGAN(BGAN, preprocess_resize, objective, 
            ref_vect, eigval_fullfor, eigvec_fullfor, eig_id_arr=sort_idx, 
            space="full", ticks=15, lim=(-1,1), summary_dir=savedir, veclabel="eig_forward_full")

Finally, I'll run this analysis in multiple classes and see the tuning and invariance. For the purpose I'll extract
the embedding matrix `embed_mat` first and generate a bunch of classes to experiment with.

In [None]:
RGB_mean = torch.tensor([0.485, 0.456, 0.406]).view(1,-1,1,1).cuda()
RGB_std  = torch.tensor([0.229, 0.224, 0.225]).view(1,-1,1,1).cuda()
preprocess = Compose([lambda img: (F.interpolate(img, (224, 224), mode='bilinear', align_corners=True) - RGB_mean) / RGB_std])

embed_mat = BGAN.embeddings.parameters().__next__()
class_id = 1
embed_mat[:,class_id:class_id+1].T


In [None]:
list(sorted(np.random.randint(1000, size=(20))))

In [None]:
%%time
class_id = 1
for class_id in [17, 79, 95, 107, 224, 346, 493, 542, 579, 637, 667, 754, 761, 805, 814, 847, 856, 941, 954, 968]:#[2, 10, 113, 459, 517, 663, 754, 787, 857, 998]:
    t0 = time()
    classvec = embed_mat[:,class_id:class_id+1].T
    noisevec = torch.from_numpy(truncated_noise_sample(1, 128, 0.6)).cuda()
    ref_vect = torch.cat((noisevec, classvec, ), dim=1).detach().clone()
    objective = FeatLinModel(alexnet, layername="classifier_6", type="neuron", chan=class_id, pos=(1, 1))

    BGANHVP = GANForwardHVPOperator(G, ref_vect, objective, preprocess=preprocess)
    
    eigval_fullfor, eigvec_fullfor = lanczos(BGANHVP, num_eigenthings=128, use_gpu=True)
    print(time() - t0, " Finish Forward Hessian Decompostion")# 40 sec 146sec for 2000 eigens
    eigval_fullfor = eigval_fullfor[::-1]
    eigvec_fullfor = eigvec_fullfor[::-1, :]

    sort_idx = np.argsort(-np.abs(eigval_fullfor))
    img_list, scores_col = tuning_plot_BigGAN(BGAN, preprocess_resize, objective, 
                ref_vect, eigval_fullfor, eigvec_fullfor, eig_id_arr=sort_idx, 
                space="full", ticks=15, lim=(-1.5,1.5), summary_dir=savedir, veclabel="eig_forward_full_cls%d"%(class_id))
    print(time() - t0, " Finish output images Decompostion")


In [None]:
sort_idx = np.argsort(np.abs(eigval_fullfor))
img_list, scores_col = tuning_plot_BigGAN(BGAN, preprocess_resize, objective, 
            ref_vect, eigval_fullfor, eigvec_fullfor, eig_id_arr=sort_idx[-15:], 
            space="full", ticks=15, lim=(-1,1), summary_dir=savedir, veclabel="eig_forward_full")

In [None]:
sort_idx = np.argsort(np.abs(eigval_fullfor))
img_list, scores_col = tuning_plot_BigGAN(BGAN, preprocess_resize, objective, 
            ref_vect, eigval_fullfor, eigvec_fullfor, eig_id_arr=sort_idx[:15], 
            space="full", ticks=15, lim=(-1,1), summary_dir=savedir, veclabel="eig_forward_full")

In [None]:
sort_idx = np.argsort(-np.abs(eigval_fullfor))
img_list, scores_col = tuning_plot_BigGAN(BGAN, preprocess_resize, objective, 
            ref_vect, eigval_fullfor, eigvec_fullfor, eig_id_arr=sort_idx, 
            space="full", ticks=15, lim=(-1,1), summary_dir=savedir, veclabel="eig_forward_full")

The next step is to see is there any invariance / tuning in the intermediate neurons.

In [None]:
embed_mat = BGAN.embeddings.parameters().__next__()

In [None]:
alexnet.eval()#train(True)
unit_id = 12
objective = FeatLinModel(alexnet, layername='classifier_1', type="neuron", chan=unit_id, pos=(1, 1))

rndclassvec = torch.randn(1, 128)
rndclassvec = rndclassvec / rndclassvec.norm() * 0.67
noisevec = truncated_noise_sample(batch_size=1, dim_z=128, truncation=0.6)
noisevec = torch.from_numpy(noisevec)
ref_vect = torch.cat((noisevec, rndclassvec), dim=1).detach().clone()
ref_vect = ref_vect.cuda().requires_grad_(True)
print(ref_vect[0,:128].norm().item(), " ", ref_vect[0,128:].norm().item())

In [None]:
# rndclassvec.requires_grad_(True)
# noisevec.requires_grad_(True)
optimizer = optim.Adam([ref_vect], lr=1.5E-3)
step = 0
trial = 0
MAXTRY = 100
while step <= 200:
    optimizer.zero_grad()
#     img = BGAN.generator(ref_vect, 0.6)
    img = G.visualize(ref_vect)
    obj = objective(preprocess(img))
    obj.backward()
    optimizer.step()
    if ref_vect.grad.norm() < 1E-6:
        rndclassvec =  torch.randn(1, 128) / np.sqrt(128) * 0.67
        noisevec = torch.from_numpy(truncated_noise_sample(batch_size=1, dim_z=128, truncation=0.6))
        ref_vect = torch.cat((noisevec, rndclassvec), dim=1).detach().clone()
        ref_vect = ref_vect.cuda().requires_grad_(True)
        optimizer = optim.Adam([ref_vect], lr=1.5E-3)
        print("No gradient, Restart optimizer from another spot.")
        step = 0
        trial += 1
        if trial > MAXTRY:
            break
    if np.mod((step + 1), 10) == 0:
        print("step %d: %.2f  Norm: Noise %.1f Class %.3f"%(step, obj.item(), 
                ref_vect[0,:128].norm().item(), ref_vect[0,128:].norm().item()))
    step += 1
#         plt.imshow((img.detach().cpu().permute(2,3,1,0).squeeze() + 1.0) / 2.0)
#         plt.axis('off')
#         plt.show()
with torch.no_grad():
    img = G.visualize(ref_vect)
    act = objective(preprocess(img))
plt.imshow(img.detach().cpu().permute(2,3,1,0).squeeze())
plt.axis('off')
plt.show()
alexnet.eval()

In [None]:
(rndclassvec.abs() / rndclassvec.grad.abs()).median()

In [None]:
(noisevec.abs() / noisevec.grad.abs()).median()

In [None]:
for param in alexnet.parameters():
    param.requires_grad_(False)
unit_id = 12
objective = FeatLinModel(alexnet, layername='classifier_1', type="neuron", chan=unit_id, pos=(1, 1))

In [None]:
embed_mat[:, np.random.randint(1000)].unsqueeze(0)

In [None]:
rndclassvec

In [None]:
success=False
trial = 0
MAXTRY = 100
while not success: #  or trial < MAXTRY
#     rndclassvec = torch.randn(1, 128).cuda() / np.sqrt(128) * 0.67
    rndclassvec = embed_mat[:, np.random.randint(1000)].unsqueeze(0).clone().cuda()
    noisevec = torch.from_numpy(truncated_noise_sample(batch_size=1, dim_z=128, truncation=0.6)).cuda()
#     ref_vect = torch.cat((noisevec, rndclassvec), dim=1).detach().clone()
    rndclassvec.requires_grad_(True)
    noisevec.requires_grad_(True)
    print(rndclassvec.norm().item(), " ", noisevec.norm().item())
    optimizer_cls = optim.Adam([rndclassvec], lr=1.5E-3, weight_decay=1E-4)
    optimizer_noise = optim.Adam([noisevec], lr=1.5E-3, weight_decay=1E-4)

    for step in range(200):
        optimizer_cls.zero_grad()
#         optimizer_noise.zero_grad()
    #     img = BGAN.generator(ref_vect, 0.6)
        img = G.visualize(torch.cat((noisevec, rndclassvec), dim=1)) # ref_vect)
        obj = objective(preprocess(img))
        obj.backward()
        optimizer_cls.step()
        optimizer_noise.step()
        if noisevec.grad.norm() < 1E-6 and rndclassvec.grad.norm() < 1E-6:
            print("No gradient, Restart optimizer from another spot.")
            trial += 1
            break
        else:
            success = True
        if np.mod((step + 1), 10) == 0:
            print("step %d: %.2f  Norm: Noise %.1f Class %.3f"%(step, obj.item(), 
                    noisevec.norm().item(), rndclassvec.norm().item()))
            print("Grad Amp Ratio Class %.2E, Noise %.2E"% 
            ((rndclassvec.grad.abs() / rndclassvec.abs()).median().item(), (noisevec.grad.abs() / noisevec.abs()).median().item()))
    #         plt.imshow((img.detach().cpu().permute(2,3,1,0).squeeze() + 1.0) / 2.0)
    #         plt.axis('off')
    #         plt.show()
    if trial > MAXTRY:
        break

if not success: print("Optimization failed too many times, have a rest! ")

with torch.no_grad():
    img = G.visualize(torch.cat((noisevec, rndclassvec), dim=1))
    act = objective(preprocess(img))
plt.imshow(img.detach().cpu().permute(2,3,1,0).squeeze())
plt.axis('off')
plt.show()
# alexnet.eval()

In [None]:

unit_id = 1
for unit_id in [17, ]:#[2, 10, 113, 459, 517, 663, 754, 787, 857, 998]:
    t0 = time()
    classvec = embed_mat[:,class_id:class_id+1].T
    noisevec = torch.from_numpy(truncated_noise_sample(1, 128, 0.6)).cuda()
    ref_vect = torch.cat((noisevec, classvec, ), dim=1).detach().clone()

    objective = FeatLinModel(alexnet, layername="classifier_6", type="neuron", chan=unit_id, pos=(1, 1))
    BGANHVP = GANForwardHVPOperator(G, ref_vect, objective, preprocess=preprocess)

    eigval_fullfor, eigvec_fullfor = lanczos(BGANHVP, num_eigenthings=128, use_gpu=True)
    print(time() - t0, " Finish Forward Hessian Decompostion")# 40 sec 146sec for 2000 eigens
    eigval_fullfor = eigval_fullfor[::-1]
    eigvec_fullfor = eigvec_fullfor[::-1, :]

    sort_idx = np.argsort(-np.abs(eigval_fullfor))
    img_list, scores_col = tuning_plot_BigGAN(BGAN, preprocess_resize, objective,
                ref_vect, eigval_fullfor, eigvec_fullfor, eig_id_arr=sort_idx,
                space="full", ticks=15, lim=(-1.5,1.5), summary_dir=savedir, veclabel="eig_forward_fc6_%d"%(unit_id))
    print(time() - t0, " Finish output images Decompostion")

In [None]:
def BigGAN_optim_result(param):
    lr_cls = 10**param[0, 0] # 1.5E-3
    wd_cls = 10**param[0, 1]
    lr_ns = 10**param[0, 2]
    wd_ns = 10**param[0, 3]
    success=False
    trial = 0
    MAXTRY = 100
    while not success: #  or trial < MAXTRY
    #     rndclassvec = torch.randn(1, 128).cuda() / np.sqrt(128) * 0.67
        rndclassvec = embed_mat[:, np.random.randint(1000)].unsqueeze(0).clone().cuda()
        noisevec = torch.from_numpy(truncated_noise_sample(batch_size=1, dim_z=128, truncation=0.6)).cuda()
    #     ref_vect = torch.cat((noisevec, rndclassvec), dim=1).detach().clone()
        rndclassvec.requires_grad_(True)
        noisevec.requires_grad_(True)
#         print(rndclassvec.norm().item(), " ", noisevec.norm().item())
        optimizer_cls = optim.Adam([rndclassvec], lr=lr_cls, weight_decay=wd_cls) 
        optimizer_noise = optim.Adam([noisevec], lr=lr_ns, weight_decay=wd_ns)

        for step in range(200):
            optimizer_cls.zero_grad()
            img = G.visualize(torch.cat((noisevec, rndclassvec), dim=1)) # ref_vect)
            obj = objective(preprocess(img))
            obj.backward()
            optimizer_cls.step()
            optimizer_noise.step()
            if noisevec.grad.norm() < 1E-6 and rndclassvec.grad.norm() < 1E-6:
                print("No gradient, Restart optimizer from another spot.")
                trial += 1
                break
            else:
                success = True
            if np.mod((step + 1), 20) == 0:
                print("step %d: %.2f  Norm: Noise %.1f Class %.3f"%(step, obj.item(), 
                        noisevec.norm().item(), rndclassvec.norm().item()))
                print("Grad Amp Ratio Class %.2E, Noise %.2E"% 
                ((rndclassvec.grad.abs() / rndclassvec.abs()).median().item(), (noisevec.grad.abs() / noisevec.abs()).median().item()))
        if trial > MAXTRY:
            break
    if not success: 
        print("Optimization failed too many times, have a rest! ")
        return np.nan
    else:
        return obj.detach().cpu().item()

In [None]:
BigGAN_optim_result(np.array([[-3.0,-4,-3,-4]]))

In [None]:
import GPy
import GPyOpt
from GPyOpt.methods import BayesianOptimization

In [None]:
import GPy
import GPyOpt
from GPyOpt.methods import BayesianOptimization
mixed_domain =[{'name': 'lr_class', 'type': 'continuous', 'domain': (-4, -1.5),'dimensionality': 1},
               {'name': 'wd_class', 'type': 'continuous', 'domain': (-6, -2),'dimensionality': 1},
               {'name': 'lr_noise', 'type': 'continuous', 'domain': (-4, -1.5),'dimensionality': 1},
               {'name': 'wd_noise', 'type': 'continuous', 'domain': (-6, -2),'dimensionality': 1},]
#                {'name': 'var3', 'type': 'discrete', 'domain': (3,8,10),'dimensionality': 1},]
#                {'name': 'var4', 'type': 'categorical', 'domain': (0,1,2),'dimensionality': 1},
#                {'name': 'var5', 'type': 'continuous', 'domain': (-1,2),'dimensionality': 1}]
myBopt = BayesianOptimization(f=optim_result,                     # Objective function       
                             domain=mixed_domain,          # Box-constraints of the problem
                             initial_design_numdata = 5,   # Number data initial design
                             acquisition_optimizer_type='lbfgs',
                             acquisition_type='EI',        # Expected Improvement
                             exact_feval = False,         # True evaluations, no sample noise
                             maximize=False)           

In [None]:
max_iter = 50       ## maximum number of iterations
max_time = 3600      ## maximum allowed time
eps      = 1e-6     ## tolerance, max distance between consicutive evaluations.
myBopt.run_optimization(max_iter,eps=0,ver)

In [None]:
max_iter = 50       ## maximum number of iterations
max_time = 3600      ## maximum allowed time
eps      = 1e-6     ## tolerance, max distance between consicutive evaluations.
myBopt.run_optimization(max_iter,eps=0,ver)

In [None]:
def BigGAN_optim_result(param):
    lr_cls = 10**param[0, 0] # 1.5E-3
    wd_cls = 10**param[0, 1]
    lr_ns = 10**param[0, 2]
    wd_ns = 10**param[0, 3]
    success=False
    trial = 0
    MAXTRY = 100
    while not success: #  or trial < MAXTRY
    #     rndclassvec = torch.randn(1, 128).cuda() / np.sqrt(128) * 0.67
        rndclassvec = embed_mat[:, np.random.randint(1000)].unsqueeze(0).clone().cuda()
        noisevec = torch.from_numpy(truncated_noise_sample(batch_size=1, dim_z=128, truncation=0.6)).cuda()
    #     ref_vect = torch.cat((noisevec, rndclassvec), dim=1).detach().clone()
        rndclassvec.requires_grad_(True)
        noisevec.requires_grad_(True)
#         print(rndclassvec.norm().item(), " ", noisevec.norm().item())
        optimizer_cls = optim.Adam([rndclassvec], lr=lr_cls, weight_decay=wd_cls) 
        optimizer_noise = optim.Adam([noisevec], lr=lr_ns, weight_decay=wd_ns)

        for step in range(200):
            optimizer_cls.zero_grad()
            img = G.visualize(torch.cat((noisevec, rndclassvec), dim=1)) # ref_vect)
            obj = objective(preprocess(img))
            obj.backward()
            optimizer_cls.step()
            optimizer_noise.step()
            if noisevec.grad.norm() < 1E-6 and rndclassvec.grad.norm() < 1E-6:
                print("No gradient, Restart optimizer from another spot.")
                trial += 1
                break
            else:
                success = True
            if np.mod((step + 1), 20) == 0:
                print("step %d: %.2f  Norm: Noise %.1f Class %.3f"%(step, obj.item(), 
                        noisevec.norm().item(), rndclassvec.norm().item()))
                print("Grad Amp Ratio Class %.2E, Noise %.2E"% 
                ((rndclassvec.grad.abs() / rndclassvec.abs()).median().item(), (noisevec.grad.abs() / noisevec.abs()).median().item()))
        if trial > MAXTRY:
            break
    if not success: 
        print("Optimization failed too many times, have a rest! ")
        return np.nan
    else:
        return obj.detach().cpu().item()

In [None]:
BigGAN_optim_result(np.array([[-3.0,-4,-3,-4]]))

In [None]:
import GPy
import GPyOpt
from GPyOpt.methods import BayesianOptimization

In [None]:
import GPy
import GPyOpt
from GPyOpt.methods import BayesianOptimization
mixed_domain =[{'name': 'lr_class', 'type': 'continuous', 'domain': (-4, -1.5),'dimensionality': 1},
               {'name': 'wd_class', 'type': 'continuous', 'domain': (-6, -2),'dimensionality': 1},
               {'name': 'lr_noise', 'type': 'continuous', 'domain': (-4, -1.5),'dimensionality': 1},
               {'name': 'wd_noise', 'type': 'continuous', 'domain': (-6, -2),'dimensionality': 1},]
#                {'name': 'var3', 'type': 'discrete', 'domain': (3,8,10),'dimensionality': 1},]
#                {'name': 'var4', 'type': 'categorical', 'domain': (0,1,2),'dimensionality': 1},
#                {'name': 'var5', 'type': 'continuous', 'domain': (-1,2),'dimensionality': 1}]
myBopt = BayesianOptimization(f=optim_result,                     # Objective function       
                             domain=mixed_domain,          # Box-constraints of the problem
                             initial_design_numdata = 5,   # Number data initial design
                             acquisition_optimizer_type='lbfgs',
                             acquisition_type='EI',        # Expected Improvement
                             exact_feval = False,         # True evaluations, no sample noise
                             maximize=False)           

In [None]:
max_iter = 50       ## maximum number of iterations
max_time = 3600      ## maximum allowed time
eps      = 1e-6     ## tolerance, max distance between consicutive evaluations.
myBopt.run_optimization(max_iter,eps=0,ver)

In [None]:
max_iter = 50       ## maximum number of iterations
max_time = 3600      ## maximum allowed time
eps      = 1e-6     ## tolerance, max distance between consicutive evaluations.
myBopt.run_optimization(max_iter,eps=0,ver)

In [None]:
def BigGAN_optim_result(param):
    lr_cls = 10**param[0, 0] # 1.5E-3
    wd_cls = 10**param[0, 1]
    lr_ns = 10**param[0, 2]
    wd_ns = 10**param[0, 3]
    success=False
    trial = 0
    MAXTRY = 100
    while not success: #  or trial < MAXTRY
    #     rndclassvec = torch.randn(1, 128).cuda() / np.sqrt(128) * 0.67
        rndclassvec = embed_mat[:, np.random.randint(1000)].unsqueeze(0).clone().cuda()
        noisevec = torch.from_numpy(truncated_noise_sample(batch_size=1, dim_z=128, truncation=0.6)).cuda()
    #     ref_vect = torch.cat((noisevec, rndclassvec), dim=1).detach().clone()
        rndclassvec.requires_grad_(True)
        noisevec.requires_grad_(True)
#         print(rndclassvec.norm().item(), " ", noisevec.norm().item())
        optimizer_cls = optim.Adam([rndclassvec], lr=lr_cls, weight_decay=wd_cls) 
        optimizer_noise = optim.Adam([noisevec], lr=lr_ns, weight_decay=wd_ns)

        for step in range(200):
            optimizer_cls.zero_grad()
            img = G.visualize(torch.cat((noisevec, rndclassvec), dim=1)) # ref_vect)
            obj = objective(preprocess(img))
            obj.backward()
            optimizer_cls.step()
            optimizer_noise.step()
            if noisevec.grad.norm() < 1E-6 and rndclassvec.grad.norm() < 1E-6:
                print("No gradient, Restart optimizer from another spot.")
                trial += 1
                break
            else:
                success = True
            if np.mod((step + 1), 20) == 0:
                print("step %d: %.2f  Norm: Noise %.1f Class %.3f"%(step, obj.item(), 
                        noisevec.norm().item(), rndclassvec.norm().item()))
                print("Grad Amp Ratio Class %.2E, Noise %.2E"% 
                ((rndclassvec.grad.abs() / rndclassvec.abs()).median().item(), (noisevec.grad.abs() / noisevec.abs()).median().item()))
        if trial > MAXTRY:
            break
    if not success: 
        print("Optimization failed too many times, have a rest! ")
        return np.nan
    else:
        return obj.detach().cpu().item()

In [None]:
BigGAN_optim_result(np.array([[-3.0,-4,-3,-4]]))

In [None]:
import GPy
import GPyOpt
from GPyOpt.methods import BayesianOptimization

In [None]:
import GPy
import GPyOpt
from GPyOpt.methods import BayesianOptimization
mixed_domain =[{'name': 'lr_class', 'type': 'continuous', 'domain': (-4, -1.5),'dimensionality': 1},
               {'name': 'wd_class', 'type': 'continuous', 'domain': (-6, -2),'dimensionality': 1},
               {'name': 'lr_noise', 'type': 'continuous', 'domain': (-4, -1.5),'dimensionality': 1},
               {'name': 'wd_noise', 'type': 'continuous', 'domain': (-6, -2),'dimensionality': 1},]
#                {'name': 'var3', 'type': 'discrete', 'domain': (3,8,10),'dimensionality': 1},]
#                {'name': 'var4', 'type': 'categorical', 'domain': (0,1,2),'dimensionality': 1},
#                {'name': 'var5', 'type': 'continuous', 'domain': (-1,2),'dimensionality': 1}]
myBopt = BayesianOptimization(f=optim_result,                     # Objective function       
                             domain=mixed_domain,          # Box-constraints of the problem
                             initial_design_numdata = 5,   # Number data initial design
                             acquisition_optimizer_type='lbfgs',
                             acquisition_type='EI',        # Expected Improvement
                             exact_feval = False,         # True evaluations, no sample noise
                             maximize=False)           

In [None]:
max_iter = 50       ## maximum number of iterations
max_time = 3600      ## maximum allowed time
eps      = 1e-6     ## tolerance, max distance between consicutive evaluations.
myBopt.run_optimization(max_iter,eps=0,ver)

In [None]:
max_iter = 50       ## maximum number of iterations
max_time = 3600      ## maximum allowed time
eps      = 1e-6     ## tolerance, max distance between consicutive evaluations.
myBopt.run_optimization(max_iter,eps=0,ver)

In [None]:
def BigGAN_optim_result(param):
    lr_cls = 10**param[0, 0] # 1.5E-3
    wd_cls = 10**param[0, 1]
    lr_ns = 10**param[0, 2]
    wd_ns = 10**param[0, 3]
    success=False
    trial = 0
    MAXTRY = 100
    while not success: #  or trial < MAXTRY
    #     rndclassvec = torch.randn(1, 128).cuda() / np.sqrt(128) * 0.67
        rndclassvec = embed_mat[:, np.random.randint(1000)].unsqueeze(0).clone().cuda()
        noisevec = torch.from_numpy(truncated_noise_sample(batch_size=1, dim_z=128, truncation=0.6)).cuda()
    #     ref_vect = torch.cat((noisevec, rndclassvec), dim=1).detach().clone()
        rndclassvec.requires_grad_(True)
        noisevec.requires_grad_(True)
#         print(rndclassvec.norm().item(), " ", noisevec.norm().item())
        optimizer_cls = optim.Adam([rndclassvec], lr=lr_cls, weight_decay=wd_cls) 
        optimizer_noise = optim.Adam([noisevec], lr=lr_ns, weight_decay=wd_ns)

        for step in range(200):
            optimizer_cls.zero_grad()
            img = G.visualize(torch.cat((noisevec, rndclassvec), dim=1)) # ref_vect)
            obj = objective(preprocess(img))
            obj.backward()
            optimizer_cls.step()
            optimizer_noise.step()
            if noisevec.grad.norm() < 1E-6 and rndclassvec.grad.norm() < 1E-6:
                print("No gradient, Restart optimizer from another spot.")
                trial += 1
                break
            else:
                success = True
            if np.mod((step + 1), 20) == 0:
                print("step %d: %.2f  Norm: Noise %.1f Class %.3f"%(step, obj.item(), 
                        noisevec.norm().item(), rndclassvec.norm().item()))
                print("Grad Amp Ratio Class %.2E, Noise %.2E"% 
                ((rndclassvec.grad.abs() / rndclassvec.abs()).median().item(), (noisevec.grad.abs() / noisevec.abs()).median().item()))
        if trial > MAXTRY:
            break
    if not success: 
        print("Optimization failed too many times, have a rest! ")
        return np.nan
    else:
        return obj.detach().cpu().item()

In [None]:
BigGAN_optim_result(np.array([[-3.0,-4,-3,-4]]))

In [None]:
import GPy
import GPyOpt
from GPyOpt.methods import BayesianOptimization

In [None]:
import GPy
import GPyOpt
from GPyOpt.methods import BayesianOptimization
mixed_domain =[{'name': 'lr_class', 'type': 'continuous', 'domain': (-4, -1.5),'dimensionality': 1},
               {'name': 'wd_class', 'type': 'continuous', 'domain': (-6, -2),'dimensionality': 1},
               {'name': 'lr_noise', 'type': 'continuous', 'domain': (-4, -1.5),'dimensionality': 1},
               {'name': 'wd_noise', 'type': 'continuous', 'domain': (-6, -2),'dimensionality': 1},]
#                {'name': 'var3', 'type': 'discrete', 'domain': (3,8,10),'dimensionality': 1},]
#                {'name': 'var4', 'type': 'categorical', 'domain': (0,1,2),'dimensionality': 1},
#                {'name': 'var5', 'type': 'continuous', 'domain': (-1,2),'dimensionality': 1}]
myBopt = BayesianOptimization(f=optim_result,                     # Objective function       
                             domain=mixed_domain,          # Box-constraints of the problem
                             initial_design_numdata = 5,   # Number data initial design
                             acquisition_optimizer_type='lbfgs',
                             acquisition_type='EI',        # Expected Improvement
                             exact_feval = False,         # True evaluations, no sample noise
                             maximize=False)           

In [None]:
max_iter = 50       ## maximum number of iterations
max_time = 3600      ## maximum allowed time
eps      = 1e-6     ## tolerance, max distance between consicutive evaluations.
myBopt.run_optimization(max_iter,eps=0,ver)