In [None]:
%load_ext autoreload

%autoreload 2

In [None]:
import os
import sys
import random
import numpy as np
from tqdm import tqdm_notebook
from sklearn.gaussian_process import GaussianProcessRegressor as GPR
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import ttest_ind
from matplotlib import pyplot as plt

In [None]:
import pickle
import pickle
import gzip

In [None]:
import mfgpr

# Read benchmark data

In [None]:
#rnd1k_ep1_nst_arcs = pickle.load(open('lf_kd_ep1_c10.pickle', 'rb'))      # CIFAR10
#(lf_val, lf_test, hf_val, hf_test) = pickle.load(open('acc_nb201_c10.pickle', 'rb'))      # CIFAR 10
#rnd1k_ep1_nst_arcs = pickle.load(open('lf_kd_ep1.pickle', 'rb'))          # CIFAR100
#(lf_val, lf_test, hf_val, hf_test) = pickle.load(open('acc_nb201.pickle', 'rb'))          # CIFAR 100
rnd1k_ep1_nst_arcs = pickle.load(open('lf_kd_ep1_imagenet.pickle', 'rb')) # ImageNet16-120
(lf_val, lf_test, hf_val, hf_test) = pickle.load(open('acc_nb201_imagenet.pickle', 'rb'))  # ImageNet

In [None]:
obj = pickle.load(gzip.open('nasbench_val.pkl.gz', 'rb'))

In [None]:
# need to apply this fix to data order
n = 1563

ys2 = obj['ys'][n:] + obj['ys'][:n]
X2 = obj['descriptors'][n:] + obj['descriptors'][:n]
gs2 = obj['gs'][n:] + obj['gs'][:n]

In [None]:
kd_pred = rnd1k_ep1_nst_arcs

In [None]:
X_adj = []

for elem in gs2:
    X_adj.append(elem.numpy().flatten())

In [None]:
plt.scatter(kd_pred, hf_test, s=1)
plt.xlabel('kd_pred')
plt.ylabel('hf_test')

# Run simulations of NAS methods

In [None]:
N = 150

## Random search by high-fidelity data (200 epochs for train stage)

In [None]:
trials = 100
acc_rs_hf = np.zeros(N)
sz = N
costs_rs_hf = np.cumsum([200]*sz)
for trial in range(trials):
    points = list(np.random.choice(list(range(len(X2))), size = N, replace = False))
    
    y_val_points = [hf_val[x] for x in points]
    y_points = np.zeros(N)
    
    for i in range(len(y_points)):
        k = np.argmax(y_val_points[0:i+1])
        y_points[i] = hf_test[points[k]]
    
    acc_rs_hf += np.array(y_points)

## Random search by low-fidelity data (12 epochs for train stage)

In [None]:
trials = 500

sz = int(N*200/12)
acc_rs_lf = np.zeros(sz)
costs_rs_lf = np.cumsum([12]*sz)
acc_rs_lf_all_trials = []
for trial in tqdm_notebook(range(trials)):
    points = list(np.random.choice(list(range(len(X2))), size = sz, replace = False))
    
    y_val_points = [lf_val[x] for x in points]
    y_points = np.zeros(sz)
    
    k = 0
    for i in range(len(y_points)):
        if y_val_points[i] > y_val_points[k]:
            k = i
        y_points[i] = hf_test[points[k]]
    
    acc_rs_lf += np.array(y_points)
    acc_rs_lf_all_trials.append(np.array(y_points))

In [None]:
acc_rs_lf_all_trials = np.array(acc_rs_lf_all_trials)

## Random search with KD-data (approx time is ~1.5 of training epochs without KD)

In [None]:
trials = 100

sz = min([int(N*200/1.5), len(X2)])
acc_rs_kd = np.zeros(sz)
costs_rs_kd = np.cumsum([1.5]*sz)

for trial in tqdm_notebook(range(trials)):
    points = list(np.random.choice(list(range(len(X2))), size = sz, replace = False))
    
    y_val_points = [kd_pred[x] for x in points]
    y_points = np.zeros(sz)
    
    k = 0
    for i in range(len(y_points)):
        #k = np.argmax(y_val_points[0:i+1])
        if y_val_points[i] > y_val_points[k]:
            k = i
        y_points[i] = hf_test[points[k]]
    
    acc_rs_kd += np.array(y_points)

In [None]:
def run_MFKDGPR(X_scaled = None, lf_data = None, hf_data = None, max_points = None, r=5, 
                hf_cost=12, lf_cost=1.5, n_init=20):
    refit_each = 5
    #checked_points = list(np.random.choice(list(range(len(data))), size = n_init, replace = False))
    lf_checked_points = list(np.random.choice(list(range(len(lf_data))), size = r*n_init, replace = False))
    
    hf_checked_points = list(np.random.choice(list(range(len(hf_data))), size = n_init, replace = False))
    
    costs = []
    costs.append(hf_cost + n_init*r*lf_cost) # include initial cost for low-fidelity
    for i in range(1, n_init):
        costs.append(hf_cost)
    
    X_train_lf = [X_scaled[idx] for idx in lf_checked_points] 
    X_train_hf = [X_scaled[idx] for idx in hf_checked_points] 
    y_train_lf = [lf_data[idx] for idx in lf_checked_points]
    y_train_hf = [hf_data[idx] for idx in hf_checked_points]
        
    composite_kernel = RBF(length_scale=1, length_scale_bounds=(0.001, 100))
    composite_kernel = ConstantKernel(1, constant_value_bounds=(0.001, 100)) * composite_kernel
    composite_kernel = WhiteKernel(noise_level=1, noise_level_bounds=(0.001, 100)) + composite_kernel
    
    if r > 0:
        model = mfgpr.GaussianProcessCoKriging(
                kernel=composite_kernel,
                n_restarts_optimizer=1
            )
    else:
        model = GPR(
                kernel=composite_kernel,
                n_restarts_optimizer=1
            )
    
    
        
    pbar = tqdm_notebook(total=max_points)
    pbar.update(n_init)
    
    if r > 0:
        model.fit(np.array(X_train_lf), np.array(y_train_lf), np.array(X_train_hf), np.array(y_train_hf))
    else:
        model.fit(np.array(X_train_hf), np.array(y_train_hf))
    
    while len(hf_checked_points) < max_points:
        # low fidelity x r
        
        # high fidelity
        if len(hf_checked_points) % refit_each == 0:
            if r > 0:
                model.fit(np.array(X_train_lf), np.array(y_train_lf), np.array(X_train_hf), np.array(y_train_hf))
            else:
                model.fit(np.array(X_train_hf), np.array(y_train_hf))
            #model.fit(np.array(X_train_lf), np.array(y_train_lf), np.array(X_train_hf), np.array(y_train_hf))
        
        X_scaled_part = []
        acq_points = list(np.random.choice(list(range(len(hf_data))), size = min([5000, len(hf_data)]), replace = False))


        for idx in acq_points:
            X_scaled_part.append(X_scaled[idx])

        preds, std = model.predict(np.array(X_scaled_part), return_std=True)
        preds = np.hstack((preds.reshape(-1, 1), std.reshape(-1, 1))).T

        max_score = -np.inf
        best_new_idx = None

        for i, idx in enumerate(acq_points):            
            if idx not in hf_checked_points:
                # UCB
                score = preds[0][i] + 1 * preds[1][i] 

                if score > max_score:
                    max_score = score
                    best_new_idx = idx

        if best_new_idx is not None:
            hf_checked_points.append(best_new_idx)
            costs.append(hf_cost)
            y_train_hf.append(hf_data[best_new_idx])
            X_train_hf.append(X_scaled[best_new_idx])
        pbar.update(1)
    return hf_checked_points, y_train_hf, costs

## GPR single fidelity with 12 epochs for train stage

In [None]:
gpr_trials = 100

N_lf = 95

acc_gpr = np.zeros(N_lf)
all_costs = np.zeros(N_lf)
acc_gpr_all_trials = []
for trial in tqdm_notebook(range(gpr_trials)):
    points, y_train, costs = run_MFKDGPR(X_adj, [], [x/100 for x in lf_val], N_lf, 
                                         hf_cost=12, r=0)
    
    y_points = np.zeros(N_lf)
    
    for i in range(len(y_points)):
        k = np.argmax(y_train[0:i+1])
        y_points[i] = hf_test[points[k]]
    
    acc_gpr += np.array(y_points)
    all_costs += np.array(costs)
    acc_gpr_all_trials.append(np.array(y_points))

all_costs_gpr = np.cumsum(all_costs/gpr_trials)

acc_gpr_all_trials = np.array(acc_gpr_all_trials)

## Multi-fidelity GPR with 12 epochs as high-fidelity source of data and KD as low-fidelity source

In [None]:
mfkdgpr_trials = 100

In [None]:
N_lf = 75

In [None]:
acc_mfkdgpr = np.zeros(N_lf)
all_costs = np.zeros(N_lf)
acc_mfkdgpr_all_trials = []
for trial in tqdm_notebook(range(mfkdgpr_trials)):
    points, y_train, costs = run_MFKDGPR(X_adj, [x/100 for x in kd_pred], [x/100 for x in lf_val], N_lf, 
                                         hf_cost=12)
    
    y_points = np.zeros(N_lf)
    
    for i in range(len(y_points)):
        k = np.argmax(y_train[0:i+1])
        y_points[i] = hf_test[points[k]]
    
    acc_mfkdgpr += np.array(y_points)
    all_costs += np.array(costs)
    acc_mfkdgpr_all_trials.append(np.array(y_points))

In [None]:
acc_mfkdgpr_all_trials = np.array(acc_mfkdgpr_all_trials)

In [None]:
all_costs_kd_lf = np.cumsum(all_costs/mfkdgpr_trials)

In [None]:
plt.figure(figsize=(10, 5), dpi=100)

plt.plot(costs_rs_hf, acc_rs_hf/trials, label = 'Random Search HF', color=plt.cm.Greys(0.8))
plt.plot(costs_rs_lf, acc_rs_lf_all_trials.mean(axis=0), label = 'Random Search LF', color=plt.cm.Greys(0.6))
plt.plot(costs_rs_kd, acc_rs_kd/trials, label = 'Random Search KD', color=plt.cm.Greys(0.4))
plt.plot(all_costs_kd_lf, acc_mfkdgpr_all_trials.mean(axis=0), label = 'MF-KD-GPR (KD+LF r=5)')
plt.plot(all_costs_gpr, acc_gpr_all_trials.mean(axis=0), label = 'GPR', color='green')
plt.xlim([0, 1000])
plt.xlabel('num.epochs')
plt.ylabel('test accuracy')
plt.legend()
plt.grid()