In [4]:
import numpy as np 
import matplotlib.pyplot as plt
import pickle # for .pkl files
import h5py   # for .jld2 files
import os
import pandas as pd
import torch
from sklearn.decomposition import PCA


plt.rcParams["font.size"] = 16

## Load Data

In [5]:
file = h5py.File("targets_and_normalized_features.jld2", "r")
# feature matrix
X = torch.from_numpy(np.transpose(file["X"][:]))
# simulation data
y = torch.from_numpy(np.transpose(file["gcmc_y"][:]))
# associated simulation costs
cost = torch.from_numpy(np.transpose(file["gcmc_elapsed_time"][:]))

# total number of COFs in data set
nb_COFs = X.shape[0] 

## Search Efficientcy Curves

In [None]:
# for random search results plot 

# get y_max acquired up to iteration i for i = 1,2,...
def y_max(res):
    y_max_mu      = np.zeros(nb_iterations)
    y_max_sig_bot = np.zeros(nb_iterations)
    y_max_sig_top = np.zeros(nb_iterations)
    
    for i in range(1, nb_iterations+1):
        # max value acquired up to this point
        y_maxes = np.array([max(y[res['ids_acquired'][r]][:i]) for r in range(nb_runs)])
        assert np.size(y_maxes) == nb_runs
        y_max_mu[i-1]      = np.mean(y_maxes)
        y_max_sig_bot[i-1] = np.std(y_maxes[y_maxes < y_max_mu[i-1]])
        y_max_sig_top[i-1] = np.std(y_maxes[y_maxes > y_max_mu[i-1]])
    return y_max_mu, y_max_sig_bot, y_max_sig_top

# rs_mean, rs_lower_bound, rs_upper_bound = y_max(rs_res)
y_rs_max_mu, y_rs_max_sig_bot, y_rs_max_sig_top = y_max(rs_res)

In [1]:
# put SFBO, MFBO, and Random Search on same plot
# top pannel should be the accumulated cost as a function of the number of itterations
# the bottom shoule be the max. S^{l=1}_{Xe/Kr}

## Pirinciple Component Analysis

In [2]:
# color accordind to GCMC Selectivity 
# low-fidelity => X, black
# high-fidelity => +, red
# shuffle points before plotting to remove any bias

In [None]:
pca = PCA(n_components=2)
pca.fit(X)
X_2D = pca.transform(X)

In [None]:
#low dimensional (PCA) visualization of the entire dataset
plt.scatter(X_2D[:, 0], X_2D[:, 1], c=y, s=10)
plt.xlabel('PC 1')
plt.ylabel('PC 2')
cb = plt.colorbar(fraction=0.03, pad=0.04)
cb.set_label(label="$S_{Xe/Kr}$")
plt.xticks()
plt.yticks()
plt.gca().set_aspect('equal', 'box')
plt.tight_layout()
# plt.savefig("./figs/PCA_feature_space_colored_by_Selectivity.pdf", dpi=600, format="pdf")
plt.show()

### SFBO PCA

In [None]:
nb_acquired = [nb_COFs_initialization, 15, 30, 45]
max_y_seen = np.zeros(len(nb_acquired))
fig, ax = plt.subplots(1, len(nb_acquired), sharey=True, sharex=True, figsize=[3*6.4, 4.8])
# gray background
for a in ax:
    a.set_aspect('equal', 'box')
    a.scatter(X_2D[:, 0], X_2D[:, 1], s=10, c=0.3 * np.ones(nb_COFs), cmap="binary", vmin=0, vmax=1)
    
for i in range(len(nb_acquired)):
    ids = ids_acquired[:nb_acquired[i]]
    max_y_seen[i] = y_max[i] 
    assert len(ids) == nb_acquired[i]
    # use above colorbar to assign color!
    ax[i].scatter(X_2D[ids, 0], X_2D[ids, 1], 
                  c=y[ids], marker="o", s=15, vmin=cb.vmin, vmax=cb.vmax)
    if i == 0:
        ax[i].set_title('(initialization)\n{} acquired COFs'.format(nb_acquired[i]))
    else:
        ax[i].set_title('{} acquired COFs'.format(nb_acquired[i]))
    ax[i].tick_params(axis='x')
ax[0].set_ylabel('PC 2', fontsize=14)

ax[2].tick_params(axis='y', labelsize=0)


fig.text(0.5, 0.02, 'PC 1', ha='center')
plt.tight_layout()
# plt.savefig("./figs/SFBO_feature_space_acquired_COFs.pdf", bbox_inches="tight", dpi=600, format="pdf")
plt.show()

### MFBO PCA 

In [None]:
ids_acquired = torch.tensor(list(zip(*acquired_set))[0])
fid_acquired = torch.tensor(list(zip(*acquired_set))[1])


nb_acquired = [2*nb_COFs_initialization, 20, 40, 60, 80]
max_y_seen = np.zeros(len(nb_acquired))
fig, ax = plt.subplots(1, len(nb_acquired), sharey=True, sharex=True, figsize=[3*6.4, 4.8])
# gray background
for a in ax:
    a.set_aspect('equal', 'box')
    a.hexbin(X_2D[:, 0], X_2D[:, 1], C=0.3 * np.ones(nb_COFs), cmap="binary", vmin=0, vmax=1)
    
for i in range(len(nb_acquired)):
    ids  = ids_acquired[:nb_acquired[i]].detach().numpy()
    fids = fid_acquired[:nb_acquired[i]].detach().numpy()
    max_y_seen[i] = y_max[nb_acquired[i]]
    assert len(ids) == nb_acquired[i]
    # use above colorbar to assign color!
    ax[i].scatter(X_2D[ids, 0], X_2D[ids, 1], 
                  c=y[ids], marker="+", s=55, vmin=cb.vmin, vmax=cb.vmax)
    if i == 0:
        ax[i].set_title('(initialization)\n{} acquired COFs'.format(nb_acquired[i]))
    else:
        ax[i].set_title('{} acquired COFs'.format(nb_acquired[i]))
    ax[i].tick_params(axis='x')
ax[0].set_ylabel('PC 2', fontsize=14)

ax[2].tick_params(axis='y', labelsize=0)


fig.text(0.5, 0.02, 'PC 1', ha='center')
plt.tight_layout()
# plt.savefig("feature_space_acquired_COFs.pdf", dpi=600, bbox_inches="tight", format="pdf")
plt.show()