In [1]:
# Importing libraries
import pandas as pd
import matplotlib.pyplot as plt
import spatial_efd
import math 
import signac
import numpy as np
import os.path
import os
import torch
import gpytorch
import subprocess
import gc
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from smt.sampling_methods import LHS
# Importing helper libraries for bayesian optimization
from dependencies.data_preprocessing_class import dataPreprocessing
from dependencies.gaussian_process_regression_class import gaussianProcessRegression
from dependencies.acquisition_functions_class import acqisitionFunctions
from dependencies.geometry_writer import geometryWriter
from dependencies.feature_extractor_4 import FeatureExtractor

In [2]:
"""
STEP 0:
Load the input and output data gennerated by SE model for building a GPR model
"""
# Checking if data exists
doesDataFileExist = os.path.isfile("master_feature_output.npy")
# Loading datafiles if they exist
# Else fetching and preparing data from signac workspace
if doesDataFileExist == True:
	master_parameter_input_n = np.load('master_parameter_input_n.npy', )
	master_feature_output = np.load('master_feature_output.npy', )

In [3]:
"""
STEP 2a: Data preprocessing
1. Selects the parameters sampled in LHS from total 35 parameters of the SE model
2. PCA on the out shape features.
"""
data_efd_mean = np.mean(master_feature_output,axis = 0)
data_efd_variance = np.std(master_feature_output,axis = 0)
data_efd_variance[0,] = 10**-33
# Loading in the data processing class
dataPreprocess  = dataPreprocessing(master_parameter_input_n, master_feature_output, 133)
# Converting the input parameters to logscale
master_parameter_input_log = dataPreprocess.inputLogTransform()
# Selecting the parameters that were sampled in the latin hypercube sampling
num_parameters_LHS = 7
LHS_parameter_index = [1, 4, 7, 17, 18, 19, 33]
# Calling in the function to separate out the desired parameters
data_x = dataPreprocess.inputParameterSelection(num_parameters_LHS, LHS_parameter_index, master_parameter_input_log)
# PCA to reduce dimensionality of the output data
total_variance_explained, principalComponents, weights, weights_af = dataPreprocess.pcaEfdFeatures(8)
# Storing mean and standard deviation of input training data for later use
data_x_mean = np.mean(data_x, axis=0)
data_x_variance = np.std(data_x, axis=0)
# Normalizing data
data_x = StandardScaler().fit_transform(data_x)
max_data_x = np.amax(data_x, axis=0) 
min_data_x = np.amin(data_x, axis=0) 
# Selecting PC1 as our 1st output to the multidimensional input
data_y = principalComponents

  master_parameter_input = np.log(self.master_parameter_input_n)


In [4]:
# Reading the vertices output file from a sample SE simulation output with known parameters
fe_exp = FeatureExtractor('input_data/vertices_target_SE.txt', 'log_edges.xlsx')
# Extracting the efd coefficients
coeff_exp = fe_exp.tissue_efd_coeff(20)
# Reverse EFD for plotting the normalized tissue shape
xt_exp, yt_exp = spatial_efd.inverse_transform(coeff_exp, harmonic=20)
efd_coeff_exp_reshaped = np.reshape(coeff_exp, (80,))

efd_coeff_exp_normalized = (np.divide(np.subtract(efd_coeff_exp_reshaped,data_efd_mean), data_efd_variance)) 
efd_coeff_exp_normalized = np.reshape(efd_coeff_exp_normalized, (80,1))
# Multiplying EFD coefficients by already obtained weight of pc
efd_coeff_exp_normalized_pc = np.matmul(weights,efd_coeff_exp_normalized)
# Reshaping array for appending to the original data array
y_exp = np.reshape(efd_coeff_exp_normalized_pc, (1,8))

In [5]:
"""
Step 3: class ExactGP Model 
        Needs to be executed to execute the 
		  gaussianProcessRegression class 
"""
class ExactGPModel(gpytorch.models.ExactGP):
	def __init__(self, train_x, train_y, likelihood):
		super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
		self.mean_module = gpytorch.means.ConstantMean()
		self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=0.5))
		
	def forward(self, x):
		mean_x = self.mean_module(x)
		covar_x = self.covar_module(x)
		return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
# Calling in the gpr class
gpr  = gaussianProcessRegression(data_x, data_y)
# Splitting up the training and test data
train_x, train_y, test_x, test_y = gpr.split_data(110, 133)

In [6]:
"""
Step 5: Executing the loop for bayesian optimization

"""
maxIter = 1
num_pc_components = 8
error_target_sampled = []
iter_counter = []
param_sampled = np.zeros((maxIter,7))

for i in range(maxIter):
    """ Step 5a: Training the GP model 
    """	
    """ Step 5b: Estimating acquisition function
    Sample random points in space
    Use the gpr model to estimate expected improvement
    find optimum x
    Transform to parameter space
    """
    # Method I: latine hypercube sampling
    xlimits = np.array([[min_data_x[0], max_data_x[0]],[min_data_x[1], max_data_x[1]],[min_data_x[2], max_data_x[2]],[min_data_x[3], max_data_x[3]],[min_data_x[4], max_data_x[4]],[min_data_x[5], max_data_x[5]],[min_data_x[6], max_data_x[6]]])
    sampling = LHS(xlimits = xlimits)
    # Defining numvber of samples
    num_samples = 100000
    # Implementing latin hypercube sampling
    x = sampling(num_samples)
    master_acquisition = x


    # Method II: Random sampling
    #num_samples = 1000000
    #x = np.random.rand(num_samples, 7)
    
    ei = np.zeros((num_samples,))
    
    for j in range(num_pc_components):
        # Getting the trained model and likelihood using the training data
        model, likelihood = gpr.GP_Regressor(train_x, train_y, test_x, test_y, 100, i, ExactGPModel,j)
        y_target = efd_coeff_exp_normalized_pc[j]
        # Calling in the acquisition function class
        af = acqisitionFunctions(x, test_x, test_y[:,j])
        ei_comp, model_mean, model_var = af.expected_improvement_modified(model, likelihood, y_target)
        
        
        # Calculating the xpected improvement
        ei = ei + weights_af[j]*ei_comp
        
        master_acquisition = np.hstack((master_acquisition,np.reshape(model_mean,(100000,1))))
        master_acquisition = np.hstack((master_acquisition,np.reshape(model_var,(100000,1))))
        master_acquisition = np.hstack((master_acquisition,np.reshape(ei_comp,(100000,1))))
        
        print(np.shape(master_acquisition))
        
        del model
        del likelihood
        del af
        gc.collect()

Iter 1/100 - Loss: 6.599   lengthscale: 0.693   noise: 0.693
Iter 2/100 - Loss: 6.170   lengthscale: 0.744   noise: 0.744
Iter 3/100 - Loss: 5.798   lengthscale: 0.798   noise: 0.798
Iter 4/100 - Loss: 5.477   lengthscale: 0.853   noise: 0.853
Iter 5/100 - Loss: 5.200   lengthscale: 0.909   noise: 0.911
Iter 6/100 - Loss: 4.960   lengthscale: 0.965   noise: 0.969
Iter 7/100 - Loss: 4.752   lengthscale: 1.022   noise: 1.029
Iter 8/100 - Loss: 4.571   lengthscale: 1.077   noise: 1.090
Iter 9/100 - Loss: 4.413   lengthscale: 1.131   noise: 1.151
Iter 10/100 - Loss: 4.274   lengthscale: 1.183   noise: 1.213
Iter 11/100 - Loss: 4.151   lengthscale: 1.232   noise: 1.275
Iter 12/100 - Loss: 4.042   lengthscale: 1.279   noise: 1.337
Iter 13/100 - Loss: 3.946   lengthscale: 1.322   noise: 1.399
Iter 14/100 - Loss: 3.859   lengthscale: 1.362   noise: 1.460
Iter 15/100 - Loss: 3.782   lengthscale: 1.399   noise: 1.521
Iter 16/100 - Loss: 3.712   lengthscale: 1.433   noise: 1.581
Iter 17/100 - Los

Iter 52/100 - Loss: 2.827   lengthscale: 0.717   noise: 3.012
Iter 53/100 - Loss: 2.823   lengthscale: 0.717   noise: 3.035
Iter 54/100 - Loss: 2.819   lengthscale: 0.717   noise: 3.058
Iter 55/100 - Loss: 2.815   lengthscale: 0.717   noise: 3.080
Iter 56/100 - Loss: 2.812   lengthscale: 0.718   noise: 3.102
Iter 57/100 - Loss: 2.808   lengthscale: 0.720   noise: 3.123
Iter 58/100 - Loss: 2.805   lengthscale: 0.721   noise: 3.144
Iter 59/100 - Loss: 2.802   lengthscale: 0.724   noise: 3.164
Iter 60/100 - Loss: 2.799   lengthscale: 0.726   noise: 3.184
Iter 61/100 - Loss: 2.796   lengthscale: 0.729   noise: 3.204
Iter 62/100 - Loss: 2.793   lengthscale: 0.732   noise: 3.223
Iter 63/100 - Loss: 2.791   lengthscale: 0.734   noise: 3.242
Iter 64/100 - Loss: 2.788   lengthscale: 0.737   noise: 3.261
Iter 65/100 - Loss: 2.785   lengthscale: 0.739   noise: 3.279
Iter 66/100 - Loss: 2.783   lengthscale: 0.741   noise: 3.298
Iter 67/100 - Loss: 2.781   lengthscale: 0.743   noise: 3.316
Iter 68/

(100000, 16)
Iter 1/100 - Loss: 3.999   lengthscale: 0.693   noise: 0.693
Iter 2/100 - Loss: 3.827   lengthscale: 0.744   noise: 0.744
Iter 3/100 - Loss: 3.676   lengthscale: 0.797   noise: 0.798
Iter 4/100 - Loss: 3.542   lengthscale: 0.850   noise: 0.854
Iter 5/100 - Loss: 3.424   lengthscale: 0.900   noise: 0.911
Iter 6/100 - Loss: 3.320   lengthscale: 0.945   noise: 0.970
Iter 7/100 - Loss: 3.228   lengthscale: 0.980   noise: 1.030
Iter 8/100 - Loss: 3.147   lengthscale: 1.006   noise: 1.091
Iter 9/100 - Loss: 3.075   lengthscale: 1.022   noise: 1.153
Iter 10/100 - Loss: 3.011   lengthscale: 1.029   noise: 1.216
Iter 11/100 - Loss: 2.955   lengthscale: 1.030   noise: 1.279
Iter 12/100 - Loss: 2.904   lengthscale: 1.025   noise: 1.341
Iter 13/100 - Loss: 2.859   lengthscale: 1.017   noise: 1.403
Iter 14/100 - Loss: 2.820   lengthscale: 1.006   noise: 1.465
Iter 15/100 - Loss: 2.785   lengthscale: 0.995   noise: 1.526
Iter 16/100 - Loss: 2.754   lengthscale: 0.984   noise: 1.586
Iter

Iter 33/100 - Loss: 2.190   lengthscale: 1.422   noise: 2.211
Iter 34/100 - Loss: 2.190   lengthscale: 1.430   noise: 2.233
Iter 35/100 - Loss: 2.189   lengthscale: 1.439   noise: 2.253
Iter 36/100 - Loss: 2.188   lengthscale: 1.449   noise: 2.272
Iter 37/100 - Loss: 2.188   lengthscale: 1.460   noise: 2.289
Iter 38/100 - Loss: 2.187   lengthscale: 1.472   noise: 2.306
Iter 39/100 - Loss: 2.187   lengthscale: 1.484   noise: 2.320
Iter 40/100 - Loss: 2.186   lengthscale: 1.496   noise: 2.334
Iter 41/100 - Loss: 2.186   lengthscale: 1.510   noise: 2.347
Iter 42/100 - Loss: 2.185   lengthscale: 1.523   noise: 2.358
Iter 43/100 - Loss: 2.185   lengthscale: 1.537   noise: 2.369
Iter 44/100 - Loss: 2.185   lengthscale: 1.551   noise: 2.378
Iter 45/100 - Loss: 2.185   lengthscale: 1.566   noise: 2.387
Iter 46/100 - Loss: 2.184   lengthscale: 1.580   noise: 2.395
Iter 47/100 - Loss: 2.184   lengthscale: 1.595   noise: 2.402
Iter 48/100 - Loss: 2.184   lengthscale: 1.610   noise: 2.408
Iter 49/

(100000, 25)
Iter 1/100 - Loss: 2.076   lengthscale: 0.693   noise: 0.693
Iter 2/100 - Loss: 2.034   lengthscale: 0.744   noise: 0.744
Iter 3/100 - Loss: 1.999   lengthscale: 0.798   noise: 0.798
Iter 4/100 - Loss: 1.970   lengthscale: 0.853   noise: 0.852
Iter 5/100 - Loss: 1.948   lengthscale: 0.910   noise: 0.907
Iter 6/100 - Loss: 1.930   lengthscale: 0.967   noise: 0.962
Iter 7/100 - Loss: 1.915   lengthscale: 1.026   noise: 1.017
Iter 8/100 - Loss: 1.905   lengthscale: 1.084   noise: 1.069
Iter 9/100 - Loss: 1.896   lengthscale: 1.142   noise: 1.120
Iter 10/100 - Loss: 1.890   lengthscale: 1.199   noise: 1.167
Iter 11/100 - Loss: 1.885   lengthscale: 1.256   noise: 1.211
Iter 12/100 - Loss: 1.882   lengthscale: 1.311   noise: 1.252
Iter 13/100 - Loss: 1.879   lengthscale: 1.365   noise: 1.287
Iter 14/100 - Loss: 1.878   lengthscale: 1.418   noise: 1.319
Iter 15/100 - Loss: 1.876   lengthscale: 1.469   noise: 1.345
Iter 16/100 - Loss: 1.875   lengthscale: 1.518   noise: 1.367
Iter

Iter 35/100 - Loss: 1.696   lengthscale: 2.065   noise: 0.459
Iter 36/100 - Loss: 1.695   lengthscale: 2.057   noise: 0.446
Iter 37/100 - Loss: 1.695   lengthscale: 2.047   noise: 0.432
Iter 38/100 - Loss: 1.694   lengthscale: 2.036   noise: 0.418
Iter 39/100 - Loss: 1.693   lengthscale: 2.026   noise: 0.404
Iter 40/100 - Loss: 1.692   lengthscale: 2.015   noise: 0.389
Iter 41/100 - Loss: 1.692   lengthscale: 2.005   noise: 0.375
Iter 42/100 - Loss: 1.691   lengthscale: 1.996   noise: 0.360
Iter 43/100 - Loss: 1.690   lengthscale: 1.988   noise: 0.345
Iter 44/100 - Loss: 1.690   lengthscale: 1.981   noise: 0.330
Iter 45/100 - Loss: 1.689   lengthscale: 1.975   noise: 0.315
Iter 46/100 - Loss: 1.688   lengthscale: 1.970   noise: 0.301
Iter 47/100 - Loss: 1.688   lengthscale: 1.967   noise: 0.287
Iter 48/100 - Loss: 1.687   lengthscale: 1.964   noise: 0.273
Iter 49/100 - Loss: 1.686   lengthscale: 1.963   noise: 0.259
Iter 50/100 - Loss: 1.686   lengthscale: 1.962   noise: 0.246
Iter 51/

In [7]:
np.save('master_acquisition.npy',master_acquisition)

In [None]:
""" STructure of master_acquisition_array: 7 parameters, model_mean_PC1, model_var_PC1, ei_PC1, 
                                      model_mean_PC2, model_var_PC12, ei_PC2, .....
"""

master_acquisition = np.load("master_acquisition.npy",)