In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import spatial_efd
import math 
import signac
import numpy as np
import os.path
import os
import torch
import gpytorch
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from smt.sampling_methods import LHS
# Importing helper libraries for bayesian optimization
from dependencies.data_preprocessing_class import dataPreprocessing
from dependencies.gaussian_process_regression_class import gaussianProcessRegression
from dependencies.acquisition_functions_class import acqisitionFunctions
from dependencies.geometry_writer import geometryWriter
from dependencies.feature_extractor_4 import FeatureExtractor


In [2]:
# Checking if data exists
doesDataFileExist = os.path.isfile("master_feature_output.npy")
# Loading datafiles if they exist
# Else fetching and preparing data from signac workspace
if doesDataFileExist == True:
	master_parameter_input_n = np.load('master_parameter_input_n.npy', )
	master_feature_output = np.load('master_feature_output.npy', )
	

In [9]:
data_efd_mean = np.mean(master_feature_output,axis = 0)
data_efd_variance = np.std(master_feature_output,axis = 0)
data_efd_variance[0,] = 10**-33
# Loading in the data processing class
dataPreprocess  = dataPreprocessing(master_parameter_input_n, master_feature_output, 133)
# Converting the input parameters to logscale
master_parameter_input_log = dataPreprocess.inputLogTransform()
# Selecting the parameters that were sampled in the latin hypercube sampling
num_parameters_LHS = 7
LHS_parameter_index = [1, 4, 7, 17, 18, 19, 33]
# Calling in the function to separate out the desired parameters
data_x = dataPreprocess.inputParameterSelection(num_parameters_LHS, LHS_parameter_index, master_parameter_input_log)
# PCA to reduce dimensionality of the output data
total_variance_explained, principalComponents, weights = dataPreprocess.pcaEfdFeatures(8)
# Storing mean and standard deviation of input training data for later use
data_x_mean = np.mean(data_x, axis=0)
data_x_variance = np.std(data_x, axis=0)
# Normalizing data
data_x = StandardScaler().fit_transform(data_x)
# Selecting PC1 as our 1st output to the multidimensional input
data_y = principalComponents[:,0]


max_data_x = np.amax(data_x, axis=0) 
min_data_x = np.amin(data_x, axis=0) 
print(max_data_x)
print(min_data_x)


[1.42079434 1.41813797 1.44579386 1.40938397 1.4086535  1.40883561
 1.3508899 ]
[-1.81937813 -1.93823379 -1.87646633 -1.78401882 -1.8260345  -1.91939962
 -1.8461721 ]


  master_parameter_input = np.log(self.master_parameter_input_n)


In [4]:
# Reading the vertices output file from a sample SE simulation output with known parameters
fe_exp = FeatureExtractor('input_data/vertices_target_SE.txt', 'log_edges.xlsx')
# Extracting the efd coefficients
coeff_exp = fe_exp.tissue_efd_coeff(20)
# Reverse EFD for plotting the normalized tissue shape
xt_exp, yt_exp = spatial_efd.inverse_transform(coeff_exp, harmonic=20)
efd_coeff_exp_reshaped = np.reshape(coeff_exp, (80,))

efd_coeff_exp_normalized = (np.divide(np.subtract(efd_coeff_exp_reshaped,data_efd_mean), data_efd_variance)) 
efd_coeff_exp_normalized = np.reshape(efd_coeff_exp_normalized, (80,1))
# Multiplying EFD coefficients by already obtained weight of pc
efd_coeff_exp_normalized_pc = np.matmul(weights,efd_coeff_exp_normalized)
# Reshaping array for appending to the original data array
y_exp = np.reshape(efd_coeff_exp_normalized_pc, (1,8))
y_target = efd_coeff_exp_normalized_pc[0]
		

In [5]:
print(y_target)

[-2.27741185]


In [6]:
class ExactGPModel(gpytorch.models.ExactGP):
	def __init__(self, train_x, train_y, likelihood):
		super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
		self.mean_module = gpytorch.means.ConstantMean()
		self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.MaternKernel(nu=0.5))
		
	def forward(self, x):
		mean_x = self.mean_module(x)
		covar_x = self.covar_module(x)
		return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
# Calling in the gpr class
gpr  = gaussianProcessRegression(data_x, data_y)
# Splitting up the training and test data
train_x, train_y, test_x, test_y = gpr.split_data(110, 133)

In [7]:
model, likelihood = gpr.GP_Regressor(train_x, train_y, test_x, test_y, 1000, 1, ExactGPModel)


Iter 1/1000 - Loss: 6.599   lengthscale: 0.693   noise: 0.693
Iter 2/1000 - Loss: 6.170   lengthscale: 0.744   noise: 0.744
Iter 3/1000 - Loss: 5.798   lengthscale: 0.798   noise: 0.798
Iter 4/1000 - Loss: 5.477   lengthscale: 0.853   noise: 0.853
Iter 5/1000 - Loss: 5.200   lengthscale: 0.909   noise: 0.911
Iter 6/1000 - Loss: 4.960   lengthscale: 0.965   noise: 0.969
Iter 7/1000 - Loss: 4.752   lengthscale: 1.022   noise: 1.029
Iter 8/1000 - Loss: 4.571   lengthscale: 1.077   noise: 1.090
Iter 9/1000 - Loss: 4.413   lengthscale: 1.131   noise: 1.151
Iter 10/1000 - Loss: 4.274   lengthscale: 1.183   noise: 1.213
Iter 11/1000 - Loss: 4.151   lengthscale: 1.232   noise: 1.275
Iter 12/1000 - Loss: 4.042   lengthscale: 1.279   noise: 1.337
Iter 13/1000 - Loss: 3.946   lengthscale: 1.322   noise: 1.399
Iter 14/1000 - Loss: 3.859   lengthscale: 1.362   noise: 1.460
Iter 15/1000 - Loss: 3.782   lengthscale: 1.399   noise: 1.521
Iter 16/1000 - Loss: 3.712   lengthscale: 1.433   noise: 1.581
I

Iter 145/1000 - Loss: 2.761   lengthscale: 1.652   noise: 4.238
Iter 146/1000 - Loss: 2.760   lengthscale: 1.653   noise: 4.247
Iter 147/1000 - Loss: 2.759   lengthscale: 1.655   noise: 4.255
Iter 148/1000 - Loss: 2.759   lengthscale: 1.656   noise: 4.263
Iter 149/1000 - Loss: 2.758   lengthscale: 1.657   noise: 4.271
Iter 150/1000 - Loss: 2.757   lengthscale: 1.659   noise: 4.279
Iter 151/1000 - Loss: 2.756   lengthscale: 1.660   noise: 4.287
Iter 152/1000 - Loss: 2.756   lengthscale: 1.662   noise: 4.295
Iter 153/1000 - Loss: 2.755   lengthscale: 1.663   noise: 4.303
Iter 154/1000 - Loss: 2.754   lengthscale: 1.664   noise: 4.310
Iter 155/1000 - Loss: 2.753   lengthscale: 1.666   noise: 4.318
Iter 156/1000 - Loss: 2.753   lengthscale: 1.667   noise: 4.326
Iter 157/1000 - Loss: 2.752   lengthscale: 1.669   noise: 4.333
Iter 158/1000 - Loss: 2.751   lengthscale: 1.670   noise: 4.341
Iter 159/1000 - Loss: 2.751   lengthscale: 1.671   noise: 4.348
Iter 160/1000 - Loss: 2.750   lengthscal

Iter 292/1000 - Loss: 2.701   lengthscale: 1.830   noise: 4.970
Iter 293/1000 - Loss: 2.701   lengthscale: 1.831   noise: 4.972
Iter 294/1000 - Loss: 2.701   lengthscale: 1.832   noise: 4.975
Iter 295/1000 - Loss: 2.700   lengthscale: 1.833   noise: 4.977
Iter 296/1000 - Loss: 2.700   lengthscale: 1.834   noise: 4.980
Iter 297/1000 - Loss: 2.700   lengthscale: 1.835   noise: 4.982
Iter 298/1000 - Loss: 2.700   lengthscale: 1.836   noise: 4.984
Iter 299/1000 - Loss: 2.700   lengthscale: 1.837   noise: 4.987
Iter 300/1000 - Loss: 2.699   lengthscale: 1.838   noise: 4.989
Iter 301/1000 - Loss: 2.699   lengthscale: 1.838   noise: 4.991
Iter 302/1000 - Loss: 2.699   lengthscale: 1.839   noise: 4.994
Iter 303/1000 - Loss: 2.699   lengthscale: 1.840   noise: 4.996
Iter 304/1000 - Loss: 2.699   lengthscale: 1.841   noise: 4.998
Iter 305/1000 - Loss: 2.698   lengthscale: 1.842   noise: 5.000
Iter 306/1000 - Loss: 2.698   lengthscale: 1.843   noise: 5.002
Iter 307/1000 - Loss: 2.698   lengthscal

Iter 434/1000 - Loss: 2.681   lengthscale: 1.949   noise: 5.108
Iter 435/1000 - Loss: 2.680   lengthscale: 1.949   noise: 5.108
Iter 436/1000 - Loss: 2.680   lengthscale: 1.950   noise: 5.108
Iter 437/1000 - Loss: 2.680   lengthscale: 1.951   noise: 5.107
Iter 438/1000 - Loss: 2.680   lengthscale: 1.951   noise: 5.107
Iter 439/1000 - Loss: 2.680   lengthscale: 1.952   noise: 5.107
Iter 440/1000 - Loss: 2.680   lengthscale: 1.953   noise: 5.106
Iter 441/1000 - Loss: 2.680   lengthscale: 1.953   noise: 5.106
Iter 442/1000 - Loss: 2.680   lengthscale: 1.954   noise: 5.105
Iter 443/1000 - Loss: 2.680   lengthscale: 1.955   noise: 5.105
Iter 444/1000 - Loss: 2.680   lengthscale: 1.955   noise: 5.104
Iter 445/1000 - Loss: 2.679   lengthscale: 1.956   noise: 5.104
Iter 446/1000 - Loss: 2.679   lengthscale: 1.957   noise: 5.103
Iter 447/1000 - Loss: 2.679   lengthscale: 1.957   noise: 5.103
Iter 448/1000 - Loss: 2.679   lengthscale: 1.958   noise: 5.102
Iter 449/1000 - Loss: 2.679   lengthscal

Iter 580/1000 - Loss: 2.668   lengthscale: 2.026   noise: 4.905
Iter 581/1000 - Loss: 2.668   lengthscale: 2.026   noise: 4.903
Iter 582/1000 - Loss: 2.667   lengthscale: 2.027   noise: 4.901
Iter 583/1000 - Loss: 2.667   lengthscale: 2.027   noise: 4.898
Iter 584/1000 - Loss: 2.667   lengthscale: 2.027   noise: 4.896
Iter 585/1000 - Loss: 2.667   lengthscale: 2.028   noise: 4.893
Iter 586/1000 - Loss: 2.667   lengthscale: 2.028   noise: 4.891
Iter 587/1000 - Loss: 2.667   lengthscale: 2.029   noise: 4.889
Iter 588/1000 - Loss: 2.667   lengthscale: 2.029   noise: 4.886
Iter 589/1000 - Loss: 2.667   lengthscale: 2.029   noise: 4.884
Iter 590/1000 - Loss: 2.667   lengthscale: 2.030   noise: 4.881
Iter 591/1000 - Loss: 2.667   lengthscale: 2.030   noise: 4.879
Iter 592/1000 - Loss: 2.667   lengthscale: 2.030   noise: 4.876
Iter 593/1000 - Loss: 2.667   lengthscale: 2.031   noise: 4.874
Iter 594/1000 - Loss: 2.667   lengthscale: 2.031   noise: 4.871
Iter 595/1000 - Loss: 2.666   lengthscal

Iter 726/1000 - Loss: 2.657   lengthscale: 2.062   noise: 4.446
Iter 727/1000 - Loss: 2.656   lengthscale: 2.062   noise: 4.442
Iter 728/1000 - Loss: 2.656   lengthscale: 2.062   noise: 4.438
Iter 729/1000 - Loss: 2.656   lengthscale: 2.062   noise: 4.434
Iter 730/1000 - Loss: 2.656   lengthscale: 2.062   noise: 4.430
Iter 731/1000 - Loss: 2.656   lengthscale: 2.062   noise: 4.426
Iter 732/1000 - Loss: 2.656   lengthscale: 2.062   noise: 4.422
Iter 733/1000 - Loss: 2.656   lengthscale: 2.062   noise: 4.418
Iter 734/1000 - Loss: 2.656   lengthscale: 2.063   noise: 4.414
Iter 735/1000 - Loss: 2.656   lengthscale: 2.063   noise: 4.410
Iter 736/1000 - Loss: 2.656   lengthscale: 2.063   noise: 4.406
Iter 737/1000 - Loss: 2.656   lengthscale: 2.063   noise: 4.402
Iter 738/1000 - Loss: 2.656   lengthscale: 2.063   noise: 4.398
Iter 739/1000 - Loss: 2.656   lengthscale: 2.063   noise: 4.394
Iter 740/1000 - Loss: 2.656   lengthscale: 2.063   noise: 4.390
Iter 741/1000 - Loss: 2.655   lengthscal

Iter 872/1000 - Loss: 2.645   lengthscale: 2.061   noise: 3.779
Iter 873/1000 - Loss: 2.645   lengthscale: 2.061   noise: 3.774
Iter 874/1000 - Loss: 2.645   lengthscale: 2.061   noise: 3.768
Iter 875/1000 - Loss: 2.645   lengthscale: 2.060   noise: 3.763
Iter 876/1000 - Loss: 2.645   lengthscale: 2.060   noise: 3.758
Iter 877/1000 - Loss: 2.645   lengthscale: 2.060   noise: 3.753
Iter 878/1000 - Loss: 2.645   lengthscale: 2.060   noise: 3.747
Iter 879/1000 - Loss: 2.645   lengthscale: 2.060   noise: 3.742
Iter 880/1000 - Loss: 2.645   lengthscale: 2.060   noise: 3.737
Iter 881/1000 - Loss: 2.645   lengthscale: 2.060   noise: 3.732
Iter 882/1000 - Loss: 2.645   lengthscale: 2.060   noise: 3.726
Iter 883/1000 - Loss: 2.644   lengthscale: 2.059   noise: 3.721
Iter 884/1000 - Loss: 2.644   lengthscale: 2.059   noise: 3.716
Iter 885/1000 - Loss: 2.644   lengthscale: 2.059   noise: 3.710
Iter 886/1000 - Loss: 2.644   lengthscale: 2.059   noise: 3.705
Iter 887/1000 - Loss: 2.644   lengthscal

In [8]:
xlimits = np.array([[min_data_x[0], max_data_x[0]],[min_data_x[1], max_data_x[1]],[min_data_x[2], max_data_x[2]],[min_data_x[3], max_data_x[3]],[min_data_x[4], max_data_x[4]],[min_data_x[5], max_data_x[5]],[min_data_x[6], max_data_x[6]]])
sampling = LHS(xlimits = xlimits)
# Defining numvber of samples
num_samples = 1000000
# Implementing latin hypercube sampling
x = sampling(num_samples)
af = acqisitionFunctions(x, test_x, test_y)
# Calculating the xpected improvement
ei = af.expected_improvement_modified(model, likelihood, y_target)
# Finding the indez that leads to maximum acquisition function
x_sampled_index = np.argmax(ei)
# Assessing the new sampled value
x_sampled_logscale_standardized = x[x_sampled_index,:]
# Converting x sampled into parameter space
# Multiplying by standard deviation and adding the mean pf data
x_sampled = np.exp(np.add(np.multiply(x_sampled_logscale_standardized,data_x_variance), data_x_mean))
print(x_sampled)

[1.30449279e-08 1.86722243e-07 6.55471784e-03 1.37340124e-05
 1.74317734e-05 4.19454932e-01 1.85126808e-08]


In [None]:
paraminputs = [0,0.0001,0,0,0,0,0,0.001,0,0,0, 0.1,0.1,10,0.1,0.1,0.1,0.1,10,0.0001,0.001,0.001, 1,1,0.6,0.6,0.6,0.6,0.2,0.1,3,0.6,1.8, 0.001,0.001]
# Repalcaing the parameters with newly sampled values
paraminputs[1] = x_sampled[0,]
# tension cuboidal basal
paraminputs[4] = x_sampled[1,]
# tension columnar basal
paraminputs[7] = x_sampled[2,]
# k columnar apical
paraminputs[17] = x_sampled[3,]
# k columnar basal
paraminputs[18] = x_sampled[4,]
# k columnar lateral
paraminputs[19] = x_sampled[5,]
# K_ECM
paraminputs[33] = x_sampled[6,]
# Defining the set system pressure
param_pressure = 0.001
# Writing geometry file
geometryWriter(paraminputs, param_pressure, 'wingDisc')
# Running surface evolver simulations
# Can be replaced using python subprocess using 
os.system("/home/nkumar4/Desktop/evolver_installation/src/evolver wingDisc.fe")

In [None]:
fe = FeatureExtractor('vertices.txt', 'log_edges.xlsx')
efd_coeff_sampled = fe.tissue_efd_coeff(20)
efd_coeff_sampled_reshaped = np.reshape(efd_coeff_sampled, (80,))
xt_sampled, yt_sampled = spatial_efd.inverse_transform(efd_coeff_sampled, harmonic=20)

"""Step 5e: Converting EFD to PC space
"""
# normalizing efd coefficients with existing data mean and variance
efd_coeff_sampled_normalized = (np.divide(np.subtract(efd_coeff_sampled_reshaped,data_efd_mean), data_efd_variance))
efd_coeff_sampled_normalized = np.reshape(efd_coeff_sampled_normalized, (80,1))
# Multiplying EFD coefficients by already obtained weight of pc
efd_coeff_sampled_normalized_pc = np.matmul(weights,efd_coeff_sampled_normalized)
# Reshaping array for appending to the original data array
y_sampled = np.reshape(efd_coeff_sampled_normalized_pc, (1,8))
print(y_sampled)

In [None]:
data_x = np.vstack((data_x, np.reshape(x_sampled,(1,7))))

In [None]:
train_x = np.vstack((train_x, np.reshape(x_sampled,(1,7))))
train_y = np.append(train_y,y_sampled[:,0],axis=0)
print(np.shape(train_x))
print(np.shape(train_y))

In [None]:
os.system("rm vertices.txt")

In [None]:
filename_shape_plot = str(1) + "_sapled_target_xy_plot.svg"
# Plotting target data
plt.plot(xt_exp,yt_exp,'black', label='Target')
# Plotting sampled data
plt.plot(xt_sampled, yt_sampled,'blue', label='Experimental')
plt.axes().set_aspect('equal', 'datalim')
# Labeling axes
plt.xlabel("x [nondimensional]")
plt.ylabel("y [nondimensional]")
# Plotting legends
plt.legend()
plt.savefig("contour_evolution_plots/" + filename_shape_plot)
plt.close()

# Calculating error: Error is calculated as a norm of difference between target and sampled EFD coefficients
error_target_sampled_step =  np.linalg.norm(efd_coeff_exp_reshaped-efd_coeff_sampled_reshaped)