In [154]:
# import h5 reader
import h5py
import numpy as np
import pandas as pd
import time
from os import listdir
from os.path import isfile, join

work_path = "/home/shengduo/pylith-developer/build/debug/pylith-nonRegSlipLawWithVaryingB/examples/bar_shearwave/quad4/"
h5_path = work_path + "output/fault/"

In [166]:
# Class RunABatch
class RunABatch:
    # Constructor
    def __init__(self, input_set, work_path, FourierTerms = 16):
        # Initialize data paths and batch parameters
        self.work_path = work_path
        self.input_set = [tuple(i) for i in input_set]
        self.h5_path = work_path + "output/fault/"
        self.FourierTerms = FourierTerms
        
        # Store all cases in the h5_path
        self.existingCasesFile = self.h5_path + "CaseList.csv"
        
        # Flag for whether cases have been run
        self.casesExcuted = False
        
        # Get existing cases
        self.getExistingCasesOfInputSet()
        
        # Get input_set_toRun
        self.input_set_toRun = list(set(self.input_set) - self.existingCases)
        
        # Run cases
        self.runCases(self.input_set_toRun)
        
        # Get obsevations
        self.Observations = self.getObservations(self.input_set)
    
    # Inline function gets [A, B] list
    def getAAndB(self, fileName):
        A_idx = fileName.find('A')
        B_idx = fileName.find('B')
        slash_idx = fileName.find('-')
        
        # Change this part !! Before applying to new name convention
        A = float(fileName[A_idx + 1 : B_idx - 1])
        B = float(fileName[B_idx + 1 : slash_idx])
        return (A, B)
    
    
    # Function get_existing_files_set
    def getExistingCasesOfInputSet(self):
        # Get all .h5 file names as a list
        myPath = self.h5_path
        onlyFiles = [f for f in listdir(myPath) if (isfile(join(myPath, f)) and f[-3 : ] == '.h5')]
        self.existingCases = set([self.getAAndB(f) for f in onlyFiles])
        
    # Function runCases
    def runCases(self, input_set):
        shell_path = self.work_path + "RunJobsJP.sh"
        shellRead = open(shell_path, 'r')
        list_of_lines = shellRead.readlines()
        shellRead.close()

        AA = [ele[0] for ele in input_set]
        BB = [ele[1] for ele in input_set]

        list_of_lines[9] = "AA=" + str(tuple(AA)).replace(',', '') + "\n"
        list_of_lines[10] = "BB=" + str(tuple(BB)).replace(',', '') + "\n"

        shellWrite = open(shell_path, 'w')
        shellWrite.writelines(list_of_lines)
        shellWrite.close()

        # Run the cases
        !source $shell_path
        
        self.casesExcuted = True
        
        # Return from shell
        return
    
    
    # Function getObservations for the input_set
    def getObservations(self, input_set):
        # Initialize Observations
        Observations = []
        
        # Check if the cases have been excuted
        if self.casesExcuted == False:
            return Observations
        
        # Loop through all Inputs
        for input_ele in input_set:
            # Open the file
            h5_file = self.h5_path + "A" + str(input_ele[0]) + "_B" + str(input_ele[1]) + "-fault.h5"
            f = h5py.File(h5_file, 'r')

            # Get time
            time = np.array(f['time']).reshape([-1])
            time = time - np.min(time)
            nOfTSteps = time.shape[0]

            # Get Slip rates
            SlipRates = np.array(f['vertex_fields']['slip_rate'])
            Vx = SlipRates[:, :, 0].transpose()
            Vy = SlipRates[:, :, 1].transpose()
            nOfNodes = Vx.shape[0]

            # Find the Fourier coefficients
            FourierTerms = self.FourierTerms
            T = np.max(time)

            # Compute the Fourier terms
            Ks = np.array(range(FourierTerms))
            coskPiTt = np.cos(Ks.reshape([-1, 1]) * np.pi / T * time)
            VxcoskPiTt = np.concatenate([coskPiTt * Vxi.reshape([1, -1]) for Vxi in Vx], 0)

            # Compute the fourier coefficients
            # print('time.shape: ', time.shape)
            observation = np.trapz(VxcoskPiTt, x=time)

            # Append the result from this file
            Observations.append(observation)
        
        Observations = np.array(Observations)
        return Observations
    
    

In [258]:
# Input example of getting observations
AA = np.linspace(0.002, 0.014, 7)
BB = AA + 0.004

# Get input set
input_set = set([(AA[i], BB[i]) for i in range(len(AA))])

# Test class RunABatch
myBatch = RunABatch(input_set, work_path, FourierTerms = 32)

/home/shengduo/InverseProblems/GPRWorkingField


In [259]:
AA

array([0.002, 0.004, 0.006, 0.008, 0.01 , 0.012, 0.014])

In [260]:
np.array(list(myBatch.input_set))

array([[0.004, 0.008],
       [0.014, 0.018],
       [0.006, 0.01 ],
       [0.01 , 0.014],
       [0.002, 0.006],
       [0.008, 0.012],
       [0.012, 0.016]])

# Sampling process

In [305]:
# Import the sampling packages
import emcee
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
# Set up a GP
myGP = GP_predictor(myBatch.input_set, myBatch.Observations, GPkernel = DotProduct() + WhiteKernel())

# Define the log probability function
def log_prob(u, u_low, u_high, y, GP, si_eta = 1.):
    normal_idx = np.all(np.concatenate([u >= u_low, u <= u_high], axis = 1), axis = 1)
    res = - 0.5 / si_eta ** 2 * np.sum((y - GP.predict(u)) ** 2, axis = 1)
    res[~normal_idx] = -np.inf
    return res

# Use a sample y
y = myBatch.Observations[0]

# Sample from p(u|y)
uLow = np.array([0.002, 0.006])
uLength = np.array([0.01, 0.01])
uHigh = uLow + uLength

ndim, nwalkers = 2, 100
p0 = np.random.uniform(size = [nwalkers, ndim]) * uLength + uSt

sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, args=[uLow, uHigh, y, myGP], vectorize = True)
sampler.run_mcmc(p0, 1000)

samples = sampler.get_last_sample().coords
samples.shape



(100, 2)

In [307]:
np.mean(samples, axis=0)

array([0.0039599 , 0.00771252])

In [310]:

newGP = GP_predictor(np.append(np.array(myBatch.input_set), myInv.U[-2:], axis=0), 
                     np.append(np.array(myBatch.Observations), myInv.O[-2:], axis=0))

# Define the log probability function
def log_prob(u, u_low, u_high, y, GP, si_eta = 1.):
    normal_idx = np.all(np.concatenate([u >= u_low, u <= u_high], axis = 1), axis = 1)
    res = - 0.5 / si_eta ** 2 * np.sum((y - GP.predict(u)) ** 2, axis = 1)
    res[~normal_idx] = -np.inf
    return res

# Use a sample y
y = myBatch.Observations[0]

# Sample from p(u|y)
uLow = np.array([0.002, 0.006])
uLength = np.array([0.01, 0.01])
uHigh = uLow + uLength

ndim, nwalkers = 2, 100
p0 = np.random.uniform(size = [nwalkers, ndim]) * uLength + uSt

sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, args=[uLow, uHigh, y, newGP], vectorize = True)
sampler.run_mcmc(p0, 1000)

samples = sampler.get_last_sample().coords
samples.shape

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


(100, 2)

In [309]:
np.mean(samples, axis=0)

array([0.00334259, 0.00850793])

# Class BayersianInv

In [209]:
# Define Bayersian Inv that solves a Bayersian inversion problem
class BayersianInv:
    # Constructor
    def __init__(self, u_low, u_high, y, work_path, FourierTerms = 32, 
                 atol = 1.0e-6, si_eta = 0.1, n_samples = 20, MCMCsteps = 1000):
        self.u_low = u_low
        self.u_high = u_high
        self.input_dim = len(u_low)
        
        self.y = y
        self.output_dim = len(y)
        self.work_path = work_path
        self.FourierTerms = FourierTerms
        self.atol = atol
        self.si_eta = si_eta
        self.n_samples = n_samples
        self.MCMCsteps = MCMCsteps
        
        self.GPs = []
        self.U = np.empty([0, self.input_dim])
        self.O = np.empty([0, self.output_dim])
        self.iterations = 0
    
    # Get the accumulative probability function
    def log_prob(self, u, y):
        # Apply hard constraints
        normal_idx = np.all(np.concatenate([u >= self.u_low, u <= self.u_high], axis = 1), axis = 1)
       
        # First prior is uniform distribution
        res = np.ones(len(u))
        res[~normal_idx] = -np.inf
        
        # Add all posteriors in each iteration
        if (np.sum(normal_idx) > 0):
            for i in range(self.iterations):
                res[normal_idx] += -0.5 / self.si_eta ** 2 * (
                    np.sum(
                        (y - self.GPs[i].predict(u[normal_idx])) ** 2, 
                        axis = 1
                    )
                )
        
        # Return the log_probability at the current iteration
        return res
    
    # Initial_run: Explore the parameter space so that Gaussian process can always converge
    def initial_run(self, n_grid_pts = 3):
        
    
    # Run one iteration
    def runOneIteration(self, n_samples):
        # Sample from the current distribution for u and get the current observations
        sampler = emcee.EnsembleSampler(n_samples, 
                                        self.input_dim, 
                                        self.log_prob, args=[y], 
                                        vectorize = True)
        
        # Initialize uniformly as the starting point
        p0 = np.random.uniform(size = [n_samples, self.input_dim]) * (self.u_high - self.u_low) + self.u_low
        
        # Get the result
        sampler.run_mcmc(p0, self.MCMCsteps)
        self.samples = sampler.get_last_sample().coords
        
        # Check if the standard deviation of the samples is smaller than atol
        if np.std(self.samples) < self.atol:
            # Converging flag True
            return True
        
        else:
            # Run the forward map with the samples
            myBatch = RunABatch(self.samples, self.work_path, FourierTerms = self.FourierTerms)
            self.U = np.append(self.U, self.samples, axis = 0)
            self.O = np.append(self.O, myBatch.Observations, axis = 0)

            # Get a new Gaussian process
            myGP = GP_predictor(self.U, self.O)
            self.GPs.append(myGP)
            self.iterations += 1
            
            # Converging flag False
            return False
    
    # Run function
    def run(self, n_iter_max = 10):
        for i in range(n_iter_max):
            # In each iteration
            print("="*30, " Iteration ", str(self.iterations), " ", "="*30)
            
            # Run one iteration
            start_time = time.time()
            converged = self.runOneIteration(self.n_samples)
            end_time = time.time()
            print("Time cost: ", str(end_time - start_time), " s")
            print("Standard deviatiation of samples: ", np.std(self.samples))
            self.inversed_u = np.mean(self.samples, axis = 0)
            print("Inversed u: ", self.inversed_u)
                
            # Break for pre-achieved convergence
            if converged:
                self.inversed_u = np.mean(self.samples, axis = 0)
                print("Inversion exited with success!")
                print("Inversed u: ", self.inversed_u)
                break
        
        # Print non-convergent info
        self.inversed_u = np.mean(self.samples, axis = 0)
        print("Inversion failed to converge within ", str(self.iterations), " iterations!")
        print("Inversed u: ", self.inversed_u)

# Test an inversion

In [211]:
# Generate test batch
u_low = np.array([0.002, 0.004])
u_high = u_low + 0.01

# Generate input set
AA = np.linspace(0.002, 0.01, 5)
BB = np.linspace(0.004, 0.012, 5)[np.random.permutation(5)]
test_input_set = np.array([[AA[i], BB[i]] for i in range(len(AA))])
fTerms = 32

testBatch = RunABatch(test_input_set, work_path, FourierTerms = fTerms)
print("Test input set: \n", test_input_set)

/home/shengduo/InverseProblems/GPRWorkingField
Test input set: 
 [[0.002 0.004]
 [0.004 0.012]
 [0.006 0.006]
 [0.008 0.01 ]
 [0.01  0.008]]


In [212]:
# Run an inversion test:
testCase_idx = 1
y = testBatch.Observations[testCase_idx]
myInv = BayersianInv(u_low, u_high, y, work_path, FourierTerms = fTerms, n_samples = 10)
myInv.run(n_iter_max = 10)

Running case A0.011035327545878871_B0.012912861617791049
Finished in 12 s!

Running case A0.009420755931358889_B0.006784518957766042
Finished in 12 s!

Running case A0.008407727964533361_B0.012952696062411723
Finished in 12 s!

Running case A0.009425836751431164_B0.012716362106250967
Finished in 12 s!

Running case A0.003928347487011021_B0.009347837808840305
Finished in 12 s!

Running case A0.009975563433810952_B0.008319044096127843
Finished in 12 s!

Running case A0.010160954433504498_B0.009543855107796024
Finished in 13 s!

Running case A0.009352528076089928_B0.00927681436130323
Finished in 12 s!

Running case A0.0022873130080542795_B0.0068983222741910485
Finished in 12 s!

Running case A0.005138482542330063_B0.005940056145205135
Finished in 12 s!

/home/shengduo/InverseProblems/GPRWorkingField
Time cost:  122.45620965957642  s
Standard deviatiation of samples:  2.7879532717670357e-03
Inversed u:  [0.00791328 0.00946924]
Running case A3.928351570249735e-03_B0.009347682026470112
Finis

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Time cost:  123.2339596748352  s
Standard deviatiation of samples:  3.1115054174759987e-03
Inversed u:  [0.00463833 0.00818051]
Running case A5.149293967314822e-03_B0.006519521487947522
Finished in 12 s!

Running case A0.008254236407428025_B0.013144623288148296
Finished in 12 s!

Running case A0.005149893760535665_B0.006518623169201648
Finished in 12 s!

Running case A0.00825498523588103_B0.013143447493303347
Finished in 12 s!

Running case A0.003897549314159432_B0.00937864113393305
Finished in 13 s!

Running case A0.003897684666169116_B0.009378770036950491
Finished in 12 s!

Running case A0.005147219145951889_B0.006519230357227754
Finished in 12 s!

Running case A0.008255721749018362_B0.013145393510461822
Finished in 12 s!

Running case A0.005149912948968463_B0.006518721831804961
Finished in 12 s!

Running case A0.008254666490835226_B0.013145265477426126
Finished in 12 s!

/home/shengduo/InverseProblems/GPRWorkingField


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Time cost:  124.25774550437927  s
Standard deviatiation of samples:  3.0397911854332816e-03
Inversed u:  [0.00614112 0.00974122]
Running case A2.20825790502311e-03_B0.006968357519933166
Finished in 13 s!

Running case A0.003867956041365769_B0.009410304423095987
Finished in 12 s!

Running case A0.0038681488687398865_B0.009410339859910103
Finished in 12 s!

Running case A0.002208520276219475_B0.0069680083765410235
Finished in 12 s!

Running case A0.0022089308576428556_B0.006967779360452038
Finished in 13 s!

Running case A0.002208918015049044_B0.006967760930885536
Finished in 12 s!

Running case A0.0038675848956135506_B0.009410037952016344
Finished in 12 s!

Running case A0.0038677398169698037_B0.009409957281657567
Finished in 12 s!

Running case A0.008252007754143077_B0.01292012044432276
Finished in 12 s!

Running case A0.00825148123422028_B0.012919695143997108
Finished in 12 s!

/home/shengduo/InverseProblems/GPRWorkingField


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Time cost:  125.86408138275146  s
Standard deviatiation of samples:  3.350084287230303e-03
Inversed u:  [0.00408095 0.00913524]
Running case A3.8451386092789586e-03_B0.009434933156182025
Finished in 12 s!

Running case A0.0038449668456288022_B0.009435104302792676
Finished in 11 s!

Running case A0.0038454114910118275_B0.009435055984477721
Finished in 13 s!

Running case A0.0021756369494598127_B0.006997007331593645
Finished in 12 s!

Running case A0.003844951144392103_B0.009434947820016516
Finished in 12 s!

Running case A0.008229253535534482_B0.012609429268248772
Finished in 12 s!

Running case A0.0021761375969656304_B0.006997344446207321
Finished in 12 s!

Running case A0.0038452955110131514_B0.009434806236686008
Finished in 12 s!

Running case A0.00384505736852785_B0.009435000486360883
Finished in 12 s!

Running case A0.002176082956662419_B0.006997397712875951
Finished in 13 s!

/home/shengduo/InverseProblems/GPRWorkingField


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Time cost:  125.6452407836914  s
Standard deviatiation of samples:  3.0897519402801037e-03
Inversed u:  [0.00378279 0.0090211 ]
Running case A3.8264126558124213e-03_B0.009456130290753805
Finished in 12 s!

Running case A0.002149004744665428_B0.007021957471491819
Finished in 12 s!

Running case A0.003826683783379306_B0.009455672362994598
Finished in 13 s!

Running case A0.003826627022678164_B0.009456168393205311
Finished in 12 s!

Running case A0.0021485165777924632_B0.0070221634773524775
Finished in 12 s!

Running case A0.0021484910818325227_B0.007022038729059286
Finished in 12 s!

Running case A0.003826394094667758_B0.00945575198437559
Finished in 13 s!

Running case A0.003826400099187329_B0.009455783817755114
Finished in 12 s!

Running case A0.0021484126431159027_B0.007022646771286862
Finished in 12 s!

Running case A0.003826666119627178_B0.00945538312697562
Finished in 12 s!

/home/shengduo/InverseProblems/GPRWorkingField


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Time cost:  128.75390672683716  s
Standard deviatiation of samples:  2.8535599080852367e-03
Inversed u:  [0.00315536 0.00848237]
Running case A3.810813836357576e-03_B0.009473512520473102
Finished in 12 s!

Running case A0.0038107534255511725_B0.00947364664703179
Finished in 12 s!

Running case A0.003810681187293828_B0.009473780302620952
Finished in 12 s!

Running case A0.0038108075776575245_B0.00947360697787944
Finished in 12 s!

Running case A0.01057652084055444_B0.0040062202410432405
Finished in 13 s!

Running case A0.0038106782609361044_B0.009473577670904773
Finished in 12 s!

Running case A0.0038108264346081973_B0.009473634580179574
Finished in 12 s!

Running case A0.003810645027309389_B0.009473878278940953
Finished in 12 s!

Running case A0.007781587342183173_B0.013477854075851657
Finished in 13 s!

Running case A0.0038111003005244213_B0.009473757681662837
Finished in 12 s!

/home/shengduo/InverseProblems/GPRWorkingField


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Time cost:  128.47581911087036  s
Standard deviatiation of samples:  3.1178879834537878e-03
Inversed u:  [0.00488444 0.00932735]
Running case A3.797261983536072e-03_B0.00948959751850945
Finished in 12 s!

Running case A0.007552557860818556_B0.013193320304734234
Finished in 12 s!

Running case A0.003797092436706904_B0.009490085188560066
Finished in 12 s!

Running case A0.0037972089021691582_B0.009489536038675197
Finished in 12 s!

Running case A0.003797242021682069_B0.009489675469490406
Finished in 13 s!

Running case A0.003797396725979454_B0.009489712734667559
Finished in 12 s!

Running case A0.007552389910904716_B0.013193021576356363
Finished in 12 s!

Running case A0.003797507326906877_B0.009489689468950055
Finished in 13 s!

Running case A0.0037977655562067237_B0.009489054848708411
Finished in 12 s!

Running case A0.007552608944051454_B0.013193620207609162
Finished in 12 s!

/home/shengduo/InverseProblems/GPRWorkingField


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Time cost:  130.49903535842896  s
Standard deviatiation of samples:  3.3132390801139306e-03
Inversed u:  [0.0049239  0.01060073]
Running case A3.785896113523551e-03_B0.009503605100683411
Finished in 12 s!

Running case A0.0037856602024524504_B0.009503858598409246
Finished in 12 s!

Running case A0.0037858641719272046_B0.009503492064421467
Finished in 13 s!

Running case A0.0037854018311781817_B0.009503733273895752
Finished in 12 s!

Running case A0.0037859460035575662_B0.009503537579594296
Finished in 12 s!

Running case A0.0037859615266458073_B0.009503610646059722
Finished in 13 s!

Running case A0.002089078729708349_B0.007077765637473161
Finished in 12 s!

Running case A0.0020886654252857137_B0.007077585442361858
Finished in 12 s!

Running case A0.003785693176546796_B0.009503689057957062
Finished in 13 s!

Running case A0.002089139889158866_B0.007077875997354474
Finished in 13 s!

/home/shengduo/InverseProblems/GPRWorkingField


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Time cost:  133.89162611961365  s
Standard deviatiation of samples:  2.9121105048717593e-03
Inversed u:  [0.00327673 0.00877588]
Running case A3.775044428703866e-03_B0.009516250109257772
Finished in 12 s!

Running case A0.0037752702142198934_B0.009516471871077811
Finished in 12 s!

Running case A0.002068195667793198_B0.007084310552715156
Finished in 12 s!

Running case A0.0037752909914275726_B0.009516387423047338
Finished in 13 s!

Running case A0.0037753548184577233_B0.009516099252310445
Finished in 13 s!

Running case A0.003775712858866493_B0.009516142699243278
Finished in 12 s!

Running case A0.00377523543590516_B0.009516326468795024
Finished in 12 s!

Running case A0.0037751909973074978_B0.009516210925826078
Finished in 13 s!

Running case A0.0020681024617206552_B0.0070843601704143515
Finished in 12 s!

Running case A0.003775411988267146_B0.009516215207937119
Finished in 12 s!

/home/shengduo/InverseProblems/GPRWorkingField


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Time cost:  135.57113075256348  s
Standard deviatiation of samples:  2.9214874458675863e-03
Inversed u:  [0.00343388 0.00902988]
Inversion failed to converge within  10  iterations!
Inversed u:  [0.00343388 0.00902988]


In [264]:
myInv.GPs[1].GP.kernel_

1.05**2 * RBF(length_scale=0.169)

In [269]:
myInv.GPs[9].GP.kernel_

1.42**2 * RBF(length_scale=0.136)

In [221]:
myInv.U

array([[0.00228731, 0.00689832],
       [0.00392835, 0.00934784],
       [0.00840773, 0.0129527 ],
       [0.01103533, 0.01291286],
       [0.00997556, 0.00831904],
       [0.01016095, 0.00954386],
       [0.00513848, 0.00594006],
       [0.00942584, 0.01271636],
       [0.00942076, 0.00678452],
       [0.00935253, 0.00927681],
       [0.00228701, 0.00689931],
       [0.00513804, 0.00593274],
       [0.00228676, 0.0068981 ],
       [0.00831708, 0.01295774],
       [0.00228713, 0.00689779],
       [0.00392835, 0.00934768],
       [0.00392886, 0.00934767],
       [0.00228634, 0.00689834],
       [0.01199574, 0.00727853],
       [0.00392796, 0.00934718],
       [0.00825499, 0.01314345],
       [0.00514722, 0.00651923],
       [0.00825572, 0.01314539],
       [0.00825424, 0.01314462],
       [0.00389768, 0.00937877],
       [0.00389755, 0.00937864],
       [0.00514929, 0.00651952],
       [0.00825467, 0.01314527],
       [0.00514991, 0.00651872],
       [0.00514989, 0.00651862],
       [0.

In [216]:
myInv.O.shape

(100, 96)

In [366]:
# myKernel = 1 * RBF(length_scale=1.0, length_scale_bounds=(1e-4, 1e2))
# myKernel = DotProduct() + WhiteKernel()

from sklearn.gaussian_process.kernels import Matern

myKernel = 1.0 * Matern(length_scale=1.0e-6, length_scale_bounds=(1e-15, 1e3), nu=1.5)
# myKernel = 1 * RBF(length_scale=1.0e-10, length_scale_bounds=(1e-15, 1e3))


shit = GP_predictor(myInv.U, myInv.O, myKernel)

In [367]:
shit.GP.kernel_

1**2 * Matern(length_scale=1e-06, nu=1.5)

In [368]:
# Define the log probability function
def log_prob(u, u_low, u_high, y, GP, si_eta = 1.):
    normal_idx = np.all(np.concatenate([u >= u_low, u <= u_high], axis = 1), axis = 1)
    res = - 0.5 / si_eta ** 2 * np.sum((y - GP.predict(u)) ** 2, axis = 1)
    res[~normal_idx] = -np.inf
    return res

# Use a sample y
y = myBatch.Observations[0]

# Sample from p(u|y)
uLow = np.array([0.002, 0.006])
uLength = np.array([0.01, 0.01])
uHigh = uLow + uLength

ndim, nwalkers = 2, 100
p0 = np.random.uniform(size = [nwalkers, ndim]) * uLength + uSt

sampler = emcee.EnsembleSampler(nwalkers, ndim, log_prob, args=[uLow, uHigh, y, shit], vectorize = True)
sampler.run_mcmc(p0, 1000)

samples = sampler.get_last_sample().coords
samples.shape

(100, 2)

In [369]:
np.mean(samples, axis = 0)

array([0.00674049, 0.0110458 ])

In [345]:
myInv.U[-10:], myInv.O[-10:]

(array([[0.00377571, 0.00951614],
        [0.0020682 , 0.00708431],
        [0.00377504, 0.00951625],
        [0.00377535, 0.0095161 ],
        [0.00377524, 0.00951633],
        [0.00377529, 0.00951639],
        [0.0020681 , 0.00708436],
        [0.00377541, 0.00951622],
        [0.00377519, 0.00951621],
        [0.00377527, 0.00951647]]),
 array([[-3.60085529e+02,  1.94958264e+01,  1.96437842e+01,
          1.88670169e+01,  1.87778725e+01,  1.80904601e+01,
          1.72596645e+01,  1.65674677e+01,  1.58606433e+01,
          1.41133612e+01,  1.40250724e+01,  1.18417479e+01,
          1.09369004e+01,  9.79981390e+00,  7.51695360e+00,
          6.69149831e+00,  4.79634815e+00,  2.31335324e+00,
          2.12109699e+00, -2.20792679e+00, -1.60127432e+00,
         -6.21755860e+00, -6.34622174e+00, -9.87690222e+00,
         -1.09404858e+01, -1.25180701e+01, -1.26927984e+01,
         -1.07445739e+01, -7.65990549e+00, -2.75989855e+00,
          7.61442397e-01,  3.87137296e+00, -3.59436818e+02

In [236]:
import scipy
from sklearn.utils.optimize import _check_optimize_result
class MyGPR(GaussianProcessRegressor):
    def __init__(self, *args, max_iter=2e06, gtol=1e-06, **kwargs):
        super().__init__(*args, **kwargs)
        self._max_iter = max_iter
        self._gtol = gtol

    def _constrained_optimization(self, obj_func, initial_theta, bounds):
        if self.optimizer == "fmin_l_bfgs_b":
            opt_res = scipy.optimize.minimize(obj_func, initial_theta, method="L-BFGS-B", jac=True, bounds=bounds, options={'maxiter':self._max_iter, 'gtol': self._gtol})
            _check_optimize_result("lbfgs", opt_res)
            theta_opt, func_min = opt_res.x, opt_res.fun
        elif callable(self.optimizer):
            theta_opt, func_min = self.optimizer(obj_func, initial_theta, bounds=bounds)
        else:
            raise ValueError("Unknown optimizer %s." % self.optimizer)
        return theta_opt, func_min

# Gaussian predictor

In [256]:
# Import Gaussian-regression related functions
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF

# Pre-process the data
from sklearn import preprocessing

# function train_GP
class GP_predictor:
    # Constructor
    def __init__(self, 
                 input_set, 
                 observation_set, 
                 GPkernel = 1 * RBF(length_scale=1.0, length_scale_bounds=(1e-1, 1e2)), 
                 n_optimizers = 9):
        # Scale input data
        self.input_set = [list(x) for x in input_set]
        self.observation_set = [list(x) for x in observation_set]
        self.input_dimension = len(self.input_set[0])
        self.observation_dimension = len(self.observation_set[0])
        self.trainset_length = len(self.input_set)
        
        self.input_scaler = preprocessing.StandardScaler()
        self.input_scaler.fit(np.array(self.input_set))
        
        # Scale output data
        self.observation_scaler = preprocessing.StandardScaler()
        self.observation_scaler.fit(np.array(observation_set))
        
        # Fit Gaussian process
        self.GP = GaussianProcessRegressor(kernel = GPkernel, n_restarts_optimizer = n_optimizers)
        # self.GP = MyGPR(kernel = GPkernel, n_restarts_optimizer = n_optimizers, max_iter = max_iterations)
        self.GP.fit(self.input_scaler.transform(np.array(self.input_set)), 
                    self.observation_scaler.transform(np.array(self.observation_set)))
        
    # Predict on a new input set
    def predict(self, new_input_set):
        # Predict new observation
        new_observation = self.observation_scaler.inverse_transform(
            self.GP.predict(
                self.input_scaler.transform(np.array(list(new_input_set)).reshape([-1, self.input_dimension]))
            )
        )
        
        return new_observation
        
    

In [69]:
# Generate a set, run cases, get observations

In [None]:
myGP = GP_predictor(input_set, )