In [1]:
import numpy as np
import random
from numpy.random import default_rng
import warnings
import math
from scipy.stats import norm, multivariate_normal
from scipy import integrate
import scipy.optimize as optimize
import os
import time
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, WhiteKernel, ConstantKernel, DotProduct
from sklearn.preprocessing import StandardScaler, PowerTransformer
from scipy.stats import qmc
import pandas as pd
from enum import Enum
import scipy
import matplotlib.pyplot as plt
import pickle
import gpflow

import itertools
from itertools import combinations_with_replacement
from itertools import combinations
from itertools import permutations

ModuleNotFoundError: No module named 'gpflow'

In [None]:
#To scale objective function to multiple GPs...
#Create fxn for analyzing a single gp
def eval_gp_new_theta(theta_guess, t_matrix, gp_object, Xexp):
    #Get theta into correct form using t_matrix
    theta_guess = theta_guess.reshape(1,-1)
    gp_theta = theta_guess@t_matrix.T
    #Append x data for consideration
    gp_theta = np.repeat(gp_theta, len(Xexp) , axis = 0)
    gp_input = np.concatenate((gp_theta, Xexp), axis=1)
    #Get mean and std from gp
    gp_mean, gp_covar = gp_object.predict(gp_input, return_cov=True)
    gp_std = np.sqrt(np.diag(gp_covar))
    
    return gp_mean, gp_std

#define the scipy function for minimizing
def scipy_min_fxn(theta_guess, t_matricies, gps, Xexp_list, Yexp_list):
    #Assert you have data and a transformation matrix for each gp
    assert len(t_matricies) == len(Xexp_list) == len(Yexp_list), "lists must be the same length"
    #Initialize weight and squared error arrays
    sqerr_array  = []
    weight_array = []
    #loop over number of gps (since there is one gp for every molecule and property)
    for i in range(len(gps)):
        #Evaluate GP
        gp_mean, gp_std = eval_gp_new_theta(theta_guess, t_matricies[i], gps[i], Xexp_list[i])
        #Calculate weight from uncertainty
        weight_mpi = (1/(gp_std**2)).tolist()
        weight_array += weight_mpi
        #Calculate sse
        sq_err = ((Yexp_list[i].flatten() - gp_mean)**2).tolist()
        sqerr_array += sq_err
    #List to array
    sqerr_array = np.array(sqerr_array)
    weight_array = np.array(weight_array)
    #Normalize weights to add up to 1
    scaled_weights = weight_array / np.sum(weight_array)
    #Define objective function
    obj = np.sum(scaled_weights*sqerr_array)
    return obj

In [None]:
with open("R14-vlegp/vle-gps.pkl", 'rb') as pickle_file:
    data = pickle.load(pickle_file)

ModuleNotFoundError: No module named 'gpflow'

In [None]:
## specify initial guesses bounds for new_theta
repeats = 50
bounds_theta_new_l = [-5, -5, -5, -5, -5]
bounds_theta_new_u = [ 2,  2,  2,  2,  2]
bounds_theta_new = np.array([bounds_theta_new_l, bounds_theta_new_u]).T
theta_guesses = np.random.uniform(low=bounds_theta_new_l, high=bounds_theta_new_u, size=(repeats, len(bounds_theta_new_l)) )

#Define matricies, gps, and data to look at
t_matricies = [t_matrix, t_matrix2]
gps = [fit_gp_model, fit_gp_model2]
Xexp_list = [Xexp, Xexp2]
Yexp_list = [Yexp, Yexp2]

#Intialize data storage matricies
theta_vals = np.zeros((repeats, len(bounds_theta_new_l)))
l2_norms = np.zeros(repeats)
costs = np.zeros(repeats)
fxn_evals = np.zeros(repeats)

#Optimize w/ retstarts
for i in range(repeats):
    #Get guess and find scipy.optimize solution
    t_guess = theta_guesses[i]
    Solution = optimize.minimize(scipy_min_fxn, t_guess , bounds=bounds_theta_new, method='L-BFGS-B', args=(t_matricies, gps, Xexp_list, Yexp_list), 
                                 options = {"disp":True})
    #Back out results
    theta_min_obj = Solution.x
    costs[i] = Solution.fun
    theta_vals[i] = theta_min_obj
    #Note counting Jacobian and function evalauations as function evaluations
    fxn_evals[i] = Solution.nfev + Solution.njev
    
#Print Results
nlr_theta = theta_vals[np.argmin(costs)]
nlr_l2_norm = l2_norms[np.argmin(costs)]
nlr_evals = fxn_evals[np.argmin(costs)]
print("GP New Theta", nlr_theta)
print("GP Theta 1 = ", nlr_theta@t_matrix.T)
print("GP Theta 2 = ", nlr_theta@t_matrix2.T)
print("Evaluations = ", nlr_evals)
print("Func Val", costs[np.argmin(costs)])