In [2]:
import numpy as np
import random
from numpy.random import default_rng
import warnings
import math
from scipy.stats import norm, multivariate_normal
from scipy import integrate
import scipy.optimize as optimize
import os
import time
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, WhiteKernel, ConstantKernel, DotProduct
from sklearn.preprocessing import StandardScaler, PowerTransformer
from scipy.stats import qmc
import pandas as pd
from enum import Enum
import scipy
import matplotlib.pyplot as plt
import pickle
import gpflow

import itertools
from itertools import combinations_with_replacement
from itertools import combinations
from itertools import permutations
import utils
from utils import r14, r32, r50, r125, r134a, r143a, r170

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
2024-02-08 12:36:55.249712: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-08 12:36:55.307434: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-08 12:36:55.307497: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-08 12:3

In [3]:
#Create fxn for analyzing a single gp w/ gpflow
def eval_gp_new_theta(theta_guess, t_matrix, gp_object, Xexp):
    #Get theta into correct form using t_matrix
    theta_guess = theta_guess.reshape(1,-1)
    gp_theta = theta_guess@t_matrix.T
    #Append x data for consideration
    gp_theta = np.repeat(gp_theta, len(Xexp) , axis = 0)
    gp_input = np.concatenate((gp_theta, Xexp), axis=1)
    #Get mean and std from gp
    gp_mean, gp_covar = gp_object.predict_f(gp_input, full_cov=True)
    gp_std = np.sqrt(np.diag(gp_covar))
    
    return gp_mean, gp_std

In [4]:
with open("R14-vlegp/vle-gps.pkl", 'rb') as pickle_file:
    data = pickle.load(pickle_file)

print(data)
# gpflow.utilities.print_summary(data)
# import tensorflow as tf
# for i, model in enumerate(list(data.values())):
#     print(list(data.keys())[i])
#     gpflow.utilities.print_summary(model)

{'sim_liq_density': <gpflow.models.gpr.GPR object at 0x7f5d46308310>, 'sim_vap_density': <gpflow.models.gpr.GPR object at 0x7f5d459842d0>, 'sim_Pvap': <gpflow.models.gpr.GPR object at 0x7f5d45990a90>, 'sim_Hvap': <gpflow.models.gpr.GPR object at 0x7f5d45999390>}


In [19]:
#Specify initial guesse
#Set repeats
repeats = 50

#TODO:
#Make file in utils for atom types
    #class transform_matricies
        #__init__(self, param_bounds, param_names, dict_of_matricies)
        #Give it a dictionary of tansformation matricies for each molecule class in utils.py
        #Method set_transfor_matrix(R14Constantsclass)

#Get bounds for atom typing scheme (from NW for now)
at_param_bounds_l = [2, 1.5, 2, 10, 2, 15]
at_param_bounds_u = [4,   3, 4, 75,10, 50]
at_param_bounds = np.array([at_param_bounds_l, at_param_bounds_u]).T

#Load class properies for each molecule
r14 = utils.r14.R14Constants()
r32 = utils.r32.R32Constants()
r50 = utils.r50.R50Constants()
r125 = utils.r125.R125Constants()
r134a = utils.r134a.R134aConstants()
r143a = utils.r143a.R143aConstants()
r170 = utils.r170.R170Constants()


{130: 142.0, 150: 131.72, 170: 118.77, 190: 101.5, 210: 75.63}


In [26]:
#Get dict of refrigerants to consider
molec_data_dict = {"R14":r14,
                "R32":r32,
                "R50":r50,
                "R125":r125,
                "R134a":r134a,
                "R143a":r143a,
                "R170":r170}

#Make a dict of these gp dictionaries for each molecule
all_gp_dict = {}
#loop over molecules
for key in list(molec_data_dict.keys()):
    #Get dict of vle gps
    #OPTIONAL append the MD density gp to the VLE density gp dictionary w/ key "MD Density"
     file = os.path.join(key +"-vlegp/vle-gps.pkl")
     with open(file, 'rb') as pickle_file:
        all_gp_dict[key] = pickle.load(pickle_file)

#define the scipy function for minimizing
def scipy_min_fxn(theta_guess, molec_data_dict, all_gp_dict):
    #Initialize weight and squared error arrays
    sqerr_array  = []
    weight_array = []

    #Loop over molecules
    for molec in list(molec_data_dict.keys()):
        print(molec)
        #Get theta associated with each gp
        # param_matrix = AT(7, molec)
        #Get GPs associated with each molecule
        molec_gps_dict = all_gp_dict[molec]
        #Loop over gps (1 per property)
        for key in list(molec_gps_dict.keys()):
            print(key)
            #Get GP associated with property
            gp_model = molec_gps_dict[key]
            gpflow.utilities.print_summary(gp_model)
            #Get X and Y data associated with the GP
            if "vap" in key:
                exp_data = molec_data_dict[molec].expt_vap_density
            elif "liq" in key:
                exp_data = molec_data_dict[molec].expt_liq_density
            elif "Pvap" in key: 
                exp_data = molec_data_dict[molec].expt_Pvap
            elif "Hvap" in key:
                exp_data = molec_data_dict[molec].expt_Hvap
            else:
                raise(ValueError, "all_gp_dict must contain a dict with keys sim_vap_density, sim_liq_density, sim_Hvap, or, sim_Pvap")
            #Get x and y data
            x_exp = np.array(list(exp_data.keys())).reshape(-1,1)
            y_exp = np.array(list(exp_data.values()))
            print(x_exp, y_exp)
            # #Evaluate GP
            # gp_mean, gp_std = eval_gp_new_theta(theta_guess, param_matrix, gp_model, x_exp)
            # #Calculate weight from uncertainty
            # weight_mpi = (1/(gp_std**2)).tolist()
            # weight_array += weight_mpi
            # #Calculate sse
            # sq_err = ((y_exp.flatten() - gp_mean)**2).tolist()
            # sqerr_array += sq_err
    
    #List to array
    sqerr_array = np.array(sqerr_array)
    weight_array = np.array(weight_array)
    #Normalize weights to add up to 1
    scaled_weights = weight_array / np.sum(weight_array)
    #Define objective function
    obj = np.sum(scaled_weights*sqerr_array)
    return obj
#Check output
print(scipy_min_fxn(1, molec_data_dict, all_gp_dict))

R14
sim_liq_density
╒═════════════════════════╤═══════════╤══════════════════╤═════════╤═════════════╤═════════╤═════════╤═══════════════════════════════╕
│ name                    │ class     │ transform        │ prior   │ trainable   │ shape   │ dtype   │ value                         │
╞═════════════════════════╪═══════════╪══════════════════╪═════════╪═════════════╪═════════╪═════════╪═══════════════════════════════╡
│ GPR.mean_function.A     │ Parameter │ Identity         │         │ True        │ (5, 1)  │ float64 │ [[0.71898...                  │
├─────────────────────────┼───────────┼──────────────────┼─────────┼─────────────┼─────────┼─────────┼───────────────────────────────┤
│ GPR.mean_function.b     │ Parameter │ Identity         │         │ True        │ (1,)    │ float64 │ [-3.42204]                    │
├─────────────────────────┼───────────┼──────────────────┼─────────┼─────────────┼─────────┼─────────┼───────────────────────────────┤
│ GPR.kernel.variance     │ Paramet

In [None]:
## specify initial guesses bounds for new_theta
repeats = 50
bounds_theta_new_l = [-5, -5, -5, -5, -5]
bounds_theta_new_u = [ 2,  2,  2,  2,  2]
bounds_theta_new = np.array([bounds_theta_new_l, bounds_theta_new_u]).T
theta_guesses = np.random.uniform(low=bounds_theta_new_l, high=bounds_theta_new_u, size=(repeats, len(bounds_theta_new_l)) )

#Define matricies, gps, and data to look at
t_matricies = [t_matrix, t_matrix2]
gps = [fit_gp_model, fit_gp_model2]
Xexp_list = [Xexp, Xexp2]
Yexp_list = [Yexp, Yexp2]

#Initialize results dataframe
column_names = ['Theta Guess', 'Min Obj', 'Param at Min Obj', 'Min Obj Cum.', 'Param at Min Obj Cum.',
                "func evals", "jac evals", "Termination", "Total Run Time"]
ls_results = pd.DataFrame(columns=column_names)

#Optimize w/ retstarts
for i in range(repeats):
    #Start timer
    time_start = time.time()
    #Get guess and find scipy.optimize solution
    Solution = optimize.minimize(scipy_min_fxn, theta_guesses[i] , bounds=bounds_theta_new, method='L-BFGS-B', 
                                 args=(t_matricies, gps, Xexp_list, Yexp_list), options = {"disp":False})
    #End timer and calculate total run time
    time_end = time.time()
    time_per_run = time_end-time_start
    #Back out results
    param_min_obj = Solution.x
    min_obj = Solution.fun
    
    #Create df for each least squares run
    iter_df = pd.DataFrame(columns=column_names)
    #On 1st iteration, min obj cum and theta min obj cum are the same as sse and sse min obj
    if i == 0:
        ls_iter_res = [theta_guesses[i], min_obj, param_min_obj, min_obj, param_min_obj, Solution.nfev, 
                            Solution.njev, Solution.status, time_per_run]
    #Otherwise compare to the iteration before before setting
    else:
        obj_cum = min_obj if min_obj < ls_results["Min Obj Cum."].iloc[i-1] else ls_results["Min Obj Cum."].iloc[i-1]
        theta_obj_cum = param_min_obj if min_obj < ls_results["Min Obj Cum."].iloc[i-1] else ls_results['Param at Min Obj Cum.'].iloc[i-1]
        ls_iter_res = [theta_guesses[i], min_obj, Solution.x, obj_cum, theta_obj_cum,  Solution.nfev, 
                            Solution.njev, Solution.status, time_per_run]

    # Add the new row to the DataFrame
    iter_df.loc[0] = ls_iter_res
    ls_results = pd.concat([ls_results.astype(iter_df.dtypes), iter_df], ignore_index=True)

print(ls_results)