In [26]:
import numpy as np
import random
from numpy.random import default_rng
import warnings
import math
from scipy.stats import norm, multivariate_normal
from scipy import integrate
import scipy.optimize as optimize
import os
import time
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, WhiteKernel, ConstantKernel, DotProduct
from sklearn.preprocessing import StandardScaler, PowerTransformer
from scipy.stats import qmc
import pandas as pd
from enum import Enum
import scipy
import matplotlib.pyplot as plt
import pickle
import gpflow

import itertools
from itertools import combinations_with_replacement
from itertools import combinations
from itertools import permutations
import utils
from utils import r14, r32, r50, r125, r134a, r143a, r170

In [23]:
#Create fxn for analyzing a single gp w/ gpflow
def eval_gp_new_theta(theta_guess, t_matrix, gp_object, Xexp):
    #Get theta into correct form using t_matrix
    theta_guess = theta_guess.reshape(1,-1)
    gp_theta = theta_guess@t_matrix.T
    #Append x data for consideration
    gp_theta = np.repeat(gp_theta, len(Xexp) , axis = 0)
    gp_input = np.concatenate((gp_theta, Xexp), axis=1)
    #Get mean and std from gp
    gp_mean, gp_covar = gp_object.predict_f(gp_input, full_cov=True)
    gp_std = np.sqrt(np.diag(gp_covar))
    
    return gp_mean, gp_std

In [34]:
with open("R14-vlegp/vle-gps.pkl", 'rb') as pickle_file:
    data = pickle.load(pickle_file)

print(data)
# gpflow.utilities.print_summary(data)
# import tensorflow as tf
# for i, model in enumerate(list(data.values())):
#     print(list(data.keys())[i])
#     gpflow.utilities.print_summary(model)

{'sim_liq_density': <gpflow.models.gpr.GPR object at 0x7f7a5ccdc650>, 'sim_vap_density': <gpflow.models.gpr.GPR object at 0x7f79d0648310>, 'sim_Pvap': <gpflow.models.gpr.GPR object at 0x7f79d0631dd0>, 'sim_Hvap': <gpflow.models.gpr.GPR object at 0x7f79d0646650>}


In [31]:
#Specify initial guesse
#Set repeats
repeats = 50

#TODO:
#Make file in utils for atom types
    #class transform_matricies
        #__init__(self, param_bounds, param_names, dict_of_matricies)
        #Give it a dictionary of tansformation matricies for each molecule class in utils.py
        #Method set_transfor_matrix(R14Constantsclass)

#Get bounds for atom typing scheme (from NW for now)
at_param_bounds_l = [2, 1.5, 2, 10, 2, 15]
at_param_bounds_u = [4,   3, 4, 75,10, 50]
at_param_bounds = np.array([at_param_bounds_l, at_param_bounds_u]).T

#Load class properies for each molecule
r14 = utils.r14.R14Constants
r32 = utils.r32.R32Constants
r50 = utils.r50.R50Constants
r125 = utils.r125.R125Constants
r134a = utils.r134a.R134aConstants
r143a = utils.r143a.R143aConstants
r170 = utils.r170.R170Constants

#Get dict of refrigerants to consider
molec_data_dict = {"r14":r14,
                "r32":r32,
                "r50":r50,
                "r125":r125,
                "r134a":r134a,
                "r143a":r143a,
                "r170":r170}
#For each molecule, append the MD density gp to the VLE density gp dictionary w/ key "MD Density"
#Make a dict of these gp dictionaries for each molecule

#define the scipy function for minimizing
def scipy_min_fxn(theta_guess, molec_data_dict, molec_gp_dict):
    #Initialize weight and squared error arrays
    sqerr_array  = []
    weight_array = []

    #Loop over molecules
    for molec in molec_data_dict.keys():
        #Get theta associated with each gp
        param_matrix = AT(7, molec)
        #Get number of GPs for each molecule
        all_gps_dict = molec_gp_dict[molec].values()
        #Loop over number of GPs
        for gp_model in all_gps_dict.values():
            #Get X and Y data associated with each GP
        
            #Evaluate GP
            gp_mean, gp_std = eval_gp_new_theta(theta_guess, param_matrix, gp_model, xexp)
            #Calculate weight from uncertainty
            weight_mpi = (1/(gp_std**2)).tolist()
            weight_array += weight_mpi
            #Calculate sse
            sq_err = ((yexp.flatten() - gp_mean)**2).tolist()
            sqerr_array += sq_err
    #List to array
    sqerr_array = np.array(sqerr_array)
    weight_array = np.array(weight_array)
    #Normalize weights to add up to 1
    scaled_weights = weight_array / np.sum(weight_array)
    #Define objective function
    obj = np.sum(scaled_weights*sqerr_array)
    return obj

In [None]:
## specify initial guesses bounds for new_theta
repeats = 50
bounds_theta_new_l = [-5, -5, -5, -5, -5]
bounds_theta_new_u = [ 2,  2,  2,  2,  2]
bounds_theta_new = np.array([bounds_theta_new_l, bounds_theta_new_u]).T
theta_guesses = np.random.uniform(low=bounds_theta_new_l, high=bounds_theta_new_u, size=(repeats, len(bounds_theta_new_l)) )

#Define matricies, gps, and data to look at
t_matricies = [t_matrix, t_matrix2]
gps = [fit_gp_model, fit_gp_model2]
Xexp_list = [Xexp, Xexp2]
Yexp_list = [Yexp, Yexp2]

#Initialize results dataframe
column_names = ['Theta Guess', 'Min Obj', 'Param at Min Obj', 'Min Obj Cum.', 'Param at Min Obj Cum.',
                "func evals", "jac evals", "Termination", "Total Run Time"]
ls_results = pd.DataFrame(columns=column_names)

#Optimize w/ retstarts
for i in range(repeats):
    #Start timer
    time_start = time.time()
    #Get guess and find scipy.optimize solution
    Solution = optimize.minimize(scipy_min_fxn, theta_guesses[i] , bounds=bounds_theta_new, method='L-BFGS-B', 
                                 args=(t_matricies, gps, Xexp_list, Yexp_list), options = {"disp":False})
    #End timer and calculate total run time
    time_end = time.time()
    time_per_run = time_end-time_start
    #Back out results
    param_min_obj = Solution.x
    min_obj = Solution.fun
    
    #Create df for each least squares run
    iter_df = pd.DataFrame(columns=column_names)
    #On 1st iteration, min obj cum and theta min obj cum are the same as sse and sse min obj
    if i == 0:
        ls_iter_res = [theta_guesses[i], min_obj, param_min_obj, min_obj, param_min_obj, Solution.nfev, 
                            Solution.njev, Solution.status, time_per_run]
    #Otherwise compare to the iteration before before setting
    else:
        obj_cum = min_obj if min_obj < ls_results["Min Obj Cum."].iloc[i-1] else ls_results["Min Obj Cum."].iloc[i-1]
        theta_obj_cum = param_min_obj if min_obj < ls_results["Min Obj Cum."].iloc[i-1] else ls_results['Param at Min Obj Cum.'].iloc[i-1]
        ls_iter_res = [theta_guesses[i], min_obj, Solution.x, obj_cum, theta_obj_cum,  Solution.nfev, 
                            Solution.njev, Solution.status, time_per_run]

    # Add the new row to the DataFrame
    iter_df.loc[0] = ls_iter_res
    ls_results = pd.concat([ls_results.astype(iter_df.dtypes), iter_df], ignore_index=True)

print(ls_results)