# PhaseObjC - Pure (Stoichiometric) Phase Calibration Example
## Calibration of Aluminosilicates from Berman(1988) database of pure solid phases (standard state properties)
### Concepts Demonstrated
* Loading a thermodynamic database
* Imposing experimental priors

## Import libraries ...

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
pd.set_option('display.max_columns', None)
from scipy import stats

import os
import sys
sys.path.insert(0, '..')
sys.path.insert(0, '/Users/msghiorso/ENKI/PhaseObjC')
import phasepy

import importlib
importlib.reload(phasepy)

## Define reusable functions for analysis and visualization
* improve code readability
* reduce coding errors
* speed up debugging

In [None]:
def get_data_path(filenm):
    datadir = os.path.dirname(phasepy.__file__)
    pathnm = os.path.join(datadir,'data',filenm)
    return pathnm

def plot_calib_fitness(costdata_df, log_space=True, legend_loc='upper right'):

    ind_a = np.arange(costdata_df.shape[0])
    msk_nc  = (costdata_df['rxn_dir']=='NC').values
    msk_fwd = (costdata_df['rxn_dir']=='FWD').values
    msk_rev = (costdata_df['rxn_dir']=='REV').values
    msk_other = ~np.any((msk_nc,msk_fwd,msk_rev),axis=0)
    
    if log_space:
        fitness_a = -costdata_df['log_lk']
    else:
        fitness_a = np.exp(costdata_df['log_lk'])

    plt.figure()
    plt.plot(ind_a[msk_fwd],fitness_a[msk_fwd],'ro',label='FWD')
    plt.plot(ind_a[msk_rev],fitness_a[msk_rev],'bo',label='REV')
    plt.plot(ind_a[msk_nc],fitness_a[msk_nc],'ko',label='NC')
    plt.plot(ind_a[msk_other],fitness_a[msk_other],'o', label='FWD/REV?',color=[.7,.7,.7])
    
    plt.xlabel('Data Index')
    if log_space:
        plt.ylabel('Cost = '+r'$-\log_e {\rm Prob}$')
    else:
        plt.ylabel('Prob')
        
    plt.legend(loc=legend_loc)
    plt.show()
    pass

def fit_printout(fit_output_d,thermoDB_mod):
    print('costval0 = ' ,fit_output_d['costval0'])
    print('param0_tbl:\n' ,fit_output_d['param0_tbl'])
    print('------')
    print('costval = ',fit_output_d['costval'])
    print('param_tbl:\n' ,fit_output_d['param_tbl'])
    print('------')
    print('param_corr:\n' ,np.round(fit_output_d['corr_a'],decimals=2))
    print('------')
    print('prior:')
    print(fit_output_d['prior_df'].to_string())
    print('------')
    print('rxn key:')
    print(thermoDB_mod.rxn_key)
    print('phs key:')
    print(thermoDB_mod.rxn_key)
    pass


def get_nice_colors(Ncolor,whitefrac=0.0):
    cmap = plt.get_cmap(name='viridis')
    ind_a = np.int_(np.round(np.linspace(0,255,Ncolor)))
    col_a = np.array([cmap.colors[ind] for ind in ind_a])
    col_a = (1-whitefrac)*col_a+whitefrac*np.ones(col_a.shape)
    return col_a

def plot_phase_diagram_with_data(Tlims,costdata_df, thermoDB_mod, savefig_path=''):
    Nrxn = len(thermoDB_mod.rxn_l)
    col_l = get_nice_colors(Nrxn)
    
    rxn_plt_l = []
    for rxn_eqn,rxn_obj,col in zip(thermoDB_mod.rxn_key,thermoDB_mod.rxn_l,col_l):
        msk_rxn = costdata_df['rxn'] == rxn_eqn
        msk_fwd = costdata_df['rxn_dir'] == 'FWD'
        msk_rev = costdata_df['rxn_dir'] == 'REV'
        msk_nc = costdata_df['rxn_dir'] == 'NC'
        msk_quest = (costdata_df['rxn_dir'] == 'FWD?')|(costdata_df['rxn_dir'] == 'REV?')
        
        dat_fwd_rxn_df = costdata_df[msk_rxn&msk_fwd]
        dat_rev_rxn_df = costdata_df[msk_rxn&msk_rev]
        dat_nc_rxn_df  = costdata_df[msk_rxn&msk_nc]
        
        pfwd, = plt.plot(dat_fwd_rxn_df['T']-273.15,
                         dat_fwd_rxn_df['P']/1e3,'o',
                         markersize=6,color=col)
        pnc, = plt.plot(dat_nc_rxn_df['T']-273.15,
                        dat_nc_rxn_df['P']/1e3,'x',
                        markersize=6,color=col) 
        prev, = plt.plot(dat_rev_rxn_df['T']-273.15,
                         dat_rev_rxn_df['P']/1e3,'o',
                         markersize=10,color=col) 
        prev.set_markerfacecolor('none')
        
        
        TP_bound_a = rxn_obj.trace_rxn_bound(Tlims=Tlims,Nsamp=100)
        # TP_bnd_l.append(TP_bound_a)
        stable_a = rxn_obj.get_rxn_stability(TP_bound_a,thermoDB_mod.rxn_l)
        
        rxn_bnd, = plt.plot(TP_bound_a[0][stable_a]-273.15, 
                            TP_bound_a[1][stable_a]/1e3,'-',
                            lw=2,color=col)
        rxn_bnd_meta, = plt.plot(TP_bound_a[0][~stable_a]-273.15, 
                                 TP_bound_a[1][~stable_a]/1e3,'--',
                                 lw=2,color=col) 
        
        
        rxn_plt_l.append(pfwd)
    
    plt.xlabel('Temperature  [C]')
    plt.ylabel('Pressure  [kbar]')
    
    plt.legend(rxn_plt_l, thermoDB_mod.rxn_key)
    if savefig_path != '':
        plt.savefig(savefig_path)
        
    plt.show()
    pass


from scipy.stats import norm, chi2

def cov_ellipse(cov, q=None, nsig=None, **kwargs):
    """
    Parameters
    ----------
    cov : (2, 2) array
        Covariance matrix.
    q : float, optional
        Confidence level, should be in (0, 1)
    nsig : int, optional
        Confidence level in unit of standard deviations. 
        E.g. 1 stands for 68.3% and 2 stands for 95.4%.

    Returns
    -------
    width, height, rotation :
         The lengths of two axises and the rotation angle in degree
    for the ellipse.
    """

    if q is not None:
        q = np.asarray(q)
    elif nsig is not None:
        q = 2 * norm.cdf(nsig) - 1
    else:
        raise ValueError('One of `q` and `nsig` should be specified.')
    r2 = chi2.ppf(q, 2)

    val, vec = np.linalg.eigh(cov)
    width, height = 2 * np.sqrt(val[:, None] * r2)
    width = width[0]
    height = height[0]
    rotation = np.degrees(np.arctan2(*vec[::-1, 0]))

    return width, height, rotation

## Load data into new database model

In [None]:
importlib.reload(phasepy)
raw_df = phasepy.ThermoDBModel.read_phase_rev_data(get_data_path('Al2SiO5_thermo_data.xlsx'))
thermoDB_mod = phasepy.ThermoDBModel(raw_df,mask_phs_l=['fSil'])

## Phases and Reactions
* Thermodynamic Database Models are characterized reactions and phases
* **ID numbers identify each rxn and phs (see key below)**
### Phases
* Each phase is described by standard state properties
* enthalpy, entropy, and volume: $(dH_0, S_0, V_0)$
### Reactions
* phase properties control the position of the reaction boundaries, where Gibbs energies are equal, $\Delta G_{\rm rxn}=0$
* The reactions are modeled using the set of energy threshold parameters $\alpha_i$ (see Bayes Logistic Uncertain Reaction Model notebook)

In [None]:
print('phs key:')
print(thermoDB_mod.phs_key.to_string())
print('------')
print('rxn key:')
print(thermoDB_mod.rxn_key.to_string())

## Experimental Priors
* experimental constraints are automatically loaded upon intializing database model
* priors help control the behavior of variables not well-constrained by phase reversal data
* published experimental constraints are often statistically inconsistent with one another
* **robust probability models** are used for priors to allow for outliers 
    * Student's T-distribution has much fatter tails than the normal distribution, allowing for large outliers

In [None]:
thermoDB_mod.exp_prior_df

### Examine all possible phase and reaction parameters using get_param_table
* Each parameter belongs to a paraticular phase (phsN) or particular reaction (rxnN)
* numbering scheme matches phase and reaction keys above
* Note that not all of these will be fit

In [None]:
# param0_d = thermoDB_mod.param_d
param0_df = thermoDB_mod.get_param_table(typ='all')
dat_trust_d = thermoDB_mod.extract_trust_data()
print('Initial Full Parameter Set')
param0_df

## Initial Model Fitness
* Prior to fitting, model is assesed to determine quality of initial guess
* Initial phase parameters are reasonable (from Berman 1988)
* Initial reaction parameters are completely arbitrary
* **initial data likelihoods are badly estimated** due to poor reaction parameter values
* **highly improbable reversal observations reflect a poor initial reaction model**

In [None]:
cost_d = thermoDB_mod.eval_model_costfun(param0_df['value'],
                                         param0_df['name'],full_output=True)
costdata_df = cost_d['cost_data_df']

print('Initial Model Fitness:')
plot_calib_fitness(costdata_df,log_space=True)
plot_calib_fitness(costdata_df,log_space=False)

print('Initial Cost Fun Value = ',cost_d['cost_val'])
costdata_df.head()

## Fitting Rxn Parameters
* Start with rxn params since we know the phase parameter set from Berman 1988 is reasonable
* Setup free parameters for fitting
* add or delete parameters from free parameter list
* use either parameter types or individual parameter names

In [None]:
# Setup Free parameters for fitting rxn parameters
thermoDB_mod.del_free_params([],typ='all')
thermoDB_mod.add_free_params([],typ='rxn')
thermoDB_mod.del_free_params([],typ='rxnadj')

param0_rxn_df = thermoDB_mod.get_param_table(typ='free')
print('Initial Rxn Parameter Set')
param0_rxn_df

In [None]:
fit_rxn_output_d = thermoDB_mod.fit_model(full_output=True)
print('costval0 = ' ,fit_rxn_output_d['costval0'])
print('costval = ' ,fit_rxn_output_d['costval'])
fit_rxn_output_d = thermoDB_mod.fit_model(full_output=True)
fit_printout(fit_rxn_output_d,thermoDB_mod)

## Fit Phase parameters (fixing reaction parameters at optimal values)

In [None]:
# Setup Free parameters for fitting phs parameters
thermoDB_mod.del_free_params([],typ='all')
thermoDB_mod.add_free_params([],typ='phs')
# dH0_phs0 is fixed since absolute enthalpy is not constrained by phase reversal data
thermoDB_mod.del_free_params(['dH0_phs0'])
# Fix V0's since they are not well constrained by phase reversal data alone
# thermoDB_mod.del_free_params(['V0_phs0','V0_phs1','V0_phs2'])

param0_phs_all_df = thermoDB_mod.get_param_table(typ='free')
param0_phs_all_df

In [None]:
fit_phs_output_d = thermoDB_mod.fit_model(full_output=True)
# fit_phs_output_d = thermoDB_mod.fit_model(full_output=True)
fit_printout(fit_phs_output_d,thermoDB_mod)

In [None]:
fit_output_d = fit_phs_output_d
plot_calib_fitness(fit_output_d['costdata_df'],log_space=True)
plot_calib_fitness(fit_output_d['costdata_df'],log_space=False)

In [None]:
thermoDB_mod.del_free_params([],typ='all')
thermoDB_mod.add_free_params(param0_rxn_df['name'])
fit_rxn_output_d = thermoDB_mod.fit_model(full_output=True)
fit_printout(fit_rxn_output_d,thermoDB_mod)
fit_output_d = fit_rxn_output_d

In [None]:
thermoDB_mod.del_free_params([],typ='all')
thermoDB_mod.add_free_params(param0_phs_all_df['name'])
fit_phs_output_d = thermoDB_mod.fit_model(full_output=True)
fit_printout(fit_phs_output_d,thermoDB_mod)
fit_output_d = fit_phs_output_d

In [None]:
plot_calib_fitness(fit_output_d['costdata_df'],log_space=True)
plot_calib_fitness(fit_output_d['costdata_df'],log_space=False)

## Examine resulting Phase Diagram
* Note small shifts in reaction boundaries do a better job separating forward and reverse reaction boundaries

In [None]:
Tlims = 273.15+np.array([400,900])
plot_phase_diagram_with_data(Tlims,fit_output_d['costdata_df'], thermoDB_mod,savefig_path='figs/Al2SiO5-phase-diagram.eps')

## Model Uncertainties explored using the posterior
* The family of acceptable models given the data is represented as the posterior
* Approximate posterior is given by the curvature of log-probability or cost-function space
* Can be sumarized by the covariance matrix, which is given by $\Sigma = - H^{-1}$, where $H$ is the Hessian or curvature matrix with respect to the model parameters
* **Phase Diagram Uncertainty is determined by drawing from posterior**

In [None]:
# Draw Uncertainties
Tlims = 273.15+np.array([400,900])
posterior_rxn_d = thermoDB_mod.posterior_rxn_bound(Tlims,Nsamp=10,Ndraw=600,convert_units=True)


In [None]:
conf_lvl=.68
# conf_lvl=.95
# conf_lvl=.995
thermoDB_mod.calc_rxn_bound_conf_lvl(posterior_rxn_d,conf_lvl=conf_lvl)

width, height, rot =cov_ellipse(posterior_rxn_d['PT_triple_cov_a'],q=conf_lvl)
ell = mpl.patches.Ellipse(xy=posterior_rxn_d['PT_triple_mean_a'], width=width, height=height, 
                          angle=rot,color='r',zorder=1e3)


reac_set_l = [['And'],['And'],['Ky']]
prod_set_l = [['Ky'],['Sil'],['Sil']]

thermoDB = phasepy.ThermoDB()
rxn0_obj_l = []
rxn0_eqn_l = []
for reac_l, prod_l in zip(reac_set_l,prod_set_l):
    rxn0_obj = thermoDB.new_rxn(reac_l,prod_l)
    rxn0_obj_l.append(rxn0_obj)
    rxn0_eqn_l.append(rxn0_obj.rxn_eqn_str)
    

plt.figure()
ax = plt.gca()
col_a = get_nice_colors(len(rxn0_obj_l),whitefrac=.5)

for irxn,rxnnm in enumerate(thermoDB_mod.rxn_key):
    plt.fill_between(posterior_rxn_d['T_a'],posterior_rxn_d['rxn_conf_bnd_a'][0,irxn,:],
                     posterior_rxn_d['rxn_conf_bnd_a'][1,irxn,:],color =col_a[irxn],
                     label=rxnnm )

plt.legend()
# thermoDB0 = phasepy.ThermoDB()
# plt.plot(T_a,rxn_conf_bnd_a[0].T,'k-')
# plt.plot(T_a,rxn_conf_bnd_a[1].T,'k-')
ax.add_patch(ell)
plt.xlabel('Temperature [C]')
plt.ylabel('Pressure [kbar]')

for rxn0_obj,col in zip(rxn0_obj_l,col_a):
    TP_bound_a = rxn0_obj.trace_rxn_bound(Tlims=Tlims,Nsamp=100)
    stable_a = rxn0_obj.get_rxn_stability(TP_bound_a,rxn0_obj_l)
    
    col_gray = [.5,.5,.5]
    rxn0_bnd, = plt.plot(TP_bound_a[0][stable_a]-273.15,
                        TP_bound_a[1][stable_a]/1e3,'k-',color=col_gray,
                        lw=1)
    rxn0_bnd_meta, = plt.plot(TP_bound_a[0][~stable_a]-273.15, 
                             TP_bound_a[1][~stable_a]/1e3,'k--',color=col_gray,
                             lw=1)

# plt.savefig('figs/Al2SiO5-conf-bounds.eps')
plt.show()





## Further refinement possible, but slow and often unnecessary
* Refitting all parameters simultaneously slightly improves model
* differences are quite small 
* simultaneously fitting all parameters is much slower, however, and often unnecessary


In [None]:
# Setup Free parameters for fitting phs parameters
thermoDB_mod.del_free_params([],typ='all')
thermoDB_mod.add_free_params([],typ='phs')
thermoDB_mod.add_free_params([],typ='rxn')
thermoDB_mod.del_free_params([],typ='rxnadj')
# dH0_phs0 is fixed since absolute enthalpy is not constrained by phase reversal data
thermoDB_mod.del_free_params(['dH0_phs0'])
# Fix V0's since they are not well constrained by phase reversal data alone
# thermoDB_mod.del_free_params(['V0_phs0','V0_phs1','V0_phs2'])

param0_phs_all_df = thermoDB_mod.get_param_table(typ='free')
param0_phs_all_df

In [None]:
fit_all_output_d = thermoDB_mod.fit_model(full_output=True)
fit_printout(fit_all_output_d,thermoDB_mod)

## Further refinement possible, but unnecessary
* Continued fitting slightly improves model
* differences are quite small however 
* Refitting all parameters simultaneously slightly improves model
* simultaneously fitting all parameters is much slower, however, and often unnecessary

In [None]:
# Draw Uncertainties
Tlims = 273.15+np.array([400,900])
posterior_rxn_d = thermoDB_mod.posterior_rxn_bound(Tlims,Nsamp=10,Ndraw=100,convert_units=True)
conf_lvl=.68


In [None]:
width, height, rot =cov_ellipse(posterior_rxn_d['PT_triple_cov_a'],q=conf_lvl)
ell = mpl.patches.Ellipse(xy=posterior_rxn_d['PT_triple_mean_a'], width=width, height=height, 
                          angle=rot,color='r',zorder=1e3)


reac_set_l = [['And'],['And'],['Ky']]
prod_set_l = [['Ky'],['Sil'],['Sil']]

thermoDB = phasepy.ThermoDB()
rxn0_obj_l = []
rxn0_eqn_l = []
for reac_l, prod_l in zip(reac_set_l,prod_set_l):
    rxn0_obj = thermoDB.new_rxn(reac_l,prod_l)
    rxn0_obj_l.append(rxn0_obj)
    rxn0_eqn_l.append(rxn0_obj.rxn_eqn_str)
    

plt.figure()
ax = plt.gca()
col_a = get_nice_colors(len(rxn0_obj_l),whitefrac=.5)

for irxn,rxnnm in enumerate(thermoDB_mod.rxn_key):
    plt.fill_between(posterior_rxn_d['T_a'],posterior_rxn_d['rxn_conf_bnd_a'][0,irxn,:],
                     posterior_rxn_d['rxn_conf_bnd_a'][1,irxn,:],color =col_a[irxn],
                     label=rxnnm )

plt.legend()
# thermoDB0 = phasepy.ThermoDB()
# plt.plot(T_a,rxn_conf_bnd_a[0].T,'k-')
# plt.plot(T_a,rxn_conf_bnd_a[1].T,'k-')
ax.add_patch(ell)
plt.xlabel('T [C]')
plt.ylabel('P [kbar]')

for rxn0_obj,col in zip(rxn0_obj_l,col_a):
    TP_bound_a = rxn0_obj.trace_rxn_bound(Tlims=Tlims,Nsamp=100)
    stable_a = rxn0_obj.get_rxn_stability(TP_bound_a,rxn0_obj_l)
    
    rxn0_bnd, = plt.plot(TP_bound_a[0][stable_a]-273.15, 
                        TP_bound_a[1][stable_a]/1e3,'k-',
                        lw=1)
    rxn0_bnd_meta, = plt.plot(TP_bound_a[0][~stable_a]-273.15, 
                             TP_bound_a[1][~stable_a]/1e3,'k--',
                             lw=1)

plt.show()

