# Estimating Correlated Uncertainty from Fitted Blowers-Masel Trees


For each node in the kinetics rate rule family tree, we want to know how it depends on each training reaction.

<img src="bm_tree_derivative_fig.png" width="500">

Sensitivity S is the relative change in output $y$ over a given relative change in input $x$:
$$S = \frac{\frac{dy}{y}}{\frac{dx}{x}} = \frac{\frac{y_{perturbed} - y}{y}}{\frac{x_{perturbed} - x}{x}} $$

$$S = \frac{dy}{dx}\cdot \frac{x}{y} $$

Define the scale factor $a$:
$$x_{perturbed} = ax$$

$$x_{perturbed} = (a-1)x + x$$

$$x_{perturbed} - x = (a-1)x$$

$$\frac{x_{perturbed} - x}{x} = a - 1$$

$$\frac{dx}{x} = a - 1$$

$$S = \frac{\frac{dy}{y}}{a - 1}$$

$$S = \frac{d\ln(y)}{a - 1}$$

In [1]:
import json

from rmgpy.molecule.molecule import *
from rmgpy.species import *
from rmgpy.data.rmg import RMGDatabase
from rmgpy.species import Species
from rmgpy import settings

import rmgpy
from rmgpy import chemkin
import numpy as np
from copy import deepcopy
import csv
from rmgpy.molecule.molecule import *
from rmgpy.species import *
from rmgpy.chemkin import *
import rmgpy.kinetics as _kinetics
from rmgpy.data.rmg import RMGDatabase
from IPython.display import display
from rmgpy.data.thermo import ThermoLibrary
from rmgpy.rmg.react import react
from rmgpy.species import Species
from rmgpy.reaction import Reaction
from rmgpy.data.rmg import get_db
from rmgpy.exceptions import UndeterminableKineticsError, ActionError
from rmgpy.data.base import LogicOr
from itertools import product
from rmgpy.molecule.group import Group
from rmgpy.kinetics.arrhenius import ArrheniusBM
import time
import scipy.special as special
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('TkAgg')
%matplotlib inline

In [2]:
## Load the database
database = RMGDatabase()
database.load(path=settings['database.directory'],
              thermo_libraries=['Klippenstein_Glarborg2016', 'BurkeH2O2', 'thermo_DFT_CCSDTF12_BAC',
                                'DFT_QCI_thermo', 'primaryThermoLibrary', 'primaryNS', 'NitrogenCurran',
                                'NOx2018', 'FFCM1(-)', 'SulfurLibrary', 'SulfurGlarborgH2S', 'SABIC_aromatics'],
              transport_libraries=[],
              reaction_libraries=[],
              seed_mechanisms=[],  # ['BurkeH2O2inN2','ERC-FoundationFuelv0.9'],
              kinetics_families= ['Singlet_Carbene_Intra_Disproportionation'], # 'all'
              kinetics_depositories=['training'],
              depository=False,  # Don't bother loading the depository information, as we don't use it
              )

In [3]:
## Make the kinetics rate rule tree

family = database.kinetics.families["Singlet_Carbene_Intra_Disproportionation"]
family.clean_tree()

family.generate_tree(thermo_database=database.thermo,
                     nprocs=1,
                     new_fraction_threshold_to_reopt_node=0.25,
                     max_batch_size=800,
                     extension_iter_max=2,
                     extension_iter_item_cap=100)

print(f"Group Entries: {len(family.groups.entries)}")
family.check_tree()
family.regularize(thermo_database=database.thermo)
templateRxnMap = family.get_reaction_matches(thermo_database=database.thermo, remove_degeneracy=True,
                                             get_reverse=True, exact_matches_only=False, fix_labels=True)

family.clean_tree_rules()
family.make_bm_rules_from_template_rxn_map(templateRxnMap, compute_derivatives=True)

ERROR:root:4.0
ERROR:root:iter_max achieved terminating early
ERROR:root:iter_max achieved terminating early
ERROR:root:iter_max achieved terminating early


Group Entries: 7


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  sensitivity_E0_only = (np.log(kin_perturbed_onlyE0.E0.value_si) - np.log(kin.E0.value_si)) / (SCALE_FACTOR - 1)


In [4]:
## unpack the sensitivites
derivatives = {}
for key in family.rules.entries.keys():
    entry = family.rules.entries[key]
    
    if len(entry) == 0:
        derivatives[key] = []
        continue
    entry = entry[0]
    # print(entry.long_desc)
    start_str = 'sensitivities = '
    start_index = entry.long_desc.find(start_str) + len(start_str)
    sensitivities_str = entry.long_desc[start_index:].replace("'", '"')
    sensitivities_str = sensitivities_str.replace("nan", '"-9999999"')
    print(sensitivities_str)
    derivatives[key] = json.loads(sensitivities_str)

[{"dA": 1.844369109093894, "dA_only": 0.2498761670608749, "dE0": 0.016316141432657146, "dE0_only": -0.01801340076213108, "dn": 0.10408387868395573, "name": "C6H6 <=> C6H6-2"}, {"dA": 10.10616532170983, "dA_only": 0.24987618618157992, "dE0": 0.10086970787159137, "dE0_only": -0.010901484785109751, "dn": 0.643379006838068, "name": "C6H6-3 <=> C6H6-4"}, {"dA": -6.721413412500284, "dA_only": 0.24987623185526697, "dE0": -0.07136528364484665, "dE0_only": -0.025400653285603834, "dn": -0.4550446952624657, "name": "[C]1C=CC=CC1 <=> C6H6-5"}, {"dA": -4.178834830505201, "dA_only": 0.24987558918357333, "dE0": -0.04533679390488211, "dE0_only": -0.02320924949295289, "dn": -0.2890799235278226, "name": "[C]1C=CCC=C1 <=> C6H6-7"}]
[{"dA": 0.5006463776720534, "dA_only": 0.4997529958875031, "dE0": 0.0, "dE0_only": 0.0, "dn": -4.4158704501678525e-05, "name": "C6H6 <=> C6H6-2"}, {"dA": 0.4999330041402512, "dA_only": 0.49975182833877874, "dE0": 0.0, "dE0_only": 0.0, "dn": -8.955285163305893e-06, "name": "C6H

In [7]:
# list the training reactions
for key in family.rules.entries.keys():
    entry = family.rules.entries[key]
    if len(entry) == 0:
        continue
    print(entry[0])
    for i, rxn in enumerate(templateRxnMap[key]):
        # keep other blowers masel params fixed, only change dA
        # print('\t', rxn, f'\tdA_node/dA_train=', derivatives[key][i]['dA_only'], derivatives[key][i]['dE0_only']) # derivatives[key][i]['name'])
        #print('\t', rxn, f'\tdA_node/dA_train=', derivatives[key][i]['dA_only']) # derivatives[key][i]['name'])
        
        
        
        # allow all blowers masel params to change freely
        print('\t', rxn, f'\tdA_node/dA_train=', derivatives[key][i]['dA'], derivatives[key][i]['dE0'], derivatives[key][i]['dn']) # derivatives[key][i]['name'])

Root
	 C6H6 <=> C6H6-2 	dA_node/dA_train= 1.844369109093894 0.016316141432657146 0.10408387868395573
	 C6H6-3 <=> C6H6-4 	dA_node/dA_train= 10.10616532170983 0.10086970787159137 0.643379006838068
	 [C]1C=CC=CC1 <=> C6H6-5 	dA_node/dA_train= -6.721413412500284 -0.07136528364484665 -0.4550446952624657
	 [C]1C=CCC=C1 <=> C6H6-7 	dA_node/dA_train= -4.178834830505201 -0.04533679390488211 -0.2890799235278226
Root_Ext-3C-R_Ext-4R!H-R_Sp-4R!H-1C
	 C6H6 <=> C6H6-2 	dA_node/dA_train= 0.5006463776720534 0.0 -4.4158704501678525e-05
	 C6H6-3 <=> C6H6-4 	dA_node/dA_train= 0.4999330041402512 0.0 -8.955285163305893e-06
Root_Ext-3C-R_Ext-4R!H-R_N-Sp-4R!H-1C
	 [C]1C=CC=CC1 <=> C6H6-5 	dA_node/dA_train= 0.49971930693720695 -0.0 6.259888071237414e-07
	 [C]1C=CCC=C1 <=> C6H6-7 	dA_node/dA_train= 0.5001937212796843 -0.0 -8.907492007564604e-06
Root_Ext-3C-R_Ext-4R!H-R_Sp-4R!H-1C_Ext-4R!H-R_Ext-4R!H-R
	 C6H6 <=> C6H6-2 	dA_node/dA_train= 0.9995003330837129 0.0 0.0
Root_Ext-3C-R_Ext-4R!H-R_N-Sp-4R!H-1C_Ext-4R!

## Try to visualize the fitting

In [None]:
import matplotlib.pyplot as plt
# Plot ln(k) vs 1/T

# Try this node first because it makes the most sense
#Root_Ext-3C-R_Ext-4R!H-R_Sp-4R!H-1C
# C6H6 <=> C6H6-2 	dA_node/dA_train= 0.4975766477373962 -37.666938775510175 -2.9668649057030526e-06
# C6H6-3 <=> C6H6-4 	dA_node/dA_train= 0.4975143957435253 -98.40868274582552 1.0933643724307169e-07

# Get this training reaction C6H6 <=> C6H6-2
node_name = 'Root_Ext-3C-R_Ext-4R!H-R_N-Sp-4R!H-1C_Ext-4R!H-R_Ext-5R!H-R_Ext-5R!H-R_N-Sp-6R!H-5R!H'  # one training reaction
# node_name = 'Root_Ext-3C-R_Ext-4R!H-R_Sp-4R!H-1C'  # two training reactions
# node_name = 'Root'  # four training reactions


Tmin = 300
Tmax = 1500
legend = []

    
def estimate_delta_H(T):
    dH = 0
    for j, training_rxn in enumerate(templateRxnMap[node_name]):
        dH += training_rxn.get_enthalpy_of_reaction(T)
    return dH / float(len(templateRxnMap[node_name]))


# plot the node's ln(k)
legend.append(node_name)
T = np.linspace(Tmin, Tmax, 100)
T_inv = 1.0 / T
lnk = np.zeros(len(T))
for i, Temp in enumerate(T):
    delta_h = estimate_delta_H(Temp)
    # delta_h = estimate_delta_H(1110)
    lnk[i] = np.log(family.rules.entries[node_name][0].data.get_rate_coefficient(Temp, dHrxn=delta_h))
plt.plot(T_inv, lnk)
print(family.rules.entries[node_name][0].data)


# Plot each training reaction's ln(k)
for training_rxn in templateRxnMap[node_name]:
    legend.append(str(training_rxn))
    for i, Temp in enumerate(T):
        lnk[i] = np.log(training_rxn.kinetics.get_rate_coefficient(Temp))
    print(training_rxn.kinetics)
    plt.plot(T_inv, lnk)


plt.ylabel('ln(k)')
plt.xlabel('1/T')
plt.legend(legend)
plt.show()

In [None]:
# node kinetics

# dir(family.rules.entries[node_name][0].item)

T = 300
P = 1013250
training_rxn.kinetics.get_rate_coefficient(T)


In [None]:
family.rules.entries['Root_Ext-3C-R_Ext-4R!H-R_Sp-4R!H-1C'][0].data

In [None]:
print(type(family.rules.entries[node_name][0].data))
dir(family.groups.entries[node_name].item)

In [None]:
templateRxnMap['Root'][1]

In [None]:
family.rules.entries

In [None]:
family.groups.entries['Root_Ext-3C-R_Ext-4R!H-R_Sp-4R!H-1C'].item

In [None]:
tree(
"""
L1: Root
    L2: Root_Ext-3C-R_Ext-4R!H-R_Sp-4R!H-1C
        L3: Root_Ext-3C-R_Ext-4R!H-R_Sp-4R!H-1C_Ext-4R!H-R_Ext-4R!H-R
    L2: Root_Ext-3C-R_Ext-4R!H-R_N-Sp-4R!H-1C
        L3: Root_Ext-3C-R_Ext-4R!H-R_N-Sp-4R!H-1C_Ext-4R!H-R_Ext-5R!H-R_Ext-5R!H-R_Sp-6R!H-5R!H
        L3: Root_Ext-3C-R_Ext-4R!H-R_N-Sp-4R!H-1C_Ext-4R!H-R_Ext-5R!H-R_Ext-5R!H-R_N-Sp-6R!H-5R!H
"""
)