In [1]:
import os
import numpy as np

from rmgpy.tools.uncertainty import Uncertainty, process_local_results
from rmgpy.tools.canteramodel import get_rmg_species_from_user_species
from rmgpy.species import Species

## Step 1: Define mechanism files

In [2]:
# Must use annotated chemkin file
chemkin_file = './data/parse_source/chem_annotated.inp'
dict_file = './data/parse_source/species_dictionary.txt'

# Initialize the Uncertainty class instance and load the model
uncertainty = Uncertainty(output_directory='.temp/uncertainty')
uncertainty.load_model(chemkin_file, dict_file)


## Kinetics Uncertainty for Rate Rules


The rate $\ln k_\mathrm{site}$ can be written as a sum of other random variables, as shown in the block diagram above.

$$\ln k_\mathrm{site} = \left[\sum_{rule_i}\omega_i \ln(k_\mathrm{rule,i})\right] + \ln k_\mathrm{family}+\log {10}(N+1)\ln k_\mathrm{non-exact}$$


If we consider each of $\ln(k_\mathrm{rule,i})$, $\ln(k_\mathrm{family})$, and $\ln(k_\mathrm{non-exact})$ to be independent random variables, then we can derive the total variance $\sigma^2(\ln k_\mathrm{site})$ using the following [property](https://en.wikipedia.org/wiki/Variance#Basic_properties):

$$\sigma^2(aX+bY)=a^2\sigma^2(X)+b^2\sigma^2(Y)+2abcov(X,Y)$$

$\ln(k_\mathrm{rule,i})$, $\ln(k_\mathrm{family})$, and $\ln(k_\mathrm{non-exact})$ are all mutually independent, so the $2abcov(X,Y)$ term drops out in every case.

$$\sigma^2(\ln k_\mathrm{site}) = \left[\sum_{rule_i}\omega_i^2 \sigma^2(\ln(k_\mathrm{rule,i}))\right] + \sigma^2(\ln k_\mathrm{family})+\left[\log {10}(N+1)\right]^2\sigma^2(\ln k_\mathrm{non-exact})$$



what is currently implemented:
$$\sigma(\ln k_\mathrm{site}) = \left[\sum_{rule_i}\omega_i \sigma(\ln(k_\mathrm{rule,i}))\right] + \sigma(\ln k_\mathrm{family})+\left[\log {10}(N+1)\right]\sigma(\ln k_\mathrm{non-exact})$$


### Covariance Matrix
We want to compute elements of the covariance matrix $\Sigma_{ab}=cov(\ln(k_{rate\ rules,a}), \ln(k_{rate\ rules,b}))$

$$\Sigma_{ab}=cov(\ln(k_{site,a}), \ln(k_{site,b}))$$

$$\Sigma_{ab}=cov\left(\left[\sum_{rule_i}\omega_{a,i} \ln(k_\mathrm{rule,i})\right] + \ln k_\mathrm{family,a}+\log {10}(N_a+1)\ln k_\mathrm{non-exact,a}, \left[\sum_{rule_j}\omega_{b,j} \ln(k_\mathrm{rule,j})\right] + \ln k_\mathrm{family,b}+\log {10}(N_b+1)\ln k_\mathrm{non-exact,b}\right)$$

The [covariance of linear combinations](https://en.wikipedia.org/wiki/Covariance#Covariance_of_linear_combinations) is the following:
$$cov(aX+bY, cV+dW)=ac\cdot cov(X,V)+ad\cdot cov(X,W)+bc\cdot cov(Y,V)+bd\cdot cov(Y,W)$$

Or, more generally:
$$cov(\sum_ia_iX_i, \sum_jb_jY_j)=\sum_{i}\sum_{j}a_ib_j cov(X_i, Y_j)$$


Before trying to write this all out, we know that the $\ln(k_\mathrm{rule,i})$, $\ln(k_\mathrm{family})$, and  $\ln(k_\mathrm{non-exact})$ are independent, so we have the following:


$$cov(\ln(k_\mathrm{rule,i}), \ln(k_\mathrm{family}))=0$$
$$cov(\ln(k_\mathrm{rule,i}), \ln(k_\mathrm{non-exact}))=0$$
$$cov(\ln(k_\mathrm{family}), \ln(k_\mathrm{non-exact}))=0$$


$$\Sigma_{ab}=\left[\sum_{rule_i}\sum_{rule_j}\omega_{a,i}\omega_{b,j} cov(\ln(k_\mathrm{rule,i}),\ln(k_\mathrm{rule,j}))\right]+cov(\ln k_\mathrm{family,a},\ln k_\mathrm{family,b})+\log {10}(N_a+1)\log {10}(N_b+1)cov(\ln k_\mathrm{non-exact,a},\ln k_\mathrm{non-exact,b})$$


### Check the diagonal to see if this matches the variance
$$\Sigma_{aa}=\left[\sum_{rule_i}\sum_{rule_j}\omega_{a,i}\omega_{a,j} cov(\ln(k_\mathrm{rule,i}),\ln(k_\mathrm{rule,j}))\right]+cov(\ln k_\mathrm{family,a},\ln k_\mathrm{family,a})+\log {10}(N_a+1)\log {10}(N_a+1)cov(\ln k_\mathrm{non-exact,a},\ln k_\mathrm{non-exact,a})$$


$$\Sigma_{aa}=\left[\sum_{rule_i}\sum_{rule_j}\omega_{a,i}\omega_{a,j} cov(\ln(k_\mathrm{rule,i}),\ln(k_\mathrm{rule,j}))\right]+\sigma^2(\ln k_\mathrm{family,a})+\left[\log {10}(N_a+1)\right]^2\sigma^2(\ln k_\mathrm{non-exact,a})$$


Different rate rules are independent from each other, so $cov(\ln(k_\mathrm{rule,i}),\ln(k_\mathrm{rule,j}))=0$ for $i \neq j$


$$\Sigma_{aa}=\left[\sum_{rule_i}\omega_{a,i}^2 \sigma^2(\ln(k_\mathrm{rule,i}))\right]+\sigma^2(\ln k_\mathrm{family,a})+\left[\log {10}(N_a+1)\right]^2\sigma^2(\ln k_\mathrm{non-exact,a})$$

In [3]:
# NOTE: You must load the database with the same settings which were used to generate the model.
#       This includes any thermo or kinetics libraries which were used. This takes a while.
uncertainty.load_database(
    thermo_libraries=['primaryThermoLibrary'],
    kinetics_families='default',
    reaction_libraries=[],
)


In [4]:
# Calculate the covariance matrix, then split it up into separate matrices for kinetic and thermo parameters
uncertainty.extract_sources_from_model()
uncertainty.assign_parameter_uncertainties(correlated=True)
Sigma, labels = uncertainty.get_uncertainty_covariance_matrix()
N = len(uncertainty.species_list)
Sigma_G = Sigma[0:N,0:N]
Sigma_k = Sigma[N:,N:]

## Example: Compute variance of reaction 21

In [5]:
# Walk through variance example reaction 21,21

# Display the source information for estimating reaction 21's rate
a=21
reaction_a = uncertainty.reaction_list[a]
print(f'Reaction {a}:\t', reaction_a)
print()
print('Source Dictionary')
print(uncertainty.reaction_sources_dict[reaction_a])
print()
print(uncertainty.reaction_sources_dict[reaction_a]['Rate Rules'][0])
training_rules=uncertainty.reaction_sources_dict[reaction_a]['Rate Rules'][1]['training']
for rule in training_rules:
    print(rule)

print()

print(f'Covariance Uncertainty [{a},{a}]:', Sigma_k[a,a])

# Hand calculation
variance_by_hand = 2.0 * (np.float_power(0.5, 2.0) * 0.5) + 1.0 + np.float_power(np.log10(3), 2.0) * 3.5
print(f'Hand calculated variance:\t{variance_by_hand}')
print()

# Check against the existing (modified) uncertainty code using the correlated option
total_uncertainty = 0
for key in uncertainty.kinetic_input_uncertainties[a]:
    total_uncertainty += uncertainty.kinetic_input_uncertainties[a][key]
print(f'Existing uncertainty code:\t{total_uncertainty}')
print()
print(uncertainty.kinetic_input_uncertainties[a])

Reaction 21:	 H(6) + [CH]=C(14) <=> H2(13) + C#C(28)

Source Dictionary
{'Rate Rules': ['Disproportionation', {'template': [<Entry index=22 label="H_rad">, <Entry index=208 label="Cds/H2_d_Crad">], 'degeneracy': 2.0, 'exact': False, 'rules': [], 'training': [(<Entry index=673 label="H_rad;Cds/H2_d_N3rad">, <Entry index=116 label="H + CH2N <=> H2 + CHN">, 0.5), (<Entry index=688 label="H_rad;Cds/H2_d_N5dcrad/O">, <Entry index=131 label="H + CH2NO <=> H2 + CHNO">, 0.5)]}]}

Disproportionation
(<Entry index=673 label="H_rad;Cds/H2_d_N3rad">, <Entry index=116 label="H + CH2N <=> H2 + CHN">, 0.5)
(<Entry index=688 label="H_rad;Cds/H2_d_N5dcrad/O">, <Entry index=131 label="H + CH2NO <=> H2 + CHNO">, 0.5)

Covariance Uncertainty [21,21]: 2.0467564209684275
Hand calculated variance:	2.0467564209684275

Existing uncertainty code:	2.0467564209684275

{'Disproportionation H_rad;Cds/H2_d_N3rad': 0.125, 'Disproportionation H_rad;Cds/H2_d_N5dcrad/O': 0.125, 'Estimation H(6)+C2H3(14)=H2(13)+C#C(28)':

## Example variance of species 11 thermo

In [6]:
# Thermo example
i=11
print(f'Species:\t{uncertainty.species_list[i]}')
print()
print('Uncertainty sources dictionary:')
print(uncertainty.species_sources_dict[uncertainty.species_list[i]])


print()
total_uncertainty = 0
for key in uncertainty.thermo_input_uncertainties[i].keys():
    total_uncertainty += uncertainty.thermo_input_uncertainties[i][key]
print(f'Existing uncertainty code:\t{total_uncertainty}')
print(uncertainty.thermo_input_uncertainties[i])
print()

# Hand calculation
variance_by_hand = np.float_power(2.0, 2.0)*0.1 + np.float_power(1.0, 2.0)*0.1 + 1.5
print(f'Hand calculated variance:\t{variance_by_hand}')

# Using the get_uncertainty_covariance_matrix function to estimate uncertainty
print(f'Covariance Uncertainty [{i},{i}]:', Sigma_G[i,i])


Species:	[CH]=C(14)

Uncertainty sources dictionary:
{'GAV': {'group': [(<Entry index=296 label="Cds-CdsHH">, 2)], 'radical': [(<Entry index=435 label="Cds_P">, 1)]}}

Existing uncertainty code:	2.0
{'Group(group) Cds-CdsHH': 0.4, 'Group(radical) Cds_P': 0.1, 'Estimation C2H3(14)': 1.5}

Hand calculated variance:	2.0
Covariance Uncertainty [11,11]: 2.0


## Example: covariance between reaction 19 and 21

In [7]:
# Walk through example 19,21

a=19
reaction_a = uncertainty.reaction_list[a]
print(f'Reaction {a}:\t', reaction_a)

print(uncertainty.reaction_sources_dict[reaction_a])

print()
b=21
reaction_b = uncertainty.reaction_list[b]
print(f'Reaction {b}:\t', str(reaction_b))
print(uncertainty.reaction_sources_dict[reaction_b])


print(f'Covariance Uncertainty [{a},{b}]: {Sigma_k[a, b]}')
#print('uncertainty.uncertainty', uncertainty.kinetic_input_uncertainties[a])
# the possible sources of the reactions


print(0.5*(0.5*0.1)+0.5*(0.5*0.1625))

Reaction 19:	 [CH3](4) + [CH]=C(14) <=> CH4(3) + C#C(28)
{'Rate Rules': ['Disproportionation', {'template': [<Entry index=59 label="C_methyl">, <Entry index=208 label="Cds/H2_d_Crad">], 'degeneracy': 2.0, 'exact': False, 'rules': [], 'training': [(<Entry index=673 label="H_rad;Cds/H2_d_N3rad">, <Entry index=116 label="H + CH2N <=> H2 + CHN">, 0.1), (<Entry index=688 label="H_rad;Cds/H2_d_N5dcrad/O">, <Entry index=131 label="H + CH2NO <=> H2 + CHNO">, 0.16250000000000003), (<Entry index=672 label="O_pri_rad;Cds/H2_d_N3rad">, <Entry index=115 label="HO + CH2N <=> H2O + CHN">, 0.05), (<Entry index=689 label="O_pri_rad;Cds/H2_d_N5dcrad/O">, <Entry index=132 label="HO + CH2NO <=> H2O + CHNO">, 0.1375), (<Entry index=669 label="O_rad/NonDeO;Cds/H2_d_N3rad">, <Entry index=112 label="HO2-2 + CH2N <=> H2O2 + CHN">, 0.07500000000000001), (<Entry index=674 label="NH2_rad;Cds/H2_d_N3rad">, <Entry index=117 label="H2N + CH2N <=> H3N + CHN">, 0.1), (<Entry index=691 label="NH2_rad;Cds/H2_d_N5dcrad/O

## Property checks to verify covariance matrix

In [8]:
## Basic sanity checks on the covariance matrix

# check that the diagonal matches the uncorrelated variances

uncorrelated_matches_diagonal = True
uncertainty.assign_parameter_uncertainties(correlated=False)
for i in range(0, len(uncertainty.kinetic_input_uncertainties)):
    if uncertainty.kinetic_input_uncertainties[i] != Sigma_k[i,i]:
        print(uncertainty.kinetic_input_uncertainties[i], Sigma_k[i,i])
        uncorrelated_matches_diagonal = False

if uncorrelated_matches_diagonal:
    print("The diagonal of the covariance matrix matches the uncorrelated variances")

    
# check that the off diagonals are always less than the corresponding diagonals
diagonal_is_max = True
for row in range (0, Sigma_k.shape[0]): # horizontal
    for col in range(0, row): # col
        if Sigma_k[row, col] > Sigma_k[row,row]:
            diagonal_is_max = False
            print("Off-diagonal is too large!")
for col in range (0, Sigma_k.shape[0]): # vertical
    for row in range(col+1, Sigma_k.shape[0]): # col
        if Sigma_k[row, col] > Sigma_k[col,col]:
            diagonal_is_max = False
            print("Off-diagonal is too large!")     
            
            
if diagonal_is_max:
    print("All off-diagonals are less than diagonal")

        
    
# check that the covariance matrix is symmetric
is_symmetric = (Sigma_k == Sigma_k.transpose())
if is_symmetric.all():
    print("Covariance matrix is symmetric")
    

#check the covariance matrix is positive semidefinite
pos_semidef = True
for i in range(0, 100):
    x = np.random.rand(Sigma_k.shape[0]) - 0.5
    prod = np.dot(np.dot(Sigma_k,x), x)
    if prod < 0:
        print("Covariance matrix is not positive semi-definite")
        pos_semidef = False
if pos_semidef:
    print("Covariance matrix is positive semi-definite")


The diagonal of the covariance matrix matches the uncorrelated variances
All off-diagonals are less than diagonal
Covariance matrix is symmetric
Covariance matrix is positive semi-definite


## Save the covariance matrix using RMG objects

In [19]:
import pickle
from scipy.sparse import coo_matrix


# convert the matrix to sparse form because it's big
sparse_cov_mat = coo_matrix(Sigma)

# save as pickle using rmg objects
rmg_data = {
    'covariance_matrix': sparse_cov_mat,
    'parameter_labels': labels
}

with open('cov_mat_rmg.pickle', 'wb') as f:
    pickle.dump(rmg_data, f)

## Save the covariance matrix without using RMG objects

In [18]:
# save as two numpy arrays with only strings as labels
import pickle
from scipy.sparse import coo_matrix


flat_species_list = []
for sp in uncertainty.species_list:
    flat_species_list.append(str(sp))
flat_reaction_list = []
for rxn in uncertainty.reaction_list:
    flat_reaction_list.append(str(rxn))


# break Sigma back into parts
N = len(uncertainty.species_list)
Sigma_G = Sigma[0:N,0:N]
Sigma_k = Sigma[N:,N:]

# convert the matrix to sparse form because it's big
sparse_G = coo_matrix(Sigma_G)
sparse_k = coo_matrix(Sigma_k)


data = {
    'cov_thermo': sparse_G,
    'species_labels': flat_species_list,
    'cov_kinetic': sparse_k,
    'reaction_labels': flat_reaction_list
}

with open('cov_mat_flat.pickle', 'wb') as f:
    pickle.dump(data, f)

## Read covariance matrix

In [None]:
from scipy.sparse import coo_matrix
import numpy as np
import pickle

with open('cov_mat_flat.pickle', 'rb') as f:
    data = pickle.load(f)

sparse_G = data['cov_thermo']
species_list = data['species_labels']
sparse_k = data['cov_kinetic']
reaction_list = data['reaction_labels']

Sigma_G = sparse_G.toarray()
Sigma_k = sparse_k.toarray()

a=19
b=21
print(f'Reaction {a}: {reaction_list[a]}')
print(f'Reaction {b}: {reaction_list[b]}')
print(f'cov(Reaction {a}, Reaction {b}: {Sigma_k[a,b]}')

## List species and reaction info

In [None]:
#List species and sources
for species in uncertainty.species_list:
    print(species)
    print(uncertainty.species_sources_dict[species])

In [None]:
#print the list of reactions, sources, uncertainties:
uncertainty.assign_parameter_uncertainties(correlated=False)
for i in range(0,len(uncertainty.reaction_list)):
    print(i, uncertainty.reaction_list[i], uncertainty.kinetic_input_uncertainties[i])
    print(uncertainty.reaction_sources_dict[uncertainty.reaction_list[i]])
    print()
    