# Fit Arrhenius for reaction filtering in RMG from fastest training reactions per reaction family.

This script iterates through all RMG families besides surface reaction families. For each reaction family the fastest 
training reaction for a temperature range between 300 and 2500 K are fitted into an Arrhenius fit for unimolecular
and bimolecular reactions seperately. 

The Arrhenius fits are stored in a YAML file called `FilterArrheniusFits` that is read once at the beginning of an RMG run and used at each iteration to identify family specific filter criteria for reaction generation. 

For verification, the Arrhenius fits are stored as .png files in the folder `ArrheniusFits`.

Currently, the highest rate from forward or reverse is used for the Arrhenius fit.

In [1]:
import os
import numpy
import operator
import yaml
import matplotlib.pyplot as plt
%matplotlib inline

from rmgpy import settings
from rmgpy.data.rmg import RMGDatabase, getDB
from rmgpy.kinetics.arrhenius import Arrhenius
from rmgpy.thermo.thermoengine import submit
from rmgpy.rmgobject import RMGObject

Using Theano backend.


## Load the database with RMG reaction families

In [2]:
database = RMGDatabase()
database.load(
    settings['database.directory'], 
    thermoLibraries = [
                'primaryThermoLibrary',
                'Klippenstein_Glarborg2016',
                'BurkeH2O2',
                'thermo_DFT_CCSDTF12_BAC',
                'CBS_QB3_1dHR', 
                'DFT_QCI_thermo',
                'Narayanaswamy',
                'Lai_Hexylbenzene',
                'SABIC_aromatics',
                'vinylCPD_H'],
    transportLibraries = [],
    reactionLibraries = [],
    seedMechanisms = [],
    kineticsFamilies = 'all',
    kineticsDepositories = ['training'],
    depository = False,  
)

In [3]:
# Get Arrhenius fits for all RMG families
families = getDB('kinetics').families.keys()
#families = ['H_Abstraction', 'R_Recombination', 'Surface_Dissociation_vdW']

In [4]:
# Temperature range to fit Arrhenius
Ts = numpy.linspace(300,2500,50)

## Generate the Arrhenius fits and print .png figures

In [5]:
# Helper function
def analyze_reactions(fam_name, molecularity=1):
    print fam_name
    
    fam = database.kinetics.families[fam_name]
    dep = fam.getTrainingDepository()
    rxns = []
    list_indices = []

    # Extract all training reactions for selected family
    for entry in dep.entries.values():
        r = entry.item
        r.kinetics = entry.data
        r.index = entry.index
        for spc in r.reactants+r.products:
            if spc.thermo is None:
                submit(spc)
        rxns.append(r)

    # Only proceed if at least one training reaction is available
    if rxns:         
        # Get kinetic rates for unimolecular reactions
        k_list = []
        index_list = []
        for rxn in rxns:
            if len(rxn.reactants) == molecularity:
                k_list.append(rxn.kinetics)
                index_list.append(rxn.index)
            if len(rxn.products) == molecularity:
                k_list.append(rxn.generateReverseRateCoefficient())
                index_list.append(rxn.index)

        # Get max. kinetic rates at each discrete temperature
        if k_list:
            k_max_list = []
            max_rxn_list = set()
            for T in Ts:
                mydict = {}
                kvals = [k.getRateCoefficient(T) for k in k_list]
                mydict = dict(zip(index_list, kvals))

                # Find key and value of max rate coefficient
                key_max_rate = max(mydict.iteritems(), key=operator.itemgetter(1))[0]
                
                max_entry = dep.entries.get(key_max_rate)
                max_rxn = max_entry.item
                max_rxn_list.add(max_rxn)
                
                kval = mydict[key_max_rate]
                k_max_list.append(kval)
                
                #print """For {0} 1/K training reaction {1} with index {2} has the highest rate 
                #of {3}.""".format(1000.0/T, max_entry, key_max_rate, kval)
                
                #if molecularity == 2:
                #    display(max_rxn)
                #    print "collision limit:  {}".format(max_rxn.calculate_coll_limit(T))
                #    print "collision limit agrees?"
                #    print [] == max_rxn.check_collision_limit_violation(300.0, 2000.0, 0.1, 100.0)

            units = 's^-1' if molecularity == 1 else 'm^3/(mol*s)'
                
            arr = Arrhenius().fitToData(Ts,numpy.array(k_max_list), units)
            
            fig = plt.figure()
            fig_name = fam_name
            fig_name += ' Unimolecular' if molecularity == 1 else ' Bimolecular'
            save_path = 'ArrheniusFits/'
            plt.semilogy(1000.0/Ts, k_max_list, label=fig_name)
            plt.xlabel("1000/T (1/K)")
            plt.ylabel("k ({0})".format(units))
            plt.legend(loc='upper left')
            if molecularity == 1:
                fig.savefig((save_path + fig_name + '_Unimolecular' + '.png'), bbox_inches='tight')
            elif molecularity == 2:
                fig.savefig((save_path + fig_name + '_Unimolecular' + '.png'), bbox_inches='tight')

            plt.close("all")
            
            return arr
        
    else:
        arr = None
        return arr

            

In [6]:
# Main - Arrhenius fitting
families_unimol = []
fits_unimol = []
families_bimol = []
fits_bimol = []

for family in families:
    # Update this script once training reaction generation for surface families works better
    if 'Surface' not in family:
        # Unimolecular reactions
        arr_uni = analyze_reactions(family, molecularity=1)
        families_unimol.append(family)
        fits_unimol.append(arr_uni)
        print "*uni_mol {0}".format(arr_uni)
        
        # Bimolecular reactions
        arr_bi = analyze_reactions(family, molecularity=2)
        families_bimol.append(family)
        fits_bimol.append(arr_bi)
        print "*bi_mol {0}".format(arr_bi)
    else:
        print family
        families_unimol.append(family)
        fits_unimol.append(None)
        print "*uni_mol {0}".format(None)
        
        families_bimol.append(family)
        fits_bimol.append(None)
        print "*bi_mol {0}".format(None)
        
# Generate a dictionary for unimolecular and bimolecular Arrhenius fits and the corresponding reaction family name
dict_unimol = dict(zip(families_unimol, fits_unimol))
dict_bimol = dict(zip(families_bimol, fits_bimol))

H_Abstraction


This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "/Users/agnes/Documents/Software/Miniconda3/envs/rmg_env/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/Users/agnes/Documents/Software/Miniconda3/envs/rmg_env/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/Users/agnes/Documents/Software/Miniconda3/envs/rmg_env/lib/python2.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/agnes/Documents/Software/Miniconda3/envs/rmg_env/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/agnes/Documents/Software/Minico

*uni_mol None
H_Abstraction




*bi_mol Arrhenius(A=(5.64568e-55,'m^3/(mol*s)'), n=17.7991, Ea=(-341.909,'kJ/mol'), T0=(1,'K'), Tmin=(300,'K'), Tmax=(2500,'K'), comment="""Fitted to 50 data points; dA = *|/ 73695.3, dn = +|- 1.41862, dEa = +|- 9.88989 kJ/mol""")
R_Recombination
*uni_mol Arrhenius(A=(2.78461e-43,'s^-1'), n=16.6777, Ea=(-123.705,'kJ/mol'), T0=(1,'K'), Tmin=(300,'K'), Tmax=(2500,'K'), comment="""Fitted to 50 data points; dA = *|/ 217409, dn = +|- 1.55555, dEa = +|- 10.8445 kJ/mol""")
R_Recombination
*bi_mol Arrhenius(A=(5.5302e+10,'m^3/(mol*s)'), n=-0.0163042, Ea=(-191.114,'kJ/mol'), T0=(1,'K'), Tmin=(300,'K'), Tmax=(2500,'K'), comment="""Fitted to 50 data points; dA = *|/ 1, dn = +|- 3.79035e-15, dEa = +|- 2.64244e-14 kJ/mol""")
Surface_Dissociation_vdW
*uni_mol None
*bi_mol None


## Save Arrhenius fits in YAML file

In [7]:
# Helper functions for ArrheniusRMGObject
class ArrheniusRMGObject(RMGObject):
    """
    Child class of RMG Object for storing filter Arrhenius fits.
    """

    def __init__(self, unimol=None, bimol=None):
        if unimol:
            self.unimol = unimol
        else:
            self.unimol = {}
        if bimol:
            self.bimol = bimol
        else:
            self.bimol = {}
        
    def save_yaml(self, path):
        """
        Save the data to a .yml file
        """
        full_path = os.path.join(path)
        with open(full_path, 'a+') as f:
            yaml.dump(data=self.as_dict(), stream=f)
          

In [8]:
# Generate empty ArrheniusRMGObject
obj = ArrheniusRMGObject(unimol=dict_unimol, bimol=dict_bimol)

In [9]:
# Path to YAML file with stored Arrhenius fits
path = '../input/FilterArrheniusFits'
f = open(path, 'w')

obj.save_yaml(path)