# Show "best fit" high-pressure limit kinetics 
Run this script once to identify a list of reactions including indices that produce the highest 
rates per reaction family for unimolecular and bimolecular reactions. Based on the result, 
improve the database and run the script again. Repeat this process until the "best fit" 
high-pressure limit kinetics show "reasonable" rates.

In [None]:
import os
import time
import operator
import unittest 
import numpy as np
from copy import deepcopy

from rmgpy import settings
from rmgpy.data.kinetics.database import KineticsDatabase
from rmgpy.data.base import DatabaseError
from rmgpy.molecule.molecule import Molecule
from rmgpy.species import Species
from rmgpy.data.rmg import RMGDatabase
from rmgpy.data.kinetics.common import KineticsError, saveEntry
from rmgpy.kinetics.arrhenius import Arrhenius
from rmgpy.data.rmg import getDB
from rmgpy.thermo.thermoengine import submit
from rmgpy.reaction import Reaction
import matplotlib.pyplot as plt
%matplotlib inline

## Step 1: 
Select the family to work on.

In [None]:
#path = os.path.join(settings['database.directory'], 'kinetics','families')
database = RMGDatabase()
database.load(
    settings['database.directory'], 
    thermoLibraries = ['primaryThermoLibrary','Klippenstein_Glarborg2016','BurkeH2O2','thermo_DFT_CCSDTF12_BAC','CBS_QB3_1dHR', 'DFT_QCI_thermo'],
    transportLibraries = [],
    reactionLibraries = [],
    seedMechanisms = [],
    kineticsFamilies = 'all',
    kineticsDepositories = ['training'],
    depository = False,  
)

In [None]:
#all_families = getDB('kinetics').families.keys()
all_families = ['H_Abstraction']

# Discrete temperature range
Ts = np.linspace(300,2000,30)

## Step 2a: unimolecular reactions - forward
Generate a list of reactions including indices that produce the highest rates per reaction family for unimolecular reactions.

In [None]:
for fam_name in all_families:
    print fam_name
    # Generate training reactions only for non-surface families
    if 'Surface' not in fam_name:
        fam = database.kinetics.families[fam_name]
        dep = fam.getTrainingDepository()
        rxns = []
        list_indices = []
        
        # Extract all training reactions for selected family
        for entry in dep.entries.values():
            r = entry.item
            r.kinetics = entry.data
            for spc in r.reactants+r.products:
                if spc.thermo is None:
                    submit(spc)
            rxns.append(r)
        
        # Only proceed if at least one training reaction is available
        if rxns:         
            # Get kinetic rates for unimolecular reactions
            kunim = []
            for ind_rxn, rxn in enumerate(rxns):
                if len(rxn.reactants) == 1:
                    kunim.append(rxn.kinetics)
                    list_indices.append(ind_rxn)
                        
            # Get max. kinetic rates at each discrete temperature
            if kunim:
                kunimaxes = []
                for T in Ts:
                    mydict = {}
                    kvals = [k.getRateCoefficient(T) for k in kunim]
                    mydict = dict(zip(list_indices, kvals))
                
                    # Find key and value of max rate coefficient
                    key_max_rate = max(mydict.iteritems(), key=operator.itemgetter(1))[0]
                    kval = mydict[key_max_rate]
                    kunimaxes.append(kval)
                    print """For {0} 1/K training reaction {1} with index {2} has the highest rate 
                    of {3}.""".format(1000.0/T, dep.entries.get(key_max_rate), key_max_rate, kval)
        
                arruni = Arrhenius().fitToData(Ts,np.array(kunimaxes),'s^-1')
    
                fig = plt.figure()
                fig_name =  fam_name 
                #save_path = 'ArrheniusFig/'
                plt.semilogy(1000.0/Ts,kunimaxes, label = (fig_name + ' Unimolecular'))
                plt.xlabel("1000/T (1/K)")
                plt.ylabel("k 1/s")
                plt.legend(loc='upper left')
                #fig.savefig((save_path + fig_name + '_Unimolecular' + '.png'), bbox_inches='tight')
                #plt.close("all")
        else:
            print "No training reaction available for family {0}".format(fam_name)

## Step 2a: unimolecular reactions - backward
Generate a list of reactions including indices that produce the highest rates per reaction family for unimolecular reactions.

In [None]:
for fam_name in all_families:
    print fam_name
    # Generate training reactions only for non-surface families
    if 'Surface' not in fam_name:
        fam = database.kinetics.families[fam_name]
        dep = fam.getTrainingDepository()
        rxns = []
        list_indices = []
        
        # Extract all training reactions for selected family
        for entry in dep.entries.values():
            r = entry.item
            r.kinetics = entry.data
            for spc in r.reactants+r.products:
                if spc.thermo is None:
                    submit(spc)
            rxns.append(r)
        
        # Only proceed if at least one training reaction is available
        if rxns:         
            # Get kinetic rates for unimolecular reactions
            kunim = []
            for ind_rxn, rxn in enumerate(rxns):
                if len(rxn.products) == 1:
                    kunim.append(rxn.generateReverseRateCoefficient())
                    list_indices.append(ind_rxn)
                        
            # Get max. kinetic rates at each discrete temperature
            if kunim:
                kunimaxes = []
                rxn_indices = set()

                for T in Ts:
                    mydict = {}
                    kvals = [k.getRateCoefficient(T) for k in kunim]
                    mydict = dict(zip(list_indices, kvals))
                
                    # Find key and value of max rate coefficient
                    key_max_rate = max(mydict.iteritems(), key=operator.itemgetter(1))[0]
                    kval = mydict[key_max_rate]
                    rxn_indices.add(key_max_rate)

                    kunimaxes.append(kval)
                    print """For {0} 1/K training reaction {1} with index {2} has the highest rate 
                    of {3}.""".format(1000.0/T, dep.entries.get(key_max_rate), key_max_rate, kval)
        
                arruni = Arrhenius().fitToData(Ts,np.array(kunimaxes),'s^-1')
    
                fig = plt.figure()
                fig_name =  fam_name 
                #save_path = 'ArrheniusFig/'
                plt.semilogy(1000.0/Ts,kunimaxes, label = (fig_name + ' Unimolecular'))
                plt.xlabel("1000/T (1/K)")
                plt.ylabel("k 1/s")
                plt.legend(loc='upper left')
                #fig.savefig((save_path + fig_name + '_Unimolecular' + '.png'), bbox_inches='tight')
                #plt.close("all")
                
                spc_set = set()
                for rxn_ind in rxn_indices:
                    spc_set.update(rxns[rxn_ind].reactants)
                    spc_set.update(rxns[rxn_ind].products)
                for spcs in spc_set:
                    print spcs.label
                    print spcs.thermo.comment
                
        else:
            print "No training reaction available for family {0}".format(fam_name)

## Step 2b: bimolecular reactions - forward 
Generate a list of reactions including indices that produce the highest rates per reaction family for bimolecular reactions.

In [None]:
for fam_name in all_families:
    
    # Generate training reactions only for non-surface families
    if 'Surface' not in fam_name:
        fam = database.kinetics.families[fam_name]
        dep = fam.getTrainingDepository()
        rxns = []
        list_indices = []
        
        # Extract all training reactions for selected family
        for entry in dep.entries.values():
            r = entry.item
            r.kinetics = entry.data
            for spc in r.reactants+r.products:
                if spc.thermo is None:
                    submit(spc)
            rxns.append(r)
        
        # Only proceed if at least one training reaction is available
        if rxns:         
            # Get kinetic rates for unimolecular reactions
            kbim = []
            for ind_rxn, rxn in enumerate(rxns):
                if len(rxn.reactants) == 2:
                    kbim.append(rxn.kinetics)
                    list_indices.append(ind_rxn)
                        
            # Get max. kinetic rates at each discrete temperature
            if kbim:
                kbimaxes = []
                for T in Ts:
                    mydict = {}
                    kvals = [k.getRateCoefficient(T) for k in kbim]
                    mydict = dict(zip(list_indices, kvals))
                
                    # Find key and value of max rate coefficient
                    key_max_rate = max(mydict.iteritems(), key=operator.itemgetter(1))[0]
                    kval = mydict[key_max_rate]
                    kbimaxes.append(kval)
                    print """For {0} 1/K training reaction {1} with index {2} has the highest rate 
                    of {3}.""".format(1000.0/T, dep.entries.get(key_max_rate), key_max_rate, kval)
                    display(rxns[key_max_rate])
                    print "collision limit agrees?"
                    print [] == rxns[key_max_rate].check_collision_limit_violation( 300.0, 2000.0, 0.1, 100.0)
        
                arrbi = Arrhenius().fitToData(Ts,np.array(kbimaxes),'m^3/(mol*s)')
    
                fig = plt.figure()
                fig_name =  fam_name 
                #save_path = 'ArrheniusFig/'
                plt.semilogy(1000.0/Ts,kbimaxes, label = (fig_name + ' Bimolecular') )
                plt.xlabel("1000/T (1/K)")
                plt.ylabel("k m3/(mol*s)")
                plt.legend(loc='upper left')
                #fig.savefig((save_path + fig_name + '_Bimolecular' + '.png'), bbox_inches='tight')
                #plt.close("all")
                
        else:
            print "No training reaction available for family {0}".format(fam_name)

## Step 2b: bimolecular reactions - backward
Generate a list of reactions including indices that produce the highest rates per reaction family for bimolecular reactions.

In [None]:
for fam_name in all_families:
    
    # Generate training reactions only for non-surface families
    if 'Surface' not in fam_name:
        fam = database.kinetics.families[fam_name]
        dep = fam.getTrainingDepository()
        rxns = []
        list_indices = []
        
        # Extract all training reactions for selected family
        for entry in dep.entries.values():
            r = entry.item
            r.kinetics = entry.data
            for spc in r.reactants+r.products:
                if spc.thermo is None:
                    submit(spc)
            rxns.append(r)
        
        # Only proceed if at least one training reaction is available
        if rxns:         
            # Get kinetic rates for unimolecular reactions
            kbim = []
            maxrxns = []
            for ind_rxn, rxn in enumerate(rxns):
                if len(rxn.products) == 2:
                    kbim.append(rxn.generateReverseRateCoefficient())
                    list_indices.append(ind_rxn)
                        
            # Get max. kinetic rates at each discrete temperature
            if kbim:
                kbimaxes = []
                rxn_indices = set()
                for T in Ts:
                    mydict = {}
                    kvals = [k.getRateCoefficient(T) for k in kbim]
                    mydict = dict(zip(list_indices, kvals))
                
                    # Find key and value of max rate coefficient
                    key_max_rate = max(mydict.iteritems(), key=operator.itemgetter(1))[0]
                    rxn_indices.add(key_max_rate)
                    kval = mydict[key_max_rate]
                    kbimaxes.append(kval)
                    print """For {0} 1/K training reaction {1} with index {2} has the highest rate 
                    of {3}.""".format(1000.0/T, dep.entries.get(key_max_rate), key_max_rate, kval)
                    display(rxns[key_max_rate])
                    print "collision limit agrees?"
                    print [] == rxns[key_max_rate].check_collision_limit_violation( 300.0, 2000.0, 0.1, 100.0)
        
                arrbi = Arrhenius().fitToData(Ts,np.array(kbimaxes),'m^3/(mol*s)')
    
                fig = plt.figure()
                fig_name =  fam_name 
                #save_path = 'ArrheniusFig/'
                plt.semilogy(1000.0/Ts,kbimaxes, label = (fig_name + ' Bimolecular') )
                plt.xlabel("1000/T (1/K)")
                plt.ylabel("k m3/(mol*s)")
                plt.legend(loc='upper left')
                #fig.savefig((save_path + fig_name + '_Bimolecular' + '.png'), bbox_inches='tight')
                #plt.close("all")
                
                spc_set = set()
                for rxn_ind in rxn_indices:
                    spc_set.update(rxns[rxn_ind].reactants)
                    spc_set.update(rxns[rxn_ind].products)
                for spcs in spc_set:
                    print spcs.thermo.comment
                
        else:
            print "No training reaction available for family {0}".format(fam_name)

In [None]:
rxn = Reaction(reactants=[Species().fromSMILES("[H]"),Species().fromSMILES("[H]")],
              products = [Species().fromSMILES("[H][H]")])

In [None]:
rxn.calculate_coll_limit(1000.0)

In [None]:
rxn.calculate_coll_limit(2000.0)

In [None]:
rxn.calculate_coll_limit(300.0)