# Fit Arrhenius for reaction filtering in RMG from fastest training reactions per reaction family.

This script iterates through all RMG families besides surface reaction families. For each reaction family the fastest 
training reaction for a temperature range between 298 and 2500 K are fitted into an Arrhenius fit for unimolecular
and bimolecular reactions seperately. 

The Arrhenius fits are stored in a YAML file called `FilterArrheniusFits.yml` that is read once at the beginning of an RMG run and used at each iteration to identify family specific filter criteria for reaction generation. 

For verification, the Arrhenius fits are stored as .png files in the folder `ArrheniusFits`.

Currently, the highest rate from forward or reverse is used for the Arrhenius fit.

In [None]:
import os
import operator

%matplotlib inline
import matplotlib.pyplot as plt
import numpy

from rmgpy import settings
from rmgpy.data.kinetics.database import filter_limit_fits
from rmgpy.data.rmg import RMGDatabase
from rmgpy.kinetics.arrhenius import Arrhenius
from rmgpy.thermo.thermoengine import submit

## Load the database with RMG reaction families

In [None]:
database = RMGDatabase()
database.load(
    settings['database.directory'], 
    thermo_libraries = [
        'primaryThermoLibrary',
        'Klippenstein_Glarborg2016',
        'BurkeH2O2',
        'thermo_DFT_CCSDTF12_BAC',
        'CBS_QB3_1dHR', 
        'DFT_QCI_thermo',
        'Narayanaswamy',
        'Lai_Hexylbenzene',
        'SABIC_aromatics',
        'vinylCPD_H'
    ],
    transport_libraries = [],
    reaction_libraries = [],
    seed_mechanisms = [],
    kinetics_families = 'all',
    kinetics_depositories = ['training'],
    depository = False,  
)

In [None]:
# Get Arrhenius fits for all RMG families
families = list(database.kinetics.families.keys())

In [None]:
# Temperature range to fit Arrhenius
T_min = 298.0
T_max = 2500.0
T_count = 50
Ts =  1 / numpy.linspace(1 / T_max, 1 / T_min, T_count)

## Generate the Arrhenius fits and print .png figures

In [None]:
# Helper function
def analyze_reactions(fam_name, molecularity=1):
    print(fam_name)
    
    fam = database.kinetics.families[fam_name]
    dep = fam.get_training_depository()
    rxns = []

    # Extract all training reactions for selected family
    for entry in dep.entries.values():
        r = entry.item
        r.kinetics = entry.data
        r.index = entry.index
        for spc in r.reactants+r.products:
            if spc.thermo is None:
                submit(spc)
        rxns.append(r)

    # Only proceed if at least one training reaction is available
    if rxns:         
        # Get kinetic rates for unimolecular reactions
        k_list = []
        index_list = []
        for rxn in rxns:
            if len(rxn.reactants) == molecularity:
                k_list.append(rxn.kinetics)
                index_list.append(rxn.index)
            if len(rxn.products) == molecularity:
                k_list.append(rxn.generate_reverse_rate_coefficient())
                index_list.append(rxn.index)

        # Get max. kinetic rates at each discrete temperature
        if k_list:
            k_max_list = []
            max_rxn_list = set()
            for T in Ts:
                kvals = [k.get_rate_coefficient(T) for k in k_list]
                mydict = dict(zip(index_list, kvals))

                # Find key and value of max rate coefficient
                key_max_rate = max(mydict.items(), key=operator.itemgetter(1))[0]
                
                max_entry = dep.entries.get(key_max_rate)
                max_rxn = max_entry.item
                max_rxn_list.add(max_rxn)
                
                k_val = mydict[key_max_rate]
                k_max_list.append(k_val)
                
                if molecularity==2 and max_rxn.check_collision_limit_violation(T_min, T_max, 10000.0, 1.0e7):
                    display(max_rxn)
                    print("""The collision limit of {0} m^3/(mol*s) at {1} (K) is violated by 
                    training reaction {2} with index {3}.
                    
                    The rate of training reaction {2} 
                    is {4} m^3/(mol*s).""".format(max_rxn.calculate_coll_limit(T), T, max_entry, key_max_rate, k_val))

            units = 's^-1' if molecularity == 1 else 'm^3/(mol*s)'
            
            arr = Arrhenius().fit_to_data(Ts, numpy.array(k_max_list), units)
            
            fig = plt.figure()
            fig_name = fam_name
            fig_name += ' Unimolecular' if molecularity == 1 else ' Bimolecular'
            save_path = 'ArrheniusFits/'
            plt.semilogy(1000.0/Ts, k_max_list, label=fig_name)
            plt.xlabel("1000/T (1/K)")
            plt.ylabel("k ({0})".format(units))
            plt.legend(loc='upper left')
            if molecularity == 1:
                fig.savefig((save_path + fig_name + '.png'), bbox_inches='tight')
            elif molecularity == 2:
                fig.savefig((save_path + fig_name + '.png'), bbox_inches='tight')

            plt.close("all")
            
            return arr
        
    else:
        arr = None
        return arr

In [None]:
# Main - Arrhenius fitting
families_unimol = []
fits_unimol = []
families_bimol = []
fits_bimol = []

# Check to see if the directory 'ArrheniusFits' for saving the .png files exists. 
# If it doesn't exist create it.
if not os.path.exists('ArrheniusFits'):
    os.mkdir('ArrheniusFits')

for family in families:
    # Update this script once training reaction generation for surface families works better
    if 'Surface' not in family:
        # Unimolecular reactions
        arr_uni = analyze_reactions(family, molecularity=1)
        families_unimol.append(family)
        fits_unimol.append(arr_uni)
        print("*uni_mol {0}".format(arr_uni))
        
        # Bimolecular reactions
        arr_bi = analyze_reactions(family, molecularity=2)
        families_bimol.append(family)
        fits_bimol.append(arr_bi)
        print("*bi_mol {0}".format(arr_bi))
    else:
        print(family)
        families_unimol.append(family)
        fits_unimol.append(None)
        print("*uni_mol {0}".format(None))
        
        families_bimol.append(family)
        fits_bimol.append(None)
        print("*bi_mol {0}".format(None))
        
# Generate a dictionary for unimolecular and bimolecular Arrhenius fits and the corresponding reaction family name
dict_unimol = dict(zip(families_unimol, fits_unimol))
dict_bimol = dict(zip(families_bimol, fits_bimol))

## Save Arrhenius fits in YAML file

In [None]:
# Generate empty ArrheniusRMGObject
filter_fits = filter_limit_fits(unimol=dict_unimol, bimol=dict_bimol)

In [None]:
# Path to YAML file with stored Arrhenius fits
path = '../input/FilterArrheniusFits.yml'
filter_fits.save_yaml(path)