In [1]:
import rmgpy
import numpy as np
from rmgpy.molecule.molecule import *
from rmgpy.species import *
from rmgpy.chemkin import *
from rmgpy.data.rmg import RMGDatabase
from IPython.display import display
from rmgpy.data.thermo import ThermoLibrary
from rmgpy.rmg.react import react
from rmgpy.species import Species
from rmgpy.reaction import Reaction
from rmgpy.data.rmg import get_db
from rmgpy.molecule.group import Group
from rmgpy.kinetics.arrhenius import ArrheniusBM
from rmgpy import settings
import time
import matplotlib.pyplot as plt
import matplotlib

In [2]:
settings

{'database.directory': '/home/khalil.nor/Code/RMG-database/input',
 'test_data.directory': '/home/khalil.nor/Code/RMG-Py/rmgpy/test_data'}

In [3]:
thermo_libs = [
'C1_C2_Fluorine',
'primaryThermoLibrary',
'Fluorine',
'FFCM1(-)',
'halogens',
'CHOF_G4',
'CHOCl_G4',
'CHOBr_G4',
'CHOFCl_G4',
'CHOFBr_G4',
'CHOFClBr_G4',
'DFT_QCI_thermo',
'2-BTP_G4',
'thermo_DFT_CCSDTF12_BAC',
'SulfurHaynes'
]


In [4]:
database = RMGDatabase()
database.load(
            path = settings['database.directory'],
            thermo_libraries = thermo_libs,
            transport_libraries = [],
            reaction_libraries = [],
            seed_mechanisms = [],#['BurkeH2O2inN2','ERC-FoundationFuelv0.9'],
            kinetics_families = 'F_Abstraction',
            kinetics_depositories = ['training'],
            #frequenciesLibraries = self.statmechLibraries,
            depository = False, # Don't bother loading the depository information, as we don't use it
        )


In [5]:
database.kinetics.families

{'F_Abstraction': <ReactionFamily "F_Abstraction">}

In [6]:
family = database.kinetics.families["F_Abstraction"]


In [7]:
family.clean_tree()

In [8]:
start = time.time()
family.generate_tree(thermo_database=database.thermo,
                     nprocs=1,
                     new_fraction_threshold_to_reopt_node=0.25,
                     max_batch_size=800,
                     extension_iter_max=2,
                     extension_iter_item_cap=100)

end = time.time()
print(end-start)

ERROR:root:234.0
ERROR:root:iter_max achieved terminating early
ERROR:root:iter_max achieved terminating early


214.17257642745972


In [9]:
len(family.groups.entries)

366

In [10]:
family.groups.entries

{'Root': <Entry index=0 label="Root">,
 'Root_1R->O': <Entry index=1 label="Root_1R->O">,
 'Root_N-1R->O': <Entry index=2 label="Root_N-1R->O">,
 'Root_1R->O_3R->O': <Entry index=3 label="Root_1R->O_3R->O">,
 'Root_1R->O_N-3R->O': <Entry index=4 label="Root_1R->O_N-3R->O">,
 'Root_N-1R->O_3R->O': <Entry index=5 label="Root_N-1R->O_3R->O">,
 'Root_N-1R->O_N-3R->O': <Entry index=6 label="Root_N-1R->O_N-3R->O">,
 'Root_1R->O_3R->O_Ext-1O-R': <Entry index=7 label="Root_1R->O_3R->O_Ext-1O-R">,
 'Root_1R->O_3R->O_Ext-3O-R_Ext-4R!H-R': <Entry index=8 label="Root_1R->O_3R->O_Ext-3O-R_Ext-4R!H-R">,
 'Root_1R->O_3R->O_1O-u0': <Entry index=9 label="Root_1R->O_3R->O_1O-u0">,
 'Root_1R->O_3R->O_N-1O-u0': <Entry index=10 label="Root_1R->O_3R->O_N-1O-u0">,
 'Root_1R->O_N-3R->O_1O-u0': <Entry index=11 label="Root_1R->O_N-3R->O_1O-u0">,
 'Root_1R->O_N-3R->O_N-1O-u0': <Entry index=12 label="Root_1R->O_N-3R->O_N-1O-u0">,
 'Root_N-1R->O_3R->O_Ext-3O-R': <Entry index=13 label="Root_N-1R->O_3R->O_Ext-3O-R">

In [11]:
start = time.time()
family.check_tree()
end = time.time()
print(end-start)

0.17127060890197754


In [12]:
start = time.time()
family.regularize(thermo_database=database.thermo)
end = time.time()
print(end-start)

1.1211936473846436


In [13]:
start = time.time()
templateRxnMap = family.get_reaction_matches(thermo_database=database.thermo,remove_degeneracy=True,
                                             get_reverse=True,exact_matches_only=False,fix_labels=True)
end = time.time()
print(end-start)

0.9733355045318604


In [14]:
len(templateRxnMap)

366

In [15]:
family.clean_tree_rules()

In [16]:
start = time.time()
family.make_bm_rules_from_template_rxn_map(templateRxnMap)#,nprocs=6)
end = time.time()
print(end-start)

  for rxns, label in rxnlists])


41.019097089767456


In [18]:
start = time.time()
family.check_tree()
end = time.time()
print(end-start)

0.17059063911437988


In [None]:
start = time.time()
errors,uncertainties = family.cross_validate(iters=0,random_state=5,folds=0,ascend=False)
end = time.time()
print(end-start)

In [None]:
errors.values()

In [None]:
plt.figure(figsize=(10,8))
plt.hist(np.abs(list(errors.values())),bins=30, density=True)
plt.title('Decision Tree Estimator',fontsize=18)
#plt.ylim(0,.4)
plt.xlim(0,15)
plt.ylabel('Probability density',fontsize=18)
plt.xlabel(r'$|Ln(k_{est}/k_{rxn})|$',fontsize=18)

In [None]:
#family.save('/Users/mattjohnson/RMGCODE/RMG-database/input/kinetics/families/')

In [19]:
save_path = os.path.join(settings['database.directory'], 'kinetics', 'families', family.name)
print(save_path)

/home/khalil.nor/Code/RMG-database/input/kinetics/families/F_Abstraction


In [20]:
family.save(save_path)