# Convert Kinetics Library to Training Reactions Script

Specify the kinetics library name below and run the script.  It automatically overwrites the training reactions files it needs to.  Then you should commit those files.

This script only trains safely.  In other words, if a single match from an RMG family is found, a training reaction is created.  Sometimes, there are no matches from RMG reaction families, or multiple matches.  This indicates an error that requires manual fixing, and a printout is given in the script.

In [1]:
# Set libraries to load reactions from; set to None to load all libraries
# libraries = ['PFAS_HPL']
libraries = ['PFAS_HPL']
# Set families to add training reactions to; either 'default' or a list, e.g. ['R_Addition_MultipleBond']
families = ['Perfluoroalkene_Formation',
            'CF_Radical_Formation_3',
            'CF_Radical_Formation_2', 
            'CF_Radical_Formation_1', 
            'OF_Radical_Formation', 
            'Lactone_to_Perfluoroalkene', 
            'CO2_Elimination_From_Lactone', 
            'CO2_Elimination_From_Carboxylic_Acid', 
            'CO2_Elimination_From_Lactone_Ether', 
            'CO_Elimination_From_Lactone', 
            'CO_CF_bond_dissociation', 
            'PFAS_Hydrolysis', 
            'Lactone_Formation', 
            'CO2_Elimination_From_PFCA_Radical']

# Specify whether to plot kinetics comparisons
compare_kinetics = False

# Specify whether to print library reactions which don't fit in the specified families
# This can result in a lot of unnecessary output if only using a few families
show_all = False

# Specify whether to prioritize aromatic resonance structures to reduce cases of multiple matches
filter_aromatic = True

# Specify whether to use verbose comments when averaging tree
verbose_comments = True

In [2]:
from rmgpy import settings
from rmgpy.data.rmg import RMGDatabase
from kinetics_library_to_training_tools_modified import *
import kinetics_library_to_training_tools_modified# import process_reactions
from base64 import b64encode
from IPython.display import display, HTML
import os
import re

In [3]:
#if we want to add in the other reaction families (not new) 
matched_families_path = '/work/westgroup/nora/Code/projects/PFAS/AIChE_2024/organized_into_rxn_families/HPL_matches/'
matched_htmls = [html for html in os.listdir(matched_families_path) if '_matched_rxns.html' in html]
previously_matched_families = [re.search('(\S+)_matched_rxns', html).group(1) for html in matched_htmls]
families.extend(previously_matched_families)

In [None]:
from importlib import reload

In [4]:
thermolibs = [
'C1_C2_Fluorine', #putting Siddha's as most trusted because Caroline used this thermo for calcs
'PFCA_thermo',
'NCSU_C2_C8_PFAS', #using this as second trusted thermo because has good estimates for higher C pfas
'Fluorine',
'primaryThermoLibrary',
'FFCM1(-)',
'halogens',
'CHOF_G4',
'CHOCl_G4',
'CHOBr_G4',
'CHOFCl_G4',
'CHOFBr_G4',
'CHOFClBr_G4',
'DFT_QCI_thermo',
'2-BTP_G4',
'thermo_DFT_CCSDTF12_BAC',
'SulfurHaynes'
]


## Step 1: Load RMG-database with specified libraries and families

In [5]:
database = RMGDatabase()
database.load(
    path = settings['database.directory'],
    thermo_libraries = thermolibs,  # Can add others if necessary
    kinetics_families = families,
    reaction_libraries = libraries,
    kinetics_depositories = ['training'],
)
# If we want accurate kinetics comparison, add existing training reactions and fill tree by averaging
if compare_kinetics:
    for family in database.kinetics.families.values():
        if not family.auto_generated:
            family.add_rules_from_training(thermo_database=database.thermo)
            family.fill_rules_by_averaging_up(verbose=verbose_comments)

In [6]:
database.kinetics.families.keys()

dict_keys(['1+2_Cycloaddition', '1,2_Insertion_CO', '1,2_Insertion_carbene', '1,3_Insertion_CO2', '1,3_sigmatropic_rearrangement', 'CF_Radical_Formation_1', 'CF_Radical_Formation_2', 'CF_Radical_Formation_3', 'CO2_Elimination_From_Carboxylic_Acid', 'CO2_Elimination_From_Lactone', 'CO2_Elimination_From_Lactone_Ether', 'CO2_Elimination_From_PFCA_Radical', 'CO_CF_bond_dissociation', 'CO_Elimination_From_Lactone', 'F_Abstraction', 'Intra_R_Add_Endocyclic', 'Lactone_Formation', 'Lactone_to_Perfluoroalkene', 'OF_Radical_Formation', 'PFAS_Hydrolysis', 'Perfluoroalkene_Formation', 'R_Addition_COm', 'R_Addition_MultipleBond', 'R_Recombination', 'Singlet_Carbene_Intra_Disproportionation', 'XY_Addition_MultipleBond'])

## Step 2a: Generate library reactions from families to get proper labels

In [None]:
reload(kinetics_library_to_training_tools_modified)

In [7]:
master_dict, multiple_dict, lib_fam_rxn_dict = kinetics_library_to_training_tools_modified.process_reactions(database,
                                               libraries,
                                               list(database.kinetics.families.keys()),
                                               compare_kinetics=compare_kinetics,
                                               show_all=show_all,
                                               filter_aromatic=filter_aromatic)

[Species(label="CF2H", molecule=[Molecule(smiles="F[CH]F")], molecular_weight=(51.0154,'amu'))] [Species(label="CF", molecule=[Molecule(smiles="[C]F")], molecular_weight=(31.009,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
F[CH]F <=> F + [C]F CF_Radical_Formation_1


[Species(label="CF3H", molecule=[Molecule(smiles="FC(F)F")], molecular_weight=(70.0138,'amu'))] [Species(label="CF3", molecule=[Molecule(smiles="F[C](F)F")], molecular_weight=(69.0059,'amu')), Species(label="H", molecule=[Molecule(smiles="[H]")], molecular_weight=(1.00797,'amu'))]
[H] + F[C](F)F <=> FC(F)F R_Recombination


[Species(label="CF3H", molecule=[Molecule(smiles="FC(F)F")], molecular_weight=(70.0138,'amu'))] [Species(label="F", molecule=[Molecule(smiles="[F]")], molecular_weight=(18.9984,'amu')), Species(label="CF2H", molecule=[Molecule(smiles="F[CH]F")], molecular_weight=(51.0154,'amu'))]
F[CH]F + [F] <=> FC(F)F R_Recombination


[Species(label="CF3H", molecule=[Molecule(smiles="FC(F)F")], molecular_weight=(70.0138,'amu'))] [Species(label="CF2", molecule=[Molecule(smiles="F[C]F")], molecular_weight=(50.0075,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
F[C]F + F <=> FC(F)F 1,2_Insertion_carbene


[Species(label="CF4", molecule=[Molecule(smiles="FC(F)(F)F")], molecular_weight=(88.0043,'amu')), Species(label="H", molecule=[Molecule(smiles="[H]")], molecular_weight=(1.00797,'amu'))] [Species(label="CF3", molecule=[Molecule(smiles="F[C](F)F")], molecular_weight=(69.0059,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
FC(F)(F)F + [H] <=> F + F[C](F)F F_Abstraction


[Species(label="CF4", molecule=[Molecule(smiles="FC(F)(F)F")], molecular_weight=(88.0043,'amu')), Species(label="H2O", molecule=[Molecule(smiles="O")], molecular_weight=(18.0153,'amu'))] [Species(label="CF3OH", molecule=[Molecule(smiles="OC(F)(F)F")], molecular_weight=(86.0132,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
FC(F)(F)F + O <=> F + OC(F)(F)F PFAS_Hydrolysis


ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {4,S}
2    F u0 p3 c0 {4,S}
3 *1 O u0 p2 c0 {4,S} {5,S}
4 *2 C u0 p0 c0 {1,S} {2,S} {3,S} {5,S}
5 *3 O u0 p1 c0 {3,S} {4,S} {6,S} {7,S}
6    H u0 p0 c0 {5,S}
7    H u0 p0 c0 {5,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {4,S}
2    F u0 p3 c0 {4,S}
3 *2 O u0 p2 c0 {4,S} {5,S}
4 *1 C u0 p0 c0 {1,S} {2,S} {3,S} {5,S}
5 *3 O u0 p1 c0 {3,S} {4,S} {6,S} {7,S}
6    H u0 p0 c0 {5,S}
7    H u0 p0 c0 {5,S}



[Species(label="CF2O", molecule=[Molecule(smiles="O=C(F)F")], molecular_weight=(66.0069,'amu')), Species(label="H2O", molecule=[Molecule(smiles="O")], molecular_weight=(18.0153,'amu'))] [Species(label="FC(O)OH", molecule=[Molecule(smiles="O=C(O)F")], molecular_weight=(64.0158,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
O=C(F)F + O <=> F + O=C(O)F PFAS_Hydrolysis


[Species(label="CF3O", molecule=[Molecule(smiles="[O]C(F)(F)F")], molecular_weight=(85.0053,'amu'))] [Species(label="CF2O", molecule=[Molecule(smiles="O=C(F)F")], molecular_weight=(66.0069,'amu')), Species(label="F", molecule=[Molecule(smiles="[F]")], molecular_weight=(18.9984,'amu'))]
O=C(F)F + [F] <=> [O]C(F)(F)F R_Addition_MultipleBond


[Species(label="CF3OH", molecule=[Molecule(smiles="OC(F)(F)F")], molecular_weight=(86.0132,'amu'))] [Species(label="CF2O", molecule=[Molecule(smiles="O=C(F)F")], molecular_weight=(66.0069,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
O=C(F)F + F <=> OC(F)(F)F XY_Addition_MultipleBond


[Species(label="FCO2", molecule=[Molecule(smiles="[O]C(=O)F"), Molecule(smiles="[O]C(=O)F")], molecular_weight=(63.0078,'amu'))] [Species(label="CO2", molecule=[Molecule(smiles="O=C=O")], molecular_weight=(44.0094,'amu')), Species(label="F", molecule=[Molecule(smiles="[F]")], molecular_weight=(18.9984,'amu'))]
O=C=O + [F] <=> [O]C(=O)F R_Addition_MultipleBond


[Species(label="FC(O)OH", molecule=[Molecule(smiles="O=C(O)F")], molecular_weight=(64.0158,'amu'))] [Species(label="CO2", molecule=[Molecule(smiles="O=C=O")], molecular_weight=(44.0094,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
O=C=O + F <=> O=C(O)F 1,3_Insertion_CO2


[Species(label="CF3OO", molecule=[Molecule(smiles="[O]OC(F)(F)F")], molecular_weight=(101.005,'amu'))] [Species(label="CF3", molecule=[Molecule(smiles="F[C](F)F")], molecular_weight=(69.0059,'amu')), Species(label="O2", molecule=[Molecule(smiles="[O][O]")], molecular_weight=(31.9988,'amu'))]
F[C](F)F + [O][O] <=> [O]OC(F)(F)F R_Recombination


[Species(label="CF3OO", molecule=[Molecule(smiles="[O]OC(F)(F)F")], molecular_weight=(101.005,'amu'))] [Species(label="CF2O", molecule=[Molecule(smiles="O=C(F)F")], molecular_weight=(66.0069,'amu')), Species(label="OF", molecule=[Molecule(smiles="[O]F")], molecular_weight=(34.9978,'amu'))]
[O]OC(F)(F)F <=> [O]F + O=C(F)F OF_Radical_Formation


[Species(label="CF3CF", molecule=[Molecule(smiles="F[C]C(F)(F)F")], molecular_weight=(100.015,'amu'))] [Species(label="CF2CF2", molecule=[Molecule(smiles="FC(F)=C(F)F")], molecular_weight=(100.015,'amu'))]
F[C]C(F)(F)F <=> FC(F)=C(F)F Singlet_Carbene_Intra_Disproportionation


[Species(label="CF3CF", molecule=[Molecule(smiles="F[C]C(F)(F)F")], molecular_weight=(100.015,'amu'))] [Species(label="CF3", molecule=[Molecule(smiles="F[C](F)F")], molecular_weight=(69.0059,'amu')), Species(label="CF", molecule=[Molecule(smiles="[C]F")], molecular_weight=(31.009,'amu'))]
F[C]C(F)(F)F <=> [C]F + F[C](F)F CF_Radical_Formation_2


[Species(label="CF3CF2", molecule=[Molecule(smiles="F[C](F)C(F)(F)F")], molecular_weight=(119.013,'amu'))] [Species(label="CF4", molecule=[Molecule(smiles="FC(F)(F)F")], molecular_weight=(88.0043,'amu')), Species(label="CF", molecule=[Molecule(smiles="[C]F")], molecular_weight=(31.009,'amu'))]
F[C](F)C(F)(F)F <=> FC(F)(F)F + [C]F CF_Radical_Formation_1


[Species(label="CF3CF2H", molecule=[Molecule(smiles="FC(F)C(F)(F)F")], molecular_weight=(120.021,'amu'))] [Species(label="CF3", molecule=[Molecule(smiles="F[C](F)F")], molecular_weight=(69.0059,'amu')), Species(label="CF2H", molecule=[Molecule(smiles="F[CH]F")], molecular_weight=(51.0154,'amu'))]
F[CH]F + F[C](F)F <=> FC(F)C(F)(F)F R_Recombination


[Species(label="CF3CF2H", molecule=[Molecule(smiles="FC(F)C(F)(F)F")], molecular_weight=(120.021,'amu'))] [Species(label="CF2CF2", molecule=[Molecule(smiles="FC(F)=C(F)F")], molecular_weight=(100.015,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
FC(F)=C(F)F + F <=> FC(F)C(F)(F)F XY_Addition_MultipleBond


[Species(label="CF3CF2H", molecule=[Molecule(smiles="FC(F)C(F)(F)F")], molecular_weight=(120.021,'amu'))] [Species(label="CF3CF2", molecule=[Molecule(smiles="F[C](F)C(F)(F)F")], molecular_weight=(119.013,'amu')), Species(label="H", molecule=[Molecule(smiles="[H]")], molecular_weight=(1.00797,'amu'))]
[H] + F[C](F)C(F)(F)F <=> FC(F)C(F)(F)F R_Recombination


[Species(label="CF3CF2H", molecule=[Molecule(smiles="FC(F)C(F)(F)F")], molecular_weight=(120.021,'amu'))] [Species(label="F", molecule=[Molecule(smiles="[F]")], molecular_weight=(18.9984,'amu')), Species(label="CF2CF2H", molecule=[Molecule(smiles="F[C](F)C(F)F")], molecular_weight=(101.023,'amu'))]
F[C](F)C(F)F + [F] <=> FC(F)C(F)(F)F R_Recombination


[Species(label="C2F6", molecule=[Molecule(smiles="FC(F)(F)C(F)(F)F")], molecular_weight=(138.012,'amu'))] [Species(label="CF3", molecule=[Molecule(smiles="F[C](F)F")], molecular_weight=(69.0059,'amu')), Species(label="CF3", molecule=[Molecule(smiles="F[C](F)F")], molecular_weight=(69.0059,'amu'))]
F[C](F)F + F[C](F)F <=> FC(F)(F)C(F)(F)F R_Recombination


[Species(label="C2F6", molecule=[Molecule(smiles="FC(F)(F)C(F)(F)F")], molecular_weight=(138.012,'amu'))] [Species(label="CF3CF2", molecule=[Molecule(smiles="F[C](F)C(F)(F)F")], molecular_weight=(119.013,'amu')), Species(label="F", molecule=[Molecule(smiles="[F]")], molecular_weight=(18.9984,'amu'))]
F[C](F)C(F)(F)F + [F] <=> FC(F)(F)C(F)(F)F R_Recombination


[Species(label="C2F6", molecule=[Molecule(smiles="FC(F)(F)C(F)(F)F")], molecular_weight=(138.012,'amu'))] [Species(label="CF4", molecule=[Molecule(smiles="FC(F)(F)F")], molecular_weight=(88.0043,'amu')), Species(label="CF2", molecule=[Molecule(smiles="F[C]F")], molecular_weight=(50.0075,'amu'))]
F[C]F + FC(F)(F)F <=> FC(F)(F)C(F)(F)F 1,2_Insertion_carbene


[Species(label="C2F6", molecule=[Molecule(smiles="FC(F)(F)C(F)(F)F")], molecular_weight=(138.012,'amu')), Species(label="H", molecule=[Molecule(smiles="[H]")], molecular_weight=(1.00797,'amu'))] [Species(label="CF3CF2", molecule=[Molecule(smiles="F[C](F)C(F)(F)F")], molecular_weight=(119.013,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
FC(F)(F)C(F)(F)F + [H] <=> F + F[C](F)C(F)(F)F F_Abstraction


[Species(label="C2F6", molecule=[Molecule(smiles="FC(F)(F)C(F)(F)F")], molecular_weight=(138.012,'amu')), Species(label="H2O", molecule=[Molecule(smiles="O")], molecular_weight=(18.0153,'amu'))] [Species(label="C2F5OH", molecule=[Molecule(smiles="OC(F)(F)C(F)(F)F")], molecular_weight=(136.021,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
FC(F)(F)C(F)(F)F + O <=> F + OC(F)(F)C(F)(F)F PFAS_Hydrolysis


ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {6,S}
2 *3 F u0 p2 c0 {4,S} {5,S} {6,S}
3    F u0 p3 c0 {5,S}
4 *1 O u0 p2 c0 {2,S} {6,S}
5    C u1 p0 c0 {2,S} {3,S} {6,S}
6 *2 C u0 p0 c0 {1,S} {2,S} {4,S} {5,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {6,S}
2    F u0 p3 c0 {5,S}
3 *3 F u0 p2 c0 {4,S} {5,S} {6,S}
4 *1 O u0 p2 c0 {3,S} {6,S}
5    C u1 p0 c0 {2,S} {3,S} {6,S}
6 *2 C u0 p0 c0 {1,S} {3,S} {4,S} {5,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {6,S}
2 *3 F u0 p2 c0 {4,S} {5,S} {6,S}
3    F u0 p3 c0 {5,S}
4 *2 O u0 p2 c0 {2,S} {6,S}
5    C u1 p0 c0 {2,S} {3,S} {6,S}
6 *1 C u0 p0 c0 {1,S} {2,S} {4,S} {5,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {6,S}
2    F u0 p3 c0 {5,S}
3 *3 F u0 p2 c0 {4,S} {5,S} {6,S}
4 *2 O u0 p2 c0 {3,S} {6,S}
5    C u1 p0 c0 {2,S} {3,S} {6,S}
6 *1 C u0 p0 c0 {

[Species(label="CF2CFO", molecule=[Molecule(smiles="O=C(F)[C](F)F"), Molecule(smiles="[O]C(F)=C(F)F")], molecular_weight=(97.0159,'amu'))] [Species(label="CF2O", molecule=[Molecule(smiles="O=C(F)F")], molecular_weight=(66.0069,'amu')), Species(label="CF", molecule=[Molecule(smiles="[C]F")], molecular_weight=(31.009,'amu'))]
O=C(F)[C](F)F <=> O=C(F)F + [C]F CF_Radical_Formation_1


[Species(label="F2COCF", molecule=[Molecule(smiles="F[C]1OC1(F)F")], molecular_weight=(97.0159,'amu'))] [Species(label="CF2CFO", molecule=[Molecule(smiles="O=C(F)[C](F)F"), Molecule(smiles="[O]C(F)=C(F)F")], molecular_weight=(97.0159,'amu'))]
O=C(F)[C](F)F <=> F[C]1OC1(F)F Intra_R_Add_Endocyclic


[Species(label="F2COCF", molecule=[Molecule(smiles="F[C]1OC1(F)F")], molecular_weight=(97.0159,'amu'))] [Species(label="CF2O", molecule=[Molecule(smiles="O=C(F)F")], molecular_weight=(66.0069,'amu')), Species(label="CF", molecule=[Molecule(smiles="[C]F")], molecular_weight=(31.009,'amu'))]
F[C]1OC1(F)F <=> [C]F + O=C(F)F CF_Radical_Formation_3


[Species(label="CF3CO", molecule=[Molecule(smiles="O=[C]C(F)(F)F")], molecular_weight=(97.0159,'amu'))] [Species(label="CF3", molecule=[Molecule(smiles="F[C](F)F")], molecular_weight=(69.0059,'amu')), Species(label="CO", molecule=[Molecule(smiles="[C-]#[O+]")], molecular_weight=(28.01,'amu'))]
[C-]#[O+] + F[C](F)F <=> O=[C]C(F)(F)F R_Addition_COm


ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
2    F u0 p3 c0 {6,S}
3    F u0 p3 c0 {6,S}
4    F u0 p3 c0 {7,S}
5 *1 O u0 p2 c0 {1,S} {7,S}
6    C u0 p0 c0 {1,S} {2,S} {3,S} {7,S}
7 *2 C u0 p0 c0 {1,S} {4,S} {5,S} {6,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {6,S}
2 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
3    F u0 p3 c0 {6,S}
4    F u0 p3 c0 {7,S}
5 *1 O u0 p2 c0 {2,S} {7,S}
6    C u0 p0 c0 {1,S} {2,S} {3,S} {7,S}
7 *2 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {6,S}
2    F u0 p3 c0 {6,S}
3 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
4    F u0 p3 c0 {7,S}
5 *1 O u0 p2 c0 {3,S} {7,S}
6    C u0 p0 c0 {1,S} {2,S} {3,S} {7,S}
7 *2 C u0 p0 c0 {3,S} {4,S} {5,S} {6,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
2    F u0 p3 c0 {6,S}
3    F u0 p3 c0 {

[Species(label="CF3CFO", molecule=[Molecule(smiles="O=C(F)C(F)(F)F")], molecular_weight=(116.014,'amu'))] [Species(label="CF2CFO", molecule=[Molecule(smiles="O=C(F)[C](F)F"), Molecule(smiles="[O]C(F)=C(F)F")], molecular_weight=(97.0159,'amu')), Species(label="F", molecule=[Molecule(smiles="[F]")], molecular_weight=(18.9984,'amu'))]
O=C(F)[C](F)F + [F] <=> O=C(F)C(F)(F)F R_Recombination


ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
2    F u0 p3 c0 {6,S}
3    F u0 p3 c0 {6,S}
4    F u0 p3 c0 {7,S}
5 *1 O u0 p2 c0 {1,S} {7,S}
6    C u0 p0 c0 {1,S} {2,S} {3,S} {7,S}
7 *2 C u0 p0 c0 {1,S} {4,S} {5,S} {6,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {6,S}
2 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
3    F u0 p3 c0 {6,S}
4    F u0 p3 c0 {7,S}
5 *1 O u0 p2 c0 {2,S} {7,S}
6    C u0 p0 c0 {1,S} {2,S} {3,S} {7,S}
7 *2 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {6,S}
2    F u0 p3 c0 {6,S}
3 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
4    F u0 p3 c0 {7,S}
5 *1 O u0 p2 c0 {3,S} {7,S}
6    C u0 p0 c0 {1,S} {2,S} {3,S} {7,S}
7 *2 C u0 p0 c0 {3,S} {4,S} {5,S} {6,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
2    F u0 p3 c0 {6,S}
3    F u0 p3 c0 {

[Species(label="CF3CFO", molecule=[Molecule(smiles="O=C(F)C(F)(F)F")], molecular_weight=(116.014,'amu'))] [Species(label="CF3", molecule=[Molecule(smiles="F[C](F)F")], molecular_weight=(69.0059,'amu')), Species(label="CFO", molecule=[Molecule(smiles="O=[C]F")], molecular_weight=(47.0084,'amu'))]
O=[C]F + F[C](F)F <=> O=C(F)C(F)(F)F R_Recombination


ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
2    F u0 p3 c0 {6,S}
3    F u0 p3 c0 {6,S}
4    F u0 p3 c0 {7,S}
5 *1 O u0 p2 c0 {1,S} {7,S}
6    C u0 p0 c0 {1,S} {2,S} {3,S} {7,S}
7 *2 C u0 p0 c0 {1,S} {4,S} {5,S} {6,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {6,S}
2 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
3    F u0 p3 c0 {6,S}
4    F u0 p3 c0 {7,S}
5 *1 O u0 p2 c0 {2,S} {7,S}
6    C u0 p0 c0 {1,S} {2,S} {3,S} {7,S}
7 *2 C u0 p0 c0 {2,S} {4,S} {5,S} {6,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {6,S}
2    F u0 p3 c0 {6,S}
3 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
4    F u0 p3 c0 {7,S}
5 *1 O u0 p2 c0 {3,S} {7,S}
6    C u0 p0 c0 {1,S} {2,S} {3,S} {7,S}
7 *2 C u0 p0 c0 {3,S} {4,S} {5,S} {6,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1 *3 F u0 p2 c0 {5,S} {6,S} {7,S}
2    F u0 p3 c0 {6,S}
3    F u0 p3 c0 {

[Species(label="CF3CFO", molecule=[Molecule(smiles="O=C(F)C(F)(F)F")], molecular_weight=(116.014,'amu'))] [Species(label="CF4", molecule=[Molecule(smiles="FC(F)(F)F")], molecular_weight=(88.0043,'amu')), Species(label="CO", molecule=[Molecule(smiles="[C-]#[O+]")], molecular_weight=(28.01,'amu'))]
[C-]#[O+] + FC(F)(F)F <=> O=C(F)C(F)(F)F 1,2_Insertion_CO


ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1     F u0 p3 c0 {6,S}
2     F u0 p3 c0 {6,S}
3     F u0 p3 c0 {6,S}
4     F u0 p3 c0 {7,S}
5  *1 O u0 p2 c0 {7,S} {8,S}
6     C u0 p0 c0 {1,S} {2,S} {3,S} {7,S}
7  *2 C u0 p0 c0 {4,S} {5,S} {6,S} {8,S}
8  *3 O u0 p1 c0 {5,S} {7,S} {9,S} {10,S}
9     H u0 p0 c0 {8,S}
10    H u0 p0 c0 {8,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1     F u0 p3 c0 {6,S}
2     F u0 p3 c0 {6,S}
3     F u0 p3 c0 {6,S}
4     F u0 p3 c0 {7,S}
5  *2 O u0 p2 c0 {7,S} {8,S}
6     C u0 p0 c0 {1,S} {2,S} {3,S} {7,S}
7  *1 C u0 p0 c0 {4,S} {5,S} {6,S} {8,S}
8  *3 O u0 p1 c0 {5,S} {7,S} {9,S} {10,S}
9     H u0 p0 c0 {8,S}
10    H u0 p0 c0 {8,S}



[Species(label="CF3CFO", molecule=[Molecule(smiles="O=C(F)C(F)(F)F")], molecular_weight=(116.014,'amu')), Species(label="H2O", molecule=[Molecule(smiles="O")], molecular_weight=(18.0153,'amu'))] [Species(label="CF3C(O)OH", molecule=[Molecule(smiles="O=C(O)C(F)(F)F")], molecular_weight=(114.023,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
O=C(F)C(F)(F)F + O <=> F + O=C(O)C(F)(F)F PFAS_Hydrolysis


[Species(label="C2F5O", molecule=[Molecule(smiles="[O]C(F)(F)C(F)(F)F")], molecular_weight=(135.013,'amu'))] [Species(label="CF3", molecule=[Molecule(smiles="F[C](F)F")], molecular_weight=(69.0059,'amu')), Species(label="CF2O", molecule=[Molecule(smiles="O=C(F)F")], molecular_weight=(66.0069,'amu'))]
O=C(F)F + F[C](F)F <=> [O]C(F)(F)C(F)(F)F R_Addition_MultipleBond


[Species(label="C2F5OH", molecule=[Molecule(smiles="OC(F)(F)C(F)(F)F")], molecular_weight=(136.021,'amu'))] [Species(label="CF3", molecule=[Molecule(smiles="F[C](F)F")], molecular_weight=(69.0059,'amu')), Species(label="CF2OH", molecule=[Molecule(smiles="O[C](F)F")], molecular_weight=(67.0148,'amu'))]
O[C](F)F + F[C](F)F <=> OC(F)(F)C(F)(F)F R_Recombination


[Species(label="C2F5OH", molecule=[Molecule(smiles="OC(F)(F)C(F)(F)F")], molecular_weight=(136.021,'amu'))] [Species(label="CF3CFO", molecule=[Molecule(smiles="O=C(F)C(F)(F)F")], molecular_weight=(116.014,'amu')), Species(label="HF", molecule=[Molecule(smiles="F")], molecular_weight=(20.0064,'amu'))]
O=C(F)C(F)(F)F + F <=> OC(F)(F)C(F)(F)F XY_Addition_MultipleBond


ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {4,S}
2 *1 O u0 p2 c0 {3,S} {4,S}
3 *3 O u0 p1 c0 {2,S} {4,S} {5,D}
4 *2 C u0 p0 c0 {1,S} {2,S} {3,S} {5,S}
5    C u1 p0 c0 {3,D} {4,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {4,S}
2 *2 O u0 p2 c0 {3,S} {4,S}
3 *3 O u0 p1 c0 {2,S} {4,S} {5,D}
4 *1 C u0 p0 c0 {1,S} {2,S} {3,S} {5,S}
5    C u1 p0 c0 {3,D} {4,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1 *3 F u0 p2 c0 {3,S} {4,S} {5,S}
2    O u1 p2 c0 {4,S}
3 *1 O u0 p2 c0 {1,S} {5,S}
4    C u0 p0 c0 {1,S} {2,S} {5,D}
5 *2 C u0 p0 c0 {1,S} {3,S} {4,D}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1 *3 F u0 p2 c0 {3,S} {4,S} {5,S}
2    O u1 p2 c0 {4,S}
3 *2 O u0 p2 c0 {1,S} {5,S}
4    C u0 p0 c0 {1,S} {2,S} {5,D}
5 *1 C u0 p0 c0 {1,S} {3,S} {4,D}



[Species(label="CF(O)CO", molecule=[Molecule(smiles="O=[C]C(=O)F"), Molecule(smiles="[O]C(F)=C=O")], molecular_weight=(75.0185,'amu'))] [Species(label="CFO", molecule=[Molecule(smiles="O=[C]F")], molecular_weight=(47.0084,'amu')), Species(label="CO", molecule=[Molecule(smiles="[C-]#[O+]")], molecular_weight=(28.01,'amu'))]
[C-]#[O+] + O=[C]F <=> O=[C]C(=O)F R_Addition_COm


[Species(label="C2F5OO", molecule=[Molecule(smiles="[O]OC(F)(F)C(F)(F)F")], molecular_weight=(151.012,'amu'))] [Species(label="CF3CF2", molecule=[Molecule(smiles="F[C](F)C(F)(F)F")], molecular_weight=(119.013,'amu')), Species(label="O2", molecule=[Molecule(smiles="[O][O]")], molecular_weight=(31.9988,'amu'))]
F[C](F)C(F)(F)F + [O][O] <=> [O]OC(F)(F)C(F)(F)F R_Recombination


[Species(label="C2F5OO", molecule=[Molecule(smiles="[O]OC(F)(F)C(F)(F)F")], molecular_weight=(151.012,'amu'))] [Species(label="CF3CFO", molecule=[Molecule(smiles="O=C(F)C(F)(F)F")], molecular_weight=(116.014,'amu')), Species(label="OF", molecule=[Molecule(smiles="[O]F")], molecular_weight=(34.9978,'amu'))]
[O]OC(F)(F)C(F)(F)F <=> [O]F + O=C(F)C(F)(F)F OF_Radical_Formation


ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1 *3 F u0 p2 c0 {4,S} {5,S} {6,S}
2    F u0 p3 c0 {5,S}
3    O u0 p2 c0 {5,S} {6,S}
4 *1 O u0 p2 c0 {1,S} {6,S}
5    C u0 p0 c0 {1,S} {2,S} {3,S} {6,S}
6 *2 C u0 p0 c0 {1,S} {3,S} {4,S} {5,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {5,S}
2 *3 F u0 p2 c0 {4,S} {5,S} {6,S}
3    O u0 p2 c0 {5,S} {6,S}
4 *1 O u0 p2 c0 {2,S} {6,S}
5    C u0 p0 c0 {1,S} {2,S} {3,S} {6,S}
6 *2 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1 *3 F u0 p2 c0 {4,S} {5,S} {6,S}
2    F u0 p3 c0 {5,S}
3    O u0 p2 c0 {5,S} {6,S}
4 *2 O u0 p2 c0 {1,S} {6,S}
5    C u0 p0 c0 {1,S} {2,S} {3,S} {6,S}
6 *1 C u0 p0 c0 {1,S} {3,S} {4,S} {5,S}

ERROR:root:Could not update atomtypes for this molecule:
multiplicity -187
1    F u0 p3 c0 {5,S}
2 *3 F u0 p2 c0 {4,S} {5,S} {6,S}
3    O u0 p2 c0 {5,S} {6,S}
4 *2 O u0 p2 c0 {2,S} {6,S}
5    C u0

[Species(label="c_F2COC(O)", molecule=[Molecule(smiles="O=C1OC1(F)F")], molecular_weight=(94.0169,'amu'))] [Species(label="CF2O", molecule=[Molecule(smiles="O=C(F)F")], molecular_weight=(66.0069,'amu')), Species(label="CO", molecule=[Molecule(smiles="[C-]#[O+]")], molecular_weight=(28.01,'amu'))]
O=C(F)F + [C-]#[O+] <=> O=C1OC1(F)F 1+2_Cycloaddition


ResonanceError: Can only generate resonance structures for reactive molecules! Got the following unreactive structure:
1    O u0 p2 c0 {2,D}
2 *3 C u0 p1 c0 {1,D}
Reactive = False

In [None]:
master_dict['PFAS_HPL'].keys()

In [None]:
reaction_dict = master_dict['PFAS_HPL']

In [None]:
#these were the ones that were matched: 
matched_families = list(master_dict['PFAS_HPL'].keys())

for fmly in matched_families: 
    family_name = fmly
    analyze_per_family(family_name, lib_fam_rxn_dict, reaction_dict, database, compare_kinetics=False)
    

In [None]:
#these are the reactions that were unmatched
unmatched, matched = kinetics_library_to_training_tools_modified.pick_out_unmatched_rxns(database, libraries, list(database.kinetics.families.keys()))

In [None]:
print(len(matched), len(unmatched))

In [None]:
family_name = 'Perfluoroalkene_Formation'
analyze_per_family(family_name, lib_fam_rxn_dict, reaction_dict, database, compare_kinetics=False)

In [None]:
family_name = 'Lactone_Formation'
analyze_per_family(family_name, lib_fam_rxn_dict, reaction_dict, database, compare_kinetics=False)

In [None]:
family_name = '1,2_Insertion_carbene'
analyze_per_family(family_name, lib_fam_rxn_dict, reaction_dict, database, compare_kinetics=True)


In [None]:
family_name = 'R_Addition_MultipleBond'
analyze_per_family(family_name, lib_fam_rxn_dict, reaction_dict, database, compare_kinetics=True)


In [None]:
family_name = 'XY_Addition_MultipleBond'
analyze_per_family(family_name, lib_fam_rxn_dict, reaction_dict, database, compare_kinetics=True)


In [None]:
family_name = 'Singlet_Carbene_Intra_Disproportionation'
analyze_per_family(family_name, lib_fam_rxn_dict, reaction_dict, database, compare_kinetics=True)


In [None]:
family_name = '1,2_Insertion_CO'
analyze_per_family(family_name, lib_fam_rxn_dict, reaction_dict, database, compare_kinetics=True)


In [None]:
family_name = '1+2_Cycloaddition'
analyze_per_family(family_name, lib_fam_rxn_dict, reaction_dict, database, compare_kinetics=True)


In [None]:
family_name = '1,3_Insertion_CO2'
analyze_per_family(family_name, lib_fam_rxn_dict, reaction_dict, database, compare_kinetics=True)


In [None]:
family_name = '1,3_sigmatropic_rearrangement'
analyze_per_family(family_name, lib_fam_rxn_dict, reaction_dict, database, compare_kinetics=True)


In [None]:
#find the reactions without matches and save them to a file


## Step 2b (optional): Review and select reactions to be added

In [None]:
review_reactions(master_dict, prompt=True)

## Step 2c (optional): Manual processing for reactions with multiple matches

In [None]:
manual_selection(master_dict, multiple_dict, database)

## Step 2d: Final review of reactions to be added

In [None]:
review_reactions(master_dict, prompt=False)

## Step 3: Write the new training reactions to the database

In [None]:
for library_name, reaction_dict in master_dict.items():
    library = database.kinetics.libraries[library_name]
    
    for family_name, reaction_list in reaction_dict.items():
        print('Adding training reactions from {0} to {1}...'.format(library_name, family_name))

        family = database.kinetics.families[family_name]
        try:
            depository = family.get_training_depository()
        except:
            raise Exception('Unable to find training depository in {0}. Check that one exists.'.format(family_name))

        print('Training depository previously had {} rxns. Now adding {} new rxn(s).'.format(len(depository.entries), len(reaction_list)))

        ref_list = []
        type_list = []
        short_list = []
        long_list = []
        
        for reaction in reaction_list:
            # Get the original entry to retrieve metadata
            orig_entry = library.entries[reaction.index]
            short_desc = orig_entry.short_desc
            long_desc = 'Training reaction from kinetics library: {0}\nOriginal entry: {1}'.format(library_name, orig_entry.label)
            if orig_entry.long_desc:
                long_desc += '\n' + orig_entry.long_desc
            
            ref_list.append(orig_entry.reference)
            type_list.append(orig_entry.reference_type)
            short_list.append(short_desc)
            long_list.append(long_desc)
            
        family.save_training_reactions(
            reaction_list,
            reference=ref_list,
            reference_type=type_list,
            short_desc=short_list,
            long_desc=long_list,
        )