# Convert Kinetics Library to Training Reactions Script

Specify the kinetics library name below and run the script.  It automatically overwrites the training reactions files it needs to.  Then you should commit those files.

This script only trains safely.  In other words, if a single match from an RMG family is found, a training reaction is created.  Sometimes, there are no matches from RMG reaction families, or multiple matches.  This indicates an error that requires manual fixing, and a printout is given in the script.

In [None]:
# Set libraries to load reactions from; set to None to load all libraries
libraries = ['vinylCPD_H']

# Set families to add training reactions to; either 'all' or a list, e.g. ['R_Addition_MultipleBond']
families = ['Intra_R_Add_Endocyclic']

# Specify whether to plot kinetics comparisons
compareKinetics = True

# Specify whether to print library reactions which don't fit in the specified families
# This can result in a lot of unnecessary output if only using a few families
showAll = False

# Specify whether to prioritize aromatic resonance structures to reduce cases of multiple matches
filterAromatic = True

# Specify whether to use verbose comments when averaging tree
verboseComments = False

In [None]:
from rmgpy import settings
from rmgpy.data.rmg import RMGDatabase
from kinetics_library_to_training_tools import *


## Step 1: Load RMG-database with specified libraries and families

In [None]:
database = RMGDatabase()
database.load(
    path = settings['database.directory'],
    thermoLibraries = ['primaryThermoLibrary'],  # Can add others if necessary
    kineticsFamilies = families,
    reactionLibraries = libraries,
    kineticsDepositories = ['training'],
)
# If we want accurate kinetics comparison, add existing training reactions and fill tree by averaging
if compareKinetics:
    for family in database.kinetics.families.values():
        family.addKineticsRulesFromTrainingSet(thermoDatabase=database.thermo)
        family.fillKineticsRulesByAveragingUp(verbose=verboseComments)

## Step 2a: Generate library reactions from families to get proper labels

In [None]:
master_dict, multiple_dict = process_reactions(database,
                                               libraries,
                                               families,
                                               compareKinetics=compareKinetics,
                                               showAll=showAll,
                                               filterAromatic=filterAromatic)

## Step 2b (optional): Review and select reactions to be added

In [None]:
review_reactions(master_dict, prompt=True)

## Step 2c (optional): Manual processing for reactions with multiple matches

In [None]:
manual_selection(master_dict, multiple_dict, database)

## Step 2d: Final review of reactions to be added

In [None]:
review_reactions(master_dict, prompt=False)

## Step 3: Write the new training reactions to the database

In [None]:
for library_name, reaction_dict in master_dict.iteritems():
    library = database.kinetics.libraries[library_name]
    
    for family_name, reaction_list in reaction_dict.iteritems():
        print('Adding training reactions from {0} to {1}...'.format(library_name, family_name))

        family = database.kinetics.families[family_name]
        try:
            depository = family.getTrainingDepository()
        except:
            raise Exception('Unable to find training depository in {0}. Check that one exists.'.format(family_name))

        print('Training depository previously had {} rxns. Now adding {} new rxn(s).'.format(len(depository.entries), len(reaction_list)))

        ref_list = []
        type_list = []
        short_list = []
        long_list = []
        
        for reaction in reaction_list:
            # Get the original entry to retrieve metadata
            orig_entry = library.entries[reaction.index]
            shortDesc = orig_entry.shortDesc
            longDesc = 'Training reaction from kinetics library: {0}\nOriginal entry: {1}'.format(library_name, orig_entry.label)
            if orig_entry.longDesc:
                longDesc += '\n' + orig_entry.longDesc
            
            ref_list.append(orig_entry.reference)
            type_list.append(orig_entry.referenceType)
            short_list.append(shortDesc)
            long_list.append(longDesc)
            
        family.saveTrainingReactions(
            reaction_list,
            reference=ref_list,
            referenceType=type_list,
            shortDesc=short_list,
            longDesc=long_list,
        )