# Fit Polycyclic Thermo Groups From Thermo Library Script

This script takes thermo libraries and fits new polycyclic groups from them.  It saves the all of the polycyclic groups to the file `new_polycyclic.py`.  This file can be used to overwrite the original polycyclics thermo groups file in `input/thermo/groups/polycyclic.py`.

**IMPORTANT:** It averages any data that is found within the libraries, but will overwrite any old thermo data.  If old data is trustworthy, this script must be modified
    
Uncertainties for the groups are calculated as 2s, where s is the sample standard deviation

Please fill in the list of thermo libraries in the next block below

In [None]:
# Fill in the list of thermo libraries to be used for fitting polycyclic thermo groups
thermo_libraries = ['vinylCPD_H','C3','C10H11','Fulvene_H','naphthalene_H']


In [None]:
import copy
from IPython.display import display
from rmgpy.data.thermo import *
from rmgpy.data.base import Entry
from rmgpy.data.rmg import RMGDatabase
from rmgpy import settings
from rmgpy.species import Species
from arkane.output import prettify

## Variety of helper functions

In [None]:
def extract_polycyclic_groups(molecule):
    """
    Extract polycyclic functional groups from a real molecule
    """
    struct = molecule.copy(deep=True)
    # Saturate the structure if it is a radical
    if struct.is_radical():
        struct.saturate()
    struct.delete_hydrogens()
    
    poly_rings = struct.get_polycycles()
    groups = [convert_cycle_to_group(ring) for ring in poly_rings]
    
    return groups
                
def convert_cycle_to_group(cycle):
    """
    This function converts a list of atoms in a cycle to a functional Group object
    """
    from rmgpy.molecule.group import GroupAtom, GroupBond, Group
    
    # Create GroupAtom object for each atom in the cycle, label the first one in the cycle with a *
    group_atoms = {}
    bonds = []
    for atom in cycle:
        group_atoms[atom] = GroupAtom(atomType=[atom.atomtype],
                                     radical_electrons=[0],
                                     label='*' if cycle.index(atom)==0 else '')
                
    group = Group(atoms=group_atoms.values())            
    
    # Create GroupBond for each bond between atoms in the cycle, but not outside of the cycle
    for atom in cycle:
        for bonded_atom, bond in atom.edges.iteritems():
            if bonded_atom in cycle:
                # create a group bond with the same bond order as in the original molecule,
                # if it hasn't already been created
                if not group.has_bond(group_atoms[atom],group_atoms[bonded_atom]):
                    group.add_bond(GroupBond(group_atoms[atom],group_atoms[bonded_atom],order=[bond.order]))
            else:
                pass
        
    group.update()
    
    return group

## Thermo comparison and display functions

In [None]:
def display_thermo(thermoData):
    print('H298 = {0} kcal/mol'.format(thermoData.H298.value_si/4184))
    print('S298 = {0} cal/mol*K'.format(thermoData.S298.value_si/4.184))
def compare_thermo_data(thermo_data1, thermo_data2):
    del_H = thermo_data1.H298.value_si - thermo_data2.H298.value_si
    print('Difference in H298 = {0} kcal/mol'.format(del_H/4184))
    del_S = thermo_data1.S298.value_si - thermo_data2.S298.value_si
    print('Difference S298 = {0} cal/mol*K'.format(del_S/4.184))
    #T_data = [300,500,1000,2000]
    #for T in T_data:
    #    del_cp = thermo_data1.get_heat_capacity(T) - thermo_data2.get_heat_capacity(T)
    #    print('Difference in cp at {0} = {1} cal/mol*K'.format(T, del_cp/4.184))

## Load all the thermo libraries

In [None]:
database = RMGDatabase()
database.load(settings['database.directory'], thermo_libraries = thermo_libraries, kinetics_families='none', kinetics_depositories='none', reaction_libraries=[])

thermo_database = database.thermo
thermo_database0 = copy.deepcopy(database.thermo)

## Extract polycyclic groups from thermo libraries and create new ones

In [None]:
fitting_dictionary={}
for library_name in thermo_libraries:
    thermo_library = database.thermo.libraries[library_name]
    for label, entry in iter(thermo_library.entries.items()):
        molecule = entry.item
        library_thermo_data = entry.data
        if molecule.get_all_polycyclic_vertices():
            print(label)
            species = Species(molecule=[molecule])
            species.generate_resonance_structures() 
            print('Species has {0} resonance structures'.format(len(species.molecule)))
            estimated_thermos = [thermo_database.estimate_thermo_via_group_additivity(molecule) 
                                 for molecule in species.molecule]
            for estimated_thermo in estimated_thermos:
                index = estimated_thermos.index(estimated_thermo)
                ring_groups, polycyclic_groups = database.thermo.get_ring_groups_from_comments(estimated_thermo)
                
                if len(polycyclic_groups) == 0:
                    raise Exception('Species {0} detected as polycyclic but estimated thermo contained no \
                                    polycyclic groups: you need to create a new polycyclic group'.format(label))

                elif len(polycyclic_groups) == 1:
                    polycyclic_group = polycyclic_groups[0]
                    print('Molecule {0} has a single polycyclic group match in thermo estimate.'.format(
                        species.molecule[index].to_smiles()))
                    # Draw the molecule in ipython notebook
                    display(species.molecule[index])
                    print('Molecule SMILES: {0}'.format(species.molecule[index].to_smiles()))
                    print('Estimated thermo data:')
                    print(prettify(repr(estimated_thermo)))
                    display_thermo(estimated_thermo)

                    without_polycyclic_group_thermo = remove_thermo_data(copy.deepcopy(estimated_thermo), 
                                                                         polycyclic_group.data)
                    new_polycyclic_group_thermo = remove_thermo_data(copy.deepcopy(library_thermo_data), 
                                                                     without_polycyclic_group_thermo)


                    # Check to make sure that the polycyclic group is not generic
                    # If it is, create a new polycyclicGroup as the child
                    if polycyclic_group.label == 'PolycyclicRing':
                        groups = extract_polycyclic_groups(species.molecule[index])
                        print(groups[0].to_adjacency_list())
                        assert len(groups) == 1
                        # Create a new entry in the polycyclic groups with the same name as the thermo library entry
                        # Make sure it does not already exist
                        entry_label = label
                        counter = 0
                        while entry_label in thermoDatabase.groups['polycyclic'].entries:
                            counter += 1
                            entry_label = entry_label.split('-')[0]
                            entry_label += '-{0}'.format(counter)


                        print('Polycyclic group was found to be generic "PolycyclicRing". \
                                Creating new entry: {0}.'.format(entry_label))
                        thermo_database.groups['polycyclic'].entries[entry_label] = Entry(index = len(
                            thermo_database.groups['polycyclic'].entries)+1,
                            label = entry_label,
                            item = groups[0],
                            data = polycyclic_group.data, # Use dummy thermo here so other estimates can find this group
                            parent = polycyclic_group,
                                                                                         )

                        # Set the new entry as the polycyclic_group and make it a child of the generic group
                        polycyclic_group = thermo_database.groups['polycyclic'].entries[entryLabel] 
                        thermo_database.groups['polycyclic'].entries['PolycyclicRing'].children.append(
                            polycyclic_group)

                    else:
                        print('Matched polycyclic group "{0}"'.format(polycyclic_group.label))

                    # Add the new group value to the fitting dictionary
                    if polycyclic_group not in fitting_dictionary:
                        # Add a tuple containing fitted group data, the original library entry, and thermo library
                        fitting_dictionary[polycyclic_group]=[(new_polycyclic_group_thermo, entry, thermo_library)]
                    else:
                        fitting_dictionary[polycyclic_group].append((new_polycyclic_group_thermo, entry, 
                                                                     thermo_library))

                elif len(polycyclic_groups) > 1:
                    print('Species {0} has matched multiple polycyclic groups. This cannot be fitted with a single molecule\'s thermo data.'.format(label))
                    raise Exception
            print('=====================')

## Fit the polycyclic groups and write to file

In [None]:
for polycyclic_group, fitting_groups in iter(fitting_dictionary.items()):
    print('Original thermo data for polycyclic group: {0}'.format(polycyclic_group.label))
    if polycyclic_group.data:
        print(prettify(repr(polycyclic_group.data)))
    else:
        print('No data found. Was created as a new group.')
    
    thermo_dataset = [fit_tuple[0] for fit_tuple in fitting_groups]
    labels = [fit_tuple[1].label for fit_tuple in fitting_groups]
    library_labels = [fit_tuple[2].name for fit_tuple in fitting_groups]
    # Average the new group values to fit the original polycyclic group
    fitted_group_data = average_thermo_data([fit_tuple[0] for fit_tuple in fitting_groups])
    #print fitted_group_data
    #print fitting_groups
    polycyclic_group.data = fitted_group_data
    polycyclic_group.short_desc = "Fitted from thermo library values"
    
    comment = ''
    for i in range(len(labels)):
        comment += "Fitted from species {0} from {1} library.\n".format(labels[i],library_labels[i])
    polycyclic_group.long_desc = comment.strip()
    
    print('Fitted thermo data for polycyclic group: {0}'.format(polycyclic_group.label))
    print(prettify(repr(polycyclic_group.data)))
    print(polycyclic_group.long_desc)
    print('====================================================================')
    # At this point, save and overwrite the entire polycyclic thermo library

thermo_database.groups['polycyclic'].save('new_polycyclic.py')

## Benchmark the new groups by checking the estimates against library values

In [None]:
# Test that the new group additivity values can estimate the old library ones.
for library_name in thermo_libraries:
    thermo_library = database.thermo.libraries[library_name]
    for label, entry in iter(thermo_library.entries.items()):
        molecule = entry.item
        library_thermo_data = entry.data

        if molecule.get_all_polycyclic_vertices():
            species = Species(molecule=[molecule])
            species.generate_resonance_structures()
            print(label)
            display (species.molecule[0])
            print('Species has {0} resonance structures(s)'.format(len(species.molecule)))
            find_cp0_and_cpinf(species, library_thermo_data)
            
            estimated_thermo = thermo_database.get_thermo_data_from_groups(species)
            original_estimated_thermo = thermo_database0.get_thermo_data_from_groups(species)
            if library_thermo_data.is_identical_to(estimated_thermo):
                print('Library thermo data for species {0} matches the estimate from group additivity.\n'.format(
                    label))
                
                print('Library thermo data:') 
                display_thermo(library_thermo_data)
                print('')   
                
                print('Original estimated thermo data:')                
                ring_groups, polycyclic_groups = database.thermo.get_ring_groups_from_comments(estimated_thermo)
                print('Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclic_groups])))
                display_thermo(original_estimated_thermo)
                print('')
                
                print('Comparison of library thermo with original estimated thermo')
                compare_thermo_data(library_thermo_data, original_estimated_thermo)
                print('')
            else:
                print('Library thermo data for species {0} does not match the estimate from group \
                    additivity\n'.format(label))
                
                print('Library thermo data:')
                display_thermo(library_thermo_data)
                print('')                
                
                print('Estimated thermo data:')                 
                
                ring_groups, polycyclic_groups = database.thermo.get_ring_groups_from_comments(estimated_thermo)
                print('Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclic_groups])))
                display_thermo(estimated_thermo)
                print('')
                
                print('Comparison of library thermo with estimated thermo')
                compare_thermo_data(library_thermo_data, estimated_thermo)
                print('')
                
                print('Original estimated thermo data:')                
                
                ring_groups, polycyclic_groups = database.thermo.get_ring_groups_from_comments(
                    original_estimated_thermo)
                print('Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclic_groups])))
                display_thermo(original_estimated_thermo)
                print('')
                
                print('Comparison of library thermo with original estimated thermo')
                compare_thermo_data(library_thermo_data, original_estimated_thermo)
                print('')
            print('=======================')