# Fit Polycyclic Thermo Groups From Thermo Library Script

This script takes thermo libraries and fits new polycyclic groups from them.  It saves the all of the polycyclic groups to the file `new_polycyclic.py`.  This file can be used to overwrite the original polycyclics thermo groups file in `input/thermo/groups/polycyclic.py`.

**IMPORTANT:** It averages any data that is found within the libraries, but will overwrite any old thermo data.  If old data is trustworthy, this script must be modified
    
Uncertainties for the groups are calculated as 2s, where s is the sample standard deviation

Please fill in the list of thermo libraries in the next block below

In [None]:
# Fill in the list of thermo libraries to be used for fitting polycyclic thermo groups
thermoLibraries = ['vinylCPD_H','C3','C10H11','Fulvene_H','naphthalene_H']


In [None]:
import os
import re
import copy
import numpy
from IPython.display import Image, display
from rmgpy.data.thermo import *
from rmgpy.data.base import Entry
from rmgpy.data.rmg import RMGDatabase
from rmgpy import settings
from rmgpy.species import Species
from rmgpy.molecule import Molecule
from rmgpy.cantherm.output import prettify

## Variety of helper functions

In [None]:
def extractPolycyclicGroups(molecule):
    """
    Extract polycyclic functional groups from a real molecule
    """
    struct = molecule.copy(deep=True)
    # Saturate the structure if it is a radical
    if struct.isRadical():
        struct.saturate()
    struct.deleteHydrogens()
    
    polyRings = struct.getPolycyclicRings()
    groups = [convertCycleToGroup(ring) for ring in polyRings]
    
    return groups
                
def convertCycleToGroup(cycle):
    """
    This function converts a list of atoms in a cycle to a functional Group object
    """
    from rmgpy.molecule.group import GroupAtom, GroupBond, Group
    
    # Create GroupAtom object for each atom in the cycle, label the first one in the cycle with a *
    groupAtoms = {}
    bonds = []
    for atom in cycle:
        groupAtoms[atom] = GroupAtom(atomType=[atom.atomType],
                                     radicalElectrons=[0],
                                     label='*' if cycle.index(atom)==0 else '')
                
    group = Group(atoms=groupAtoms.values())            
    
    # Create GroupBond for each bond between atoms in the cycle, but not outside of the cycle
    for atom in cycle:
        for bondedAtom, bond in atom.edges.iteritems():
            if bondedAtom in cycle:
                # create a group bond with the same bond order as in the original molecule,
                # if it hasn't already been created
                if not group.hasBond(groupAtoms[atom],groupAtoms[bondedAtom]):
                    group.addBond(GroupBond(groupAtoms[atom],groupAtoms[bondedAtom],order=[bond.order]))
            else:
                pass
        
    group.update()
    
    return group

## Thermo comparison and display functions

In [None]:
def displayThermo(thermoData):
    print 'H298 = {0} kcal/mol'.format(thermoData.H298.value_si/4184)
    print 'S298 = {0} cal/mol*K'.format(thermoData.S298.value_si/4.184)
def compareThermoData(thermoData1, thermoData2):
    delH = thermoData1.H298.value_si - thermoData2.H298.value_si
    print 'Difference in H298 = {0} kcal/mol'.format(delH/4184)
    delS = thermoData1.S298.value_si - thermoData2.S298.value_si
    print 'Difference S298 = {0} cal/mol*K'.format(delS/4.184)
    #Tdata = [300,500,1000,2000]
    #for T in Tdata:
    #    delCp = thermoData1.getHeatCapacity(T) - thermoData2.getHeatCapacity(T)
    #    print 'Difference in Cp at {0} = {1} cal/mol*K'.format(T, delCp/4.184)

## Load all the thermo libraries

In [None]:
database = RMGDatabase()
database.load(settings['database.directory'], thermoLibraries = thermoLibraries, kineticsFamilies='none', kineticsDepositories='none', reactionLibraries=[])

thermoDatabase = database.thermo
thermoDatabase0 = copy.deepcopy(database.thermo)

## Extract polycyclic groups from thermo libraries and create new ones

In [None]:
fittingDictionary={}
for libraryName in thermoLibraries:
    thermoLibrary = database.thermo.libraries[libraryName]
    for label, entry in thermoLibrary.entries.iteritems():
        molecule = entry.item
        libraryThermoData = entry.data
        if molecule.getAllPolycyclicVertices():
            print label
            species = Species(molecule=[molecule])
            species.generate_resonance_structures() 
            print 'Species has {0} resonance structures'.format(len(species.molecule))
            estimatedThermos = [thermoDatabase.estimateThermoViaGroupAdditivity(molecule) for molecule in species.molecule]
            for estimatedThermo in estimatedThermos:
                index = estimatedThermos.index(estimatedThermo)
                ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(estimatedThermo)
                
                if len(polycyclicGroups) == 0:
                    raise Exception('Species {0} detected as polycyclic but estimated thermo contained no polycyclic groups: \
                            you need to create a new polycyclic group'.format(label))

                elif len(polycyclicGroups) == 1:
                    polycyclicGroup = polycyclicGroups[0]
                    print 'Molecule {0} has a single polycyclic group match in thermo estimate.'.format(species.molecule[index].toSMILES())
                    # Draw the molecule in ipython notebook
                    display(species.molecule[index])
                    print 'Molecule SMILES: {0}'.format(species.molecule[index].toSMILES())
                    print 'Estimated thermo data:'
                    print prettify(repr(estimatedThermo))
                    displayThermo(estimatedThermo)

                    withoutPolycyclicGroupThermo = removeThermoData(copy.deepcopy(estimatedThermo), polycyclicGroup.data)
                    newPolycyclicGroupThermo = removeThermoData(copy.deepcopy(libraryThermoData), withoutPolycyclicGroupThermo)


                    # Check to make sure that the polycyclic group is not generic
                    # If it is, create a new polycyclicGroup as the child
                    if polycyclicGroup.label == 'PolycyclicRing':
                        groups = extractPolycyclicGroups(species.molecule[index])
                        print groups[0].toAdjacencyList()
                        assert len(groups) == 1
                        # Create a new entry in the polycyclic groups with the same name as the thermo library entry
                        # Make sure it does not already exist
                        entryLabel = label
                        counter = 0
                        while entryLabel in thermoDatabase.groups['polycyclic'].entries:
                            counter += 1
                            entryLabel = entryLabel.split('-')[0]
                            entryLabel += '-{0}'.format(counter)


                        print 'Polycyclic group was found to be generic "PolycyclicRing". Creating new entry: {0}.'.format(entryLabel)
                        thermoDatabase.groups['polycyclic'].entries[entryLabel] = Entry(index = len(thermoDatabase.groups['polycyclic'].entries)+1,
                                                                                  label = entryLabel,
                                                                                   item = groups[0],
                                                                                   data = polycyclicGroup.data, # Use dummy thermo here so other estimates can find this group
                                                                                   parent = polycyclicGroup,
                                                                                  )

                        # Set the new entry as the polycyclicGroup and make it a child of the generic group
                        polycyclicGroup = thermoDatabase.groups['polycyclic'].entries[entryLabel] 
                        thermoDatabase.groups['polycyclic'].entries['PolycyclicRing'].children.append(polycyclicGroup)


                    else:
                        print 'Matched polycyclic group "{0}"'.format(polycyclicGroup.label)





                    # Add the new group value to the fitting dictionary
                    if polycyclicGroup not in fittingDictionary:
                        # Add a tuple containing fitted group data, the original library entry, and thermo library
                        fittingDictionary[polycyclicGroup]=[(newPolycyclicGroupThermo, entry, thermoLibrary)]
                    else:
                        fittingDictionary[polycyclicGroup].append((newPolycyclicGroupThermo, entry, thermoLibrary))

                elif len(polycyclicGroups) > 1:
                    print 'Species {0} has matched multiple polycyclic groups. \
                            This cannot be fitted with a single molecule\'s thermo data.'.format(label)
                    raise Exception
            print '====================='

## Fit the polycyclic groups and write to file

In [None]:
for polycyclicGroup, fittingGroups in fittingDictionary.iteritems():
    print 'Original thermo data for polycyclic group: {0}'.format(polycyclicGroup.label)
    if polycyclicGroup.data:
        print prettify(repr(polycyclicGroup.data))
    else:
        print 'No data found. Was created as a new group.'
    
    thermoDataset = [fitTuple[0] for fitTuple in fittingGroups]
    labels = [fitTuple[1].label for fitTuple in fittingGroups]
    libraryLabels = [fitTuple[2].name for fitTuple in fittingGroups]
    # Average the new group values to fit the original polycyclic group
    fittedGroupData = averageThermoData([fitTuple[0] for fitTuple in fittingGroups])
    #print fittedGroupData
    #print fittingGroups
    polycyclicGroup.data = fittedGroupData
    polycyclicGroup.shortDesc = "Fitted from thermo library values"
    
    comment = ''
    for i in range(len(labels)):
        comment += "Fitted from species {0} from {1} library.\n".format(labels[i],libraryLabels[i])
    polycyclicGroup.longDesc = comment.strip()
    
    print 'Fitted thermo data for polycyclic group: {0}'.format(polycyclicGroup.label)
    print prettify(repr(polycyclicGroup.data))
    print polycyclicGroup.longDesc
    print '===================================================================='
    # At this point, save and overwrite the entire polycyclic thermo library

thermoDatabase.groups['polycyclic'].save('new_polycyclic.py')

## Benchmark the new groups by checking the estimates against library values

In [None]:
# Test that the new group additivity values can estimate the old library ones.

for libraryName in thermoLibraries:
    thermoLibrary = database.thermo.libraries[libraryName]
    for label, entry in thermoLibrary.entries.iteritems():
        molecule = entry.item
        libraryThermoData = entry.data

        if molecule.getAllPolycyclicVertices():
            species = Species(molecule=[molecule])
            species.generate_resonance_structures()
            print label
            display (species.molecule[0])
            print 'Species has {0} resonance structures(s)'.format(len(species.molecule))
            thermoDatabase.findCp0andCpInf(species, libraryThermoData)
            
            estimatedThermo = thermoDatabase.getThermoDataFromGroups(species)
            originalEstimatedThermo = thermoDatabase0.getThermoDataFromGroups(species)
            if libraryThermoData.isIdenticalTo(estimatedThermo):
                print 'Library thermo data for species {0} matches the estimate from group additivity.\n'.format(label)
                
                print 'Library thermo data:' 
                displayThermo(libraryThermoData)
                print  ''   
                
                print 'Original estimated thermo data:'                
                ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(estimatedThermo)
                print 'Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclicGroups]))
                displayThermo(originalEstimatedThermo)
                print ''
                
                print 'Comparison of library thermo with original estimated thermo'
                compareThermoData(libraryThermoData,originalEstimatedThermo)
                print ''
            else:
                print 'Library thermo data for species {0} does not match the estimate from group additivity\n'.format(label)
                
                print 'Library thermo data:' 
                displayThermo(libraryThermoData)
                print  ''                
                
                print 'Estimated thermo data:'                 
                
                ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(estimatedThermo)
                print 'Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclicGroups]))
                displayThermo(estimatedThermo)
                print ''
                
                print 'Comparison of library thermo with estimated thermo'
                compareThermoData(libraryThermoData,estimatedThermo)
                print ''
                
                print 'Original estimated thermo data:'                
                
                ringGroups, polycyclicGroups = database.thermo.getRingGroupsFromComments(originalEstimatedThermo)
                print 'Polycyclic groups: {0}'.format(' '.join([grp.label for grp in polycyclicGroups]))
                displayThermo(originalEstimatedThermo)
                print ''
                
                print 'Comparison of library thermo with original estimated thermo'
                compareThermoData(libraryThermoData,originalEstimatedThermo)
                print ''
            print '======================='