# Progress Metering for OOH abstraction

This is a notebook designed to go through each output file in rwest-autotst-1 and determine the progress of each run.

In [1]:
import os, sys
rmg_path = os.getenv('RMGpy')
if rmg_path and rmg_path not in sys.path:
    sys.path.insert(1,rmg_path)
import os
import re
import numpy as np
from collections import defaultdict, OrderedDict
import pandas as pd
from rmgpy.molecule import Molecule
from rmgpy.reaction import Reaction
import itertools
import IPython
from IPython.display import display, Markdown
def mprint(s): display(Markdown(s))

In [2]:
def isReactantOrProduct(self, reactant):
        """
        Checks to see if a certiant rmgpy.molecule object is in 
        either the reactants side or products side of rmgpy.reaction
        
        Inputs: 
        self (rmgpy.reaction)
        reactant (rmgpy.molecule)
        
        Output:
        result (bool) - True if reactant is in the reaction, Fasle if not.
        """
        
        if reactant in self.reactants:
            gotOne = True
        elif reactant in self.products:
            gotOne = True
        else: 
            gotOne = False
        return gotOne
    
#self = df.iloc[0,1]
#reactant = Molecule(SMILES="OO")
#isReactantOrProduct(self, reactant)

In [3]:
directory = '/gss_gpfs_scratch/harms.n/comparerTST'
results = defaultdict(OrderedDict)
comparerFiles = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
k = 0
for fil in comparerFiles:
    if ".log" in fil and "updated" in fil:
        k += 1
results = defaultdict(OrderedDict)
not_ooh_abstraction = []
sucessKey = 'Normal termination of Gaussian'
failKey = "Error termination"
for i in range(1,k+1):
    r = results[i]
    filename = 'AutoTST-comparer.updated.{0:d}.combined.log'.format(i)
    filepath = os.path.join(directory,filename)
    if os.path.exists(filepath):
        r['1 log file exists'] = 1
    else:
        continue
    with open(filepath) as f:
        lines = f.readlines()
    for j,l in enumerate(lines):
        m = re.match('comparerTST.py:182 performCalcs INFO reaction: (.*)', l)
        if m:
            rxn = eval(m.group(1))
            if not (isReactantOrProduct(rxn, Molecule(SMILES="[O]O")) and isReactantOrProduct(rxn, Molecule(SMILES="OO"))): #Checking to make sure that 
                not_ooh_abstraction.append(i) 
                break
            r['0 reaction'] = rxn      
                
        if 'We have generated a H_Abstraction reaction that matches, and used it to label the atoms' in l:
            r['2 matched H-abstraction'] = 1
          
        if 'Reading existing kinetics file' in l:
            r['XX using existing kinetics data file'] = 1 # Reading in existing .kinetics file
             
        if 'Generating a TS geometry via the direct guess method' in l:
            r['3A started making TS geometry'] = 1
        if "Reading existing ts file" in l:
            r['3B using existing ts data file'] = 1 # If reading in existing .ts file, this bypasses 4, 5 and 6
        if 'optimizeTS INFO Output file' and 'exists and looks complete. Trying that.' in l:
            r['3C Previous TS optimization complete'] = 1 #If reading in existing .log, this then checks if there is an existing IRC clac   
            
        if 'Running loose optimization of TS with frozen center' in l:
            r['4A TS opt w frozen center'] = 1    
        if 'Optimization of TS reaction center distances' in l:
            r['4B TS opt of rxn center'] = 1  
        if 'Optimizing TS attempt' in l:
            r['4C TS optimization started'] = 1
        
        if 'verifyOutputFile INFO Verifying output file' in l:
            r['5A New TS optimization complete'] = 1
            
        if 'Creating IRC file' in l:
            r['6A IRC file created'] = 1
        if "Verifying the IRC output file" in l:
            r['6B New IRC calc complete'] = 1
        if "saveTSData INFO Saving TS result file" in l:
            r['6C New IRC calc successful'] = 1   
            
        if 'Symmetry input file written to' in l:
            r['7A starting Symmetry calculation'] = 1
        if 'Point group:' in l:
            r['7B Symmetry calc successful'] = 1
         
        if 'CanTherm execution initiated' in l:
            r['8 CanTherm started'] = 1
        if 'One or both of the barrier heights of' in l:
            r['8A CanTherm barrier height problem'] = -1
         
        if 'Yay, reaction kinetics calculated!!!' in l:
            r['ZZ overall success'] = 1
            r['ZZZ Complete'] = 1
            
        if "Boo," in l:
            r['ZZ overall fail'] = 1
            r['ZZZ Complete'] = 1
          
        if "gaussian.py:880 verifyIRCOutputFile ERROR Not all of the required keywords for success were found in the IRC output file!" in l:
            r['ZZZ IRC success keywords not found'] = 1

    
    if 'XX using existing TS data file' and 'ZZ overall success' in r.keys():
        r['YY successful prior calculation'] = 1
        
    if '5A New TS optimization complete' and '6A IRC file created' in r.keys():
        r['5B TS successfully optimized'] = 1
    
    if '6B New IRC calc complete' in r.keys() and "6A IRC file created" not in r.keys():
        r['6D IRC calc from previous calculation'] = 1
        del(r['6B New IRC calc complete'])
        
    if '5A New TS optimization complete' and '5C Previous TS optimization complete' in r.keys():
        del(r['5A New TS optimization complete'])
    
    """
    if '3C Previous TS optimization complete' in r.keys():
            
        qmScratchDir = '/gss_gpfs_scratch/harms.n/QMscratch/'

        fileNames = [f for f in os.listdir(qmScratchDir) if os.path.isfile(os.path.join(qmScratchDir, f))]

        r1, r2 = eval(rxn).reactants
        p1, p2 = eval(rxn).products
        

        r1SMILES = r1.toSMILES()
        
        r2SMILES = r2.toSMILES()
        p1SMILES = p1.toSMILES()
        p2SMILES = p2.toSMILES()
        
        r1Augmented = r1.toInChIKey()
        r2Augmented = r2.toInChIKey()
        p1Augmented = p1.toInChIKey()
        p2Augmented = p2.toInChIKey()
        
        for fileName in fileNames:
            if r1Augmented or r2Augmented or p1Augmented or p2Augmented in fileName:
                reactOrProdFilePath = os.path.join(qmScratchDir, fileName)
                f = open(reactOrProdFilePath, "r")
                lastLines = f.readlines()
                    
                if sucessKey or failKey in lastLines:
                    r['WW.0 Complete Reactants / Products Estimate'] = 1
                if failKey in lastLines:
                    r['WW.1 Fail Reactants / Products Estimate'] = 1 
                
            if r1SMILES and r2SMILES and p1SMILES and p2SMILES and ".log" in fileName:
                if "Est" in fileName:
                    est = os.path.join(qmScratchDir, fileName)
                    f = open(est, "r")
                    lastLines = f.readlines()
                    #f.closed
                    
                    if sucessKey or failKey in lastLines:
                        r['WW.A Complete TS Estimate'] = 1
                    if sucessKey in lastLines:
                        r['WW.B Successful TS Estimate'] = 1
                             
                    # check complete
                elif "RxnC" in fileName:
                    rxnC = os.path.join(qmScratchDir, fileName)
                    f = open(rxnC, "r")
                    lastLines = f.readlines()
                    #f.closed()
                    
                    if sucessKey or failKey in lastLines:
                        r['WW.C Complete Rxn Center'] = 1
                    if sucessKey in lastLines:
                        r['WW.D Successful Rxn Center'] = 1
                    # check complete
                    
                elif "IRC" in fileName:
                    IRC = os.path.join(qmScratchDir, fileName)
                    f = open(IRC, "r")
                    lastLines = f.readlines()
                    #f.closed()
                    
                    if sucessKey or failKey in lastLines:
                        r['WW.G Complete IRC log'] = 1
                    if sucessKey in lastLines:
                        r['WW.H Successful IRC log'] = 1
                    # check complete
                    
                else:
                    overall = os.path.join(qmScratchDir, fileName)
                    f = open(overall, "r")
                    lastLines = f.readlines()[-4:]
                    #f.closed()
                    
                    if sucessKey or failKey in lastLines:
                        r['WW.E Complete overall TS log'] = 1
                    if sucessKey in lastLines:
                        r['WW.F Successful overall TS log'] = 1
                    # check complete
                    """
                    
                    

        
        
          
        

for i in not_ooh_abstraction:
    del(results[i])
df = pd.DataFrame(results)
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,650,651,652,653,654,655,656,657,658,659
0 reaction,"<Molecule ""[CH3]""> + <Molecule ""OO""> <=> <Mole...","<Molecule ""C""> + <Molecule ""[O]O""> <=> <Molecu...","<Molecule ""OO""> + <Molecule ""[O]""> <=> <Molecu...",,"<Molecule ""OO""> + <Molecule ""[OH]""> <=> <Molec...","<Molecule ""[O]O""> + <Molecule ""O""> <=> <Molecu...","<Molecule ""[O]O""> + <Molecule ""C#C""> <=> <Mole...","<Molecule ""[CH]=C""> + <Molecule ""[O]O""> <=> <M...","<Molecule ""[CH]=C""> + <Molecule ""OO""> <=> <Mol...","<Molecule ""[O]O""> + <Molecule ""C=C""> <=> <Mole...",...,"<Molecule ""CCCC1COC1""> + <Molecule ""[O]O""> <=>...","<Molecule ""[CH2]C(=O)CCCC""> + <Molecule ""OO""> ...","<Molecule ""CCCCC[C]=O""> + <Molecule ""OO""> <=> ...","<Molecule ""CCCC1(C)CO1""> + <Molecule ""[O]O""> <...","<Molecule ""CCC(=O)C(C)C""> + <Molecule ""[O]O""> ...","<Molecule ""CC1OCC1(C)C""> + <Molecule ""[O]O""> <...","<Molecule ""CC1OCC1(C)C""> + <Molecule ""[O]O""> <...","<Molecule ""CCCC(C)[C]=O""> + <Molecule ""OO""> <=...","<Molecule ""CC(C)CC1CO1""> + <Molecule ""[O]O""> <...","<Molecule ""[CH2]C(C=O)CCC""> + <Molecule ""OO""> ..."
1 log file exists,1,1,1,1.0,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2 matched H-abstraction,1,1,1,,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
3A started making TS geometry,1,1,1,,1,1,1,1,1,,...,1,1,1,1,1,1,1,1,1,1
3B using existing ts data file,,,,,,1,,,1,,...,,,,,,,,,1,
3C Previous TS optimization complete,1,1,1,,1,,1,1,,,...,,,,,1,,,,,
4A TS opt w frozen center,,,,,,,,,,,...,1,1,1,1,,1,1,1,,1
4B TS opt of rxn center,,,,,,,,,,,...,1,1,1,1,,1,1,1,,
4C TS optimization started,,,,,,,,,,,...,1,1,1,1,,1,1,,,
5A New TS optimization complete,1,1,1,,1,,1,1,,,...,1,1,1,1,1,1,1,,,


In [20]:
qmScratchDir = '/gss_gpfs_scratch/harms.n/QMscratch/'

successKey = 'Normal termination of Gaussian'
failKey = "Error termination"

"""path = os.path.join(qmScratchDir, "OO+[CH]=C=O_C=C=O+[O]OEst.log")
print path

f = open(path, "r")
for line in f.readlines()[-4:]:
    if successKey in line:
        print "YaY"""
    

fileNames = [f for f in os.listdir(qmScratchDir) if os.path.isfile(os.path.join(qmScratchDir, f))]
for column in df:
    if not np.isnan(df[column]["3C Previous TS optimization complete"]):
        print df[column]["ZZ overall success"]
        reaction = df[column]['0 reaction']
        print reaction
        
        possibleFileNames = []
        for x in itertools.permutations(reaction.reactants, 2):
            for y in itertools.permutations(reaction.products, 2):
                X = []
                Y = []
                for item in x:
                    X.append(item.toSMILES())
                for item in y:
                    Y.append(item.toSMILES())   
                l = [tuple(X), tuple(Y)]
                for z in itertools.permutations(l, 2):
                    possibleFileNames.append(z[0][0] + '+' + z[0][1] + '_' + z[1][0] + '+' + z[1][1])
        
        print possibleFileNames
        print
        
        print df[column]
        for possibleFileName in possibleFileNames:
            for fileName in fileNames:
                if fileName.startswith(possibleFileName) and ".log" in fileName:
                    print fileName
                    
                    f = open(os.path.join(qmScratchDir,fileName), 'r')
                    for line in f.readlines()[-4:]:
                        if failKey in line:
                            status = "Complete and failed" 
                            break
                        elif successKey in line:
                            status = "Complete and success"
                            break
                    if not status:
                        status = "Incomplete"
                    print status
                        #print "Boo"
                        #break
                    
                    
                
        print
        
        
        
        
        #qmScratchDir = '/gss_gpfs_scratch/harms.n/QMscratch/'

        #fileNames = [f for f in os.listdir(qmScratchDir) if os.path.isfile(os.path.join(qmScratchDir, f))]
        
        


nan
<Molecule "[CH3]"> + <Molecule "OO"> <=> <Molecule "C"> + <Molecule "[O]O">
['[CH3]+OO_C+[O]O', 'C+[O]O_[CH3]+OO', '[CH3]+OO_[O]O+C', '[O]O+C_[CH3]+OO', 'OO+[CH3]_C+[O]O', 'C+[O]O_OO+[CH3]', 'OO+[CH3]_[O]O+C', '[O]O+C_OO+[CH3]']

0 reaction                               <Molecule "[CH3]"> + <Molecule "OO"> <=> <Mole...
1 log file exists                                                                        1
2 matched H-abstraction                                                                  1
3A started making TS geometry                                                            1
3B using existing ts data file                                                         NaN
3C Previous TS optimization complete                                                     1
4A TS opt w frozen center                                                              NaN
4B TS opt of rxn center                                                                NaN
4C TS optimization started            

KeyboardInterrupt: 

In [44]:
for x in itertools.permutations(reaction.reactants, 2):
    for y in itertools.permutations(reaction.products, 2):
        X = []
        Y = []
        for item in x:
            X.append(item.toSMILES())
        for item in y:
            Y.append(item.toSMILES())   
        l = [tuple(X), tuple(Y)]
        for z in itertools.permutations(l, 2):
            print z[0][0] + '+' + z[0][1] + '_' + z[1][0] + '+' + z[1][1]

CC1C=CCCC1+[O]O_OO+C[C]1C=CCCC1
OO+C[C]1C=CCCC1_CC1C=CCCC1+[O]O
CC1C=CCCC1+[O]O_C[C]1C=CCCC1+OO
C[C]1C=CCCC1+OO_CC1C=CCCC1+[O]O
[O]O+CC1C=CCCC1_OO+C[C]1C=CCCC1
OO+C[C]1C=CCCC1_[O]O+CC1C=CCCC1
[O]O+CC1C=CCCC1_C[C]1C=CCCC1+OO
C[C]1C=CCCC1+OO_[O]O+CC1C=CCCC1


In [4]:
fileNames

['C=CC(=O)[CH]C+OO_C=CC(=O)CC+[O]OIRC.gjf',
 'COC(=O)C=O+[O]O_OO+[CH2]OC(=O)C=OIRC.log',
 'QQONPFPTGQHPMA-UHFFFAOYSA.refined.mol',
 'RAGFJZMALVRYSE-UHFFFAOYSA-u1.crude.mol',
 'O+[CH2]C(C)CC(C)(C)C_CC(C)CC(C)(C)C+[OH].crude.mol',
 'CC(=O)O[O]+OO_CC(=O)OO+[O]O.chk',
 'CC(C)CC(=O)CC(C)C+[OH]_C[C](C)CC(=O)CC(C)C+OEst.log',
 'YOWANVRNYSRJSI-UHFFFAOYSA-u7.crude.mol',
 'CCCCCC=O+[O]O_CCC[CH]CC=O+OOIRC.log',
 'C1CCCCC1+[O]O_OO+[CH]1CCCCC1.gjf',
 'CZOZVNOWUGEPAV-UHFFFAOYSA-u5,7.refined.mol',
 'QEJQAPYSVNHDJF-UHFFFAOYSA-u1.log',
 'C=CO+[O][O]_C=[C]O+[O]ORxnC.log',
 'C[CH]C+OO_CCC+[O]OEst.log',
 'C=CCC=O+[O]O_C=CC[C]=O+OO.py',
 'OC1[CH]C=CC=1+[O][O]_OC1[C]C=CC=1+[O]OIRC.gjf',
 'CC=CCCC(C)C+[OH]_CC=CCC[C](C)C+O.crude.mol',
 'CO[O]+OO_COO+[O]O.log.TS1.log',
 'C=C=C+[O][O]_[CH]=C=C+[O]O.gjf',
 '[CH2]CC(C)O+[O][O]_[CH2]C[C](C)O+[O]ORxnC.gjf',
 'LKXXOSZTMDULEY-UHFFFAOYSA-u1,5.log',
 '[CH2]CC+[O]O_OO+[CH2]C[CH2].crude.mol',
 'C=CC[CH]C=O+OO_C=CCCC=O+[O]O.gjf',
 'JKMPLJUAEDQBOK-UHFFFAOYSA.symm',
 'XTAWB

In [5]:

if '3C Previous TS optimization complete' in :
            
    qmScratchDir = '/gss_gpfs_scratch/harms.n/QMscratch/'

    fileNames = [f for f in os.listdir(qmScratchDir) if os.path.isfile(os.path.join(qmScratchDir, f))]

    r1, r2 = eval(rxn).reactants
    p1, p2 = eval(rxn).products


    r1SMILES = r1.toSMILES()

    r2SMILES = r2.toSMILES()
    p1SMILES = p1.toSMILES()
    p2SMILES = p2.toSMILES()

    r1Augmented = r1.toInChIKey()
    r2Augmented = r2.toInChIKey()
    p1Augmented = p1.toInChIKey()
    p2Augmented = p2.toInChIKey()

    for fileName in fileNames:
        if r1Augmented or r2Augmented or p1Augmented or p2Augmented in fileName:
            reactOrProdFilePath = os.path.join(qmScratchDir, fileName)
            f = open(reactOrProdFilePath, "r")
            lastLines = f.readlines()

            if sucessKey or failKey in lastLines:
                r['WW.0 Complete Reactants / Products Estimate'] = 1
            if failKey in lastLines:
                r['WW.1 Fail Reactants / Products Estimate'] = 1 

        if r1SMILES and r2SMILES and p1SMILES and p2SMILES and ".log" in fileName:
            if "Est" in fileName:
                est = os.path.join(qmScratchDir, fileName)
                f = open(est, "r")
                lastLines = f.readlines()
                #f.closed

                if sucessKey or failKey in lastLines:
                    r['WW.A Complete TS Estimate'] = 1
                if sucessKey in lastLines:
                    r['WW.B Successful TS Estimate'] = 1

                # check complete
            elif "RxnC" in fileName:
                rxnC = os.path.join(qmScratchDir, fileName)
                f = open(rxnC, "r")
                lastLines = f.readlines()
                #f.closed()

                if sucessKey or failKey in lastLines:
                    r['WW.C Complete Rxn Center'] = 1
                if sucessKey in lastLines:
                    r['WW.D Successful Rxn Center'] = 1
                # check complete

            elif "IRC" in fileName:
                IRC = os.path.join(qmScratchDir, fileName)
                f = open(IRC, "r")
                lastLines = f.readlines()
                #f.closed()

                if sucessKey or failKey in lastLines:
                    r['WW.G Complete IRC log'] = 1
                if sucessKey in lastLines:
                    r['WW.H Successful IRC log'] = 1
                # check complete

            else:
                overall = os.path.join(qmScratchDir, fileName)
                f = open(overall, "r")
                lastLines = f.readlines()[-4:]
                #f.closed()

                if sucessKey or failKey in lastLines:
                    r['WW.E Complete overall TS log'] = 1
                if sucessKey in lastLines:
                    r['WW.F Successful overall TS log'] = 1
                # check complete

SyntaxError: invalid syntax (<ipython-input-5-2f26a5f2887c>, line 2)

In [3]:
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,1615,1616,1617,1618,1619,1620,1621,1622,1623,1624
0 reaction,"<Molecule ""[CH3]""> + <Molecule ""OO""> <=> <Mole...","<Molecule ""C""> + <Molecule ""[O]O""> <=> <Molecu...","<Molecule ""OO""> + <Molecule ""[O]""> <=> <Molecu...",,"<Molecule ""OO""> + <Molecule ""[OH]""> <=> <Molec...","<Molecule ""[O]O""> + <Molecule ""O""> <=> <Molecu...","<Molecule ""[O]O""> + <Molecule ""C#C""> <=> <Mole...","<Molecule ""[CH]=C""> + <Molecule ""[O]O""> <=> <M...","<Molecule ""[CH]=C""> + <Molecule ""OO""> <=> <Mol...","<Molecule ""[O]O""> + <Molecule ""C=C""> <=> <Mole...",...,,,,,,,,,,
1 log file exists,1,1,1,1.0,1,1,1,1,1,1,...,,,,,,,,,,
2 matched H-abstraction,1,1,1,,1,1,1,1,1,1,...,,,,,,,,,,
3A started making TS geometry,1,1,1,,1,1,1,1,1,,...,,,,,,,,,,
3B using existing ts data file,,,,,,1,,,1,,...,,,,,,,,,,
3C Previous TS optimization complete,1,1,1,,1,,1,1,,,...,,,,,,,,,,
4A TS opt w frozen center,,,,,,,,,,,...,,,,,,,,,,
4B TS opt of rxn center,,,,,,,,,,,...,,,,,,,,,,
4C TS optimization started,,,,,,,,,,,...,,,,,,,,,,
5A New TS optimization complete,1,1,1,,1,,1,1,,,...,,,,,,,,,,


In [5]:
df.count(axis=1)

0 reaction                               656
1 log file exists                        658
2 matched H-abstraction                  656
3A started making TS geometry            428
3B using existing ts data file            41
3C Previous TS optimization complete     149
4A TS opt w frozen center                237
4B TS opt of rxn center                  236
4C TS optimization started               233
5A New TS optimization complete          380
5B TS successfully optimized             211
6A IRC file created                      211
6B New IRC calc complete                 200
6C New IRC calc successful               151
6D IRC calc from previous calculation     73
7A starting Symmetry calculation         187
7B Symmetry calc successful               40
8 CanTherm started                        39
8A CanTherm barrier height problem        22
XX using existing kinetics data file     228
YY successful prior calculation          242
ZZ overall fail                          240
ZZ overall

# This portion of the code is for BioTST

In [3]:
directory = '/gss_gpfs_scratch/harms.n/bioTST'
results = defaultdict(OrderedDict)
comparerFiles = [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
k = 0
for fil in comparerFiles:
    if ".log" in fil:
        k += 1
results = defaultdict(OrderedDict)
not_ooh_abstraction = []
sucessKey = 'Normal termination of Gaussian'
failKey = "Error termination"
for i in range(1,k+1):
    r = results[i]
    filename = 'AutoTST-biofuels.{0:d}.combined.log'.format(i)
    filepath = os.path.join(directory,filename)
    if os.path.exists(filepath):
        r['1 log file exists'] = 1
    else:
        continue
    with open(filepath) as f:
        lines = f.readlines()
    for j,l in enumerate(lines):
        m = re.match('biofuelsTST.py:186 performCalcs INFO chemkinRxn: (.*)', l)
        if m:
            rxn = m.group(1)
            r['0 reaction'] = eval(rxn)       
                
  
        
        if 'We have generated a H_Abstraction reaction that matches, and used it to label the atoms' in l:
            r['2 matched H-abstraction'] = 1
          
        
        if 'Reading existing kinetics file' in l:
            r['XX using existing kinetics data file'] = 1 # Reading in existing .kinetics file
            
        
           
        
        if 'Generating a TS geometry via the direct guess method' in l:
            r['3A started making TS geometry'] = 1
        if "Reading existing ts file" in l:
            r['3B using existing ts data file'] = 1 # If reading in existing .ts file, this bypasses 4, 5 and 6
        if 'optimizeTS INFO Output file' and 'exists and looks complete. Trying that.' in l:
            r['3C Previous TS optimization complete'] = 1 #If reading in existing .log, this then checks if there is an existing IRC clac
            
            
            
        if 'Running loose optimization of TS with frozen center' in l:
            r['4A TS opt w frozen center'] = 1    
        if 'Optimization of TS reaction center distances' in l:
            r['4B TS opt of rxn center'] = 1  
        if 'Optimizing TS attempt' in l:
            r['4C TS optimization started'] = 1
        
        
        if 'verifyOutputFile INFO Verifying output file' in l:
            r['5A New TS optimization complete'] = 1
        
        
            
        if 'Creating IRC file' in l:
            r['6A IRC file created'] = 1
        if "Verifying the IRC output file" in l:
            r['6B New IRC calc complete'] = 1
        if "saveTSData INFO Saving TS result file" in l:
            r['6C New IRC calc successful'] = 1
            
            
            
        if 'Symmetry input file written to' in l:
            r['7A starting Symmetry calculation'] = 1
        if 'Point group:' in l:
            r['7B Symmetry calc successful'] = 1
         
        
        if 'CanTherm execution initiated' in l:
            r['8 CanTherm started'] = 1
        if 'One or both of the barrier heights of' in l:
            r['8A CanTherm barrier height problem'] = -1
         
        
        if 'Yay, reaction kinetics calculated!!!' in l:
            r['ZZ overall success'] = 1
            r['ZZZ Complete'] = 1
            
        if "Boo," in l:
            r['ZZ overall fail'] = 1
            r['ZZZ Complete'] = 1
            
        if "gaussian.py:880 verifyIRCOutputFile ERROR Not all of the required keywords for success were found in the IRC output file!" in l:
            r['ZZZ IRC success keywords not found'] = 1

    
    if 'XX using existing TS data file' and 'ZZ overall success' in r.keys():
        r['YY successful prior calculation'] = 1
        
    if '5A New TS optimization complete' and '6A IRC file created' in r.keys():
        r['5B TS successfully optimized'] = 1
    
    if '6B New IRC calc complete' in r.keys() and "6A IRC file created" not in r.keys():
        r['6D IRC calc from previous calculation'] = 1
        #print "deleting 6B"
        del(r['6B New IRC calc complete'])
        
    if '5A New TS optimization complete' and '5C Previous TS optimization complete' in r.keys():
        del(r['5A New TS optimization complete'])
    
    """
    if '3C Previous TS optimization complete' in r.keys():
            
        qmScratchDir = '/gss_gpfs_scratch/harms.n/QMscratch/'

        fileNames = [f for f in os.listdir(qmScratchDir) if os.path.isfile(os.path.join(qmScratchDir, f))]

        r1, r2 = rxn.reactants
        p1, p2 = rxn.products

        r1SMILES = r1[0].molecule.SMILES
        r2SMILES = r2[0].molecule.SMILES
        p1SMILES = p1[0].molecule.SMILES
        p2SMILES = p2[0].molecule.SMILES
        
        r1Augmented = r1[0].molecule.toInChiKey()
        r2Augmented = r2[0].molecule.toInChiKey()
        p1Augmented = p1[0].molecule.toInChiKey()
        p2Augmented = p2[0].molecule.toInChiKey()
        
        for fileName in fileNames:
            if r1Augmented or r2Augmented or p1Augmented or p2Augmented in fileName:
                f = open(qmScratchDir + fileName, "r")
                    lastLines = f.readlines()[-4:]
                    
                    if sucessKey or failKey in lastLines:
                        r['WW.0 Complete Reactants / Products Estimate'] = 1
                    if failKey in lastLine:
                        r['WW.1 Fail Reactants / Products Estimate'] = 1 
                
            if r1SMILES and r2SMILES and p1SMILES and p2SMILES and ".log" in fileName:
                if "Est" in fileName:
                    f = open(qmScratchDir + fileName, "r")
                    lastLines = f.readlines()[-4:]
                    
                    if sucessKey or failKey in lastLines:
                        r['WW.A Complete TS Estimate'] = 1
                    if sucessKey in lastLine:
                        r['WW.B Successful TS Estimate'] = 1
                             
                    # check complete
                elif "RxnC" in fileName:
                    f = open(qmScratchDir + fileName, "r")
                    lastLines = f.readlines()[-4:]
                    
                    if sucessKey or failKey in lastLines:
                        r['WW.C Complete Rxn Center'] = 1
                    if sucessKey in lastLine:
                        r['WW.D Successful Rxn Center'] = 1
                    # check complete
                    
                elif "IRC" in fileName:
                    f = open(qmScratchDir + fileName, "r")
                    lastLines = f.readlines()[-4:]
                    
                    if sucessKey or failKey in lastLines:
                        r['WW.G Complete IRC log'] = 1
                    if sucessKey in lastLine:
                        r['WW.H Successful IRC log'] = 1
                    # check complete
                    
                else:
                    f = open(qmScratchDir + fileName, "r")
                    lastLines = f.readlines()[-4:]
                    
                    if sucessKey or failKey in lastLines:
                        r['WW.E Complete overall TS log'] = 1
                    if sucessKey in lastLine:
                        r['WW.F Successful overall TS log'] = 1
                    # check complete
                    """        
        

dff = pd.DataFrame(results)
dff

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,774,775,776,777,778,779,780,781,782,783
0 reaction,,,,,,,,,,,...,,,,,,,,,,
1 log file exists,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,,,,,,,,,,
2 matched H-abstraction,,,,,,,,,,,...,,,,,,,,,,
3A started making TS geometry,,,,,,,,,,,...,,,,,,,,,,
3B using existing ts data file,,,,,,,,,,,...,,,,,,,,,,
3C Previous TS optimization complete,,,,,,,,,,,...,,,,,,,,,,
4A TS opt w frozen center,,,,,,,,,,,...,,,,,,,,,,
4B TS opt of rxn center,,,,,,,,,,,...,,,,,,,,,,
4C TS optimization started,,,,,,,,,,,...,,,,,,,,,,
5A New TS optimization complete,,,,,,,,,,,...,,,,,,,,,,


In [4]:
dff.count(axis=1)

0 reaction                               584
1 log file exists                        692
2 matched H-abstraction                  584
3A started making TS geometry            582
3B using existing ts data file             2
3C Previous TS optimization complete     253
4A TS opt w frozen center                327
4B TS opt of rxn center                  327
4C TS optimization started               314
5A New TS optimization complete          562
5B TS successfully optimized             216
6A IRC file created                      216
6B New IRC calc complete                 215
6C New IRC calc successful               165
6D IRC calc from previous calculation     54
7A starting Symmetry calculation         164
7B Symmetry calc successful               97
8 CanTherm started                        95
8A CanTherm barrier height problem         8
XX using existing kinetics data file       2
YY successful prior calculation           83
ZZ overall fail                          408
ZZ overall

In [None]:
100