# Progress Metering for OOH abstraction

This is a notebook designed to go through each output file in rwest-autotst-1 and determine the progress of each run.

In [4]:
import os
import re
from collections import defaultdict, OrderedDict
import pandas as pd

In [5]:
directory = '/gss_gpfs_scratch/westgroup/rwest-autotst-1'
results = defaultdict(OrderedDict)
for i in range(1,408):
    r = results[i]
    filename = 'AutoTST.{0:d}.combined.log'.format(i)
    filepath = os.path.join(directory,filename)
    if os.path.exists(filepath):
        r['1 log file exists'] = 1
    else:
        continue
    with open(filepath) as f:
        lines = f.readlines()
    for j,l in enumerate(lines):
        m = re.match('autoTST-OOH.py:62 <module> INFO (.*)', l)
        if m:
            r['0 reaction'] = m.group(1)
        if 'We have generated a H_Abstraction reaction that matches, and used it to label the atoms' in l:
            r['2 matched H-abstraction'] = 1
        if 'Generating a TS geometry via the direct guess method' in l:
            r['3 started making TS geometry'] = 1
        if 'Reading existing ts file' in l:
            r['4 using existing TS data file'] = 1
        if 'Symmetry input file written to' in l:
            r['5 starting Symmetry calculation'] = 1
        if 'Point group:' in l:
            r['6 Symmetry calc successful'] = 1
        if 'line 295, in saveCoordinatesFromRDMol' in l:
            r['7 saveCoordinatesFromRDMol bug'] = -1
            print ''.join(lines[j-30:])
        if 'CanTherm execution initiated' in l:
            r['9 CanTherm started'] = 1
        if 'One or both of the barrier heights of' in l:
            r['9a CanTherm barrier height problem'] = -1
        if "Reading existing kinetics file" in l:
            r['A using prior calculation result'] = 1
        if 'Yay, reaction kinetics calculated!!!' in l:
            r['B overall success'] = 1


In [6]:
df = pd.DataFrame(results)
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,190,191,192,193,194,195,196,197,198,199
0 reaction,H2O2 + H <=> H2 + HO2,CH3 + H2O2 <=> CH4 + HO2,CH4 + HO2 <=> CH3 + H2O2,H2O2 + O <=> OH + HO2,nh3 + ho2 <=> nh2 + h2o2,H2O2 + OH <=> H2O + HO2,HO2 + H2O => H2O2 + OH,HO2 + C2H2 => H2O2 + C2H,C2H3 + HO2 <=> C2H2 + H2O2,C2H3 + H2O2 <=> C2H4 + HO2,...,CY13C6H8 + HO2 <=> CYC6H7 + H2O2,CYC6H9 + HO2 <=> CY13C6H8 + H2O2,C5H5OH + HO2 <=> C5H4OH + H2O2,C5H5OH + HO2 <=> C5H5O + H2O2,THP-2-4-ene + HO2 <=> THP-5yl + H2O2,C6H101-3 + HO2 => C2H3 + C4H6 + H2O2,THP-234-enyl + HO2 <=> THP-2-4-ene + H2O2,THP-345-enyl + HO2 <=> THP-2-4-ene + H2O2,cC6H11 + HO2 <=> cC6H10 + H2O2,C2H5COC2H3 + HO2 <=> C2H5COC2H2 + H2O2
1 log file exists,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2 matched H-abstraction,1,1,1,1,,1,1,1,,1,...,1,,1,1,1,,,,,1
3 started making TS geometry,1,1,1,1,,1,1,1,,1,...,1,,1,1,1,,,,,1
4 using existing TS data file,1,,,,,,1,,,1,...,1,,1,1,1,,,,,1
5 starting Symmetry calculation,1,,,,,,1,,,1,...,1,,1,1,1,,,,,1
6 Symmetry calc successful,1,,,,,,1,,,1,...,1,,1,1,1,,,,,1
9 CanTherm started,1,,,,,,1,,,1,...,1,,1,1,1,,,,,1
9a CanTherm barrier height problem,,,,,,,-1,,,-1,...,,,,-1,,,,,,-1
A using prior calculation result,,,,,,,,,,,...,,,,,,,,,,


In [7]:
df.sum(axis=1)

0 reaction                            H2O2 + H <=> H2 + HO2CH3 + H2O2 <=> CH4 + HO2C...
1 log file exists                                                                   199
2 matched H-abstraction                                                             153
3 started making TS geometry                                                        139
4 using existing TS data file                                                       103
5 starting Symmetry calculation                                                     103
6 Symmetry calc successful                                                          103
9 CanTherm started                                                                  103
9a CanTherm barrier height problem                                                  -23
A using prior calculation result                                                     14
B overall success                                                                    92
dtype: object

In [8]:
df.count(axis=1)

0 reaction                            199
1 log file exists                     199
2 matched H-abstraction               153
3 started making TS geometry          139
4 using existing TS data file         103
5 starting Symmetry calculation       103
6 Symmetry calc successful            103
9 CanTherm started                    103
9a CanTherm barrier height problem     23
A using prior calculation result       14
B overall success                      92
dtype: int64