# Progress Metering for OOH abstraction

This is a notebook designed to go through each output file in rwest-autotst-1 and determine the progress of each run.

In [1]:
import os
import re
from collections import defaultdict, OrderedDict
import pandas as pd

In [2]:
directory = '/gss_gpfs_scratch/westgroup/rwest-autotst-1'
results = defaultdict(OrderedDict)
for i in range(1,408):
    r = results[i]
    filename = 'AutoTST.{0:d}.combined.log'.format(i)
    filepath = os.path.join(directory,filename)
    if os.path.exists(filepath):
        r['1 log file exists'] = 1
    else:
        continue
    with open(filepath) as f:
        lines = f.readlines()
    for j,l in enumerate(lines):
        m = re.match('autoTST-OOH.py:62 <module> INFO (.*)', l)
        if m:
            r['0 reaction'] = m.group(1)
        if 'We have generated a H_Abstraction reaction that matches, and used it to label the atoms' in l:
            r['2 matched H-abstraction'] = 1
        if 'Generating a TS geometry via the direct guess method' in l:
            r['3 started making TS geometry'] = 1
        if 'Reading existing ts file' in l:
            r['4 using existing TS data file'] = 1
        if 'Symmetry input file written to' in l:
            r['5 starting Symmetry calculation'] = 1
        if 'Point group:' in l:
            r['6 Symmetry calc successful'] = 1
        if 'line 295, in saveCoordinatesFromRDMol' in l:
            r['7 saveCoordinatesFromRDMol bug'] = -1
            print ''.join(lines[j-30:])
        if 'CanTherm execution initiated' in l:
            r['9 CanTherm started'] = 1
        if 'One or both of the barrier heights of' in l:
            r['9a CanTherm barrier height problem'] = -1
        if "Reading existing kinetics file" in l:
            r['A using prior calculation result'] = 1
        if 'Yay, reaction kinetics calculated!!!' in l:
            r['B overall success'] = 1


In [3]:
df = pd.DataFrame(results)
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,398,399,400,401,402,403,404,405,406,407
0 reaction,H2O2 + H <=> H2 + HO2,CH3 + H2O2 <=> CH4 + HO2,CH4 + HO2 <=> CH3 + H2O2,H2O2 + O <=> OH + HO2,nh3 + ho2 <=> nh2 + h2o2,H2O2 + OH <=> H2O + HO2,HO2 + H2O => H2O2 + OH,HO2 + C2H2 => H2O2 + C2H,C2H3 + HO2 <=> C2H2 + H2O2,C2H3 + H2O2 <=> C2H4 + HO2,...,,,,,,,,,,
1 log file exists,1,1,1,1,1,1,1,1,1,1,...,,,,,,,,,,
2 matched H-abstraction,1,1,1,1,,1,1,1,,1,...,,,,,,,,,,
3 started making TS geometry,1,1,1,1,,1,1,1,,1,...,,,,,,,,,,
4 using existing TS data file,1,,,,,,1,,,1,...,,,,,,,,,,
5 starting Symmetry calculation,1,,,,,,1,,,1,...,,,,,,,,,,
6 Symmetry calc successful,1,,,,,,1,,,1,...,,,,,,,,,,
9 CanTherm started,1,,,,,,1,,,1,...,,,,,,,,,,
9a CanTherm barrier height problem,,,,,,,-1,,,-1,...,,,,,,,,,,
A using prior calculation result,,,,,,,,,,,...,,,,,,,,,,


In [4]:
df.sum(axis=1)

0 reaction                           NaN
1 log file exists                    NaN
2 matched H-abstraction              NaN
3 started making TS geometry         NaN
4 using existing TS data file        NaN
5 starting Symmetry calculation      NaN
6 Symmetry calc successful           NaN
9 CanTherm started                   NaN
9a CanTherm barrier height problem   NaN
A using prior calculation result     NaN
B overall success                    NaN
dtype: float64

In [5]:
df.count(axis=1)

0 reaction                            331
1 log file exists                     331
2 matched H-abstraction               268
3 started making TS geometry          253
4 using existing TS data file         105
5 starting Symmetry calculation       178
6 Symmetry calc successful            178
9 CanTherm started                    176
9a CanTherm barrier height problem     28
A using prior calculation result       15
B overall success                     161
dtype: int64