In [1]:
import os
import time
import subprocess
import glob
from pprint import pprint
import numpy as np
from numpy.linalg import norm
import itertools 
import re
import pandas as pd
import shutil

root_dir = os.getcwd() + '\..'
sys.path.append(root_dir)

from orca_wrapper import run_orca, parse_orca_output, default_search_terms, get_imaginary_modes

os.chdir('Orca_Calculations/')

# Step 1 - Optimize Non-interacting Geometries

In [2]:
step_dir = 'Step1/'
input_files = glob.glob(f'{step_dir}/*.inp')
output_files = [i_file.replace('.inp', '.out') for i_file in input_files]
output_files

['Step1\\Br-.out', 'Step1\\CH3Br.out', 'Step1\\CH3Cl.out', 'Step1\\Cl-.out']

In [3]:
for i_file in input_files:
    run_orca(i_file)


Running "Step1\Br-.inp"
Writing to "Step1\Br-.out"

Running "Step1\CH3Br.inp"
Writing to "Step1\CH3Br.out"

Running "Step1\CH3Cl.inp"
Writing to "Step1\CH3Cl.out"

Running "Step1\Cl-.inp"
Writing to "Step1\Cl-.out"


# Step 2 - Optimize Weakly Interacting Geometries

In [5]:
# Use the optimized geometry for CH3Br as a starting point for our sweep
with open('Step1/CH3Br.xyz') as init_xyz:
    lines = init_xyz.readlines()
    nbr_atoms_before = lines[0].strip()
    nbr_atoms_after = 6
    lines[0] = f'{nbr_atoms_after}\n'
    lines.append('  Cl  -3.7                  0.0                    0.0\n')

for line in lines:
    print(line, end='')

with open('Step2/Cl-_CH3Br_init.xyz', 'w') as new_xyz:
    new_xyz.write(''.join(lines))

# Use the optimized geometry for CH3Cl as a starting point for our sweep
with open('Step1/CH3Cl.xyz') as init_xyz:
    lines = init_xyz.readlines()
    nbr_atoms_before = lines[0].strip()
    nbr_atoms_after = 6
    lines[0] = f'{nbr_atoms_after}\n'
    lines.append('  Br  -3.7                  0.0                    0.0\n')

for line in lines:
    print(line, end='')

with open('Step2/CH3Cl_Br-_init.xyz', 'w') as new_xyz:
    new_xyz.write(''.join(lines))

6
Coordinates from ORCA-job Step1\CH3Br
  C   -0.00128235936520     -0.00000008905151     -0.00000004332922
  Br  1.95252041558401     -0.00000089813715     -0.00000011893171
  H   -0.34394901558420      1.04129517652484      0.00477118112605
  H   -0.34394724171098     -0.52477881350138      0.89940066726025
  H   -0.34394698742363     -0.51651543223481     -0.90417168852537
  Cl  -3.7                  0.0                    0.0
6
Coordinates from ORCA-job Step1\CH3Cl
  C   -0.00625646205785      0.00000098945494     -0.00000010575368
  Cl  1.78918679893258     -0.00000273099634      0.00000009138449
  H   -0.36235932920563      1.03838548800012      0.00475773257704
  H   -0.36235393659441     -0.52331198586870      0.89688273308291
  H   -0.36235418257468     -0.51507161269001     -0.90164043359077
  Br  -3.7                  0.0                    0.0


In [6]:
input_files = glob.glob(f'Step2/*.inp')
output_files = [i_file.replace('.inp', '.out') for i_file in input_files]
output_files

['Step2\\CH3Cl_Br-.out', 'Step2\\Cl-_CH3Br.out']

In [7]:
for i_file in input_files:
    run_orca(i_file)


Running "Step2\CH3Cl_Br-.inp"
Writing to "Step2\CH3Cl_Br-.out"

Running "Step2\Cl-_CH3Br.inp"
Writing to "Step2\Cl-_CH3Br.out"


# Step 3 - Scan for Transition State

In [8]:
shutil.copy('Step2/Cl-_CH3Br.xyz', 'Step3/Cl-_CH3Br_init.xyz')

'Step3/Cl-_CH3Br_init.xyz'

In [9]:
step_dir = 'Step3'
input_files = glob.glob(f'{step_dir}/*.inp')
output_files = [i_file.replace('.inp', '.out') for i_file in input_files]


In [10]:
for i_file in input_files:
    run_orca(i_file)


Running "Step3\Cl-_CH3Br.inp"
Writing to "Step3\Cl-_CH3Br.out"


In [11]:
for o_file in output_files:
    # get the highest energy transition state
    with open(o_file, 'r') as of:
        content = of.read()
        split_str = "The Calculated Surface using the 'Actual Energy'\n"
        trans_state_values = content.split(split_str)[1].split('\n\n',1)[0].split('\n')
    print(trans_state_values)
    energies = [float(s.strip().split()[1]) for s in trans_state_values ]

pprint(energies)

ts_index = np.argmax(energies) + 1
print(f'\nTransition state supected at index {ts_index}')

['   2.90000000 -3073.63538623', '   2.70000000 -3073.63446451', '   2.50000000 -3073.63260691', '   2.30000000 -3073.63602085', '   2.10000000 -3073.64416383', '   1.90000000 -3073.65156792']
[-3073.63538623,
 -3073.63446451,
 -3073.63260691,
 -3073.63602085,
 -3073.64416383,
 -3073.65156792]

Transition state supected at index 3


# Step 4 - Find Imaginary Modes

In [12]:
shutil.copy(f'Step3/Cl-_CH3Br.{ts_index:03}.xyz', 'Step4/Cl-_CH3Br_TS_init.xyz')

'Step4/Cl-_CH3Br_TS_init.xyz'

In [13]:
input_files = glob.glob(f'Step4/*.inp')
output_files = [i_file.replace('.inp', '.out') for i_file in input_files]
output_files

['Step4\\[Cl_CH3_Br]-.out']

In [14]:
for i_file in input_files:
    run_orca(i_file)


Running "Step4\[Cl_CH3_Br]-.inp"
Writing to "Step4\[Cl_CH3_Br]-.out"


In [18]:
im_freq_lines = get_imaginary_modes(output_files[0])
im_freqs = []
im_freq_idx = []
for line in im_freq_lines:
    idx_str, freq_str, _ = line.split(None, 2)
    idx = int(idx_str[:-1])
    freq = float(freq_str)
    if idx not in im_freq_idx:
        im_freq_idx.append(idx)
        im_freqs.append(freq)

if len(im_freq_idx) > 1:
    print("More than one imginary frequency found! This is not a transition state!")
    print(im_freq_lines)
elif len(im_freq_idx) < 1:
    print("No imgaginary frequencies found!")
else:
    reaction_idx = im_freq_idx[0]
    print(f"Reaction coordinate given by mode {reaction_idx} with imaginary frequency {im_freqs[0]} cm**-1")

Reaction coordinate given by mode 6 with imaginary frequency -282.42 cm**-1


In [16]:
hess_file = [i_file.replace('.inp', '.hess') for i_file in input_files]
print(hess_file[0])
p_out = subprocess.run(['orca_pltvib', hess_file[0], str(reaction_idx)], capture_output=True)
if p_out.stderr:
    print(p_out.stderr.decode())
print(p_out.stdout.decode())

Step4\[Cl_CH3_Br]-.hess
The file is being recognized as a valid orca hessian file
Will plot 1 vibrations:
   6
creating: Step4\[Cl_CH3_Br]-.hess.v006.xyz



# Step 5 - Single Point Calculations with Bigger Basis

In [17]:
input_files = glob.glob('Step5/*.inp')
output_files = [i_file.replace('.inp', '.out') for i_file in input_files]
output_files

['Step5\\Br-.out',
 'Step5\\CH3Br.out',
 'Step5\\CH3Cl.out',
 'Step5\\CH3Cl_Br-.out',
 'Step5\\Cl-.out',
 'Step5\\Cl-_CH3Br.out',
 'Step5\\[Cl_CH3_Br]-.out']

In [18]:
for i_file in input_files:
    run_orca(i_file)


Running "Step5\Br-.inp"
Writing to "Step5\Br-.out"

Running "Step5\CH3Br.inp"
Writing to "Step5\CH3Br.out"

Running "Step5\CH3Cl.inp"
Writing to "Step5\CH3Cl.out"

Running "Step5\CH3Cl_Br-.inp"
Writing to "Step5\CH3Cl_Br-.out"

Running "Step5\Cl-.inp"
Writing to "Step5\Cl-.out"

Running "Step5\Cl-_CH3Br.inp"
Writing to "Step5\Cl-_CH3Br.out"

Running "Step5\[Cl_CH3_Br]-.inp"
Writing to "Step5\[Cl_CH3_Br]-.out"


# Step 6 - Solvation Effects

In [19]:
input_files = glob.glob('Step6/*.inp')
output_files = [i_file.replace('.inp', '.out') for i_file in input_files]
output_files

['Step6\\CH3Cl_Br-.out', 'Step6\\Cl-_CH3Br.out', 'Step6\\[Cl_CH3_Br]-.out']

In [20]:
for i_file in input_files:
    run_orca(i_file)


Running "Step6\CH3Cl_Br-.inp"
Writing to "Step6\CH3Cl_Br-.out"

Running "Step6\Cl-_CH3Br.inp"
Writing to "Step6\Cl-_CH3Br.out"

Running "Step6\[Cl_CH3_Br]-.inp"
Writing to "Step6\[Cl_CH3_Br]-.out"


# Putting Together the Results

In [21]:
output_files = glob.glob('Step*/*.out')
output_files

['Step1\\Br-.out',
 'Step1\\CH3Br.out',
 'Step1\\CH3Cl.out',
 'Step1\\Cl-.out',
 'Step2\\CH3Cl_Br-.out',
 'Step2\\Cl-_CH3Br.out',
 'Step3\\Cl-_CH3Br.out',
 'Step4\\[Cl_CH3_Br]-.out',
 'Step5\\Br-.out',
 'Step5\\CH3Br.out',
 'Step5\\CH3Cl.out',
 'Step5\\CH3Cl_Br-.out',
 'Step5\\Cl-.out',
 'Step5\\Cl-_CH3Br.out',
 'Step5\\[Cl_CH3_Br]-.out',
 'Step6\\CH3Cl_Br-.out',
 'Step6\\Cl-_CH3Br.out',
 'Step6\\[Cl_CH3_Br]-.out']

In [62]:
calcs = {}
for o_file in output_files:
    terms = parse_orca_output(o_file)
    if 'gibbs free energy' not in terms:
        print(f'No entropy in "{o_file}"')
        search_terms = {'TS_trans': 'Translational entropy'}
        trans_entr = parse_orca_output(o_file, search_terms=search_terms)
        if 'TS_trans' in trans_entr:
            print(f'Found translational entropy')
            terms['G'] = terms['H'] - trans_entr['TS_trans']
            terms['G-E(el)'] = terms['G'] - terms['E_sp']


    calcs[o_file[:-4]] = terms


raw_res_df = pd.DataFrame(calcs)
raw_res_df.T

No entropy in "Step1\Br-.out"
Found translational entropy
No entropy in "Step1\CH3Br.out"
Found translational entropy
No entropy in "Step1\CH3Cl.out"
Found translational entropy
No entropy in "Step1\Cl-.out"
Found translational entropy
No entropy in "Step2\CH3Cl_Br-.out"
Found translational entropy
No entropy in "Step2\Cl-_CH3Br.out"
Found translational entropy
No entropy in "Step3\Cl-_CH3Br.out"
No entropy in "Step4\[Cl_CH3_Br]-.out"
Found translational entropy
No entropy in "Step5\Br-.out"
No entropy in "Step5\CH3Br.out"
No entropy in "Step5\CH3Cl.out"
No entropy in "Step5\CH3Cl_Br-.out"
No entropy in "Step5\Cl-.out"
No entropy in "Step5\Cl-_CH3Br.out"
No entropy in "Step5\[Cl_CH3_Br]-.out"
No entropy in "Step6\CH3Cl_Br-.out"
No entropy in "Step6\Cl-_CH3Br.out"
No entropy in "Step6\[Cl_CH3_Br]-.out"


Unnamed: 0,E_sp,E_therm,H,G,G-E(el),TS
Step1\Br-,-2573.783655,-2573.779406,-2573.778462,-2573.797015,-0.01336,
Step1\CH3Br,-2613.57921,-2613.539257,-2613.538313,-2613.55711,0.0221,0.028975
Step1\CH3Cl,-499.848502,-499.808073,-499.807129,-499.825032,0.023469,0.027665
Step1\Cl-,-460.023549,-460.0193,-460.018356,-460.035759,-0.01221,
Step2\CH3Cl_Br-,-3073.652284,-3073.609444,-3073.6085,-3073.627747,0.024537,0.036575
Step2\Cl-_CH3Br,-3073.63552,-3073.593542,-3073.592597,-3073.611844,0.023675,0.036449
Step3\Cl-_CH3Br,-3073.651568,,,,,
Step4\[Cl_CH3_Br]-,-3073.632714,-3073.591893,-3073.590949,-3073.610196,0.022518,0.035104
Step5\Br-,-2574.12917,,,,,
Step5\CH3Br,-2613.942478,,,,,


In [66]:
step1_corr = ['Br-', 'Cl-', 'CH3Br', 'CH3Cl'] 
step2_corr = ['CH3Cl_Br-', 'Cl-_CH3Br'] 
step4_corr = ['[Cl_CH3_Br]-']

for stuff in step1_corr:
    raw_res_df[f'Step5\\{stuff}']['G'] = raw_res_df[f'Step5\\{stuff}']['E_sp'] - raw_res_df[f'Step1\\{stuff}']['G-E(el)']

KeyError: 'Step1\\CH3Cl_Br-'

In [58]:
res_df = raw_res_df.filter(regex='Step[1,2,4]') * 627.5094740631
res_df.T

Unnamed: 0,E_sp,E_therm,H,G,G-E(el),TS
Step1\Br-,-1615074.0,-1615071.0,-1615070.0,-1615082.0,-8.383735,
Step1\CH3Br,-1640046.0,-1640021.0,-1640020.0,-1640032.0,13.867678,18.181886
Step1\CH3Cl,-313659.7,-313634.3,-313633.7,-313644.9,14.727261,17.359912
Step1\Cl-,-288669.1,-288666.5,-288665.9,-288676.8,-7.661583,
Step2\CH3Cl_Br-,-1928746.0,-1928719.0,-1928718.0,-1928731.0,15.397211,22.950946
Step2\Cl-_CH3Br,-1928735.0,-1928709.0,-1928708.0,-1928721.0,14.856305,22.871923
Step4\[Cl_CH3_Br]-,-1928734.0,-1928708.0,-1928707.0,-1928720.0,14.13033,22.027948


In [53]:
res_df['Step1\\Cl- + CH3Br'] = res_df['Step1\Cl-'] + res_df['Step1\CH3Br']
res_df['Step1\\CH3Cl + Br-'] = res_df['Step1\Br-'] + res_df['Step1\CH3Cl']
res_df.T

Unnamed: 0,E_sp,E_therm,H,G,G-E(el),TS
Step1\Br-,-1615074.0,-1615071.0,-1615070.0,-1615082.0,-8.383735,
Step1\CH3Br,-1640046.0,-1640021.0,-1640020.0,-1640032.0,13.867678,18.181886
Step1\CH3Cl,-313659.7,-313634.3,-313633.7,-313644.9,14.727261,17.359912
Step1\Cl-,-288669.1,-288666.5,-288665.9,-288676.8,-7.661583,
Step2\CH3Cl_Br-,-1928746.0,-1928719.0,-1928718.0,-1928731.0,15.397211,22.950946
Step2\Cl-_CH3Br,-1928735.0,-1928709.0,-1928708.0,-1928721.0,14.856305,22.871923
Step4\[Cl_CH3_Br]-,-1928734.0,-1928708.0,-1928707.0,-1928720.0,14.13033,22.027948
Step1\Cl- + CH3Br,-1928715.0,-1928687.0,-1928686.0,-1928709.0,6.206094,
Step1\CH3Cl + Br-,-1928733.0,-1928705.0,-1928704.0,-1928727.0,6.343525,


In [54]:
res_df.filter(regex='Step[1,2]').T['G'].sort_values()

Step2\CH3Cl_Br-     -1.928731e+06
Step1\CH3Cl + Br-   -1.928727e+06
Step2\Cl-_CH3Br     -1.928721e+06
Step1\Cl- + CH3Br   -1.928709e+06
Step1\CH3Br         -1.640032e+06
Step1\Br-           -1.615082e+06
Step1\CH3Cl         -3.136449e+05
Step1\Cl-           -2.886768e+05
Name: G, dtype: float64

We see that the the lowest energy state before the transition state is $Cl^-\;-\; CH_3Br$ with -3073.611844 Ha

In [34]:
reference = res_df['Step2\\Cl-_CH3Br']
reference

E_sp      -1.928735e+06
E_therm   -1.928709e+06
H         -1.928708e+06
G         -1.928721e+06
G-E(el)    1.485630e+01
TS         2.287192e+01
Name: Step2\Cl-_CH3Br, dtype: float64

In [55]:
(res_df.T - reference)['G']

Step1\Br-             3.136385e+05
Step1\CH3Br           2.886887e+05
Step1\CH3Cl           1.615076e+06
Step1\Cl-             1.640044e+06
Step2\CH3Cl_Br-      -9.978894e+00
Step2\Cl-_CH3Br       0.000000e+00
Step4\[Cl_CH3_Br]-    1.034550e+00
Step1\Cl- + CH3Br     1.190734e+01
Step1\CH3Cl + Br-    -6.402661e+00
Name: G, dtype: float64