In [11]:
import sys, os
import subprocess as proc 
import textwrap
import inspect
import shutil
from datetime import datetime

In [12]:
pdb_list=["2gsm", "3hb3"]
spin_states=["01", "05", "12", "16"]

In [13]:
class automation():
    def __init__(self):
        """ 
        Class that creates the necessary files for running jobs automatically except for the .xyz coordinate files. 
        """

    def __create_runtime_folders(pdb_list, spin_states):
        """
        Checks if folder names exist, if not, creates folders.  
        """
        for pdb_id in pdb_list:
            for spin_state in spin_states:
                if not os.path.exists(f"{pdb_id}"):
                    os.mkdir(f"{pdb_id}")
                if not os.path.exists(f"{pdb_id}/{pdb_id}{spin_state}/"):
                    os.mkdir(f"{pdb_id}/{pdb_id}{spin_state}/")
        return

    def __create_gaussian_scripts_01(pdb_id):
        link0=f"""%nprocs=12
        %mem=16GB
        %chk={pdb_id}_II_S.chk
        # polar def2tzvp empiricaldispersion=gd3bj pbe1pbe

        {pdb_id}_II_S 

        0 1
        """

        link1=f"""

        --Link1--
        %nprocs=12
        %mem=16GB
        %chk={pdb_id}_II_S.chk
        # PBE1PBE/def2TZVP empiricaldispersion=GD3BJ int=(grid=ultrafine) geom=check guess=read pop=nbo scf=qc

        {pdb_id}_II_S_nbo

        0 1
        """

        link2=f""" 
        --Link2--
        %nprocs=12
        %mem=16GB
        %chk={pdb_id}_II_S.chk
        # PBE1PBE/def2TZVP empiricaldispersion=GD3BJ int=(grid=ultrafine) geom=check guess=read scrf=(smd,solvent=Chloroform) 

        {pdb_id}_II_S_solv 

        0 1 

        """
        coords=open(f"pdb_coord/{pdb_id}.xyz").read()
        com01=inspect.cleandoc(link0)+"\n"+coords+"\n\n"+inspect.cleandoc(link1)+"\n\n"+inspect.cleandoc(link2)+"\n\n\n"
        com=open(f"{pdb_id}/{pdb_id}01/{pdb_id}01.com", "w").write(com01)
        return

    def __create_gaussian_scripts_05(pdb_id):
        link3=f"""--Link3--
        %nprocs=12
        %mem=16GB
        %oldchk={pdb_id}_II_S.chk
        %chk={pdb_id}_II_Q.chk
        # PBE1PBE/def2TZVP empiricaldispersion=GD3BJ scf=maxcycle=999 geom=check pop=nbo

        {pdb_id}_II_Q

        0 5

        """

        link4=f"""--Link4--
        %nprocs=12
        %mem=16GB
        %chk={pdb_id}_II_Q.chk
        # PBE1PBE/def2TZVP empiricaldispersion=GD3BJ geom=check guess=read scrf=(smd,solvent=Chloroform)

        {pdb_id}_II_Q_solv

        0 5

        """

        com05=inspect.cleandoc(link3)+"\n\n"+inspect.cleandoc(link4)+"\n"
        com=open(f"{pdb_id}/{pdb_id}05/{pdb_id}05.com", "w").write(com05)
        return

    def __create_gaussian_scripts_12(pdb_id):
        link5=f"""--Link5--
        %nprocs=12
        %mem=16GB
        %oldchk={pdb_id}_II_S.chk
        %chk={pdb_id}_III_D.chk
        # PBE1PBE/def2TZVP empiricaldispersion=GD3BJ scf=maxcycle=999 geom=check pop=nbo

        {pdb_id}_III_D

        1 2

        """

        link6=f"""--Link6--
        %nprocs=12
        %mem=16GB
        %chk={pdb_id}_III_D.chk
        # PBE1PBE/def2TZVP empiricaldispersion=GD3BJ geom=check guess=read scrf=(smd,solvent=Chloroform)

        {pdb_id}_III_D_solv

        1 2

        """

        com12=inspect.cleandoc(link5)+"\n\n"+inspect.cleandoc(link6)+"\n"
        com=open(f"{pdb_id}/{pdb_id}12/{pdb_id}12.com", "w").write(com12)
        return
    
    def __create_gaussian_scripts_16(pdb_id):
        link7=f"""--Link7--
        %nprocs=12
        %mem=16GB
        %oldchk={pdb_id}_III_D.chk
        %chk={pdb_id}_III_H.chk
        # PBE1PBE/def2TZVP empiricaldispersion=GD3BJ scf=maxcycle=999 geom=check pop=nbo

        {pdb_id}_III_H

        1 6

        """

        link8=f"""--Link8--
        %nprocs=12
        %mem=16GB
        %chk={pdb_id}_III_H.chk
        # PBE1PBE/def2TZVP empiricaldispersion=GD3BJ geom=check guess=read scrf=(smd,solvent=Chloroform)

        {pdb_id}_III_H_solv

        1 6

        """

        com16=inspect.cleandoc(link7)+"\n\n"+inspect.cleandoc(link8)+"\n"
        com=open(f"{pdb_id}/{pdb_id}16/{pdb_id}16.com", "w").write(com16)
        return
    
    def __create_rerun_gaussian_scripts_05(pdb_id):
        link3re = f""" --Link3--
        %nprocs=12
        %mem=16GB
        %oldchk={pdb_id}_II_S.chk
        %chk={pdb_id}_II_Sre.chk
        # PBE1PBE/gen scf=maxcycle=999 geom=check

        {pdb_id}_II_Q

        0 5

        C N O H 0
        6-31G*
        ****
        Fe 0
        TZVP
        ****


        """ 
        com05re=inspect.cleandoc(link3re)+"\n"
        if not os.path.exists(f"{pdb_id}/{pdb_id}05re/"):
            os.mkdir(f"{pdb_id}/{pdb_id}05re/")
        com=open(f"{pdb_id}/{pdb_id}05re/{pdb_id}05re.com", "w").write(com05re)
        return
    
    def __create_rerun_gaussian_scripts_12(pdb_id):
        link5re = f""" --Link5--
        %nprocs=12
        %mem=16GB
        %oldchk={pdb_id}_II_S.chk
        %chk={pdb_id}_II_Sre.chk
        # PBE1PBE/gen scf=maxcycle=999 geom=check

        {pdb_id}_III_D

        1 2

        C N O H 0
        6-31G*
        ****
        Fe 0
        TZVP
        ****


        """ 
        com12re=inspect.cleandoc(link5re)+"\n"
        if not os.path.exists(f"{pdb_id}/{pdb_id}12re/"):
            os.mkdir(f"{pdb_id}/{pdb_id}12re/")
        com=open(f"{pdb_id}/{pdb_id}12re/{pdb_id}12re.com", "w").write(com12re)
        return

    def __create_rerun_gaussian_scripts_16(pdb_id):
        link7re = f""" --Link7--
        %nprocs=12
        %mem=16GB
        %oldchk={pdb_id}_III_D.chk
        %chk={pdb_id}_III_Dre.chk
        # PBE1PBE/gen scf=maxcycle=999 geom=check

        {pdb_id}_III_H

        1 6

        C N O H 0
        6-31G*
        ****
        Fe 0
        TZVP
        ****


        """ 
        com16re=inspect.cleandoc(link7re)+"\n"
        if not os.path.exists(f"{pdb_id}/{pdb_id}16re/"):
            os.mkdir(f"{pdb_id}/{pdb_id}16re/")
        com=open(f"{pdb_id}/{pdb_id}16re/{pdb_id}16re.com", "w").write(com16re)
        return

    def __create_gaussian_scripts_all(pdb_id):
        automation.__create_gaussian_scripts_01(pdb_id=pdb_id)
        automation.__create_gaussian_scripts_05(pdb_id=pdb_id)
        automation.__create_gaussian_scripts_12(pdb_id=pdb_id)
        automation.__create_gaussian_scripts_16(pdb_id=pdb_id)
        automation.__create_rerun_gaussian_scripts_05(pdb_id=pdb_id)
        automation.__create_rerun_gaussian_scripts_12(pdb_id=pdb_id)
        automation.__create_rerun_gaussian_scripts_16(pdb_id=pdb_id)
        return
        
    def main():
        spin_states=["01", "05", "12", "16"]
        pdb_list=["3hb3", "2gsm"]
        automation.__create_runtime_folders(pdb_list=pdb_list, spin_states=spin_states)
        for pdb_id in pdb_list:
            automation.__create_gaussian_scripts_all(pdb_id=pdb_id)
        return



In [14]:
automation.main()

In [15]:
if not os.path.isfile("history.txt"):
    open("history.txt", "w").write(f"HISTORY OF CALCULATIONS FOR REDOX HEME PROJECT\nMROGISNKI, GENSCH, JONES, ZHARTOVSKA, LEICH, GUO\n\n")

In [21]:
def __error_capturing(logfile, basedir, workdir, spin_state, pdb_id):
    if " Convergence failure -- run terminated." in logfile[-10:0]: 
        os.chdir(basedir)
        logmsg = f""" 
        Time, Date
        {datetime.now().strftime("%H:%M:%S")}, {datetime.date.today()}
        Path
        {workdir}
        convergence failure in {spin_state} - inspection necessary. 
        -----------------------------------------------------------------------------------------------------------------
        \n
        """
        open("history.txt", "a").write(inspect.cleandoc(logmsg)+"\n\n")
        if spin_state == "05" or spin_state == "12":
            shutil.copyfile(f"{pdb_id}/{pdb_id}01/{pdb_id}_II_S.chk", f"{pdb_id}/{pdb_id}{spin_state}re/{pdb_id}_II_S.chk")
        if spin_state == "16":
            shutil.copyfile(f"{pdb_id}/{pdb_id}12/{pdb_id}_III_D.chk", f"{pdb_id}/{pdb_id}{spin_state}re/{pdb_id}_III_D.chk")
        os.chdir(f"{pdb_id}/{pdb_id}{spin_state}re")
        runre = proc.run(["qg16", f"{pdb_id}{spin_state}re"])
    
    elif " Route card not found." in logfile[-10:0]:
        os.chdir(basedir)
        errormsg = f""" 
        Time, Date
        {datetime.now().strftime("%H:%M:%S")}, {datetime.date.today()}
        Path
        {workdir}
        no input in {spin_state} - inspection necessary. 
        -----------------------------------------------------------------------------------------------------------------
        \n
        """
        open("history.txt", "a").write(inspect.cleandoc(errormsg)+"\n\n")
    
    elif " Input Error" in logfile[-10:0]:
        os.chdir(basedir)
        errormsg = f""" 
        Time, Date
        {datetime.now().strftime("%H:%M:%S")}, {datetime.date.today()}
        Path
        {workdir}
        cannot read input in {spin_state} - inspection necessary. 
        -----------------------------------------------------------------------------------------------------------------
        """
        open("history.txt", "a").write(inspect.cleandoc(errormsg)+"\n\n")
    
    elif "Symbol not recognized in MSubst" in logfile[-10:0]:
        os.chdir(basedir)
        errormsg = f""" 
        Time, Date
        {datetime.now().strftime("%H:%M:%S")}, {datetime.date.today()}
        Path
        {workdir}
        unrecognized atomic symbol in {spin_state} - inspection necessary. 
        -----------------------------------------------------------------------------------------------------------------
        """
        open("history.txt", "a").write(inspect.cleandoc(errormsg)+"\n\n")
    
    elif " Error termination request processed by link 9999." in logfile[-10:0]:
        os.chdir(basedir)
        errormsg = f""" 
        Time, Date
        {datetime.now().strftime("%H:%M:%S")}, {datetime.date.today()}
        Path
        {workdir}
        geometry not converged in {spin_state} - inspection necessary. 
        -----------------------------------------------------------------------------------------------------------------
        """
        open("history.txt", "a").write(inspect.cleandoc(errormsg)+"\n\n")
    
    elif " Old file " and " could not be opened." in logfile[-10:0]:
        os.chdir(basedir)
        errormsg = f""" 
        Time, Date
        {datetime.now().strftime("%H:%M:%S")}, {datetime.date.today()}
        Path
        {workdir}
        chk can not be read in {spin_state} - inspection necessary. 
        -----------------------------------------------------------------------------------------------------------------
        """
        open("history.txt", "a").write(inspect.cleandoc(errormsg)+"\n\n")
    #elif aborted due to time restraint: figure out which jobs were not done in time based on results not put into results csv.
    else: pass
    return

In [27]:
pdb_id="2gsm"
basedir = "/home/pbuser/Desktop/PhD_WORK/automation_heme/"
workdirs=[f"{pdb_id}/{pdb_id}01", f"{pdb_id}/{pdb_id}05", f"{pdb_id}/{pdb_id}12", f"{pdb_id}/{pdb_id}16"]
run01 = proc.run(["qg16", f"{pdb_id}01"], capture_output=True, text=True)
#wait 2000000
for workdir in workdirs:
    os.chdir(workdir)
    if workdir[-2:0] == "01":
        logfile=open(workdir+"Erfolgreich.log", "r").readlines()
        if "Normal termination of Gaussian 16" in logfile[-1]:
            shutil.copyfile(f"{pdb_id}/{pdb_id}01/{pdb_id}_II_S.chk", f"{pdb_id}/{pdb_id}05/{pdb_id}_II_S.chk")
            shutil.copyfile(f"{pdb_id}/{pdb_id}01/{pdb_id}_II_S.chk", f"{pdb_id}/{pdb_id}12/{pdb_id}_II_S.chk")
            os.chdir(f"../{pdb_id}05")
            run05 = proc.run(["qg16", f"{pdb_id}05"], capture_output=True, text=True)
            os.chdir(f"../{pdb_id}12")
            run12 = proc.run(["qg16", f"{pdb_id}12"], capture_output=True, text=True)
        __error_capturing(logfile=logfile, basedir=basedir, workdir=workdir, spin_state="01")
    elif workdir[-2:0] == "05":
        logfile=open(workdir+"Erfolgreich.log", "r").readlines()
        if "Normal termination of Gaussian 16" in logfile[-1]:
            os.chdir(basedir)
            logmsg = f""" 
            Time, Date
            {datetime.now().strftime("%H:%M:%S")}, {datetime.date.today()}
            Path
            {workdir}
            {spin_state} - Done! 
            -----------------------------------------------------------------------------------------------------------------
            """
            open("history.txt", "a").write(inspect.cleandoc(logmsg)+"\n\n")
        __error_capturing(logfile=logfile, basedir=basedir, workdir=workdir, spin_state="05")
    elif workdir[-2:0] == "12":
        logfile=open(workdir+"Erfolgreich.log", "r").readlines()
        if "Normal termination of Gaussian 16" in logfile[-1]:
            shutil.copyfile(f"{pdb_id}/{pdb_id}12/{pdb_id}_III_D.chk", f"{pdb_id}/{pdb_id}16/{pdb_id}_III_D.chk")
            os.chdir(f"../{pdb_id}16")
            run12 = proc.run(["qg16", f"{pdb_id}16"], capture_output=True, text=True)
            os.chdir(basedir)
            logmsg = f""" 
            Time, Date
            {datetime.now().strftime("%H:%M:%S")}, {datetime.date.today()}
            Path
            {workdir}
            {spin_state} - Done! 
            -----------------------------------------------------------------------------------------------------------------
            """
            open("history.txt", "a").write(inspect.cleandoc(logmsg)+"\n\n")
        __error_capturing(logfile=logfile, basedir=basedir, workdir=workdir, spin_state="12")
    elif workdir[-2:0] == "16":
        logfile=open(workdir+"Erfolgreich.log", "r").readlines()
        if "Normal termination of Gaussian 16" in logfile[-1]:
            shutil.copyfile(f"{pdb_id}/{pdb_id}12/{pdb_id}_III_D.chk", f"{pdb_id}/{pdb_id}16/{pdb_id}_III_D.chk")
            os.chdir(f"../{pdb_id}16")
            os.chdir(basedir)
            logmsg = f""" 
            Time, Date
            {datetime.now().strftime("%H:%M:%S")}, {datetime.date.today()}
            Path
            {workdir}
            {pdb_id}{spin_state} - Done! 
            -----------------------------------------------------------------------------------------------------------------
            """
            open("history.txt", "a").write(inspect.cleandoc(logmsg)+"\n\n")
        __error_capturing(logfile=logfile, basedir=basedir, workdir=workdir, spin_state="16")


FileNotFoundError: [Errno 2] No such file or directory: 'qg16'

Folder creation

In [56]:
for pdb_id in pdb_list:
    for spin_state in spin_states:
        if not os.path.exists(f"{pdb_id}"):
            os.mkdir(f"{pdb_id}")
        if not os.path.exists(f"{pdb_id}/{pdb_id}_{spin_state}/"):
            os.mkdir(f"{pdb_id}/{pdb_id}_{spin_state}/")

Regular Gaussian Scripts

In [12]:
pdb_id="2gsm"
link0=f"""%nprocs=12
%mem=16GB
%chk={pdb_id}_II_S.chk
# polar def2tzvp empiricaldispersion=gd3bj pbe1pbe

{pdb_id}_II_S 

0 1
"""

link1=f"""

--Link1--
%nprocs=12
%mem=16GB
%chk={pdb_id}_II_S.chk
# PBE1PBE/def2TZVP empiricaldispersion=GD3BJ int=(grid=ultrafine) geom=check guess=read pop=nbo scf=qc

{pdb_id}_II_S_nbo

0 1
"""

link2=f""" 
--Link2--
%nprocs=12
%mem=16GB
%chk={pdb_id}_II_S.chk
# PBE1PBE/def2TZVP empiricaldispersion=GD3BJ int=(grid=ultrafine) geom=check guess=read scrf=(smd,solvent=Chloroform) 

{pdb_id}_II_S_solv 

0 1 

"""
coords=open(f"pdb_coord/{pdb_id}.xyz").read()
com01=link0+coords+link1+link2
com=open(f"{pdb_id}/{pdb_id}_01/{pdb_id}01.com", "w").write(com01)

In [58]:
link3=f"""--Link3--
%nprocs=12
%mem=16GB
%oldchk={pdb_id}_II_S.chk
%chk={pdb_id}_II_Q.chk
# PBE1PBE/def2TZVP empiricaldispersion=GD3BJ scf=maxcycle=999 geom=check pop=nbo

{pdb_id}_II_Q

0 5

"""

link4=f"""--Link4--
%nprocs=12
%mem=16GB
%chk={pdb_id}_II_Q.chk
# PBE1PBE/def2TZVP empiricaldispersion=GD3BJ geom=check guess=read scrf=(smd,solvent=Chloroform)

{pdb_id}_II_Q_solv

0 5

"""

com05=link3+link4
com=open(f"{pdb_id}/{pdb_id}_05/{pdb_id}05.com", "w").write(com05)

In [59]:
link5=f"""--Link5--
%nprocs=12
%mem=16GB
%oldchk={pdb_id}_II_S.chk
%chk={pdb_id}_III_D.chk
# PBE1PBE/def2TZVP empiricaldispersion=GD3BJ scf=maxcycle=999 geom=check pop=nbo

{pdb_id}_III_D

1 2

"""

link6=f"""--Link6--
%nprocs=12
%mem=16GB
%chk={pdb_id}_III_D.chk
# PBE1PBE/def2TZVP empiricaldispersion=GD3BJ geom=check guess=read scrf=(smd,solvent=Chloroform)

{pdb_id}_III_D_solv

1 2

"""

com12=link5+link6
com=open(f"{pdb_id}/{pdb_id}_12/{pdb_id}12.com", "w").write(com12)

In [60]:
link7=f"""--Link7--
%nprocs=12
%mem=16GB
%oldchk={pdb_id}_III_D.chk
%chk={pdb_id}_III_H.chk
# PBE1PBE/def2TZVP empiricaldispersion=GD3BJ scf=maxcycle=999 geom=check pop=nbo

{pdb_id}_III_H

1 6

"""

link8=f"""--Link8--
%nprocs=12
%mem=16GB
%chk={pdb_id}_III_H.chk
# PBE1PBE/def2TZVP empiricaldispersion=GD3BJ geom=check guess=read scrf=(smd,solvent=Chloroform)

{pdb_id}_III_H_solv

1 6

"""

com16=link7+link8
com=open(f"{pdb_id}/{pdb_id}_16/{pdb_id}16.com", "w").write(com16)

Re-run Gaussian Jobscripts

In [61]:
link3re = f""" --Link3--
%nprocs=12
%mem=16GB
%oldchk={pdb_id}_II_S.chk
%chk={pdb_id}_II_Sre.chk
# PBE1PBE/gen scf=maxcycle=999 geom=check

{pdb_id}_II_Q

0 5

C N O H 0
6-31G*
****
Fe 0
TZVP
****


""" 
com05re=link3re
if not os.path.exists(f"{pdb_id}/{pdb_id}_05re/"):
    os.mkdir(f"{pdb_id}/{pdb_id}_05re/")
com=open(f"{pdb_id}/{pdb_id}_05re/{pdb_id}05re.com", "w").write(com05re)

In [62]:
link5re = f""" --Link5--
%nprocs=12
%mem=16GB
%oldchk={pdb_id}_II_S.chk
%chk={pdb_id}_II_Sre.chk
# PBE1PBE/gen scf=maxcycle=999 geom=check

{pdb_id}_III_D

1 2

C N O H 0
6-31G*
****
Fe 0
TZVP
****


""" 
com12re=link5re
if not os.path.exists(f"{pdb_id}/{pdb_id}_12re/"):
    os.mkdir(f"{pdb_id}/{pdb_id}_12re/")
com=open(f"{pdb_id}/{pdb_id}_12re/{pdb_id}12re.com", "w").write(com12re)

In [63]:
link7re = f""" --Link7--
%nprocs=12
%mem=16GB
%oldchk={pdb_id}_III_D.chk
%chk={pdb_id}_III_Dre.chk
# PBE1PBE/gen scf=maxcycle=999 geom=check

{pdb_id}_III_H

1 6

C N O H 0
6-31G*
****
Fe 0
TZVP
****


""" 
com16re=link7re
if not os.path.exists(f"{pdb_id}/{pdb_id}_16re/"):
    os.mkdir(f"{pdb_id}/{pdb_id}_16re/")
com=open(f"{pdb_id}/{pdb_id}_16re/{pdb_id}16re.com", "w").write(com16re)

?

In [None]:
link3=f"""--Link3--
%nprocs=12
%mem=16GB
%oldchk={pdb_id}_II_Sre.chk
%chk={pdb_id}_II_Q.chk
# PBE1PBE/def2TZVP guess=read empiricaldispersion=GD3BJ scf=maxcycle=999 geom=check pop=nbo

{pdb_id}_II_Q

0 5

"""

link4=f"""--Link4--
%nprocs=12
%mem=16GB
%chk={pdb_id}_II_Q.chk
# PBE1PBE/def2TZVP empiricaldispersion=GD3BJ geom=check guess=read scrf=(smd,solvent=Chloroform)

{pdb_id}_II_Q_solv

0 5

"""

com05=link3+link4
com=open(f"{pdb_id}/{pdb_id}_05/{pdb_id}05.com", "w").write(com05)

In [None]:
link5=f"""--Link5--
%nprocs=12
%mem=16GB
%oldchk={pdb_id}_II_Sre.chk
%chk={pdb_id}_III_D.chk
# PBE1PBE/def2TZVP guess=read empiricaldispersion=GD3BJ scf=maxcycle=999 geom=check pop=nbo

{pdb_id}_III_D

1 2

"""

link6=f"""--Link6--
%nprocs=12
%mem=16GB
%chk={pdb_id}_III_D.chk
# PBE1PBE/def2TZVP empiricaldispersion=GD3BJ geom=check guess=read scrf=(smd,solvent=Chloroform)

{pdb_id}_III_D_solv

1 2

"""

com12=link5+link6
com=open(f"{pdb_id}/{pdb_id}_12/{pdb_id}12.com", "w").write(com12)

In [None]:
link7=f"""--Link7--
%nprocs=12
%mem=16GB
%oldchk={pdb_id}_III_Dre.chk
%chk={pdb_id}_III_H.chk
# PBE1PBE/def2TZVP guess=read empiricaldispersion=GD3BJ scf=maxcycle=999 geom=check pop=nbo

{pdb_id}_III_H

1 6

"""

link8=f"""--Link8--
%nprocs=12
%mem=16GB
%chk={pdb_id}_III_H.chk
# PBE1PBE/def2TZVP empiricaldispersion=GD3BJ geom=check guess=read scrf=(smd,solvent=Chloroform)

{pdb_id}_III_H_solv

1 6

"""

com16=link7+link8
com=open(f"{pdb_id}/{pdb_id}_16/{pdb_id}16.com", "w").write(com16)