In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# Hindered Rotors Class

In [2]:
import arkane
import cclib

from autotst.calculators.gaussian import read_gaussian_out

#from rmgpy.molecule import Molecule as RMG_Molecule

#from autotst.species import Species as TS_Species
from autotst.species import Conformer

import matplotlib.pyplot as plt
import numpy as np

class Hindered_Rotors:
    
    def __init__(self, conformer, label=None, path=None):
        """
        Class for hindered rotor calculation methods
        
        conformer :: autotst.species Conformer object
        path :: General path where files relevant to this conformer can be found or written to
        
        self.scans :: list of Scan instances
        """
        
        
        self.__conformer__ = conformer
        self.SMILES = conformer.smiles

        self.path = path
        
        
        self.AugInChIKey = conformer.rmg_molecule.toAugmentedInChIKey()
        self.label = label
        
        self.__geoIsOpt__ = False
        self.__torsions__ = None

        self.scans = []

        return
    
    def get_Conformer(self):
        return self.__conformer__
    
    def get_GeoIsOpt(self):
        return self.__geoIsOpt__
    
    def get_Torsions(self):
        return self.__torsions__
    
    def update_Conformer(self, filename=None, path=None):
        """
        Updates conformer geometry from Gaussian geometry optimization output. If no filename specified tries by naming convention within path
        
        filename :: Geometry optimization log from Gaussian. Defualt is AugInChIKey + _GeoFreq.log
        path :: path of file, default to cwd
        """
        self.__geoIsOpt__ = False
        self.__torsions__ = None
        
        if filename is None:
            filename = self.label + '_GeoFreq.log'
        
        home = os.getcwd()
        if path is None:
            path = home
            
        os.chdir(path)
        
        self.__conformer__.ase_molecule = read_gaussian_out(filename)
        self.__conformer__.update_coords()
        self.__torsions__ = self.__conformer__.get_torsions()
        self.__geoIsOpt__ = True
        
        os.chdir(home)
        return self.get_GeoIsOpt()
 
    def create_Scan(self, torsion, steps=None, stepsize_deg=None, path=None, label=None):
        """
        Creates Scan object and appends it to self.scans
        
        torsion             :: Torsion object
        steps               :: Number of steps taken during scan (int)
        stepsize_deg        :: DEGREES between steps (float)
        path                :: General path where things like a geometery log, scan input, and scan log can be found
        """
        if steps is not None:
            assert isinstance(steps, int)
        if stepsize_deg is not None:
            assert isinstance(stepsize_deg, float)
        
        if self.get_GeoIsOpt():
            conf = self.get_Conformer()
            scan_inst = Scan(conf, torsion, steps, stepsize_deg, path=path, label=None)
            self.scans.append(scan_inst)
        return
    
    
    def generate_Scans(self, steps=None, stepsize_deg=None, path=None, label=None):
        """
        Generates Scan instance for every torsion
        steps               :: Number of steps taken during scan (int)
        stepsize_deg        :: DEGREES between steps (float)
        path                :: General path where things like geometery logs, scan inputs, and scan logs can be found
        """
        
        if self.get_GeoIsOpt():
            for torsion in self.get_Torsions():
                self.create_Scan(torsion, steps=steps, stepsize_deg=stepsize_deg, path=path, label=label)
        return
    
    def set_ScanData(self):
        if self.get_GeoIsOpt(): 
            for scan in self.scans:
                scan.set_Data()

        return
    
    def check_AllScanSCFEnergies(self):
        scan_mins = {}
        for scan in self.scans:
            scan_mins[scan] = scan.check_scfEnergyMinIdx() 
        return scan_mins

Using Theano backend.


# Scan Class

In [18]:
class Scan:
    
    def __init__(self, 
                 conformer, 
                 torsion, 
                 steps=None, 
                 stepsize_deg=None, 
                 path=None, 
                 label=None, 
                 geo_log=None, 
                 tor_com=None, 
                 tor_log=None):
        """
        Scan Class
        
        conformer       :: autotst conformer object
        torsion         :: autotst torsion object
        steps           :: Number of steps taken during scan (int)
        stepsize_deg    :: DEGREES between steps (float)
        path            :: General path where things like geometery logs, scan inputs, and scan logs can be found
        
        geo_log         :: Name of gaussian geometry optimization log that the scan is based off of
        tor_com         :: Name of gaussian input for scan
        tor_log         :: Name of gaussian output log for scan
        
        self.i          :: index i for torsion indices: i,j,k,l
        self.j          :: index j for torsion indices: i,j,k,l
        self.k          :: index k for torsion indices: i,j,k,l
        self.l          :: index l for torsion indices: i,j,k,l
        
        self.stepsize_rad    :: Radians between steps (float)
        
        self.data            :: All data read in from cclib
        self.opt_indices     :: Indices relating to fully optimized geometries (at each scan step, multipl optimization take place until a final optimized geometry is found)
        self.opt_SCFEnergies :: SCF Energies of optimized geometries
        self.atomCoords      :: atom coords of all geometries in scan
        
        self.ark_energies    :: Energy of optimized geometry at each theta (different units than from cclib)
        self.ark_thetas      :: Theta at each step
        """
        
        self.__conformer__ = conformer
        self.__torsion__ = torsion
        
        self.AugInChIKey = conformer.rmg_molecule.toAugmentedInChIKey()
        
        self.label = label
        
        self.path = path
        
        self.i = torsion.atom_indices[0]
        self.j = torsion.atom_indices[1]
        self.k = torsion.atom_indices[2]
        self.l = torsion.atom_indices[3]
        
        if steps is not None:
            assert isinstance(steps, int)
        if stepsize_deg is not None:
            assert isinstance(stepsize_deg, float)
        
        self.steps = steps
        self.stepsize_deg = stepsize_deg
        
        self.stepsize_rad = None
        
        self.geo_log = geo_log
        self.input_com = tor_com
        self.output_log = tor_log
        
        
        #Info from cclib
        self.data = None
        self.opt_indices = None
        self.start_indices = None
        self.opt_SCFEnergies = None
        self.atomCoords = None
        
        #Redundant info from Arkane
        self.ark_energies = None
        self.ark_thetas = None
        
        return
    
    def get_Conformer(self):
        return self.__conformer__
    
    def get_Torsion(self):
        return self.__torsion__
        
    def set_DefaultFiles(self):
        """
        Sets geo_log, input_com, and output_log per naming convention around AugInChIKey
        
        geo_log         :: Name of gaussian geometry optimization log that the scan is based off of
        tor_com         :: Name of gaussian input for scan
        tor_log         :: Name of gaussian output log for scan
        """
        
        self.geo_log = self.AugInChIKey + '_Geo.log'

        a = min([self.j, self.k])
        b = max([self.j, self.k])
        
        self.input_com = self.AugInChIKey + '_tor{}{}'.format(a, b) + '.com'
        self.output_log = self.AugInChIKey + '_tor{}{}'.format(a, b) + '.log'
        return
    
    def write_TorInput(self,
                       filename=None,
                       path=None, 
                       method=None,
                       basis=None,
                       job=None, 
                       steps=None, 
                       stepsize_deg=None):
        """
        Write Gaussian input file for torsion scan
        
        filename :: desired file name to be written to
        path     :: path of file to write
        method   :: Gaussian method
        basis    :: Gaussian basis
        job      :: Gaussian job key word
        
        steps :: Number of steps in scan
        stepsize_deg :: change in DEGREES between steps
        """
        
        if filename is None:
            assert self.input_com is not None
            filename = self.input_com
        
        if path is None:
            path = self.path
        
        if method is None:
            method = 'm062x'
        
        if basis is None:
            basis = '6-311+g(2df,2p)'
        
        if job is None:
            job = 'Opt=(CalcFC,ModRedun)'
        
        if steps is None:
            steps = self.steps
        
        if stepsize_deg is None:
            stepsize_deg = self.stepsize_deg
        
        
        torsion = self.get_Torsion()
        conf = self.get_Conformer()
        mol = conf.rmg_molecule
        
        mol.updateMultiplicity()
        
        output = '%nprocshared=20\n'
        output += '%mem=5GB\n'
        output += '#p {0}/{1} {2}\n'.format(method, basis, job)
        output += '\nGaussian Input Prepared from Scan Object\n'
        output += '\n0 {}\n'.format(mol.multiplicity)
        
        
        idx = 0 #Indexing begins at 1 for Guassian and Arkane!! I will correct for this later to adhere to rmg's style
        for i, atom in enumerate(mol.atoms):
            mol.atoms[i].id = idx
            idx += 1
            
            output += "{}     {}     {}     {}\n".format(atom.element, atom.coords[0], atom.coords[1], atom.coords[2])

        output += '\n'
        
        # For atom IDs, need to be careful to adjust mol's IDs by +1 so that they start at 1 instead of zero
        for bond in mol.getAllEdges():
            output += 'B {0} {1}\n'.format(bond.atom1.id+1, bond.atom2.id+1)
        
        output = output + 'D {0} {1} {2} {3} S {4} {5}'.format(self.i+1,
                                                               self.j+1,
                                                               self.k+1,
                                                               self.l+1,
                                                               steps,
                                                               stepsize_deg)
        output += '\n\n\n'
        
        with open(os.path.join(path, filename), 'w') as F:
            F.write(output)
            F.close
        
        return
    
    def set_DataArkane(self, scan_log=None, path=None):
        """
        Reads in energies and thetas using arkane object
        
        scan_log :: name of scan log
        path     :: path to scan log
        """
        home = os.getcwd()
        
        if path is None:
            path = self.path
        
        if scan_log is None:
            assert self.output_log is not None
            scan_log = self.output_log
        
        os.chdir(path)
        ark = arkane.gaussian.GaussianLog(scan_log)
        os.chdir(home)
        
        scan_data = ark.loadScanEnergies()
        
        self.ark_energies = scan_data[0]
        self.ark_thetas = scan_data[1]
        return 
        
    def set_Data(self, scan_log=None, path=None):
        """
        Sets data attributes using cclib and Arkane
        
        scan_log :: name of scan log
        path     :: path to scan log
        """
        
        #home = os.getcwd()

        if path is None:
            path = self.path
        
        if scan_log is None:
            assert self.output_log is not None
            scan_log = self.output_log
        
        #os.chdir(path)
        self.data = cclib.io.ccread(scan_log)
        #os.chdir(home)
        
        self.set_data_Arkane(scan_log=scan_log, path=path)
        
        self.opt_indices = [i for i, status in enumerate(self.data.optstatus) if status==2]
        self.start_indices = [i for i, status in enumerate(self.data.optstatus) if status==1]
        self.opt_SCFEnergies = [self.data.scfenergies[index] for index in self.opt_indices]
        
        assert len(self.opt_SCFEnergies) == len(self.ark_energies)
        
        if self.steps is None:
            self.steps = len(self.opt_SCFEnergies) - 1
        else:
            assert self.steps == len(self.opt_SCFEnergies) - 1
        
        #Getting stepsize via Arkane and comparing it with the given stepsize
        self.stepsize_rad = self.ark_thetas[1] - self.ark_thetas[0]
        
        if self.stepsize is not None:
            assert abs(self.stepsize_deg/360 - self.stepsize_rad/(2*3.1415)) < 0.01
        
        assert len(self.opt_indices) == self.steps + 1
        assert len(self.start_indices) == self.steps + 1
        
        return True
    
    def plot_Scan(self):
        plt.plot(self.ark_thetas, self.ark_energies)
        return
    
    
    def check_ArkThetaContinuous(self, tol=None):
        """
        Returns true if ark_energies at the same theta are within tolerance
        """
        
        if tol is None:
            tol = 10**-8
        
        energy_by_theta = {}
        
        for theta, energy in zip(self.ark_thetas, self.ark_energies):
            key = int(theta*100/(2*3.1415))
            
            if key in energy_by_theta.keys():
                previous = energy_by_theta[key]
                error = 1.00000000-previous/energy
                
                if abs(error) > tol:
                    return False
            else:
                energy_by_theta[key] = energy
            
        return True
    
    
    def check_ArkSlopeContinuous(self, tol=None):
        """
        Check if change in energy within given tolerance over all ark energies
        """
        
        if tol is None:
            tol = 10**10
        
        for i in range(1, len(self.opt_scfEnergies)):
            
            slope = (self.ark_energies[i]-self.ark_energies[i-1]) / self.stepsize_rad
            
            if abs(slope)>tol:
                return False
        
        return True
    
    def check_SCFSlopeContinuous(self, tol=None):
        """
        Check if change in energy within given tolerance over all opt SCF energies
        """
        
        if tol is None:
            tol = 10**-2.5
        
        for i in range(1, len(self.opt_SCFEnergies)):
            
            slope = (self.opt_SCFEnergies[i]-self.opt_SCFEnergies[i-1]) / self.stepsize_deg
            
            if abs(slope)>tol:
                return False
        
        return True
    
    
    def get_ArkEnergyMinIdx(self):
        """
        Returns index of minimum ark_energy
        
        For the scan to be valid, the very first geometry should have the most optimized geometry
        """
        opt_min_idx = 0
        min_en = self.ark_energies[opt_min_idx]
        
        for i, energy in enumerate(self.ark_energies):
            if energy < min_en:
                min_en = energy
                opt_min_idx = i
        
        return opt_min_idx
    
    def get_SCFEnergyMinIdx(self):
        """
        Returns list of index of min energy among opt energies and among all energies found
        
        opt_min_idx :: Min energy index among opt energies, should alligne with self.get_ArkEnergyMinIdx()
        mid_idx     :: Min energy index among all energies
        """
        opt_min_idx = 0
        min_en = self.opt_scfEnergies[opt_min_idx]
        min_idx = self.opt_indices[opt_min_idx]
        
        for i, energy in enumerate(self.ark_energies):
            if energy < min_en:
                min_en = energy
                opt_min_idx = i
                min_idx = self.opt_indices[i]
        
        return [opt_min_idx, min_idx]
    

# Modified Statmech class

In [19]:
#Jfrom autotst.species import Species as TS_Species
from autotst.species import Conformer
#from rmgpy.species import Species as RMG_Species
#from rmgpy.molecule import Molecule as RMG_Molecule
#from rdkit import Chem
import os
import arkane

class fake_statmech():
    
    def __init__(self):
        self.reactants = []
        self.products = []
        self.ts = None
        
        self.model_chemistry = 'M06-2X/cc-pVTZ'
        self.rotors = []
        return
    
    
    """BEGIN NEW FUNCTIONS!!!"""
    
    def get_ArkaneRotorInfo(self, conformer, torsion):
        """
        Formats and returns info about torsion as it should appear in an Arkane species.py
        
        conformer :: autotst conformer object
        torsion :: autotst torsion object
        
        Needed for Arkane species file:
        scanLog :: Gaussian output log of freq calculation on optimized geometry
        pivots :: torsion center: j,k)
        top :: ID of all atoms in one top, starting from 1!),
        
        """
        a = min(torsion.atom_indices[1:3])
        b = max(torsion.atom_indices[1:3])
        
        tor_center = [a,b] #If given i,j,k,l torsion, center is j,k
        tor_center_adj = [a+1, b+1] # Adjusted since mol's IDs start from 0 while Arkane's start from 1
        
        # MUST CONTAIN FREQ as well as opt geometry
        tor_log = conformer.rmg_molecule.toAugmentedInChIKey() + '_tor{0}{1}.log'.format(a,b)
        
        
        top_IDs = self.get_TopIDs(conformer, torsion) # ID of all atoms in one of the tops
        top_IDs_adj = [ID+1 for ID in top_IDs] # Adjusted to start from 1 instead of 0
        
        info = "     HinderedRotor(scanLog=Log('{0}'), pivots={1}, top={2}, fit='fourier'),".format(tor_log, tor_center_adj, top_IDs_adj)
        
        return info
    
    def get_TopIDs(self, conformer, torsion):
        """
        Given torsion, finds bond within conformer.rmg_molecule associated with torsion center
        Splits molecule about this bond to create two molecules
        Returns IDs of all atoms in one of the new molecules
        
        conformer :: autotst conformer object
        torsion :: autotst torsion object
        
        tor_center :: If given i,j,k,l torsion indices, center is j,k
        tor_bond :: bond corresponding to tor_center
        """
        mol = conformer.rmg_molecule
        
        a = min(torsion.atom_indices[1:3])
        b = max(torsion.atom_indices[1:3])
        tor_center = [a,b] #If given i,j,k,l torsion, center is j,k
        
        tor_bond = None
        for bond in mol.getAllEdges():
            x = bond.atom1.id
            y = bond.atom2.id
            if (tor_center == [x,y]) or (tor_center == [y,x]):
                tor_bond = bond
                break
        assert tor_bond is not None

        mol.removeBond(tor_bond)
        tops = mol.split()
        
        # Need only to identify the IDs in one of the tops
        top_IDs = [atom.id for atom in tops[0].atoms]
        
        return top_IDs
       
    """END NEW FUNCTIONS!!!"""
        
        
        
        
        
        
        
        
    """MODIFIED FUNCTION - write_arkane...""" 
        
    def write_arkane_for_reacts_and_prods(self, conformer, label=None, torsions = None, path=None):
        """
        a method to write species to an arkane input file. Mol is an RMGMolecule
        """
        #conf = Conformer(species.smiles[0])
        if label is None:
            label = "you_forgot_a_label"
        if path is None:
            path = os.getcwd()
            
        mol = conformer.rmg_molecule
        freq_log = label + '_Freq'

        output = ['#!/usr/bin/env python',
                  '# -*- coding: utf-8 -*-', '', 'atoms = {']

        atom_dict = self.get_atoms(conformer)
        
        for atom, count in atom_dict.iteritems():
            output.append("    '{0}': {1},".format(atom, count))
        output = output + ['}', '']

        bond_dict = self.get_bonds(conformer)
        if bond_dict != {}:
            output.append('bonds = {')
            for bond_type, num in bond_dict.iteritems():
                output.append("    '{0}': {1},".format(bond_type, num))
            output.append("}")
        else:
            output.append('bonds = {}')

        #label = Chem.rdinchi.InchiToInchiKey(Chem.MolToInchi(Chem.MolFromSmiles(mol.toSMILES()))).strip("-N")

        external_symmetry = mol.getSymmetryNumber()

        output += ["", "linear = False", "", "externalSymmetry = {}".format(external_symmetry), "",
                   "spinMultiplicity = {}".format(mol.multiplicity), "", "opticalIsomers = 1", ""]

        output += ["energy = {", "    '{0}': Log('{1}.log'),".format(
            self.model_chemistry, freq_log), "}", ""]

        output += ["geometry = Log('{0}.log')".format(freq_log), ""]

        output += ["frequencies = Log('{0}.log')".format(freq_log), ""]
        
        
        
        
        """
        DAT NEW NEW RIGHT BELOW HERE
        """
        
        
        
        
        output += ["rotors = ["]
        for torsion in torsions:
            rotor_info = get_ArkaneRotorInfo(conformer, torsion)
            output += [rotor_info]
        output += ["]"]

        
        
        
        
        """
        DAT NEW NEW RIGHT Above HERE
        """
        
        
        
        
        
        #input_string = ""

        #for t in output:
        #    #input_string += t + "\n"
        
        input_string = '\n'.join(output)

        filename = label + '.py'
        with open(os.path.join(path, filename), "w") as f:
            f.write(input_string)
        
        return input_string
            
    def get_atoms(self, conf):
        """
        A method to create an atom dictionary for an rmg molecule
        """
        atom_dict = {}

        #conf = Conformer(species.smiles[0])

        rmg_mol = conf.rmg_molecule

        for atom in rmg_mol.atoms:
            if atom.isCarbon():
                atom_type = "C"
            if atom.isHydrogen():
                atom_type = "H"
            if atom.isOxygen():
                atom_type = "O"

            try:
                atom_dict[atom_type] += 1
            except KeyError:
                atom_dict[atom_type] = 1

        return atom_dict

    def get_bonds(self, conf):


        #conf = Conformer(species.smiles[0])
        
        rmg_mol = conf.rmg_molecule

        bondList = []
        for atom in rmg_mol.atoms:
            for bond in atom.bonds.values():
                bondList.append(bond)
        bonds = list(set(bondList))
        bondDict = {}
        for bond in bonds:
            if bond.isSingle():
                if bond.atom1.symbol == 'C' and bond.atom2.symbol == 'C':
                    bondType = 'C-C'
                elif (bond.atom1.symbol == 'H' and bond.atom2.symbol == 'H'):
                    bondType = 'H-H'
                elif (bond.atom1.symbol == 'C' and bond.atom2.symbol == 'H') or (bond.atom1.symbol == 'H' and bond.atom2.symbol == 'C'):
                    bondType = 'C-H'
                elif (bond.atom1.symbol == 'O' and bond.atom2.symbol == 'O'):
                    bondType = 'O-O'
                elif (bond.atom1.symbol == 'C' and bond.atom2.symbol == 'O') or (bond.atom1.symbol == 'O' and bond.atom2.symbol == 'C'):
                    bondType = 'C-O'
                elif (bond.atom1.symbol == 'H' and bond.atom2.symbol == 'O') or (bond.atom1.symbol == 'O' and bond.atom2.symbol == 'H'):
                    bondType = 'O-H'
                elif bond.atom1.symbol == 'N' and bond.atom2.symbol == 'N':
                    bondType = 'N-N'
                elif (bond.atom1.symbol == 'C' and bond.atom2.symbol == 'N') or (bond.atom1.symbol == 'N' and bond.atom2.symbol == 'C'):
                    bondType = 'N-C'
                elif (bond.atom1.symbol == 'O' and bond.atom2.symbol == 'N') or (bond.atom1.symbol == 'N' and bond.atom2.symbol == 'O'):
                    bondType = 'N-O'
                elif (bond.atom1.symbol == 'H' and bond.atom2.symbol == 'N') or (bond.atom1.symbol == 'N' and bond.atom2.symbol == 'H'):
                    bondType = 'N-H'
                elif bond.atom1.symbol == 'S' and bond.atom2.symbol == 'S':
                    bondType = 'S-S'
                elif (bond.atom1.symbol == 'H' and bond.atom2.symbol == 'S') or (bond.atom1.symbol == 'S' and bond.atom2.symbol == 'H'):
                    bondType = 'S-H'
            elif bond.isDouble:
                if bond.atom1.symbol == 'C' and bond.atom2.symbol == 'C':
                    bondType = 'C=C'
                elif (bond.atom1.symbol == 'O' and bond.atom2.symbol == 'O'):
                    bondType = 'O=O'
                elif (bond.atom1.symbol == 'C' and bond.atom2.symbol == 'O') or (bond.atom1.symbol == 'O' and bond.atom2.symbol == 'C'):
                    bondType = 'C=O'
                elif bond.atom1.symbol == 'N' and bond.atom2.symbol == 'N':
                    bondType = 'N=N'
                elif (bond.atom1.symbol == 'C' and bond.atom2.symbol == 'N') or (bond.atom1.symbol == 'N' and bond.atom2.symbol == 'C'):
                    bondType = 'N=C'
                elif (bond.atom1.symbol == 'O' and bond.atom2.symbol == 'N') or (bond.atom1.symbol == 'N' and bond.atom2.symbol == 'O'):
                    bondType = 'N=O'
                elif (bond.atom1.symbol == 'O' and bond.atom2.symbol == 'S') or (bond.atom1.symbol == 'S' and bond.atom2.symbol == 'O'):
                    bondType = 'S=O'
            elif bond.isTriple:
                if bond.atom1.symbol == 'C' and bond.atom2.symbol == 'C':
                    bondType = 'C#C'
                elif bond.atom1.symbol == 'N' and bond.atom2.symbol == 'N':
                    bondType = 'N#N'
                elif (bond.atom1.symbol == 'C' and bond.atom2.symbol == 'N') or (bond.atom1.symbol == 'N' and bond.atom2.symbol == 'C'):
                    bondType = 'N#C'
            try:
                bondDict[bondType] += 1
            except KeyError:
                bondDict[bondType] = 1

        return bondDict

# Additional Functions without a Home

In [20]:
from autotst.calculators.gaussian import Gaussian as AutoTST_Gaussian
def exists_and_complete(path):
    exists_and_complete = False
    auto_g = AutoTST_Gaussian()
    
    if os.path.isfile(path):
        if False not in auto_g.verify_output_file(path):
            exists_and_complete = True
    
    return exists_and_complete




from ase.calculators.gaussian import Gaussian as ASE_Gaussian
def write_GeoFreq(conf, filename, path=None):
    
    #home = os.getcwd()
    
    if path is None:
        path = os.getcwd()
    
    gaus_job = ASE_Gaussian()
    
    gaus_job.label = augInChI + '_GeoFreq'
    gaus_job.parameters['method'] = 'm062x'
    gaus_job.parameters['basis'] = '6-311+g(2df,2p)'
    gaus_job.extra = 'opt=(calcfc,maxcycle=1000) freq iop(7/33=1)'
    
    del gaus_job.parameters['force']

    # Write geo input file
    #os.chdir(path)
    gaus_job.write_input(conf.ase_molecule)
    #os.chdir(home)

In [21]:
import os
def write_ArkaneThermoInput(filename, modelChemistry, spec_name, spec_file, path=None):
    """
    Writes thermo input file for Arkane
    
    filename :: name of file that will be written to
    modelChemistry :: ModelChemistry used when finding opt geometry and frequencies
    spec_name :: label for species
    spec_file :: name of species file (needs to be in the same directory)
    """
    
    
    output = ['#!/usr/bin/env python',
              '# -*- coding: utf-8 -*-',
              '',
              'modelChemistry = "{0}"'.format(modelChemistry),
              'useHinderedRotors = True',
              'useBondCorrections = True',
              '',
              "species('{0}', '{1}')\n\nstatmech('{0}')".format(spec_name, spec_file),
              "thermo('{0}', '{1}')".format(spec_name, 'NASA')]

    output = '\n'.join(output)

    if path is None:
        path = os.getcwd()
    
    with open(os.path.join(path,filename), 'w') as f:
        f.write(output)
        f.close()
    
    return

In [22]:
torsions = []
SML = 'CC(CO)(C)OC'
conf = Conformer(SML)
torsions.append(conf.torsions)
SML = 'CC(CO)(C)OC'
conf = Conformer(SML)
torsions.append(conf.torsions)
torsions

[[<Torsion "(2, 0, 6, 16)">,
  <Torsion "(6, 0, 2, 5)">,
  <Torsion "(18, 1, 3, 7)">,
  <Torsion "(5, 2, 3, 7)">,
  <Torsion "(5, 2, 4, 10)">,
  <Torsion "(4, 2, 5, 14)">],
 [<Torsion "(2, 0, 6, 17)">,
  <Torsion "(6, 0, 2, 3)">,
  <Torsion "(18, 1, 3, 7)">,
  <Torsion "(3, 2, 4, 10)">,
  <Torsion "(3, 2, 5, 12)">,
  <Torsion "(5, 2, 3, 7)">]]

# Sample Iterative Workflow

General premise is that it will create inputs for Gaussian/Arkane and then execute them. Method will end at first execution, so user must re-run method after each calculation

In [23]:
import os
import subprocess
import shlex

from ase.calculators.gaussian import Gaussian
from autotst.species import Conformer

#Inputs
#SMILES_list = ['CC(CO)(C)OC']
#SMILES_list = ['CCCCCC(=O)O']
SMILES_list = ['CC']
modelChemistry = 'M06-2X/cc-pVTZ'
#home = os.path.join('home', 'underkoffler.c','Code','Hindered_Rotors')
#path = os.path.join(home, 'test_folder')
#home = os.getcwd()
path = os.getcwd()
given_steps = 36
given_stpsize = 10.0
log_name = 'Rotors_Log'
ThermoJob = True




#def fake_workflow(kwargs are all that good stuff above)
"""
General premise:


Iterate list of smiles:
    If GeoFreq log does not exist or is incomplete:
    
        Writes GeoFreq Input if none found
        
        Executes GeoFreq Input
        Continues to next smiles in iteration
    
    Else:
    
        Updates geometry from GeoFreq log
        Generates scan object for every torsion
        
        Iterates through each scan object:
        
            If scan log does not exist or is incomplete:
                
                Writes scan input if none found
                
                Executes scan
                Continues to next scan in iteration
            
            Else:
                Updates scan with scan log
                
        If all scans have been updated from scan log:
            
            Writes species.py for SMILESif not found
            
            If specified as thermo calc:
                
                Writes thermo input for Arkane if none found
                Executes thermo input
                
                Adds arkane class instance to dictionary by smile
            
            
"""

log = []
ark_dict = {}
if path is None:
    path = os.getcwd()
    
for SMILES in SMILES_list:
    log += ['\n\n=============================================================================================']
    log += ['Beginning workflow for {0} at:\n\t{1}'.format(SMILES, path)]
    conf = Conformer(SMILES)
    augInChI = conf.rmg_molecule.toAugmentedInChIKey()
    
    geoFreqBase = augInChI + '_GeoFreq'
    geoFreqCom = geoFreqBase + '.com'
    geoFreqLog = geoFreqBase + '.log'
    
    hind = Hindered_Rotors(conf)
    
    gaus_job = Gaussian()
    
    geoFreqCom_Path = os.path.join(path, geoFreqCom)
    geoFreqLog_Path = os.path.join(path, geoFreqLog)
    
    if not exists_and_complete(geoFreqLog_Path):
        log += ['Geometry & Frequency log file NOT FOUND or NOT COMPLETE\n\t{0} for {1} NOT FOUND or NOT COMPLETE.\n\tLooking for {0}'.format(geoFreqCom, SMILES)]
        if not os.path.isfile(geoFreqCom_Path):
            write_GeoFreq(conf, geoFreqCom, path=path)
            log += ['Geometry & Frequencey input file NOT FOUND\n\t{0} for {1} NOT FOUND.\n\tGenerating one now...'.format(geoFreqCom, SMILES)]
        else:
            log += ['Using previous geometry & frequency input file']
        
        assert os.path.isfile(geoFreqCom_Path)
        #os.chdir(path)
        log += ['EXECUTING {0}'.format(geoFreqCom)]
        subprocess.call(shlex.split('sbatch rotors_run_template.sh {0}'.format(geoFreqBase)))
        #os.chdir(home)
        
    else:
        log += ['Geometry & Frequency log file is complete!\n\t{0} for {1} is complete!'.format(geoFreqLog, SMILES)]
        assert hind.update_Conformer(filename=geoFreqLog, path=path)
        hind.generate_Scans(path=path)
        
        allScansUpdated = True
        for scan in hind.scans:
            scan.set_DefaultFiles()
            
            (i, j, k, l) = scan.get_Torsion().atom_indices
            log += ['Looking at {} torsion in {}'.format((i,j,k,l), SMILES)]
            if exists_and_complete(os.path.join(scan.path, scan.output_log)):
                scan.set_data()
                log += ['\tTorsion log file is complete!\n\t\t{0} for {1} exists and is complete!'.format(scan.output_log, SMILES)]
                #TODO scan handling
            else:
                allScansUpdated = False
                log += ['\tTorsion log file NOT FOUND or NOT COMPLETE.\n\t\t{0} for {1} NOT FOUND or NOT COMPLETE.\n\t\tLooking for existing {2}'.format(scan.output_log, SMILES, scan.input_com)]
                if not os.path.isfile(os.path.join(path, scan.input_com)):
                    log += ['\tTorsion input file NOT FOUND.\n\t\t{0} for {1} NOT FOUND.\n\t\tGenerating one now..'.format(scan.input_com, SMILES)]
                    scan.write_TorInput(steps=given_steps, stepsize_deg=given_stpsize)
                else:
                    log += ['\tUsing previous torsion input file.\n\t\t{0} for {1} exists!'.format(scan.input_com, SMILES)]
                                                                                                                                
                #os.chdir(path)
                
                log += ['\tEXECUTING {0}'.format(scan.input_com)]
                tor_base = scan.input_com.strip('.com')
                
                subprocess.call(shlex.split('sbatch rotors_run_template.sh {0}'.format(tor_base)))
                #os.chdir(home)
            
        if allScansUpdated:
            log += ['All scans for {0} have data!'.format(SMILES)]
            statmech_job = fake_statmech()
            statmech_job.model_chemistry = modelChemistry
            
            species_py = augInChI + '.py'
            if not os.path.isfile(os.path.join(path, species_py)):
                log += ['Arkane Species.py file NOT FOUND\n\t{0} NOT FOUND for {1}\n\tGenerating one now...'.format(species_py, SMILES)]
                
                torsions = hind.get_Torsions()
                    
                statmech_job.write_arkane_for_reacts_and_prods(conf, label=augInChI, torsions=torsions)
            else:
                log += ['Species.py for {0} exists!\n\t{1}'.format(SMILES, species_py)]
    
    
            if ThermoJob:
                log += ['Beginning Thermo Calculations']
                thermo_filename = augInChI + '_Thermo.py'
                if not os.path.isfile(os.path.join(path, thermo_filename)):
                    log += ['Thermo input file NOT FOUND\n\t{0} NOT FOUND for {1}\t\nGenerating one now...'.format(thermo_filename, SMILES)]

                    write_ArkaneThermoInput(thermo_filename, modelChemistry, spec_name, spec_file)

                log += ['Executing Arkane Thermo Job for {0}\n\tAKA {1}'.format(species_py, SMILES)]
                log += ['Arkane log named "arkane.log"\n\tat {0}'.format(path)]

                ark = arkane.Arkane(inputFile=thermo_filename, outputDirectory=out_path)
                ark.execute()
                ark_dict[SMILE] = ark
    log += ['\n\n=============================================================================================']

output = '\n\n'.join(log)
with open(os.path.join(path, log_name), 'w') as f:
    f.write(output)


#os.chdir(home)

In [24]:
a = False
b = 'VMPtest_tor17'
if os.path.isfile('VMPtest_tor17.com') and a:
    subprocess.call(shlex.split('sbatch rotors_run_template.sh {0}'.format(b)))

# Things I just don't want to delete:

In [25]:
"""#Inputs to write thermo
SML = 'CC(CO)(C)OC'
conf = Conformer(SML)
label = conf.rmg_molecule.toAugmentedInChIKey()
spec_file = label + '.py'

thermo_filename = 'thermo_input.py'

modelChemistry = 'M06-2X/cc-pVTZ'

spec_name = 'my_species'


#Inputs to execute Arkane
out_path = os.getcwd()


write_ArkaneThermoInput(thermo_filename, modelChemistry, spec_name, spec_file)
ark = arkane.Arkane(inputFile=thermo_filename, outputDirectory=out_path)


ark.plot = False
#ark.execute()"""
x=1

In [26]:
"""
        #Keeping atomcoords organized with their respective atom nos for easy ID
        atom_id_coords = []
        atomids = self.data.atomnos
        for i, geo_coords in enumerate(self.data.atomcoords):
            #Setting up [atomno, x, y, x] for each atom in a geometry
            geo_id_coords = np.insert(geo_coords, 0, atomids, axis=1)
            #Adding all of individual geometries to master list
            atom_id_coords.append(geo_id_coords)
        
        self.atomcoords = atom_id_coords"""
x=1

In [27]:
# To create new template run-script

#rotors_runscript_template = "#!/bin/bash\n\n#SBATCH --job-name=$1\n#SBATCH --output=$1.log\n\n## number of nodes\n#SBATCH -N 1\n#SBATCH --exclusive\n#SBATCH --partition=general\n#SBATCH --mem=120000\n\n## set the gaussian scratch directory to a fast drive\n## note that /tmp/ may be even faster than /gss_gpfs_scratch/\n#export GAUSS_SCRDIR=/scratch/$USER/gaussian_scratch\n## make the directory if it doesn't exist already\n#mkdir -p $GAUSS_SCRDIR\n\n# run gaussian, with the desired input file\ng16 $1.com\n\n"

lst_template = ["#!/bin/bash",
                "",
                "#SBATCH --job-name=rotor",
                "#SBATCH --output=$1.log",
                "",
                "## number of nodes",
                "#SBATCH -N 1",
                "#SBATCH --exclusive",
                "#SBATCH --partition=general",
                "#SBATCH --mem=120000",
                "",
                "## export GAUSS_SCRDIR=/scratch/$USER/gaussian_scratch",
                "## make the directory if it doesn't exist already",
                "## mkdir -p $GAUSS_SCRDIR",
                "",
                "# run gaussian, with the desired input file",
                "g16 $1.com",
                "",
                ""]

#new_script_template = open('rotors_run_template.sh', 'w')
#new_script_template.write('\n'.join(lst_template))
print '\n'.join(lst_template)
#new_script_template.close()
#print rotors_runscript_template


#!/bin/bash

#SBATCH --job-name=rotor
#SBATCH --output=$1.log

## number of nodes
#SBATCH -N 1
#SBATCH --exclusive
#SBATCH --partition=general
#SBATCH --mem=120000

## export GAUSS_SCRDIR=/scratch/$USER/gaussian_scratch
## make the directory if it doesn't exist already
## mkdir -p $GAUSS_SCRDIR

# run gaussian, with the desired input file
g16 $1.com




In [30]:
"""
#!/bin/bash

#SBATCH --job-name={}
#SBATCH --output={}.log

## number of nodes
#SBATCH -N 1
#SBATCH --exclusive
#SBATCH --partition=general
#SBATCH --mem=120000

## set the gaussian scratch directory to a fast drive
## note that /tmp/ may be even faster than /gss_gpfs_scratch/
#export GAUSS_SCRDIR=/gss_gpfs_scratch/$USER/gaussian_scratch
## make the directory if it doesn't exist already
#mkdir -p $GAUSS_SCRDIR

# run gaussian, with the desired input file
g16 {}.com
"""
x=1

In [None]:
"""x = RMG_Molecule()
SML = 'CC(CO)(C)OC'
x.fromSMILES(SML)
piv_bond = None
i = 1
for atom in x.atoms:
    atom.id = i
    i+=1

for bond in x.getAllEdges():
    print bond.atom1.id, bond.atom2.id
    if (bond.atom1.id == 2 and bond.atom2.id == 4) or (bond.atom1.id == 2 and bond.atom2.id == 4):
        piv_bond = bond
print "Piv Bond: ", piv_bond"""
x=1

In [None]:
"""from autotst.calculators.gaussian import read_gaussian_out
geo_log = "VMPUAIZSESMILD-UHFFFAOYSA-N_Geo.log"
conf.ase_molecule = read_gaussian_out(geo_log)
conf.update_coords()"""
x=1

In [None]:
#subprocess.call(shlex.split('sbatch rotors_run_template.sh {0}'.format(name_base)))

In [None]:
    
"""    def write_Geo_Input(self):
        # Preparing gaussian input file for Geo Opt
        gaus_job = Gaussian()
        gaus_job.label = self.label + '_Geo'
        gaus_job.parameters['method'] = 'm062x'
        gaus_job.parameters['basis'] = '6-311+g(2df,2p)'
        gaus_job.extra = 'opt=(calcfc,maxcycle=1000)'
        del gaus_job.parameters['force']

        # Write geo input file
        gaus_job.write_input(.ase_molecule)"""
"""    
    def write_Freq_Input(self, freq_file=None,  path=None, options=None):
        # Preparing gaussian input file for Freq Calc
        if self.geoIsOpt:
            mol = self.getRMGMol()
            mol.updateMultiplicity()
            
            if freq_file is None:
                freq_file = self.label + '_Freq.com'
                
            if path is None:
                path = self.path
            
            if options is None:
                options = ['%nprocshared=20',
                           '%mem=5GB',
                           '#p m062x/6-311+g(2df,2p) freq iop(7/33=1)']

            output = '\n'.join(options)
            
            output += '\n'.join(['','','0 {}'.format(mol.multiplicity),''])

            n = 1
            for atom in mol.atoms:
                assert n == atom.id
                n += 1
                output += "{}     {}     {}     {}\n".format(atom.element, atom.coords[0], atom.coords[1], atom.coords[2])
            
            with open(os.path.join(path, freq_file), 'w') as f:
                f.write(output)
                f.close()
            return
    
    def getRMGMol(self):
        return self.__RMGMol__
"""            
"""
    def updateMol(self, geo_file=None, path=None):
        assert self.__RMGMol__ is None, "Highly recommended not to update molecule more than once"
        
        if geo_file is None:
            geo_file = self.label + '_Geo.log'
        if path is None:
            path = self.path
        
        atoms = None
        try:
            atoms = read_gaussian_out(geo_file)
        except:
            print "Failed to Find Valid Geo_file"
            return False
        
        #Update XYZ
        mol = RMG_Molecule()
        mol.fromXYZ(atoms.get_atomic_numbers(), atoms.get_positions())
        
        #Update IDs
        i = 1
        for atom in mol.atoms:
            atom.id = i
            i += 1
        
        #Update Multiplicity
        mol.updateMultiplicity()
        
        self.__RMGMol__ = mol
        self.geoIsOpt = True
        return True
    
    
            
    def set_torsions(self):
        Method setting torsions of simple chains to hindered rotors object

        NO RINGS
        NO RINGS
        NO RINGS

        A - Bond1 - B - Bond2 - C - Bond3 - D

        i = A.index
        j = B.index
        k = C.index
        l = D.index

        Torsion is unique if j,k & k,j pairing is unique

        """
"""
        if not self.geoIsOpt:
            self.updateMol()
        
        assert self.geoIsOpt
        mol = self.getRMGMol()
 
        # dict of (j, k) key with [i,j,k,l] value
        # Wipes existing torsions away to recalculate
        self.torsions = {}
            
        for bond2 in mol.getAllEdges():
            i, j, k, l = -1,-1,-1,-1
            B = bond2.atom1
            j = bond2.atom1.id

            C = bond2.atom2
            k = bond2.atom2.id

            assert (j,k) not in self.torsions.keys() or (k,j) in torsions.keys()
            # [i,j,k,l] and [i,j,k,m] torsion is the same to Gaussian as long as center is k&j or j&k
            #continue

            found_bond1 = False
            found_bond3 = False

            for Bbond in mol.getBonds(B).values():
                if (Bbond.atom1.id == B.id) and (Bbond.atom2.id != C.id):
                    #Bbond is not bond2
                    #Bbond must be bond1
                    found_bond1 = True
                    i = Bbond.atom2.id
                elif (Bbond.atom1.id != C.id) and (Bbond.atom2.id == B.id):
                    #Bbond is not bond2
                    #Bbond must be bond1
                    found_bond1 = True
                    i = Bbond.atom1.id
                else:
                    #Bbond is bond2
                    assert (Bbond.atom1 == C) or (Bbond.atom2 == C)

            for Cbond in mol.getBonds(C).values():
                if (Cbond.atom1.id == C.id) and (Cbond.atom2.id != B.id):
                    #Bbond is not bond2
                    #Bbond must be bond3
                    found_bond3 = True
                    l = Cbond.atom2.id
                elif (Cbond.atom1.id != B.id) and (Cbond.atom2.id == C.id):
                    #Bbond is not bond2
                    #Bbond must be bond1
                    found_bond3 = True
                    l = Cbond.atom1.id
                else:
                    #Cbond is bond2
                    assert (Cbond.atom1 == B) or (Cbond.atom2 == B)

            #print found_bond1, found_bond3
            #print i, j, k, l
            #print
            if found_bond1 and found_bond3:
                self.torsions[(j,k)] = [i,j,k,l]
        return
"""   
"""
    def Rote_write_geo_com(self, filename=None):
        
        #Writes Gaussian input file for a geometry optimization of self.__RMGMol__
        #with a basisand parameters similar to AutoTST
        
        auto_mol = None
        if filename is None:
            filename = self.AugInchiKey + '_Geo.com'

        # Preparing gaussian input file parameters for Geo Opt as AutoTST does it
        g_inst = Gaussian()
        g_inst.label = self.AugInchiKey + '_Geo'
        
        g_inst.parameters['method'] = 'm062x'
        g_inst.parameters['basis'] = '6-311+g(2df,2p)'
        g_inst.extra = 'opt=(calcfc,maxcycle=1000)'
        del g_inst.parameters['force']

        # Write Gaussian input file
        # TODO Fix
        test_g.write_input(auto_mol.ase_molecule)
        return False
    """
x=1