#Trajectory SAPT Input Generators
Generates psi4 input files from a trajectory loaded in MDAnalysis. Set parameters for generating input files based on the instructions below. Execute the cells in order to ensure that all values are correct prior to generating the input files.

In [4]:
from MDAnalysis.analysis.base import AnalysisBase
import MDAnalysis as mda
import os


def write_xyz(selection: str, universe: mda.Universe, pathname: str):
    group = universe.select_atoms(selection)

    pathname += '.xyz'
    with mda.Writer(pathname, group.n_atoms) as coords:
        coords.write(group)


def read_xyz(xyz_path):
    with open(xyz_path, 'r') as coord_file:
        xyz_data = []
        coord_data = coord_file.readlines()[2:]

        for line in coord_data:
            if '.' in line:
                xyz_data.append(line)
        return xyz_data


def save_sapt_in(coords0: list, coords1: list, memory: int, path: str, molecule_name: str):
    coord_data = 'molecule %s {\n' % molecule_name
    coord_data += '0 1\n'

    for line0 in coords0:
        items = line0.split()
        line0 = items[0][0]
        for item in items[1:]:
            line0 += (' ' + item)
        coord_data += (line0 + '\n')

    coord_data += '--\n'
    coord_data += '-1 1\n'

    for line1 in coords1:
        items = line1.split()
        line1 = items[0][0]
        for item in items[1:]:
            line1 += (' ' + item)
        coord_data += (line1 + '\n')

    coord_data += '\nunits angstrom\n' \
                  '\n' \
                  '}\n' \
                  '\nset {\n' \
                  'basis jun-cc-pVDZ\n' \
                  'scf_type df\n' \
                  'freeze_core true\n' \
                  '}\n'

    coord_data += '\n' + 'memory ' + str(memory) + 'GB\n'

    coord_data += "\nenergy('sapt0')\n"

    with open(path, 'w+') as input_file:
        for line in coord_data:
            input_file.write(line)


def check_inputs(selection: list, start: int, stop: int, step: int, universe: mda.Universe):
    ag_sel = selection[0]
    ag_names = selection[1]
    ag_pair = selection[2]

    # Testing names and selections
    if len(ag_sel) > len(ag_names):
        raise InputError('Not all selections are named')
    elif len(ag_sel) < len(ag_names):
        raise InputError('Too many selection names for number of selections')

    for sel in ag_sel:
        try:
            ag = universe.select_atoms(sel)
        except mda.SelectionError:
            raise InputError('Error in selection: {}'.format(sel))

    for pair in ag_pair:
        if len(pair) != 2:
            raise InputError('Pairs must be a python list of string with only two items')
        found0 = False
        found1 = False
        for name in ag_names:
            if pair[0] == name:
                found0 = True
            if pair[1] == name:
                found1 = True
        if found0 is False:
            raise InputError(f'{pair[0]} in {pair} group_pair_selections is not in defined in atom_group_names')
        if found1 is False:
            raise InputError(f'{pair[1]} in {pair} group_pair_selections is not in defined in atom_group_names')

        if start >= stop:
            raise InputError('Start is greater than or equal to stop')
        if step >= stop:
            raise InputError('Step is greater than or equal to stop')

        if len(universe.trajectory) < stop:
            raise InputError(f'Stop exceeds length of trajectory, trajectory is {len(universe.trajectory)} frames')

    print('Input Parameters Accepted')


class InputError(Exception):
    pass


class Psi4SAPTGenerator(AnalysisBase):
    def __init__(self, universe: mda.Universe, sapt_selections: list, sapt_memory: int, input_directory: str, mol_name: str):
        super(Psi4SAPTGenerator, self).__init__(universe.trajectory)
        self._unv = universe
        self._sel = sapt_selections
        self._mem = sapt_memory
        self._dir = input_directory
        self._mol = mol_name

    def _prepare(self):
        """Defining data structures, selection_coords contains MDAnalysis selection commands,
         selections names contains names of the atom groups selected, and interaction_pairs
         contains the atom group names in list pairs. selections are pre-verified by check_inputs"""
        self.selection_coords = self._sel[0]
        self.selection_names = self._sel[1]
        self.interaction_pairs = self._sel[2]

    def _single_frame(self):
        """Saves coordinates for selected atom groups as .xyz files at each frame. The .xyz files are
        then read, coordinates are saved as a list, and the lists for each pair are writen into a sapt.in
        file. The sapt.in files are named based on the frame number and atoms group names. After generating files
        for the current frame the .xyz files are removed."""
        for ind in range(len(self.selection_coords)):
            write_xyz(self.selection_coords[ind], self._unv, f'{self._dir}/{self.selection_names[ind]}')

        time = int(self._unv.trajectory.time)
        name = f'{self._mol}_{time}'
        for pair in self.interaction_pairs:
            coords0 = read_xyz(f'{self._dir}/{pair[0]}.xyz')
            coords1 = read_xyz(f'{self._dir}/{pair[1]}.xyz')

            path = f'{self._dir}/frame{time}_{pair[0]}_{pair[1]}.in'
            save_sapt_in(coords0, coords1, self._mem, path, name)

        for path in self.selection_names:
            path = f'{self._dir}/{path}.xyz'
            os.remove(path)


Using the below variables input the name of the directory the input files will be stored in, the name of the molecule, path to the topology, and trajectory or trajectories being analyzed.

In [None]:
if __name__ == '__main__':
    return_file_dir = ''
    molecule_name = ''
    topology = ''
    trajectory = ['']

    # Saving simulation data as an MDAnalysis Universe object
    unv = mda.Universe(topology, trajectory)

Add the [MDAnalysis selections](https://docs.mdanalysis.org/stable/documentation_pages/selections.html) for the atom groups the list as strings.

In [None]:
    atom_group_selections = []

Add the names for the atom group selections above in the below list for use in generating file paths, ensure that the name are listed in the same order as their corresponding selections above.

In [None]:
    atom_group_names = []

Add the names of each group for the SAPT energy calculation pairs in a list withing the group pair selections list.

Ex:
```python
   group_pair_selections = [['ATP', 'Glu144'], ['ATP', 'Met146']]
```

In [None]:
    group_pair_selections = []

    Set the trajectory frame interval for generating input files, and the memory in gb as integers.

In [None]:
    start = 0
    stop = 0
    step = 0
    memory = 0

Verifying inputs, ensuring that user inputted values are valid inputs for the given trajectory.

In [None]:
    selections = [atom_group_selections, atom_group_names, group_pair_selections]
    check_inputs(selections, start, stop, step, unv)

Generating inputs. Ensure all parameters are correct and have been validated by the cell above.

In [None]:
    Psi4SAPTGenerator(unv, selections, memory, return_file_dir, molecule_name).run(start, stop, step)