In [1]:
import os
import numpy as np
import pandas as pd
import itertools
from datetime import datetime
#import ase

In [2]:
def move_molecules(xyz_df, box_size, coord):
    
    """Center system around QM molecule (QM molecule coords will be near 0)
    Input:  - xyz_df      : df containing molecule num, atom symbol, atom num (ex 1,2,3,1,2,3 for water dimer) and xyz coords of system
            - box_Size    : size of original box (given as min of last line of gro)
            - coord       : coordinate to be moved, either x, y, or z
    Output: - df similar to input xyz_df
    """
    
    move_coord = lambda coord: coord + box_size if coord < 0 else coord - box_size
    step_xyz = xyz_df.copy()
    #mean_coords = step_xyz.groupby('mol_num').agg('mean').reset_index()
    mean_coords = step_xyz[step_xyz['atom']!='H'].groupby('mol_num').agg('mean').reset_index()
    out_coord = mean_coords[abs(mean_coords[coord])>box_size/2]
    out_coord_idx = step_xyz[step_xyz['mol_num'].isin(out_coord['mol_num'])]
    step_xyz.loc[out_coord_idx.index,coord] = step_xyz.loc[out_coord_idx.index,coord].apply(move_coord)
    return(step_xyz)

def fix_box(xyz_df, new_box_size):
    """Cut box size, only solvent molecules for which the mean coords (xyz of heavy atoms) are inside the box will be included
    Input:  - xyz_df       : df containing molecule num, atom symbol, atom num (ex 1,2,3,1,2,3 for water dimer) and xyz coords of system
            - new_box_size : size of new box in Angstrom
    Output: - df similar to input xyz_df
    """
    step_xyz = xyz_df.copy()
    #mean_coords = step_xyz.groupby('mol_num').agg('mean').reset_index()
    mean_coords = step_xyz[step_xyz['atom']!='H'].groupby('mol_num').agg('mean').reset_index()
    in_box_coords = mean_coords[(abs(mean_coords['x'])<new_box_size/2)&\
                                (abs(mean_coords['y'])<new_box_size/2)&\
                                (abs(mean_coords['z'])<new_box_size/2)]
    step_xyz = step_xyz[step_xyz['mol_num'].isin(in_box_coords['mol_num'])]
    return(step_xyz)

In [3]:
# This reads the gros created from the QChem output file
# and creates a libefp input files for each
# the variables below might need to be modified
main_path = os.getcwd()
path2gros = f'{main_path}'
path2efp = f'{main_path}'
box_threshold = 1
########################################################################

gros = [fl for fl in os.listdir(path2gros) if fl.endswith('.gro')]
molecules_dict = {'ammonia' : 4, 'methane' : 5, 'methanol' : 6, 'water' : 3}
atoms_dict = {'H' : 1, 'C' : 6, 'N' : 7, 'O' : 8}

# This are systems for which the makefp did not converge
bad_efps = []
if 'gromacs_solvated' in path2gros:
    bad_efps = list(pd.read_csv(f'{path2gros}/bad_logs.txt')['name'].values)

for gro in gros:
    if gro not in bad_efps:
        with open(f'{path2gros}/{gro}') as fl:
            gro_lns = fl.readlines()
        qm_name = gro.split('_')[0]
        solv_name = gro.split('_')[1].split('.')[0]
        solv_name = solv_name if solv_name in molecules_dict.keys() else 'water'
        solv_atoms = molecules_dict[solv_name]
        if qm_name not in molecules_dict.keys():
            for ln_idx in range(len(gro_lns)-1):
                if '2SOL' in gro_lns[ln_idx+1] and 'OW' in gro_lns[ln_idx+1]:
                    sol_idx = ln_idx+1-2
            if 'mobley' in path2gros:
                qm_name = gro.split('_')[1].replace('.gro', '')
                qm_name = f'mobley_{qm_name}_s'
            else:
                qm_name = gro.replace('.gro', '') + '_s'
                qm_name = qm_name.replace('_centered_solv', '')
            qm_atoms = sol_idx
            solv_atoms = molecules_dict[solv_name]

        atom_symbols = [ln.split()[1][0] for ln in gro_lns[2:qm_atoms+2]] #bug! if atom_symbol = Cl this will replace it with C
                                                                          #I didn't fix this bug but I account for it in the Parse_files nb
        if set(atom_symbols).issubset({'H', 'C', 'N', 'O'}):
            total_atoms = int(gro_lns[1].split()[0])
            solv_mols = int((total_atoms - qm_atoms)/solv_atoms)
            box_size = [float(i)*10 for i in gro_lns[-1].split()]
            box_size = min(box_size)
            boxes = [box_size, box_size-5, box_size-10]
            boxes = [i for i in boxes if i > box_threshold]
            mol_numbers = [[1]*qm_atoms]+[[i]*solv_atoms for i in range(2,solv_mols+2)]
            mol_numbers = list(itertools.chain.from_iterable(mol_numbers))
            step_xyz = pd.DataFrame({'mol_num' : mol_numbers,
                                     'atom' : [i.split()[1][0] for i in gro_lns[2:-1]],
                                     'atom_num' : [atoms_dict[i.split()[1][0]] for i in gro_lns[2:-1]],
                                      'x' : [float(i.split()[3])*10 for i in gro_lns[2:-1]],
                                      'y' : [float(i.split()[4])*10 for i in gro_lns[2:-1]],
                                      'z' : [float(i.split()[5])*10 for i in gro_lns[2:-1]]})
            check = step_xyz.groupby('mol_num').agg('std')
            check = check[(check['x']>2)|(check['y']>2)|(check['z']>2)]
            if check.shape[0] == 0:
                x, y, z = np.mean(step_xyz.iloc[0:qm_atoms][['x','y','z']],axis=0)
                for c, cc in zip(['x','y','z'], [x, y, z]):
                    step_xyz[c] = step_xyz[c] - cc
                new_x = move_molecules(step_xyz, box_size, 'x')  
                new_xy = move_molecules(new_x, box_size, 'y')  
                new_xyz = move_molecules(new_xy, box_size, 'z') 
                for box in boxes:
                    final_xyz = fix_box(new_xyz, box)
                    final_xyz_3A = final_xyz.groupby('mol_num').head(3)
                    check2 = final_xyz_3A.groupby('mol_num').agg('std')
                    check2 = check2[(check2['x']>2)|(check2['y']>2)|(check2['z']>2)]
                    if check2.shape[0] == 0:
                        finput = open(f'{path2efp}/{qm_name}_{solv_name}_{int(box)}.in', 'w+')
                        finput.write('run_type elpot\ncoord points\nfraglib_path .\n\n')
                        QM_coords = final_xyz_3A[final_xyz_3A['mol_num']==1][['x','y','z']].values
                        finput.write(f'fragment {qm_name}\n')
                        for row in QM_coords:
                            fcoords = ['{:10.5f}'.format(i) for i in row]
                            finput.write('  '.join(fcoords))
                            finput.write('\n')
                        molecules_in_box = final_xyz_3A[final_xyz_3A['mol_num']!=1]['mol_num'].unique()
                        for mol_num in molecules_in_box:
                            finput.write(f'fragment {solv_name}\n')
                            rows = final_xyz_3A[final_xyz_3A['mol_num']==mol_num][['x','y','z']].values
                            for row in rows:
                                fcoords = ['{:10.5f}'.format(i) for i in row]
                                finput.write('  '.join(fcoords))
                                finput.write('\n')
                        finput.close()



In [4]:
step_xyz

Unnamed: 0,mol_num,atom,atom_num,x,y,z
0,1,C,6,0.02068,-0.0116,0.0138
1,1,H,1,-0.64972,-0.7445,-0.2807
2,1,H,1,-0.57672,0.9123,-0.1657
3,1,H,1,0.28778,-0.1344,1.0498
4,1,H,1,0.91798,-0.0218,-0.6172
5,2,O,8,2.25808,1.4555,-2.3565
6,2,H,1,2.35278,2.2045,-1.7748
7,2,H,1,2.56848,1.648,-3.2366
8,3,O,8,-1.47682,-2.6549,-0.1491
9,3,H,1,-1.74482,-2.2173,-0.9394
