# Mocule for Molecular Dynamics.

In [1]:
#!/bin/env python

import os
import sys
import time

from io import StringIO

import numpy as np
import pandas as pd
import re

from ase.io import read
from pathlib import Path


In [2]:
class LigParGen():
    def __init__(self, is_headless=True):

        self.url = 'http://zarbi.chem.yale.edu/ligpargen/'
        self.prefs = {"download.default_directory" : os.getcwd()}


    def get_opls_from_smiles(self,
            smiles='CCC',
            format='lammps',
            to='filename',
            is_headless=True,
    ):
        import selenium
        from selenium import webdriver
        from selenium.webdriver.common.keys import Keys
        self.options = webdriver.ChromeOptions()
    #    options.add_argument('--headless')
        self.options.add_experimental_option('prefs', self.prefs)
        if is_headless: 
            self.options.add_argument('--headless')
        self.driver = webdriver.Chrome(options=self.options)
        
        self.driver.get(self.url)
        
        print(self.driver.title)
        box = self.driver.find_element_by_id('smiles')
        print(box)
        box.send_keys(smiles)

        time.sleep(10)

        b = self.driver.find_element_by_class_name("btn")
        print(b)
        b.submit()

        time.sleep(10)

        p = self.driver.find_elements_by_css_selector('p')
        print(p)
        f = self.driver.find_element_by_class_name('form-group')
        keys = [_.get_property('value') for _ in  f.find_elements_by_name('go')]
        d = dict(list(zip(keys, f.find_elements_by_name('go'))))
        d['LAMMPS'].submit()

        time.sleep(10)

    def format_lammpsdata(self):
        pass

In [3]:
def test():
    l = LigParGen()
    from pathlib import Path
    savedir = '../../ligpargen_data'
    path = Path(savedir).resolve()
    cwd = Path('.').resolve()
    if path.exists():
        with (path / '0.data').open() as f:
            lines = f.read()
    else:
        l.get_opls_from_smiles('C')
        
    return lines

In [34]:

def test():
    l = LigParGen()
    savedir = '../../ligpargen_data'
    path = Path(savedir).resolve()
    cwd = Path('.').resolve()
    if path.exists():
        with (path / '0.data').open() as f:
            lines = f.read()
    with StringIO() as f:
        f.write(lines)
        f.seek(0)
        a = read(f, format='lammps-data')
    print(a)
    charges = a.get_initial_charges()
    masses = a.get_masses()
    cell = a.get_cell()
    positions = a.get_positions()
    bonds = a.arrays['bonds']
    angles = a.arrays['angles']
    dihedrals = a.arrays['dihedrals']
    atomids = a.arrays['id']
    molids = a.arrays['mol-id']
    types = a.arrays['type']
    mmcharges = a.arrays['mmcharges']
    print(a.arrays.keys())

def test_read_lammpsdata():
    data, lines = read_lammpsdata('../../ligpargen_data/0.data')
    for k,v in data.items():
        if np.isscalar(v):
            print(k,v)
        elif isinstance(v, pd.DataFrame):
            print(k)
            print( v.iloc[[0]])
        else:
            print(k, v[0])
def test_format_lammpsdata():

    data, lines = read_lammpsdata('../../ligpargen_data/0.data')
    print(lines)
    data = format_lammpsdata(data)
    print(data)

In [5]:
header_tags = ["atoms", "bonds", "angles", "dihedrals", "impropers"]
header_tags_types = [
    "atom types",
    "bond types",
    "angle types",
    "dihedral types",
    "improper types",
]

tags_atomic = ["Masses", "Atoms", "Velocities"]
tags_interactions = ["Bonds", "Angles", "Dihedrals", "Impropers",]
tags_coeffs = [
    "Pair Coeffs",
    "Nonbond Coeffs",
    "Bond Coeffs",
    "Angle Coeffs",
    "Dihedral Coeffs",
    "Improper Coeffs",
    "BondBond Coeffs",
    "BondAngle Coeffs",
    "MiddleBondTorsion Coeffs",
    "EndBondTorsion Coeffs",
    "AngleTorsion Coeffs",
    "AngleAngleTorsion Coeffs",
    "BondBond13 Coeffs",
    "AngleAngle Coeffs",
]


In [118]:

def read_lammpsdata(filename):
    if os.path.exists(filename):
        with open(filename) as f:
            lines = f.read()
    else:
        lines = filename

    data, lines = parse_block_data(lines)
    data, lines = parse_header(lines)
    return data, lines

def parse_block_data(lines):
    """ Parse block data begin with tags. """
    tags = tags_atomic + tags_interactions + tags_coeffs
    for _tag in tags:
        p = f'(^{_tag}\s*\n\n)(.*?)\n\n'
        match = re.split(p, lines, flags=re.MULTILINE|re.DOTALL)
        if len(match) == 4:
            _prev, _, _match, _next = match
            _lines = re.findall('^.*$', _match, flags=re.MULTILINE)
            data[_tag] = _lines
            lines = _prev + _next
    others = lines    
    data = _parse_data(data)

    return data, others


def parse_header(lines):
    """ Parse header data in lines with tags. """
    title, lines = lines.split('\n', 1)
    data['title'] = title

    tags = header_tags_types + header_tags
    for _tag in tags:
        p = f'^([-+.\d\s]+)({_tag}.*?)$'
        match = re.split(p, lines, flags=re.MULTILINE)
        if len(match) == 4:
            _prev, _value, _match, _next = match
            _line = _match.split('#')
            _comment = '' if len(_line)==1 else _line[1]
            data[_tag] = int(_value)
            
            lines = _prev + _next
    
    _lines = lines.strip().splitlines()
    lines = []
    for _line in _lines:
        if 'lo' in _line and 'hi' in _line:
            _low, _high, _tag_low, _tag_high = _line.strip().split(None, 3)
            _tag_high = _tag_high.split()[0]
            data[_tag_low] = float(_low)
            data[_tag_high] = float(_high)
        else:
            lines.append(_line)
    others = '\n'.join(lines)

    return data, others

def _parse_data(data):
    tags = tags_atomic + tags_interactions + tags_coeffs
    for _tag, _lines in data.items():
        if _tag in tags:
            _lines = [_line.split('#') for _line in _lines]
            _comments = ['' if len(_line)==1 else _line[1] for _line in _lines]
            _data = [_line[0].split() for _line in _lines]
            if _tag in tags_interactions:
                _columns = ['id', 'type', *[f'i{_}' for _ in range(len(_data[0])-2)]]
                _data = pd.DataFrame(np.array(_data), dtype=int, columns=_columns)
                _data['comment'] = _comments
            elif _tag in tags_coeffs:
                _columns = ['type', *[f'coeff{_}' for _ in range(len(_data[0])-1)]]
                _data = pd.DataFrame(_data, columns=_columns)
                _ncols = _data.shape[1]
                for _col in _columns:
                    if _data[_col].apply(lambda x:x.lstrip('-').isdigit()).all():
                        _data[_col] = _data[_col].astype(int)
                    else:
                        _data[_col] = _data[_col].astype(float)
            elif _tag == 'Masses':
                _data = pd.DataFrame(_data, columns=['type', 'mass'])
                _data['type'] = _data['type'].astype(int)
                _data['mass'] = _data['mass'].astype(float)
                _data['comment'] = _comments
            elif _tag == 'Velocity':
                _data = pd.DataFrame(np.array(_data, dtype=float), columns=['atomid', 'x', 'y', 'z'])
                _data['atomid'] = data['atomid'].astype(int)
                _data['comment'] = _comments
            elif _tag == 'Atoms':
                _columns = ['id', 'mol-id', 'type', 'q', 'x', 'y', 'z', 'ix', 'iy', 'iz']
                _int_cols = ['id', 'mol-id', 'type', 'ix', 'iy', 'iz']
                if len(_data[0]) == 6:
                    _columns.remove('q')
                    _data = pd.DataFrame(_data, columns=_columns[:3] + _columns[4:7], dtype=float)
                if len(_data[0]) == 7:
                    # atom-tag molecule-tag atom-type q x y z nx ny nz  (nx,ny,nz are optional)
                    _data = pd.DataFrame(_data, columns=_columns[:7], dtype=float)
                elif len(_data[0])==10:
                    _data = pd.DataFrame(_data, columns=_columns, dtype=float)

                for _col in _int_cols:
                    if _col in _data:
                        _data[_col] = _data[_col].astype(int)

            data[_tag] = _data
    return data


In [413]:
def _format_lammpdata(data):
    
    if len(data['Pair Coeffs']) == len(data['Atoms']) == len(data['Masses']):
        _data = pd.merge(data['Pair Coeffs'], data['Atoms'], left_on='type', right_on='type') 
        _data = pd.merge(_data, data['Masses'])
    
        _data = _data[[ 'coeff0', 'coeff1', 'mol-id', 'mass']]
        a, indx, inv, count = np.unique(_data.values, return_index=True, return_inverse=True, return_counts=True, axis=0)
        print(len(_data), len(indx))
        _map = dict(zip(sorted(indx+1), np.arange(1, len(indx)+1)))
        _types = [_map[_] for _ in indx[inv]+1]
        
        data['Atoms'].loc[:, ['type']] = _types
        
        data['Pair Coeffs'] = data['Pair Coeffs'].iloc[indx]
        data['Pair Coeffs'].loc[:, ['type']] = data['Pair Coeffs']['type'].apply(lambda x:_map[x])
        data['Pair Coeffs'] = data['Pair Coeffs'].sort_values(by='type').reset_index(drop=True)

        data['Masses'] = data['Masses'].iloc[indx]
        data['Masses'].loc[:, ['type']] = data['Masses']['type'].apply(lambda x:_map[x])
        data['Masses'] = data['Masses'].sort_values(by='type').reset_index(drop=True)

    for _tag_coeffs, _tag_ints in zip(tags_coeffs[2:], tags_interactions):
        if _tag_coeffs in data and len(data[_tag_coeffs]) == len(data[_tag_ints]):
            print(_tag_coeffs, _tag_ints)
            _data = data[_tag_coeffs]
            _cols = [_ for _ in _data.columns.values if _.lstrip('coeff').isdigit()]
            a, indx, inv, count = np.unique(_data[_cols].values, return_index=True, return_inverse=True, return_counts=True, axis=0)
            print(len(_data), len(indx))
            _map = dict(zip(sorted(indx+1), np.arange(1, len(indx)+1)))
            _types = [_map[_] for _ in indx[inv]+1]
            data[_tag_coeffs] = data[_tag_coeffs].iloc[indx]
            data[_tag_coeffs].loc[:, ['type']] = data[_tag_coeffs]['type'].apply(lambda x:_map[x])
            data[_tag_coeffs] = data[_tag_coeffs].sort_values(by='type').reset_index(drop=True)
            data[_tag_ints].loc[:, ['type']] = _types

    return data
    