In [4]:
def skip_line(x):
    return (x.startswith('!') or x.startswith('*') or x.strip() == '')

with open('../allosteric-miner/dock_probes/toppar/top_all36_prot.rtf','r') as f:
    lines = [l for l in f.readlines() if not skip_line(l.strip())]

In [5]:
def comment_parser(line):
    if line.find('!') == -1:
        # line has no comments
        return line, None
    # return fields string, comments
    line, desc = line.split('!', maxsplit = 1)[:2]
    return line, desc.strip()

def mass_parser(line):
    # Parse line with keyword MASS
    field_str, desc = comment_parser(line)
    # fields: [key, deprecated_entry, atom_type, atomic_mass]
    key, _, atom_type, mass = field_str.split()
    return atom_type, mass, desc

def decl_parser(line):
    # Parse line with keyword DECLare
    field_str = comment_parser(line)[0]
    atom = field_str.split()[-1]
    return atom

def defa_parser(line):
    # Parse default patch for residues
    field_str = comment_parser(line)[0]
    fields = field_str.split()
    # find where these two keyword located in the list
    first_i = fields.index('FIRS')
    last_i = fields.index('LAST')
    first_patch = fields[first_i+1]
    last_patch = fields[last_i+1]
    return first_patch, last_patch

def auto_parser(line):
    field_str = comment_parser(line)[0]
    autogen_ops = field_str.split()[1:]
    return autogen_ops

def resi_parser(line):
    field_str, desc = comment_parser(line)
    # [key, residue_name, total_charge]
    key, res_name, total_charge = field_str.split()
    return res_name, total_charge, desc

def atom_parser(line):
    field_str = comment_parser(line)[0]
    key, atom_name, atom_type, charge = field_str.split()
    return atom_name, atom_type, charge

def pairwise_parser(line):
    # Parser for lines with pairwise atoms separated by space
    field_str = comment_parser(line)[0]
    # The first field is the keyword
    # followed by atom type in pairs
    fields = field_str.split()[1:]
    # combine each pair into list of tuples
    if len(fields) % 2 == 1:
        raise ValueError('Odd number of atoms! Cannot group into pairs: {}'.format(line))
    atom_pairs = list(zip(fields[::2], fields[1::2]))
    return atom_pairs
    
def quad_parser(line):
    # Parse IMPR keyword for group of 4 atoms
    field_str = comment_parser(line)[0]
    fields = field_str.split()[1:]
    # combine each pair into list of tuples
    if len(fields) % 4 != 0:
        raise ValueError('Invalid length of topology specification: {}\n \
        Multiples  of 4 atoms required'.format(line))
    quad = [tuple(fields[:4]), tuple(fields[4:])]
    return quad

def octa_parser(line):
    # Parse CMAP keyword for a pair of 4 atoms
    field_str = comment_parser(line)[0]
    fields = field_str.split()[1:]
    if len(fields) != 8:
        raise ValueError('Invalid length of topology specification: {}\n \
        8 atoms required'.format(line))
    # combine into tuple of tuples with shape (2,4)
    octa = (tuple(fields[:4]), tuple(fields[4:]))
    return octa

def ic_parser(line):
    # Parse internal coordinate entries. 9 fields total
    # fields: [key, I,J,K,L, R(I(J/K)), T(I(JK/KJ)), PHI, T(JKL), R(KL)]
    # IJKL are atoms, star (*) on atom K indicate that it is an improper structure
    # Strucutres specified for both chain and improper (branch)
    # R(I(J/K)): dist of I-J/I-K, 
    # T(I(JK/KJ)): angle of I-J-K/I-K-J,
    # PHI: Dihedral, 
    # T(JKL): angel of J-K-L, 
    # R(KL): dist of K-L,
    field_str = comment_parser(line)[0]
    fields = field_str.split()[1:]
    if len(fields) != 9:
        raise ValueError('Invalid length of topology specification: {}\n \
        9 fields required'.format(line))
    i,j,k,l,r_ij,t_ijk,phi,t_jkl,r_kl = fields
    return {
        'I': i, 'J': j, 'K': k, 'L': l,
        'R(I-J/I-K)': float(r_ij), 'T(I-J-K/I-K-J)': float(t_ijk),
        'Phi': float(phi),
        'T(J-K-L)': float(t_jkl),
        'R(K-L)': float(r_kl)
    }

def delete_parser(line):
    # Parse the keyword DELETE. Keyword has to be followed by the type
    # of the data to be deleted and the value
    field_str = comment_parser(line)[0]
    fields = field_str.split()[1:]
    if len(fields) != 2:
        raise ValueError('Invalid length of topology specification: {}\n \
        2 fields required: key, data'.format(line))
    return {'key': fields[0], 'data': fields[1]}

In [76]:
rtf_ver = '.'.join(lines[0].strip().split())
mass_dict = dict()
decl_peptide_atoms = []
default_patchs = {'FIRST':None,'LAST':None}
default_autogen = None
residue_topo_dict = dict()
unparsed_lines = []
for l in lines[1:]:
    if l.startswith('MASS'):
        symbol, mass, desc = mass_parser(l)
        mass_dict.update({symbol: (float(mass), desc)})
    elif l.startswith('DECL'):
        atom = decl_parser(l)
        decl_peptide_atoms.append(atom)
    elif l.startswith('DEFA'):
        first_patch, last_patch = defa_parser(l)
        default_patchs['FIRST']=first_patch
        default_patchs['LAST']=last_patch
    elif l.startswith('AUTO'):
        default_autogen = auto_parser(l)
    elif l.startswith('RESI') or l.startswith('PRES'):
        res_name, total_charge, desc = resi_parser(l)
        if res_name not in residue_topo_dict:
            # index first group
            cur_group_i = -1
            cur_res = {
                'param_desc': desc,
                'total_charge': float(total_charge),
                'atoms':{
                },
                'bonds':{
                    'single':[],
                    'double':[],
                    'triple':[],
                    'aromatic':[]
                },
                'impropers':[],
                'cmap':[],
                'ic':[],
                'is_patch': l.startswith('PRES')
            }
        residue_topo_dict.update({res_name: cur_res})
    elif l.startswith('GROUP'):
        # Update group number
        cur_group_i += 1
        cur_atom_group = {cur_group_i: {}}
        cur_res['atoms'].update(cur_atom_group)
    elif l.startswith('ATOM'):
        if cur_group_i == -1:
            # if no GROUP keyword exist for patch, create a single group
            cur_group_i = 0
            cur_atom_group = {cur_group_i: {}}
            cur_res['atoms'].update(cur_atom_group)
        atom_name, atom_type, atom_charge = atom_parser(l)
        cur_atom_dict = {
            atom_name: 
            {
                'atom_type': atom_type, 
                'charge': float(atom_charge), 
                'mass': mass_dict[atom_type][0],
                'desc': mass_dict[atom_type][1]
            }
        }
        cur_atom_group[cur_group_i].update(cur_atom_dict)
    elif l.startswith('DONO'):
        if 'HDonor' not in cur_res:
            cur_res['HDonor'] = []
        donors = tuple(l.split()[1:])
        cur_res['HDonor'].append(donors)
    elif l.startswith('ACCE'):
        if 'HAcceptor' not in cur_res:
            cur_res['HAcceptor'] = []
        acceptors = tuple(l.split()[1:])
        cur_res['HAcceptor'].append(acceptors)
    elif l.startswith('BOND'):
        single_bonds = pairwise_parser(l)
        cur_res['bonds']['single'].extend(single_bonds)
    elif l.startswith('DOUBLE'):
        double_bonds = pairwise_parser(l)
        cur_res['bonds']['double'].extend(double_bonds)
    elif l.startswith('TRIPLE'):
        triple_bonds = pairwise_parser(l)
        cur_res['bonds']['triple'].extend(triple_bonds)
    elif l.startswith('AROMATIC'):
        aromatic_bonds = pairwise_parser(l)
        cur_res['bonds']['triple'].extend(aromatic_bonds)
    elif l.startswith('IMPR'):
        # Improper (branching structures)
        impropers = quad_parser(l)
        cur_res['impropers'].extend(impropers)
    elif l.startswith('CMAP'):
        # Dihedral crossterm energy correction map
        cmap = octa_parser(l)
        cur_res['cmap'].append(cmap)
    elif l.startswith('IC') or l.startswith('ic'):
        # Internal Coordinates
        ic_dict = ic_parser(l)
        cur_res['ic'].append(ic_dict)
    elif l.startswith('DELETE'):
        if 'delete' not in cur_res:
            cur_res['delete'] = []
        delete_entry = delete_parser(l)
        cur_res['delete'].append(delete_entry)
    elif l.startswith('DIHE'):
        pass
    elif l.startswith('ANGLE'):
        pass
    elif l.startswith('PATCH')or l.startswith('patch'):
        pass
    elif l.startswith('END') or l.startswith('end'):
        break
    else:
        unparsed_lines.append(l)

In [7]:
decl_peptide_atoms

['-CA', '-C', '-O', '+N', '+HN', '+CA']

In [55]:
class AtomDefinition:
    def __init__(
        self, parent_def, name, atom_type, charge, mass,
        desc = None
    ):
        self.parent_def = parent_def
        self.name = name
        self.atom_type = atom_type
        self.is_donor = False
        self.is_acceptor = False
        self.charge = charge
        self.mass = mass
        self.desc = desc
        
    def __repr__(self):
        return f"<Atom Definition name={self.name} type={self.atom_type}>"

In [56]:
class ResidueDefinition:
    def __init__(self, file_source, resname, topo_info_dict):
        self.file_source = file_source
        self.resname = resname
        self.is_modified = None
        self.atom_groups = None
        self.total_charge = None
        self.bonds = None
        self.impropers = None
        self.cmap = None
        self.H_donors = None
        self.H_acceptors = None
        self.is_patch = None
        self.param_desc = None
        
    def __repr__(self):
        return f"<Residue Definition name={self.resname}>"
    
    def set_bonds(self, bond_dict):
        

In [56]:
class RTFParser:
    def __init__(self, topo_dict):
        self.topo_dict = topo_dict
        
    def load_topo_dict(self, res_topo_dict):
        for key, val in res_topo_dict.items():
            self.__setattr__(key, val)
        self.atom_groups = process_res_groups(res_topo_dict)
        all_res_def[resname] = res_def
        
    def process_res_groups(self, res_topo_dict):
        all_groups = {}
        for i, group_def in res_topo_dict['atoms'].items():
            cur_group = []
            for atom_name, atom_info in group_def.items():
                atom_def = AtomDefinition(
                    self, atom_name, **atom_info
                )
                cur_group.append(atom_def)
            all_groups[i] = cur_group
        return all_groups

In [93]:
def process_res_groups(res_topo, res_def):
    all_groups = {}
    for i, group_def in res_topo['atoms'].items():
        cur_group = []
        for atom_name, atom_info in group_def.items():
            atom_def = AtomDefinition(
                res_def, atom_name, **atom_info
            )
            cur_group.append(atom_def)
        all_groups[i] = cur_group
    return all_groups

In [98]:
all_res_def = {}
for resname, res_topo in residue_topo_dict.items():
    res_def = ResidueDefinition(rtf_ver, resname)
    for key, val in res_topo.items():
        res_def.__setattr__(key, val)
    res_def.atom_groups = process_res_groups(res_topo, res_def)
    all_res_def[resname] = res_def
    

In [101]:
all_res_def['ALA'].bonds

{'single': [('CB', 'CA'),
  ('N', 'HN'),
  ('N', 'CA'),
  ('C', 'CA'),
  ('C', '+N'),
  ('CA', 'HA'),
  ('CB', 'HB1'),
  ('CB', 'HB2'),
  ('CB', 'HB3')],
 'double': [('O', 'C')],
 'triple': [],
 'aromatic': []}

In [77]:
residue_topo_dict['ALA']

{'param_desc': None,
 'total_charge': 0.0,
 'atoms': {0: {'N': {'atom_type': 'NH1',
    'charge': -0.47,
    'mass': 14.007,
    'desc': 'peptide nitrogen'},
   'HN': {'atom_type': 'H', 'charge': 0.31, 'mass': 1.008, 'desc': 'polar H'},
   'CA': {'atom_type': 'CT1',
    'charge': 0.07,
    'mass': 12.011,
    'desc': 'aliphatic sp3 C for CH'},
   'HA': {'atom_type': 'HB1',
    'charge': 0.09,
    'mass': 1.008,
    'desc': 'backbone H'}},
  1: {'CB': {'atom_type': 'CT3',
    'charge': -0.27,
    'mass': 12.011,
    'desc': 'aliphatic sp3 C for CH3'},
   'HB1': {'atom_type': 'HA3',
    'charge': 0.09,
    'mass': 1.008,
    'desc': 'alkane, CH3, new LJ params (see toppar_all22_prot_aliphatic_c27.str)'},
   'HB2': {'atom_type': 'HA3',
    'charge': 0.09,
    'mass': 1.008,
    'desc': 'alkane, CH3, new LJ params (see toppar_all22_prot_aliphatic_c27.str)'},
   'HB3': {'atom_type': 'HA3',
    'charge': 0.09,
    'mass': 1.008,
    'desc': 'alkane, CH3, new LJ params (see toppar_all22_prot_

In [8]:
unparsed_lines

[]