In [1]:
import os
from itertools import combinations, permutations
import numpy as np
from Bond import Bond
from types import MappingProxyType
from collections import namedtuple, OrderedDict
from Residue import Residue as Res
from ChMMCIFParser import ChMMCIFParser as ChmParser
from TopoDefinitions import ResidueDefinition
from Atom import Atom
from Residue import Residue
from RTFParser import RTFParser
from ICBuilder import ResidueFixer
from scipy.spatial.transform import Rotation as R
import numpy.linalg as LA

from matplotlib import pyplot as plt



In [2]:
import importlib
import Residue
import ChMMCIFParser
importlib.reload(Residue)
importlib.reload(ChMMCIFParser)
from Residue import Residue as Res
from ChMMCIFParser import ChMMCIFParser as ChmParser

In [3]:
def find_local_cif_path(pdb_id):
    pdb_id = pdb_id.lower()
    entry_point = '/mnt/backup/PDB/'
    subdir = pdb_id[1:3]
    file_path = os.path.join(entry_point, subdir, pdb_id+'.cif')
    if os.path.exists(file_path):
        return file_path

In [4]:
rtf = RTFParser('./data/toppar/top_all36_2_prot.rtf')



In [38]:
Parser = ChmParser(
    include_solvent=False
)
pdb_id = '1A8I'
file_path = find_local_cif_path(pdb_id)
structure = Parser.get_structure(file_path)

In [39]:
chainA = structure[1]['A']
chainA.load_topo_definition(rtf.residue_definitions)



In [43]:
if res.topo_definition:
    print(1)

In [44]:
res_builder = ResidueFixer()
for res in chainA:
    if res.topo_definition is None:
        print(f'No topology definition on {res}')
        continue
    res_builder.load_residue(res)
    if res.missing_atoms:
        print(res)
        res_builder.build_missing_atoms()
    res_builder.build_hydrogens()

<Residue GLN het=  resseq=7 icode= >
<Residue ASN het=  resseq=250 icode= >
<Residue GLY het=  resseq=261 icode= >
<Residue PHE het=  resseq=316 icode= >
<Residue ARG het=  resseq=323 icode= >
No topology definition <Residue LLP het=H_LLP resseq=680 icode= >
<Residue PRO het=  resseq=835 icode= >




In [45]:
chainA

NGLWidget()

<Polypeptide(L) id=A Residues/Molecules=813>

In [21]:
with open('5IEV_chainA.pdb', 'w') as f:
    f.write(chainA.get_pdb_str())
    f.write('END\n')

In [13]:
res_builder.build_hydrogens()

In [12]:
res = structure[1]['A'][6]

In [None]:
res.bonds

[Bond(<Atom CB>, <Atom CA>, type=single, order=1, length=1.538493),
 Bond(<Atom OG1>, <Atom CB>, type=single, order=1, length=1.430227),
 Bond(<Atom CG2>, <Atom CB>, type=single, order=1, length=1.520044),
 Bond(<Atom N>, <Atom CA>, type=single, order=1, length=1.447242),
 Bond(<Atom C>, <Atom CA>, type=single, order=1, length=1.532867),
 Bond(<Atom O>, <Atom C>, type=double, order=2, length=1.258037)]

In [17]:
missing_atoms, missing_hydrogens = sep_by_priorities(res.missing_atoms)

thr_ic = chainA.topo_definitions['THR'].ic

In [18]:
def reassign_ic_keys(keys, ic_dict_values):
    cur_dict = {
        k:v for k,v in zip(keys, ic_dict_values)
    }
    return cur_dict

def organize_ic_dict(ic_dicts):
    organized_ic = {
        'improper':{}, 'chain':{}
    }
    for ic_dict in ic_dicts:
        i,j,k,l = ic_dict['IJKL']

        ic_dict_values = list(ic_dict.values())[-5:]
        if k.startswith('*'):
            k_star = k
            k = k_star.lstrip('*')
            keys = [(i,k),(i,k,j),(i,j,k_star,l),(j,k,l),(k,l)]
            struct_type = 'improper'
        else:
            keys = [(i,j),(i,j,k),(i,j,k,l),(j,k,l),(k,l)]
            struct_type = 'chain'
        
        reassigned = reassign_ic_keys(keys, ic_dict_values)
        organized_ic[struct_type].update(reassigned)

    return organized_ic

In [101]:
resnames = [
    'ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 
    'GLY', 'HSD', 'HSE', 'HSP', 'ILE', 'LEU', 'LYS', 
    'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'
]

for resname in resnames:
    print(resname)
    cur_topo_def = chainA.topo_definitions[resname]
    lookup_dict = cur_topo_def.atom_lookup_dict
    all_atoms = [k for k in lookup_dict.keys() if k not in ('-C','CA','N')]
    all_heavy, all_hydrogen = sep_by_priorities(all_atoms)
    running_dict = OrderedDict({k: lookup_dict[k] for k in all_atoms})

    build_seqs = find_build_seq(cur_topo_def, all_atoms)
    for build_seq in build_seqs:
        for atom_name, ic in build_seq:
            print(atom_name, ic)
    print('\n')

ALA
C ('-C', 'N', 'CA', 'C')
+N ('N', 'CA', 'C', '+N')
O ('+N', 'CA', 'C', 'O')
+CA ('CA', 'C', '+N', '+CA')
CB ('N', 'C', 'CA', 'CB')
HA ('N', 'C', 'CA', 'HA')
HB1 ('C', 'CA', 'CB', 'HB1')
HB2 ('HB1', 'CA', 'CB', 'HB2')
HB3 ('HB1', 'CA', 'CB', 'HB3')
HN ('-C', 'CA', 'N', 'HN')


ARG
C ('-C', 'N', 'CA', 'C')
+N ('N', 'CA', 'C', '+N')
O ('+N', 'CA', 'C', 'O')
+CA ('CA', 'C', '+N', '+CA')
CB ('N', 'C', 'CA', 'CB')
CG ('N', 'CA', 'CB', 'CG')
CD ('CA', 'CB', 'CG', 'CD')
NE ('CB', 'CG', 'CD', 'NE')
CZ ('CG', 'CD', 'NE', 'CZ')
NH1 ('CD', 'NE', 'CZ', 'NH1')
NH2 ('NH1', 'NE', 'CZ', 'NH2')
HH21 ('NE', 'CZ', 'NH2', 'HH21')
HH22 ('HH21', 'CZ', 'NH2', 'HH22')
HN ('-C', 'CA', 'N', 'HN')
HA ('N', 'C', 'CA', 'HA')
HB1 ('CG', 'CA', 'CB', 'HB1')
HB2 ('CG', 'CA', 'CB', 'HB2')
HG1 ('CD', 'CB', 'CG', 'HG1')
HG2 ('CD', 'CB', 'CG', 'HG2')
HD1 ('NE', 'CG', 'CD', 'HD1')
HD2 ('NE', 'CG', 'CD', 'HD2')
HE ('CZ', 'CD', 'NE', 'HE')
HH11 ('NE', 'CZ', 'NH1', 'HH11')
HH12 ('HH11', 'CZ', 'NH1', 'HH12')


ASN
C ('-C', 

In [80]:
cur_topo_def = chainA.topo_definitions['PRO']
lookup_dict = cur_topo_def.atom_lookup_dict
all_atoms = [k for k in lookup_dict.keys() if k not in ('-C','CA','N')]
all_heavy, all_hydrogen = sep_by_priorities(all_atoms)
running_dict = OrderedDict({k: lookup_dict[k] for k in all_atoms})

build_seqs = find_build_seq(cur_topo_def, all_atoms)

In [83]:
build_seqs[0]

[('CD', ('-C', 'CA', 'N', 'CD')),
 ('C', ('-C', 'N', 'CA', 'C')),
 ('+N', ('N', 'CA', 'C', '+N')),
 ('O', ('+N', 'CA', 'C', 'O')),
 ('+CA', ('CA', 'C', '+N', '+CA')),
 ('CB', ('N', 'C', 'CA', 'CB')),
 ('CG', ('N', 'CA', 'CB', 'CG'))]

In [102]:
def save_ic_coord_pdb(resname, coord_dict, path):
    residue = Res((' ', 0, ' '), resname, segid = " ")
    for i, (atom_name, coord) in enumerate(coord_dict.items()):
        if atom_name.startswith('-') or atom_name.startswith('+'):
            continue
        element = atom_name[0]
        new_atom = Atom(
            name = atom_name,
            coord = coord,
            bfactor = 0.0,
            occupancy = 1.0,
            altloc = ' ',
            fullname = atom_name,
            serial_number = i+1,
            element = element
        )
        residue.add(new_atom)

    with open(f"{path}/{resname}.pdb", 'w') as f:
        f.write(residue.get_pdb_str())
        f.write('TER\nEND')

In [109]:
all_res_ic_coords = {}
for resname in resnames:
    topo_def = rtf.residue_definitions[resname]
    coord_dict = ab_initio_ic_build(topo_def)
    all_res_ic_coords[resname] = coord_dict

In [110]:
for resname, coord_dict in all_res_ic_coords.items():
    save_ic_coord_pdb(resname, coord_dict, './ic_test_pdb')

In [1229]:
computed_heavy_coords

['C', '+N', 'O', '+CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ']

In [1230]:
computed_hydrogen_coords

['HE1', 'HE2', 'HZ', 'HN', 'HA', 'HB1', 'HB2', 'HD1', 'HD2']

In [1154]:
thr_ic[heavy_seq[1][1]]

{'improper': False,
 'R(I-J)': 1.4607,
 'T(I-J-K)': 106.09,
 'Phi': 180.0,
 'T(J-K-L)': 117.69,
 'R(K-L)': 1.3449}

In [99]:
rtf.residue_definitions['PRO'].ic[('N','C','CA','CB')]['Phi'] = -122.4
rtf.residue_definitions['PRO'].ic[('N','C','CA','HA')]['Phi'] = 113.74

In [114]:
all_res_ic_coords['PRO']

{'-C': array([1.3366, 0.    , 0.    ]),
 'N': array([0., 0., 0.]),
 'CA': array([-0.79307466,  1.2240322 ,  0.        ]),
 'CD': array([-0.82392864, -1.20769434, -0.03506763]),
 'C': array([-0.80153919,  1.87192008,  1.39694726]),
 '+N': array([-1.51913526,  3.02006958,  1.48631299]),
 'O': array([-0.14719197,  1.38019198,  2.31720361]),
 '+CA': array([-1.67517616,  3.80986932,  2.6943281 ]),
 'CB': array([-2.21828151,  0.91199157, -0.49265412]),
 'CG': array([-2.47668634, -0.52424134, -0.02564362]),
 'HB1': array([-2.97734816,  1.58467513, -0.03406252]),
 'HB2': array([-2.29681994,  0.95231909, -1.6022473 ]),
 'HG1': array([-2.95071979, -0.55407541,  0.97505689]),
 'HG2': array([-3.11878945, -1.10742391, -0.72511927]),
 'HD1': array([-0.66689579, -1.81347445, -0.95631615]),
 'HD2': array([-0.67625814, -1.86029877,  0.85610476]),
 'HA': array([-0.26640632,  1.87395457, -0.68893182])}

In [1155]:
neighbor_atoms = {
    '-C': chainA[5]['C'].coord,
    '+N': chainA[7]['N'].coord,
    '+CA': chainA[7]['CA'].coord,
}

In [1156]:
coord_dict = {atom.name:atom.coord for atom in res}

In [976]:
coord_dict.update(neighbor_atoms)

In [1187]:
coord_dict

{'-C': array([1.3482, 0.    , 0.    ]),
 'N': array([0., 0., 0.]),
 'CA': array([-0.80200645,  1.20848906,  0.        ]),
 'C': array([-2.25981145e+00,  7.82749376e-01, -1.77582852e-16]),
 '+N': array([-3.18842856e+00,  1.75959895e+00,  4.72544476e-16]),
 'O': array([-2.56741042e+00, -4.05791814e-01, -2.89454953e-16]),
 '+CA': array([-4.62068834e+00,  1.54196714e+00,  8.02873294e-16]),
 'CB': array([-0.47447809,  2.10968089,  1.22646023]),
 'CG': array([-1.27434363,  3.42613345,  1.32423562]),
 'CD': array([-0.90561725,  4.27210885,  2.55673007]),
 'CE': array([-1.71327804,  5.57524837,  2.63246945]),
 'NZ': array([-1.33193365,  6.34424351,  3.81399012]),
 'HE1': array([-2.80253559,  5.35586931,  2.69341757]),
 'HE2': array([-1.53038986,  6.20130866,  1.73146351]),
 'HZ1': array([-1.8821697 ,  7.2261444 ,  3.85784524]),
 'HZ2': array([-0.31792316,  6.57157123,  3.76793724]),
 'HZ3': array([-1.51694086,  5.78003039,  4.6679479 ]),
 'HN': array([-5.19193562e-01, -8.53252299e-01, -1.10758

In [1030]:
topo_def['CA'].__dict__

{'parent_def': <Residue Definition name=GLU atoms=15>,
 'name': 'CA',
 'atom_type': 'CT1',
 'is_donor': False,
 'is_acceptor': False,
 'charge': 0.07,
 'mass': 12.011,
 'desc': 'aliphatic sp3 C for CH'}