In [27]:
#!/usr/bin/env python
# coding: utf-8

from Bio.PDB import *
import glob
import math
import numpy as np

loop3Size = 176
# Data files
CaMKII = glob.glob("input.loop3/CaMKII_loop3_*.pdb")
holoCaM = glob.glob("input.loop3/holoCaM_loop3_*.pdb")
Ng = glob.glob("input.loop3/Ng-*_loop3_*.pdb")

# Extract the charges
chgFiles = glob.glob("output.loop3/*.txt")
charge = dict()

for chgFile in chgFiles:
    prot = chgFile.split('/')[1].split('.')[1]
    with open(chgFile,"r") as file1:
        for line in file1:
            line = line.strip().split()
            charge[(prot,line[0])] = line[1]

def count(gen):
    """
    Count number of atoms
    """

    return sum(1 for _ in gen.get_atoms())

def cutoff(r_ij, r_cut):
    """
    Radial Cutoff function
    """

    if r_ij > r_cut:
        return 0
    else:
        return 0.5*(math.cos(math.pi*r_ij/r_cut)+1)


def Rad(atom, r_min, r_max, r_inc):
    """
    Radial distribution function to a selected atom
    with respect to r_cut ranging from r_min to r_max at increment r_inc
    tau controls how fast the curve goes to 0 (r_cut)
    Here we use very small tau.
    """
    feat = []
    tau = 0.0001
    at = atoms[atom]
    for element in ele:
        atomlist = ele[element]
        for r_cut in  
            #print("rcut is %f" %r_cut)
            g_rad = 0
            for i in atomlist:
                a = atoms[i]
                r_ij = np.linalg.norm(a.coord - at.coord)
                #print("rij is %f" %r_ij)
                g_rad += np.exp(-tau*r_ij*r_ij)*cutoff(r_ij, r_cut)
            feat.append(g_rad)
    return feat

def Ang(atom, r_cut):
    """
    Angular distribution function to a selected atom.
    n controls sensitivity in angle.
    """
    feat = []
    tau = 0.0001
    n = 0.5
    at = atoms[atom]
    for element in ele:
        atomlist = ele[element]
        for element2 in ele:
            atomlist2 = ele[element2]
            g_ang = 0
            for k in atomlist:
                for j in atomlist2:
                    ak = atoms[k]
                    aj = atoms[j]
                    r_ij = np.linalg.norm(at.coord - aj.coord)
                    r_ik = np.linalg.norm(at.coord - ak.coord)
                    r_kj = np.linalg.norm(ak.coord - ak.coord)

                    cos_ikj = np.inner((at.coord - aj.coord), (at.coord - ak.coord))/(r_ij*r_ik)
                    #print("cosine angle is %f " % cos_ikj)
                    f1 = np.power(round((0.5-cos_ikj*0.5), 6),n)
                    f2 = np.exp(-tau*(r_ij*r_ij+r_ik*r_ik+r_kj*r_kj))
                    f3 = cutoff(r_ij, r_cut)*cutoff(r_ik, r_cut)*cutoff(r_kj, r_cut)
                    #print("%f %f %f" %(f1, f2, f3))
                    g_ang += 2*f1*f2*f3
            feat.append(g_ang)
    return feat

def feature(struct):
    """
    Feature vector has 80 elements:
    Matrix Rad 0-54: radial (0-10: H; 11-21: C; 22-32: O; 33-43: N; 44-54: S)
    Matrix Ang 55-79: angular (H-H H-C H-O H-N H-S; C-H C-C C-O C-N C-S;)
    """
    return Rad(atomindex, 1.0, 6.0, 0.5) + Ang(atomindex, 3.0)

# Print the symmetry functions
output = open("sym.txt", 'w+')
cnt = 0
parser = PDBParser()

In [2]:
CaMKII = glob.glob("input.loop3/CaMKII_loop3_0.0.pdb")

In [5]:
for struct in CaMKII:
    prot = struct.split('/')[1].split('_')[0]
    index = struct.split('/')[1].split("_")[2].split('.pdb')[0]
    idf = (prot, index)
    if idf in charge:
        cach =  charge[(prot,index)]
        structure = parser.get_structure(struct.split('/')[1],struct)
        if count(structure) != loop3Size:
            print("Error in atom number!\n")
            raise SystemExit(1)

        feat = feature(structure)

In [7]:
    for i in structure.get_atoms():
        if i.parent.get_resname() == ' CA':
            calAtom = i
            
    

In [8]:
calAtom

<Atom CA>

In [37]:
for i in structure.get_atoms():
    print(i)

<Atom CA>
<Atom C>
<Atom O>
<Atom H01>
<Atom H02>
<Atom H03>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom OD1>
<Atom OD2>
<Atom H>
<Atom HA>
<Atom HB1>
<Atom HB2>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom CD>
<Atom CE>
<Atom NZ>
<Atom H>
<Atom HA>
<Atom HB1>
<Atom HB2>
<Atom HG1>
<Atom HG2>
<Atom HD1>
<Atom HD2>
<Atom HE1>
<Atom HE2>
<Atom HZ1>
<Atom HZ2>
<Atom HZ3>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom OD1>
<Atom OD2>
<Atom H>
<Atom HA>
<Atom HB1>
<Atom HB2>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom H>
<Atom HA1>
<Atom HA2>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom ND2>
<Atom OD1>
<Atom H>
<Atom HA>
<Atom HB1>
<Atom HB2>
<Atom HD21>
<Atom HD22>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom H>
<Atom HA1>
<Atom HA2>
<Atom N>
<Atom CA>
<Atom C>
<Atom O>
<Atom CB>
<Atom CG>
<Atom CD1>
<Atom CD2>
<Atom CE1>
<Atom CE2>
<Atom CZ>
<Atom OH>
<Atom H>
<Atom HA>
<Atom HB1>
<Atom HB2>
<Atom HD1>
<Atom HD2>
<Atom HE

In [39]:
i.get_name()

'HW2'

In [13]:
for i in structure.get_residues():
    print(i)

<Residue PHE het=  resseq=92 icode= >
<Residue ASP het=  resseq=93 icode= >
<Residue LYS het=  resseq=94 icode= >
<Residue ASP het=  resseq=95 icode= >
<Residue GLY het=  resseq=96 icode= >
<Residue ASN het=  resseq=97 icode= >
<Residue GLY het=  resseq=98 icode= >
<Residue TYR het=  resseq=99 icode= >
<Residue ILE het=  resseq=100 icode= >
<Residue SER het=  resseq=101 icode= >
<Residue ALA het=  resseq=102 icode= >
<Residue ALA het=  resseq=103 icode= >
<Residue GLU het=  resseq=104 icode= >
<Residue LEU het=  resseq=105 icode= >
<Residue  CA het=  resseq=176 icode= >
<Residue SOL het=  resseq=200 icode= >


In [33]:
res_ids = [i for i in structure.get_residues()]
res_ids[1]

<Residue ASP het=  resseq=93 icode= >

In [17]:
atoms7 = [i for i in ress[7].get_atoms()]


In [49]:
i.


'H'

In [51]:
atoms = [x for x in structure.get_atoms()]

for i in structure.get_atoms():
    if i.parent.get_resname() == ' CA':
        calAtom = i
atomindex = calAtom.serial_number - 1

residues = [i for i in structure.get_residues()]

ele = dict()
ele['C'] = []
ele['N'] = []
ele['OW'] = []
ele['OCoor'] = []
ele['O'] = []
ele['H'] = []

for i in atoms:
    if i.element == 'O':
        if i.parent is residues[7] and i.get_name() == 'O':
            ele['OCoor'].append(i.serial_number-1)
        elif i.get_name() == 'OW':
            ele['OW'].append(i.serial_number-1)
        else:
            ele[i.element].append(i.serial_number-1)
    if i.parent in [residues[1], residues[3], residues[5], residues[12]]:
        if i.get_name() in ['OE1','OE2','OD1','OD2']:
            ele['OCoor'].append(i.serial_number-1)
    if i.element in ['C', 'N', 'H']:
        if not i is calAtom:
            ele[i.element].append(i.serial_number-1)


In [54]:
atoms[83].get_parent()

<Residue TYR het=  resseq=99 icode= >

In [55]:
i

<Atom HW2>

In [59]:
i.get_full_id()[]

('CaMKII_loop3_0.0.pdb', 0, 'X', (' ', 200, ' '), ('HW2', ' '))

In [74]:
r_min=1.0
r_max=6.0
r_inc=0.5
for element in ele:
    for cut in np.arange(r_min,r_max+0.5*r_inc,r_inc):
        print(element+str(cut), end=",")
for element in ele:
    for element2 in ele:
        print(element+":"+element2, end=",")
print("Cachg")

C1.0,C1.5,C2.0,C2.5,C3.0,C3.5,C4.0,C4.5,C5.0,C5.5,C6.0,N1.0,N1.5,N2.0,N2.5,N3.0,N3.5,N4.0,N4.5,N5.0,N5.5,N6.0,OW1.0,OW1.5,OW2.0,OW2.5,OW3.0,OW3.5,OW4.0,OW4.5,OW5.0,OW5.5,OW6.0,OCoor1.0,OCoor1.5,OCoor2.0,OCoor2.5,OCoor3.0,OCoor3.5,OCoor4.0,OCoor4.5,OCoor5.0,OCoor5.5,OCoor6.0,O1.0,O1.5,O2.0,O2.5,O3.0,O3.5,O4.0,O4.5,O5.0,O5.5,O6.0,H1.0,H1.5,H2.0,H2.5,H3.0,H3.5,H4.0,H4.5,H5.0,H5.5,H6.0,C:C,C:N,C:OW,C:OCoor,C:O,C:H,N:C,N:N,N:OW,N:OCoor,N:O,N:H,OW:C,OW:N,OW:OW,OW:OCoor,OW:O,OW:H,OCoor:C,OCoor:N,OCoor:OW,OCoor:OCoor,OCoor:O,OCoor:H,O:C,O:N,O:OW,O:OCoor,O:O,O:H,H:C,H:N,H:OW,H:OCoor,H:O,H:H,Cachg
