In [21]:
from Bio.PDB import PDBParser,Selection
from Bio import BiopythonWarning
import numpy as np
from scipy.spatial.distance import pdist, squareform
from functions.importing.retrieve_chain import retrieve_chain
from functions.calculating.secondary_struc_cmap import secondary_struc_cmap
from Bio.PDB.DSSP import DSSP

In [3]:
chain = retrieve_chain('1kyv.pdb')

# H - Alpha-Helix
# E - Extended Configuration (Beta-sheet)
# B - Isolated Beta Bridge
# b - Isolated Beta Bridge
# G - 3-10 Helix

ss_elements = ['H','E','B','b','G']
cutoff_distance = 6
cutoff_numcontacts = 10
plot_figs = 1
exclude_neighbour = 0

struc = []
seq = []

with open('/Users/duanemoes/Desktop/Desktop/RP1/Code/python/circuit_topology/input_files/stride/1kyvstride.txt',"r") as f:
    for line in f:
        if line[0:3] == "SEQ":
            seq.append(line[10:60].strip())
        if line[0:3] == "STR":
            struc.append(line[10:60])
        if line[0:3] == "LOC":
            break

struc = ''.join(struc)
seq = ''.join(seq)

if len(struc) > len(seq):
    struc = struc[0:len(seq)]

struc_length = len(struc)

protid = chain.get_parent().get_parent().id

#Get a list of the residues and atoms
res_list = Selection.unfold_entities(chain,"R")
atom_list = Selection.unfold_entities(chain,"A")

#Make list of the atom information
residue_number = np.zeros(len(atom_list),dtype='int')
res_names = []
atom_names = []
coords = np.zeros([len(atom_list),3])
y = np.zeros(len(atom_list),dtype='int')

for res in res_list:
    res_names.append(res.get_resname())

for num, atom in enumerate(atom_list):
    residue_number[num] = atom.get_parent().get_id()[1]
    coords[num] = atom.get_coord()
    atom_names.append(atom.get_name())
    y[num] = num

numbering = list(range(residue_number[0],residue_number[-1]+1))
residue_number = residue_number - numbering[0] 
natoms = len(residue_number)

nseg = 1
segment = np.zeros([struc_length],dtype='int')

for i in range(0,struc_length):
    if struc[i] in ss_elements:
        segment[i] = nseg
        if i == struc_length:
            nseg = nseg + 1
        elif struc[i+1] != struc[i]:
            nseg = nseg + 1
nseg = nseg - 1

#atom-atom based contact map
cmap = squareform(pdist(coords))
cmap = (cmap < cutoff_distance) * 1

cmap2 = np.zeros([nseg,nseg],dtype='int')

for i in range(0,natoms):
    for j in range(i+1,natoms):
        diff_res = abs(residue_number[i]-residue_number[j])
        
        if cmap[i][j] == 1  and diff_res != 1 and diff_res != 2 and diff_res != 3:
            seg_i = segment[residue_number[i]]
            seg_j = segment[residue_number[j]]
        
            if seg_i != 0 and seg_j != 0 :
                cmap2[seg_i-1][seg_j-1] = cmap2[seg_i-1][seg_j-1]+1
                    
cmap2 = cmap2 + cmap2.T

#set values close to diagonal to zero
for i in range(0,exclude_neighbour+1):
    for j in range(0,len(cmap2)-i):
        cmap2[j][j+i] = 0
        cmap2[j+i][j] = 0

cmap3 = (cmap2 >= cutoff_numcontacts) * 1

return cmap3,cmap,cmap2,s,segment,numbering


"\ncmap2 = np.zeros([nseg,nseg],dtype='int')\n\nfor i in range(0,natoms):\n    for j in range(i+1,natoms):\n        diff_res = abs(residue_number[i]-residue_number[j])\n        \n        if cmap[i][j] == 1  and diff_res != 1 and diff_res != 2 and diff_res != 3:\n            seg_i = segment[residue_number[i]]\n            seg_j = segment[residue_number[j]]\n        \n            if seg_i != 0 and seg_j != 0 :\n                cmap2[seg_i-1][seg_j-1] = cmap2[seg_i-1][seg_j-1]+1\n                    \ncmap2 = cmap2 + cmap2.T\n\n#set values close to diagonal to zero\nfor i in range(0,exclude_neighbour+1):\n    for j in range(0,len(cmap2)-i):\n        cmap2[j][j+i] = 0\n        cmap2[j+i][j] = 0\n\ncmap3 = (cmap2 >= cutoff_numcontacts) * 1\n\nreturn cmap3,cmap,cmap2,s,segment,numbering\n"

In [15]:
chain = retrieve_chain('1kyv.pdb')
ss_elements = ['H','E','B','b','G']
cutoff_distance = 4.5
cutoff_numcontacts = 5
exclude_neighbour = 4

#Get a list of the residues and atoms
res_list = Selection.unfold_entities(chain,"R")
atom_list = Selection.unfold_entities(chain,"A")

#Make list of the atom information
residue_number = np.zeros(len(atom_list),dtype='int')
res_names = []
atom_names = []
coords = np.zeros([len(atom_list),3])
y = np.zeros(len(atom_list),dtype='int')
res_names = []

for res in res_list:
    res_names.append(res.get_resname())

for num, atom in enumerate(atom_list):
    residue_number[num] = atom.get_parent().get_id()[1]
    coords[num] = atom.get_coord()
    atom_names.append(atom.get_name())
    y[num] = num
    
numbering = list(range(residue_number[0],residue_number[-1]+1))
residue_number = residue_number - numbering[0] 

#divide into segments based on residue
nseg = len(numbering)
segment = list(range(0,nseg+1))

#delete duplicate atoms due to multiple occupancy
duplicate = np.zeros(len(atom_list),dtype='int')
for i in range(1,len(duplicate)):
    if residue_number[i] == residue_number[i-1] and atom_names[i] == atom_names[i-1]:
        duplicate[i] = 1


residue_number = residue_number[np.where(duplicate != 1)]
coords = coords[np.where(duplicate != 1)]

#atom-atom based contact map
cmap = squareform(pdist(coords))
natoms = len(residue_number)

cmap = (cmap < cutoff_distance) * 1

#segment-segment based contact map, based on atom-atom based contact map
cmap2 = np.zeros([nseg,nseg],dtype='int')

for i in range(0,natoms):
    for j in range(i+1,natoms):
            
        seg_i = segment[residue_number[i]]
        seg_j = segment[residue_number[j]]
       
        if cmap[i][j] == 1:
                cmap2[seg_i][seg_j] = cmap2[seg_i][seg_j]+1

cmap2 = cmap2 + cmap2.T

#set values close to diagonal to zero
for i in range(0,exclude_neighbour+1):
    for j in range(0,len(cmap2)-i):
        cmap2[j][j+i] = 0
        cmap2[j+i][j] = 0

cmap3 = (cmap2 >= cutoff_numcontacts) * 1

#import secondary structure information
struc = []
seq = []

with open('/Users/duanemoes/Desktop/Desktop/RP1/Code/python/circuit_topology/input_files/stride/1kyvstride.txt',"r") as f:
    for line in f:
        if line[0:3] == "SEQ":
            seq.append(line[10:60].strip())
        if line[0:3] == "STR":
            struc.append(line[10:60])
        if line[0:3] == "LOC":
            break

struc = ''.join(struc)
seq = ''.join(seq)

if len(struc) > len(seq):
    struc = struc[0:len(seq)]

struc_length = len(struc)

nstruc = 1
struc_id = np.zeros([struc_length],dtype='int')

for i in range(0,struc_length):
    if struc[i] in ss_elements:
        struc_id[i] = nstruc
        if i == struc_length:
            nstruc = nstruc + 1
        elif struc[i+1] != struc[i]:
            nstruc = nstruc + 1
nstruc = nstruc - 1



In [19]:
cmap4 = np.array(cmap3,copy=True)
filtered_structures = ['H','G']
for i in range(len(cmap3)):
    for j in range(len(cmap3)):
        if cmap3[i][j] == 1:
            if struc_id[i] == struc_id[j] and struc[i] in filtered_structures:
                cmap4[i][j] = 0

In [52]:
chain = retrieve_chain('1kyv.pdb')


In [53]:
test = chain.get_parent()

In [55]:
sell = DSSP(test,'input_files/pdb/1kyv.pdb')

In [56]:
test1 = list(sell)[:147]
test12 = [seq[1] for seq in test1]
test123 = [seq[2] for seq in test1]

In [47]:
print(seq)

DLKGPELRILIVHARWNLQAIEPLVKGAVETMIEKHDVKLENIDIESVPGSWELPQGIRASIARNTYDAVIGIGVLIKGSTMHFEYISEAVVHGLMRVGLDSGVPVILGLLTVLNEEQALYRAGLNGGHNHGNDWGSAAVEMGLKAL


In [60]:
print(''.join(test123))
print(struc)

----TT--EEEEE--TTHHHHHHHHHHHHHHHHHHH---GGGEEEEE-SSGGGHHHHHHHHHHHS--SEEEEEEEEE--SSSHHHHHHHHHHHHHHHHHHHH---EEEEEEEESSHHHHHHTBT-TT--BHHHHHHHHHHHHHHHT-
   TTTT EEEEE TTTHHHHHHHHHHHHHHHHHHH   GGGEEEEE   GGGHHHHHHHHHHH    EEEEEEEEE    HHHHHHHHHHHHHHHHHHHHH   EEEEEEEE  HHHHHHH TTTTT  HHHHHHHHHHHHHHHH 
