In [1]:
import pandas as pd
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 100)

from Bio.PDB import *

In [14]:
temp_ThTh_nodes = pd.read_csv('../Ring_output/1VY4_rRNA_Phases/1VY4_rRNA_phases_rProtein_nodes.txt', sep='\t').drop(['pdbFileName'], axis=1)
temp_ThTh_nodes

Unnamed: 0,NodeId,Chain,Position,Residue,Dssp,Degree,Bfactor_CA,x,y,z,Rapdf,Tap,Accessibility
0,a:2061:_:G,a,2061,G,,20,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
1,a:2062:_:A,a,2062,A,,14,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
2,a:2063:_:C,a,2063,C,,9,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
3,a:2064:_:C,a,2064,C,,12,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
4,a:2065:_:C,a,2065,C,,14,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6329,8:63:_:PRO,8,63,PRO,T,7,38.36,-56.248,181.615,167.673,-11.299,0.091,0.296
6330,8:64:_:TYR,8,64,TYR,T,11,42.67,-55.664,185.250,166.756,-33.017,-0.126,0.464
6331,8:65:_:GLU,8,65,GLU,,5,65.21,-58.987,186.424,165.338,48.469,0.000,0.655
6332,8:5001:_:MG,8,5001,MG,,15,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900


In [26]:
parser = PDBParser(QUIET=True)
ThTh_PDB = parser.get_structure('ThTh_PDB', '../TT_Phases_PDB/1VY4_rRNA_phases_rProtein.pdb')

In [27]:
# rewrite to clean up the look
def make_PDB_df(parsed_structure):
    """
    Create BioPython dataframe of atom properties 
    """
    structure_list = []
    for atom in Selection.unfold_entities(parsed_structure, 'A'):
        atom_name = str(atom.id)
        if atom_name == 'CA':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]])
        if  atom_name == "C5'" or atom_name == 'K':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname[2:]
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]])
        if atom_name == 'MG' or atom_name == 'ZN':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname[1:]
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]]) 
    structure_df = pd.DataFrame(structure_list, columns=['NodeId', 'x_new', 'y_new', 'z_new'])
    return(structure_df)

In [28]:
ThTh_df = make_PDB_df(ThTh_PDB)

In [29]:
ThTh_df

Unnamed: 0,NodeId,x_new,y_new,z_new
0,a:2061:_:G,-41.375000,132.966003,167.074005
1,a:2062:_:A,-39.689999,134.860992,160.151993
2,a:2063:_:C,-41.956001,130.835007,155.792007
3,a:2064:_:C,-47.741001,131.516006,153.901993
4,a:2065:_:C,-51.830002,136.326996,153.990005
...,...,...,...,...
6470,8:62:_:LEU,-57.782001,180.576996,164.320999
6471,8:63:_:PRO,-56.248001,181.615005,167.673004
6472,8:64:_:TYR,-55.664001,185.250000,166.755997
6473,8:65:_:GLU,-58.987000,186.423996,165.337997


In [30]:
ThTh_nodes = pd.merge(temp_ThTh_nodes, ThTh_df, on='NodeId', how='inner')
ThTh_nodes = ThTh_nodes.drop(columns = ['x', 'y', 'z'])
ThTh_nodes = ThTh_nodes.rename(columns={'x_new':'x', 'y_new':'y', 'z_new':'z'})
ThTh_nodes

Unnamed: 0,NodeId,Chain,Position,Residue,Dssp,Degree,Bfactor_CA,Rapdf,Tap,Accessibility,x,y,z
0,a:2061:_:G,a,2061,G,,20,-999.90,-999.900,-999.900,-999.900,-41.375000,132.966003,167.074005
1,a:2062:_:A,a,2062,A,,14,-999.90,-999.900,-999.900,-999.900,-39.689999,134.860992,160.151993
2,a:2063:_:C,a,2063,C,,9,-999.90,-999.900,-999.900,-999.900,-41.956001,130.835007,155.792007
3,a:2064:_:C,a,2064,C,,12,-999.90,-999.900,-999.900,-999.900,-47.741001,131.516006,153.901993
4,a:2065:_:C,a,2065,C,,14,-999.90,-999.900,-999.900,-999.900,-51.830002,136.326996,153.990005
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6329,8:63:_:PRO,8,63,PRO,T,7,38.36,-11.299,0.091,0.296,-56.248001,181.615005,167.673004
6330,8:64:_:TYR,8,64,TYR,T,11,42.67,-33.017,-0.126,0.464,-55.664001,185.250000,166.755997
6331,8:65:_:GLU,8,65,GLU,,5,65.21,48.469,0.000,0.655,-58.987000,186.423996,165.337997
6332,8:5001:_:MG,8,5001,MG,,15,-999.90,-999.900,-999.900,-999.900,-64.934998,185.035004,155.401993


In [31]:
ThTh_nodes.to_csv('../Ring_output/1VY4_rRNA_Phases/1VY4_rRNA_phases_rProtein_nodes_xyz_modified.txt', index=False)