In [1]:
import pandas as pd
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 100)

from Bio.PDB import *

In [2]:
temp_ThTh_nodes = pd.read_csv('../Ring_output/1VY4/1VY4_LSU_rRNA+rProtein_nodes.txt', sep='\t').drop(['pdbFileName'], axis=1)
name_chains = pd.read_csv('../standards/1VY4_name_chains.csv', names=['rProtein', 'Chain'])
temp_ThTh_nodes = pd.merge(temp_ThTh_nodes, name_chains, on='Chain')
temp_ThTh_nodes

Unnamed: 0,NodeId,Chain,Position,Residue,Dssp,Degree,Bfactor_CA,x,y,z,Rapdf,Tap,Accessibility,rProtein
0,B:1:_:U,B,1,U,,2,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900,5S
1,B:2:_:C,B,2,C,,3,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900,5S
2,B:3:_:C,B,3,C,,6,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900,5S
3,B:4:_:C,B,4,C,,11,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900,5S
4,B:5:_:C,B,5,C,,16,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900,5S
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7369,3:55:_:ARG,3,55,ARG,E,1,32.68,-97.104,140.802,196.343,-16.082,-0.278,0.423,uL30
7370,3:56:_:VAL,3,56,VAL,E,1,53.08,-100.068,143.120,196.835,-168.072,0.394,0.339,uL30
7371,3:57:_:GLU,3,57,GLU,E,2,61.18,-102.952,143.319,199.234,-67.546,-0.069,0.408,uL30
7372,3:59:_:VAL,3,59,VAL,E,3,47.99,-105.613,147.953,202.595,-0.117,0.493,0.155,uL30


In [3]:
parser = PDBParser(QUIET=True)
ThTh_PDB = parser.get_structure('ThTh_PDB', '../TT_Phases_PDB/1VY4_LSU_rRNA+rProtein.pdb')

In [4]:
# rewrite to clean up the look
def make_PDB_df(parsed_structure):
    """
    Create BioPython dataframe of atom properties 
    """
    structure_list = []
    for atom in Selection.unfold_entities(parsed_structure, 'A'):
        atom_name = str(atom.id)
        if atom_name == 'CA':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]])
        if  atom_name == "C5'" or atom_name == 'K':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname[2:]
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]])
        if atom_name == 'MG' or atom_name == 'ZN':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname[1:]
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]]) 
    structure_df = pd.DataFrame(structure_list, columns=['NodeId', 'x_new', 'y_new', 'z_new'])
    return(structure_df)

In [5]:
ThTh_df = make_PDB_df(ThTh_PDB)

In [6]:
ThTh_df

Unnamed: 0,NodeId,x_new,y_new,z_new
0,B:1:_:U,-140.216003,170.479996,158.654007
1,B:2:_:C,-143.912003,165.106995,157.557999
2,B:3:_:C,-144.324005,161.835999,152.091003
3,B:4:_:C,-143.281998,161.699997,145.934006
4,B:5:_:C,-139.960999,163.332001,140.940002
...,...,...,...,...
7531,3:56:_:VAL,-100.068001,143.119995,196.835007
7532,3:57:_:GLU,-102.952003,143.319000,199.233994
7533,3:58:_:VAL,-105.706001,145.916000,199.384003
7534,3:59:_:VAL,-105.612999,147.953003,202.595001


In [7]:
ThTh_nodes = pd.merge(temp_ThTh_nodes, ThTh_df, on='NodeId', how='inner')
ThTh_nodes = ThTh_nodes.drop(columns = ['x', 'y', 'z'])
ThTh_nodes = ThTh_nodes.rename(columns={'x_new':'x', 'y_new':'y', 'z_new':'z'})
ThTh_nodes

Unnamed: 0,NodeId,Chain,Position,Residue,Dssp,Degree,Bfactor_CA,Rapdf,Tap,Accessibility,rProtein,x,y,z
0,B:1:_:U,B,1,U,,2,-999.90,-999.900,-999.900,-999.900,5S,-140.216003,170.479996,158.654007
1,B:2:_:C,B,2,C,,3,-999.90,-999.900,-999.900,-999.900,5S,-143.912003,165.106995,157.557999
2,B:3:_:C,B,3,C,,6,-999.90,-999.900,-999.900,-999.900,5S,-144.324005,161.835999,152.091003
3,B:4:_:C,B,4,C,,11,-999.90,-999.900,-999.900,-999.900,5S,-143.281998,161.699997,145.934006
4,B:5:_:C,B,5,C,,16,-999.90,-999.900,-999.900,-999.900,5S,-139.960999,163.332001,140.940002
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7369,3:55:_:ARG,3,55,ARG,E,1,32.68,-16.082,-0.278,0.423,uL30,-97.103996,140.802002,196.343002
7370,3:56:_:VAL,3,56,VAL,E,1,53.08,-168.072,0.394,0.339,uL30,-100.068001,143.119995,196.835007
7371,3:57:_:GLU,3,57,GLU,E,2,61.18,-67.546,-0.069,0.408,uL30,-102.952003,143.319000,199.233994
7372,3:59:_:VAL,3,59,VAL,E,3,47.99,-0.117,0.493,0.155,uL30,-105.612999,147.953003,202.595001


In [8]:
ThTh_nodes.to_csv('../Ring_output/1VY4/1VY4_LSU_rRNA+rProtein_nodes_xyz_modified.txt')