In [1]:
import pandas as pd
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 100)

from Bio.PDB import *

In [2]:
temp_ThTh_nodes = pd.read_csv('../Ring_output/1VY4_phase_as_chains/1VY4_phase_as_chains_nodes.txt', sep='\t').drop(['pdbFileName'], axis=1)
temp_ThTh_nodes

Unnamed: 0,NodeId,Chain,Position,Residue,Dssp,Degree,Bfactor_CA,x,y,z,Rapdf,Tap,Accessibility
0,b:7:_:VAL,b,7,VAL,,2,80.50,72.663,100.995,17.046,47.300,-0.000,0.517
1,b:8:_:LYS,b,8,LYS,,1,83.06,71.062,97.872,15.654,-0.528,-0.905,0.322
2,b:9:_:GLU,b,9,GLU,,1,85.55,67.556,98.230,14.167,-46.135,-0.087,0.283
3,b:10:_:LEU,b,10,LEU,G,3,82.20,65.421,96.824,11.395,3.183,-1.270,0.597
4,b:11:_:LEU,b,11,LEU,H,1,87.16,66.606,97.561,7.816,-17.470,0.850,0.741
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11437,M:3027:_:MG,M,3027,MG,,11,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
11438,M:3755:_:MG,M,3755,MG,,10,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
11439,M:3781:_:MG,M,3781,MG,,4,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
11440,M:3157:_:MG,M,3157,MG,,7,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900


In [3]:
parser = PDBParser(QUIET=True)
ThTh_PDB = parser.get_structure('ThTh_PDB', '../PyMOL_scripts/1VY4_phase_as_chains.pdb')

In [4]:
# rewrite to clean up the look
def make_PDB_df(parsed_structure):
    """
    Create BioPython dataframe of atom properties 
    """
    structure_list = []
    for atom in Selection.unfold_entities(parsed_structure, 'A'):
        atom_name = str(atom.id)
        if atom_name == 'CA':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]])
        if  atom_name == "C5'" or atom_name == 'K':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname[2:]
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]])
        if atom_name == 'MG' or atom_name == 'ZN':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname[1:]
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]]) 
    structure_df = pd.DataFrame(structure_list, columns=['NodeId', 'x_new', 'y_new', 'z_new'])
    return(structure_df)

In [5]:
ThTh_df = make_PDB_df(ThTh_PDB)

In [6]:
ThTh_df

Unnamed: 0,NodeId,x_new,y_new,z_new
0,b:7:_:VAL,72.663002,100.995003,17.046000
1,b:8:_:LYS,71.061996,97.872002,15.654000
2,b:9:_:GLU,67.556000,98.230003,14.167000
3,b:10:_:LEU,65.420998,96.823997,11.395000
4,b:11:_:LEU,66.606003,97.560997,7.816000
...,...,...,...,...
11212,M:3027:_:MG,54.858002,-43.683998,-100.181999
11213,M:3755:_:MG,35.963001,-105.411003,-22.077999
11214,M:3781:_:MG,19.162001,-93.500000,-45.623001
11215,M:3157:_:MG,35.837002,-66.227997,47.847000


In [7]:
ThTh_nodes = pd.merge(temp_ThTh_nodes, ThTh_df, on='NodeId', how='inner')
ThTh_nodes = ThTh_nodes.drop(columns = ['x', 'y', 'z'])
ThTh_nodes = ThTh_nodes.rename(columns={'x_new':'x', 'y_new':'y', 'z_new':'z'})
ThTh_nodes

Unnamed: 0,NodeId,Chain,Position,Residue,Dssp,Degree,Bfactor_CA,Rapdf,Tap,Accessibility,x,y,z
0,b:7:_:VAL,b,7,VAL,,2,80.50,47.300,-0.000,0.517,72.663002,100.995003,17.046000
1,b:8:_:LYS,b,8,LYS,,1,83.06,-0.528,-0.905,0.322,71.061996,97.872002,15.654000
2,b:9:_:GLU,b,9,GLU,,1,85.55,-46.135,-0.087,0.283,67.556000,98.230003,14.167000
3,b:10:_:LEU,b,10,LEU,G,3,82.20,3.183,-1.270,0.597,65.420998,96.823997,11.395000
4,b:11:_:LEU,b,11,LEU,H,1,87.16,-17.470,0.850,0.741,66.606003,97.560997,7.816000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11122,M:3027:_:MG,M,3027,MG,,11,-999.90,-999.900,-999.900,-999.900,54.858002,-43.683998,-100.181999
11123,M:3755:_:MG,M,3755,MG,,10,-999.90,-999.900,-999.900,-999.900,35.963001,-105.411003,-22.077999
11124,M:3781:_:MG,M,3781,MG,,4,-999.90,-999.900,-999.900,-999.900,19.162001,-93.500000,-45.623001
11125,M:3157:_:MG,M,3157,MG,,7,-999.90,-999.900,-999.900,-999.900,35.837002,-66.227997,47.847000


In [8]:
ThTh_nodes.to_csv('../Ring_output/1VY4_rRNA_Phases/1VY4_rRNA_phases_rProtein_nodes_xyz_modified.txt', index=False)