In [1]:
import pandas as pd
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 100)

from Bio.PDB import *

In [2]:
temp_EsCo_nodes = pd.read_csv('../../Ring_Analysis/4v9d_phase_as_chains/4v9d_phase_as_chains_nodes.txt', sep='\t').drop(['pdbFileName'], axis=1)
temp_EsCo_nodes

Unnamed: 0,NodeId,Chain,Position,Residue,Dssp,Degree,Bfactor_CA,x,y,z,Rapdf,Tap,Accessibility
0,z:1402:Z:C,z,1402,C,,16,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
1,z:1403:Z:C,z,1403,C,,12,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
2,z:1404:Z:C,z,1404,C,,10,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
3,z:1405:Z:G,z,1405,G,,12,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
4,z:1406:Z:U,z,1406,U,,13,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10556,4:35:_:GLN,4,35,GLN,E,7,0.01,-108.462,-66.490,-43.130,37.257,-0.009,0.013
10557,4:36:_:ARG,4,36,ARG,E,16,0.01,-106.526,-68.943,-40.921,44.565,-1.319,0.387
10558,4:37:_:GLN,4,37,GLN,,13,12.82,-106.081,-70.386,-37.396,42.725,0.770,0.458
10559,4:38:_:GLY,4,38,GLY,,7,0.01,-109.783,-70.748,-36.788,10.721,0.000,0.596


In [3]:
parser = PDBParser(QUIET=True)
EsCo_PDB = parser.get_structure('EsCo_PDB', '../../Ring_Analysis/4v9d_phase_as_chains/4v9d_phase_as_chains.pdb')

In [4]:
# rewrite to clean up the look
def make_PDB_df(parsed_structure):
    """
    Create BioPython dataframe of atom properties 
    """
    structure_list = []
    for atom in Selection.unfold_entities(parsed_structure, 'A'):
        atom_name = str(atom.id)
        if atom_name == 'CA':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]])
        if  atom_name == "C5'" or atom_name == 'K':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname[2:]
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]])
        if atom_name == 'MG' or atom_name == 'ZN':
            resnum = str(atom.get_parent().id[1])
            resname = str(atom.get_parent().resname)
            if atom.get_parent().id[2] == ' ':
                insertion_code = '_'
            else:
                insertion_code = atom.get_parent().id[2]
            chain = str(atom.get_parent().get_parent().id)
            nodeid = chain+':'+resnum+':'+insertion_code+':'+resname[1:]
            structure_list.append([nodeid, atom.coord[0], atom.coord[1], atom.coord[2]]) 
    structure_df = pd.DataFrame(structure_list, columns=['NodeId', 'x_new', 'y_new', 'z_new'])
    return(structure_df)

In [5]:
EsCo_df = make_PDB_df(EsCo_PDB)

In [6]:
EsCo_df

Unnamed: 0,NodeId,x_new,y_new,z_new
0,z:1402:Z:C,-98.388000,25.709000,-15.153000
1,z:1403:Z:C,-92.976997,22.837000,-14.876000
2,z:1404:Z:C,-88.810997,18.989000,-12.866000
3,z:1405:Z:G,-86.110001,13.792000,-11.342000
4,z:1406:Z:U,-85.891998,7.446000,-10.625000
...,...,...,...,...
10813,4:34:_:LYS,-108.865997,-65.047997,-46.623001
10814,4:35:_:GLN,-108.461998,-66.489998,-43.130001
10815,4:36:_:ARG,-106.526001,-68.943001,-40.921001
10816,4:37:_:GLN,-106.081001,-70.386002,-37.396000


In [12]:
EsCo_nodes = pd.merge(temp_EsCo_nodes, EsCo_df, on='NodeId', how='inner')
EsCo_nodes = EsCo_nodes.drop(['x', 'y', 'z'], axis=1)
EsCo_nodes = EsCo_nodes.rename(columns={'x_new':'x', 'y_new':'y', 'z_new':'z'})
EsCo_nodes

Unnamed: 0,NodeId,Chain,Position,Residue,Dssp,Degree,Bfactor_CA,Rapdf,Tap,Accessibility,x,y,z
0,z:1402:Z:C,z,1402,C,,16,-999.90,-999.900,-999.900,-999.900,-98.388000,25.709000,-15.153000
1,z:1403:Z:C,z,1403,C,,12,-999.90,-999.900,-999.900,-999.900,-92.976997,22.837000,-14.876000
2,z:1404:Z:C,z,1404,C,,10,-999.90,-999.900,-999.900,-999.900,-88.810997,18.989000,-12.866000
3,z:1405:Z:G,z,1405,G,,12,-999.90,-999.900,-999.900,-999.900,-86.110001,13.792000,-11.342000
4,z:1406:Z:U,z,1406,U,,13,-999.90,-999.900,-999.900,-999.900,-85.891998,7.446000,-10.625000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10556,4:35:_:GLN,4,35,GLN,E,7,0.01,37.257,-0.009,0.013,-108.461998,-66.489998,-43.130001
10557,4:36:_:ARG,4,36,ARG,E,16,0.01,44.565,-1.319,0.387,-106.526001,-68.943001,-40.921001
10558,4:37:_:GLN,4,37,GLN,,13,12.82,42.725,0.770,0.458,-106.081001,-70.386002,-37.396000
10559,4:38:_:GLY,4,38,GLY,,7,0.01,10.721,0.000,0.596,-109.782997,-70.748001,-36.787998


In [13]:
EsCo_nodes.to_csv('../../Ring_Analysis/4v9d_phase_as_chains/4v9d_phase_as_chains_nodes_xyz.txt', index=False)