<a href="https://colab.research.google.com/github/LastCodeBender42/LRH1-Drug-Screening-Project/blob/main/04_PSN_with_py3dmol.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install Bio

import pandas as pd
from Bio.PDB import PDBParser

def pdb_to_dataframe(pdb_file):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('structure', pdb_file)

    data = []
    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    data.append([
                        model.id,
                        chain.id,
                        residue.id[1],
                        residue.resname,
                        atom.name,
                        atom.coord[0],
                        atom.coord[1],
                        atom.coord[2],
                        atom.occupancy,
                        atom.bfactor
                    ])

    columns = [
        'Model', 'Chain', 'Residue_Number', 'Residue_Name',
        'Atom_Name', 'X', 'Y', 'Z', 'Occupancy', 'B_Factor'
    ]

    df = pd.DataFrame(data, columns=columns)
    return df

pdb_file = '/content/drive/My Drive/LRH1-Drug-Screening/data/1yok.pdb'
pdb_df = pdb_to_dataframe(pdb_file)
pdb_df

Unnamed: 0,Model,Chain,Residue_Number,Residue_Name,Atom_Name,X,Y,Z,Occupancy,B_Factor
0,0,A,300,SER,N,28.985001,54.266998,34.372002,1.0,88.63
1,0,A,300,SER,CA,27.739000,54.654999,33.648998,1.0,88.62
2,0,A,300,SER,C,27.105000,53.480999,32.896999,1.0,89.53
3,0,A,300,SER,O,27.492001,53.158001,31.774000,1.0,90.12
4,0,A,300,SER,CB,28.028999,55.799000,32.671001,1.0,87.83
...,...,...,...,...,...,...,...,...,...,...
2589,0,N,1,UNK,I,26.409000,23.811001,36.709000,0.0,0.00
2590,0,N,1,UNK,S,30.770000,21.639999,30.334999,0.0,0.00
2591,0,N,1,UNK,H,23.517000,23.597000,28.778000,0.0,0.00
2592,0,N,1,UNK,HN,34.470001,26.077999,34.348999,1.0,0.00


In [None]:
pdb_1yok_df = pdb_df[(pdb_df['Chain'] == 'A') & (pdb_df['Atom_Name'] == 'CA')]
pdb_1yok_df

Unnamed: 0,Model,Chain,Residue_Number,Residue_Name,Atom_Name,X,Y,Z,Occupancy,B_Factor
1,0,A,300,SER,CA,27.739000,54.654999,33.648998,1.0,88.62
11,0,A,301,ILE,CA,25.391001,51.723999,32.971001,1.0,89.13
20,0,A,302,PRO,CA,22.995001,52.351002,30.025000,1.0,89.52
27,0,A,303,HIS,CA,19.211000,52.382999,30.504000,1.0,89.12
40,0,A,304,LEU,CA,18.423000,48.929001,29.101999,1.0,80.76
...,...,...,...,...,...,...,...,...,...,...
2275,0,A,534,GLU,CA,12.484000,16.013000,26.959000,1.0,61.83
2285,0,A,535,MET,CA,14.043000,19.150000,25.462000,1.0,62.54
2294,0,A,536,LEU,CA,16.486000,17.191999,23.264000,1.0,64.67
2303,0,A,537,HIS,CA,13.362000,15.448000,21.931999,1.0,71.70


In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Specify the path to your tab-separated file
tsv_file = '/content/drive/My Drive/LRH1-Drug-Screening/data/1yok.cif_ringEdges'

# Read the tab-separated file into a DataFrame
df = pd.read_csv(tsv_file, sep='\t')
df

Unnamed: 0,NodeId1,Interaction,NodeId2,Distance,Angle,Energy,Atom1,Atom2,Donor,Positive,Cation,Orientation,Model
0,A:300:_:SER,VDW:MC_SC,A:488:_:ASN,3.976,,6.0,C,ND2,,,,,1
1,A:301:_:ILE,VDW:SC_SC,A:305:_:ILE,3.655,,6.0,CG2,CG2,,,,,1
2,A:301:_:ILE,VDW:SC_SC,A:306:_:LEU,3.774,,6.0,CB,CD2,,,,,1
3,A:301:_:ILE,VDW:SC_SC,A:446:_:ARG,3.918,,6.0,CD1,CD,,,,,1
4,A:302:_:PRO,VDW:SC_SC,A:305:_:ILE,4.186,,6.0,CB,CD1,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
425,C:745:_:LEU,VDW:MC_SC,C:748:_:LEU,4.197,,6.0,C,CB,,,,,1
426,C:745:_:LEU,VDW:MC_SC,C:749:_:LEU,3.788,,6.0,C,CD2,,,,,1
427,C:746:_:ARG,HBOND:SC_SC,C:750:_:ASP,3.383,17.538,17.0,NE,OD2,C:746:_:ARG,,,,1
428,C:747:_:TYR,HBOND:SC_SC,C:751:_:LYS,3.541,48.081,17.0,OH,NZ,C:747:_:TYR,,,,1


In [None]:
filtered_df = df[df['NodeId1'].str.contains('^A:') & (df['NodeId2'].str.contains('^A:'))].copy()
filtered_df['source'] = filtered_df['NodeId1'].str.extract(r'(\d+)').astype(int)
filtered_df['target'] = filtered_df['NodeId2'].str.extract(r'(\d+)').astype(int)
ring_1yok_df = filtered_df[['source','target']]
ring_1yok_df

Unnamed: 0,source,target
0,300,488
1,301,305
2,301,306
3,301,446
4,302,305
...,...,...
393,531,534
394,531,535
397,532,535
399,533,536


In [None]:
# Merge df1 and df2 on 'ID' column
merged_df = pd.merge(ring_1yok_df, pdb_1yok_df[['Residue_Number', 'X', 'Y', 'Z']], left_on='source', right_on='Residue_Number', how='left')
merged_df.rename(columns={'X': 'X1','Y': 'Y1','Z': 'Z1'}, inplace=True)

merged_df2 = pd.merge(merged_df, pdb_1yok_df[['Residue_Number', 'X', 'Y', 'Z']], left_on='target', right_on='Residue_Number', how='left')
merged_df2.rename(columns={'X': 'X2','Y': 'Y2','Z': 'Z2'}, inplace=True)

edgelist_1yok_coords = merged_df2[['source','target','X1','Y1','Z1','X2','Y2','Z2']]
edgelist_1yok_coords


Unnamed: 0,source,target,X1,Y1,Z1,X2,Y2,Z2
0,300,488,27.739000,54.654999,33.648998,33.227001,51.904999,30.902000
1,301,305,25.391001,51.723999,32.971001,21.167999,47.055000,30.945000
2,301,306,25.391001,51.723999,32.971001,19.785000,48.264999,34.279999
3,301,446,25.391001,51.723999,32.971001,26.351000,43.930000,32.924000
4,302,305,22.995001,52.351002,30.025000,21.167999,47.055000,30.945000
...,...,...,...,...,...,...,...,...
348,531,534,13.546000,17.743000,31.415001,12.484000,16.013000,26.959000
349,531,535,13.546000,17.743000,31.415001,14.043000,19.150000,25.462000
350,532,535,16.796000,17.937000,29.448999,14.043000,19.150000,25.462000
351,533,536,16.033001,14.732000,27.562000,16.486000,17.191999,23.264000


In [None]:
# import pandas as pd
# from Bio.PDB import PDBParser

def pdb_to_dataframe(pdb_file):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure('structure', pdb_file)

    data = []
    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    data.append([
                        model.id,
                        chain.id,
                        residue.id[1],
                        residue.resname,
                        atom.name,
                        atom.coord[0],
                        atom.coord[1],
                        atom.coord[2],
                        atom.occupancy,
                        atom.bfactor
                    ])

    columns = [
        'Model', 'Chain', 'Residue_Number', 'Residue_Name',
        'Atom_Name', 'X', 'Y', 'Z', 'Occupancy', 'B_Factor'
    ]

    df = pd.DataFrame(data, columns=columns)
    return df

# Example usage:
pdb_file = '/content/drive/My Drive/LRH1-Drug-Screening/data/4pld.pdb'
df = pdb_to_dataframe(pdb_file)
df

Unnamed: 0,Model,Chain,Residue_Number,Residue_Name,Atom_Name,X,Y,Z,Occupancy,B_Factor
0,0,A,300,SER,N,-33.840000,-10.761,-9.074,1.0,43.64
1,0,A,300,SER,CA,-32.577999,-10.816,-9.810,1.0,41.55
2,0,A,300,SER,C,-31.959000,-9.416,-9.965,1.0,40.15
3,0,A,300,SER,O,-31.073000,-9.017,-9.184,1.0,32.47
4,0,A,300,SER,CB,-32.813000,-11.463,-11.182,1.0,44.67
...,...,...,...,...,...,...,...,...,...,...
2360,0,N,1,UNK,I,3.866000,3.423,-24.850,0.0,0.00
2361,0,N,1,UNK,S,-5.031000,8.821,-14.983,0.0,0.00
2362,0,N,1,UNK,H,-5.673000,15.211,-12.908,0.0,0.00
2363,0,N,1,UNK,HN,-5.527000,10.102,-13.140,1.0,0.00


In [None]:
pdb_4pld_df = df[(df['Chain'] == 'A') & (df['Atom_Name'] == 'CA')]
pdb_4pld_df

Unnamed: 0,Model,Chain,Residue_Number,Residue_Name,Atom_Name,X,Y,Z,Occupancy,B_Factor
1,0,A,300,SER,CA,-32.577999,-10.816000,-9.810,1.0,41.55
8,0,A,301,ILE,CA,-31.952999,-7.309000,-11.185,1.0,24.73
17,0,A,302,PRO,CA,-34.129002,-4.229000,-10.633,1.0,19.68
24,0,A,303,HIS,CA,-35.530998,-2.759000,-13.824,1.0,19.13
37,0,A,304,LEU,CA,-34.115002,0.612000,-12.842,1.0,16.01
...,...,...,...,...,...,...,...,...,...,...
2306,0,A,534,GLU,CA,-12.510000,25.766001,-15.862,1.0,18.82
2316,0,A,535,MET,CA,-14.620000,23.462999,-13.653,1.0,17.52
2325,0,A,536,LEU,CA,-12.525000,24.528000,-10.673,1.0,21.81
2334,0,A,537,HIS,CA,-12.824000,28.228001,-11.535,1.0,26.22


In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Specify the path to your tab-separated file
tsv_file = '/content/drive/My Drive/LRH1-Drug-Screening/data/4pld.cif_ringEdges'

# Read the tab-separated file into a DataFrame
df = pd.read_csv(tsv_file, sep='\t')
df

Unnamed: 0,NodeId1,Interaction,NodeId2,Distance,Angle,Energy,Atom1,Atom2,Donor,Positive,Cation,Orientation,Model
0,A:300:_:SER,HBOND:MC_SC,A:487:_:CYS,4.110,43.468,17.0,O,SG,A:487:_:CYS,,,,1
1,A:301:_:ILE,VDW:SC_SC,A:306:_:LEU,4.062,,6.0,CD1,CD1,,,,,1
2,A:301:_:ILE,VDW:SC_SC,A:309:_:LEU,3.965,,6.0,CD1,CD1,,,,,1
3,A:301:_:ILE,VDW:SC_SC,A:446:_:ARG,3.778,,6.0,CD1,CG,,,,,1
4,A:302:_:PRO,HBOND:MC_MC,A:305:_:ILE,2.961,39.047,17.0,O,N,A:305:_:ILE,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
737,A:533:_:ILE,HBOND:MC_MC,A:537:_:HIS,2.738,13.786,17.0,O,N,A:537:_:HIS,,,,1
738,A:533:_:ILE,HBOND:MC_MC,A:538:_:ALA,4.812,41.233,17.0,O,N,A:538:_:ALA,,,,1
739,A:534:_:GLU,HBOND:MC_MC,A:537:_:HIS,3.358,53.897,17.0,O,N,A:537:_:HIS,,,,1
740,A:534:_:GLU,HBOND:MC_MC,A:538:_:ALA,3.103,18.503,17.0,O,N,A:538:_:ALA,,,,1


In [None]:
filtered_df = df[df['NodeId1'].str.contains('^A:') & (df['NodeId2'].str.contains('^A:'))].copy()
filtered_df['source'] = filtered_df['NodeId1'].str.extract(r'(\d+)').astype(int)
filtered_df['target'] = filtered_df['NodeId2'].str.extract(r'(\d+)').astype(int)
ring_4pld_df = filtered_df[['source','target']]
ring_4pld_df

Unnamed: 0,source,target
0,300,487
1,301,306
2,301,309
3,301,446
4,302,305
...,...,...
737,533,537
738,533,538
739,534,537
740,534,538


In [None]:
# Merge df1 and df2 on 'ID' column
merged_df = pd.merge(ring_4pld_df, pdb_4pld_df[['Residue_Number', 'X', 'Y', 'Z']], left_on='source', right_on='Residue_Number', how='left')
merged_df.rename(columns={'X': 'X1','Y': 'Y1','Z': 'Z1'}, inplace=True)

merged_df2 = pd.merge(merged_df, pdb_4pld_df[['Residue_Number', 'X', 'Y', 'Z']], left_on='target', right_on='Residue_Number', how='left')
merged_df2.rename(columns={'X': 'X2','Y': 'Y2','Z': 'Z2'}, inplace=True)

edgelist_4pld_coords = merged_df2[['source','target','X1','Y1','Z1','X2','Y2','Z2']]
edgelist_4pld_coords

Unnamed: 0,source,target,X1,Y1,Z1,X2,Y2,Z2
0,300,487,-32.577999,-10.816000,-9.810,-30.712999,-10.222000,-3.394000
1,301,306,-31.952999,-7.309000,-11.185,-30.864000,-2.398000,-15.988000
2,301,309,-31.952999,-7.309000,-11.185,-26.664000,0.070000,-17.334999
3,301,446,-31.952999,-7.309000,-11.185,-24.549000,-2.778000,-11.103000
4,302,305,-34.129002,-4.229000,-10.633,-30.547001,-0.675000,-12.625000
...,...,...,...,...,...,...,...,...
718,533,537,-9.398000,24.170000,-14.303,-12.824000,28.228001,-11.535000
719,533,538,-9.398000,24.170000,-14.303,-16.462999,27.223000,-11.166000
720,534,537,-12.510000,25.766001,-15.862,-12.824000,28.228001,-11.535000
721,534,538,-12.510000,25.766001,-15.862,-16.462999,27.223000,-11.166000


In [None]:
!pip install py3dmol

import pandas as pd
import py3Dmol
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)

df = edgelist_1yok_coords

view = py3Dmol.view(query='pdb:1YOK', width=1200, height=800)
chA = {'chain':'A'}
chB = {'chain':'B'}
view.addSurface(py3Dmol.VDW,{'opacity':0.6,'color':'white'}, chA)
view.setStyle(chA,{'cartoon': {'color':'white'}})
view.setStyle(chB,{'': {'color':None}})
view.setHoverable({},True,'''function(atom,viewer,event,container) {
                   if(!atom.label) {
                    atom.label = viewer.addLabel(atom.resn+":"+atom.atom,{position: atom, backgroundColor: 'mintcream', fontColor:'black'});
                   }}''',
               '''function(atom,viewer) {
                   if(atom.label) {
                    viewer.removeLabel(atom.label);
                    delete atom.label;
                   }
                }''')



resset = [300,400,401,402,403,404]
for i in resset:
    view.addStyle({'chain':'A','resi': i},{'cartoon':{'color':'red'}})

for i in range(len(df)):

    view.addCylinder(
        {'start':dict(x=df.iloc[i][2],y=df.iloc[i][3],z=df.iloc[i][4]),
        'end':dict(x=df.iloc[i][5],y=df.iloc[i][6],z=df.iloc[i][7]),
        'radius':0.12,
        'fromCap':1,
        'toCap':1,
        'color':'red',
        'dashes':False
        }
    )

view.render()

html = view._make_html()
with open('1yok_view.html', 'w') as f:
    f.write(html)

In [None]:
!pip install py3dmol

import pandas as pd
import py3Dmol

df = edgelist_1yok_coords

view = py3Dmol.view(query='pdb:1YOK', width=2000, height=1000)
chA = {'chain':'A'}
chB = {'chain':'B'}
view.addSurface(py3Dmol.VDW,{'opacity':0.6,'color':'white'}, chA)
view.setStyle(chA,{'cartoon': {'color':'white'}})
view.setStyle(chB,{'': {'color':None}})
view.setHoverable({},True,'''function(atom,viewer,event,container) {
                   if(!atom.label) {
                    atom.label = viewer.addLabel(atom.resn+":"+atom.atom,{position: atom, backgroundColor: 'mintcream', fontColor:'black'});
                   }}''',
               '''function(atom,viewer) {
                   if(atom.label) {
                    viewer.removeLabel(atom.label);
                    delete atom.label;
                   }
                }''')



resset = [300,400,401,402,403,404]
for i in resset:
    view.addStyle({'chain':'A','resi': i},{'cartoon':{'color':'red'}})

for i in range(len(df)):

    view.addCylinder(
        {'start':dict(x=df.iloc[i][2],y=df.iloc[i][3],z=df.iloc[i][4]),
        'end':dict(x=df.iloc[i][5],y=df.iloc[i][6],z=df.iloc[i][7]),
        'radius':0.12,
        'fromCap':1,
        'toCap':1,
        'color':'red',
        'dashes':False
        }
    )

view.render()




<py3Dmol.view at 0x7fb534100d60>

In [None]:
df = edgelist_4pld_coords

view = py3Dmol.view(query='pdb:4PLD', width=2000, height=1000)
chA = {'chain':'A'}
chB = {'chain':'B'}
view.addSurface(py3Dmol.VDW,{'opacity':0.6,'color':'white'}, chA)
view.setStyle(chA,{'cartoon': {'color':'white'}})
view.setStyle(chB,{'': {'color':None}})
view.setHoverable({},True,'''function(atom,viewer,event,container) {
                   if(!atom.label) {
                    atom.label = viewer.addLabel(atom.resn+":"+atom.atom,{position: atom, backgroundColor: 'mintcream', fontColor:'black'});
                   }}''',
               '''function(atom,viewer) {
                   if(atom.label) {
                    viewer.removeLabel(atom.label);
                    delete atom.label;
                   }
                }''')



resset = [300,400,401,402,403,404]
for i in resset:
    view.addStyle({'chain':'A','resi': i},{'cartoon':{'color':'green'}})

for i in range(len(df)):

    view.addCylinder(
        {'start':dict(x=df.iloc[i][2],y=df.iloc[i][3],z=df.iloc[i][4]),
        'end':dict(x=df.iloc[i][5],y=df.iloc[i][6],z=df.iloc[i][7]),
        'radius':0.12,
        'fromCap':1,
        'toCap':1,
        'color':'green',
        'dashes':False
        }
    )

view.render()

<py3Dmol.view at 0x7fb4f37875b0>

In [None]:
import pandas as pd
import py3Dmol
from IPython.display import display, HTML

df_1yok = edgelist_1yok_coords
df_4pld = edgelist_4pld_coords

# Function to generate the HTML for embedding the viewer
def generate_viewer_html(pdb_id, df, resset, color, chain_color, width=900, height=900):
    view = py3Dmol.view(query=f'pdb:{pdb_id}', width=width, height=height)
    chA = {'chain': 'A'}
    chB = {'chain': 'B'}
    view.addSurface(py3Dmol.VDW, {'opacity': 0.6, 'color': 'white'}, chA)
    view.setStyle(chA, {'cartoon': {'color': chain_color}})
    view.setStyle(chB, {'cartoon': {'color': chain_color}})
    view.setHoverable({}, True, '''function(atom,viewer,event,container) {
                       if(!atom.label) {
                        atom.label = viewer.addLabel(atom.resn+":"+atom.atom,{position: atom, backgroundColor: 'mintcream', fontColor:'black'});
                       }}''',
                   '''function(atom,viewer) {
                       if(atom.label) {
                        viewer.removeLabel(atom.label);
                        delete atom.label;
                       }
                    }''')

    for i in resset:
        view.addStyle({'chain': 'A', 'resi': i}, {'cartoon': {'color': color}})

    for i in range(len(df)):
        view.addCylinder(
            {'start': dict(x=df.iloc[i][2], y=df.iloc[i][3], z=df.iloc[i][4]),
             'end': dict(x=df.iloc[i][5], y=df.iloc[i][6], z=df.iloc[i][7]),
             'radius': 0.12,
             'fromCap': 1,
             'toCap': 1,
             'color': color,
             'dashes': False
             }
        )

    return view

# Create the HTML for both viewers
viewer1_html = generate_viewer_html('1YOK', df_1yok, [300, 400, 401, 402, 403, 404], 'red', 'white')._make_html()
viewer2_html = generate_viewer_html('4PLD', df_4pld, [500, 600, 601, 602, 603, 604], 'green', 'white')._make_html()

# Display both viewers side by side
html = f"""
<div style="display: flex; justify-content: space-around;">
    <div>{viewer1_html}</div>
    <div>{viewer2_html}</div>
</div>
"""

display(HTML(html))