In [None]:
import pandas as pd 
import numpy as np 
import sys 
import os 
import copy
import plotly.graph_objects as go
import importlib
import rmsd_HL as rmsd 
importlib.reload(rmsd)


In [None]:
# This notebook will hope to identiy the effective resid in OR that are involved in the binding domain

In [43]:
backbone_df = pd.read_csv('./backbone_df.csv')
backbone_df = backbone_df.iloc[:,1:]
data_df = pd.read_csv('./data_df.csv')
data_df = data_df.iloc[:,1:]

In [865]:
#  change 'pontcy' to 'homology' model for data_df
data_df.models = data_df.models.str.replace('pontcy', 'homology')

In [None]:
# ploting 3d of data overlaying all 3 models 
fig = go.Figure()
for models in np.unique(data_df.models):
    fig.add_trace(go.Scatter3d(x=data_df.x.iloc[np.where(data_df.models == models)], 
                               y=data_df.y.iloc[np.where(data_df.models == models)], 
                               z=data_df.z.iloc[np.where(data_df.models == models)],
                               mode="markers",
                               name=models))
fig.update_traces(marker_size=3, opacity = 0.6)
fig.show()


In [798]:
# ploting 3d of backbone overlaying all 3 models 
fig = go.Figure()
for models in np.unique(backbone_df.models):
    fig.add_trace(go.Scatter3d(x=backbone_df.x.iloc[np.where(backbone_df.models == models)], 
                               y=backbone_df.y.iloc[np.where(backbone_df.models == models)], 
                               z=backbone_df.z.iloc[np.where(backbone_df.models == models)],
                               mode="markers",
                               name=models))
fig.update_traces(marker_size=3, opacity = 0.6)
fig.show()

In [None]:
# creating an dataframe of domain labels matched with ID. Transfer onto data by resid to plot
domain = pd.DataFrame(np.zeros(314))
domain[1:24] = "N-terminus"
domain[24:38] = "TM Upper" # TM1
domain[38:52] = "TM Lower"
domain[52:58] = "IC"
domain[58:73] = "TM Lower" # TM2
domain[73:87] = "TM Upper"
domain[87:93] = "EC"
domain[93:110] = "TM Upper" # TM3
domain[110:127] = "TM Lower"
domain[127:137] = "IC"
domain[137:148] = "TM Lower" # TM4
domain[148:159] = "TM Upper"
domain[159:195] = "EC"
domain[195:210] = "TM Upper" # TM5
domain[210:225] = "TM Lower"
domain[225:232] = "IC"
domain[232:249] = "TM Lower" # TM6
domain[249:265] = "TM Upper"
domain[265:276] = "EC"
domain[276:286] = "TM Upper" # TM7
domain[286:295] = "TM Lower"
domain[295:314] = "C-terminus"
domain = domain.iloc[1:,:]
domain['resid'] = list(range(1,314))
domain = domain.rename(columns={0: 'domain'})
# domain 

In [807]:
# creating separate cryoEM_df for plotting separately, adds domain column 
cryoEM_df = data_df[data_df.models == 'cryoEM']
# cryoEM_df = backbone_df[backbone_df.models == 'cryoEM']
cryoEM_df = pd.merge(cryoEM_df, domain, on='resid')

# setting data points of centroid, start and end for a model for visualization
# x, y, z = rmsd.centroid(cryoEM_df.iloc[:,0:3])
# cryoEM_df.loc[len(cryoEM_df)] = [x, y, z,'cryoEM', None, None, 'Centroid' ]
# cryoEcM_df

In [804]:
# using backbone here to plot cryoEM as backbone is easier to strategies and visualize 
fig = px.scatter_3d(cryoEM_df, x='x', y='y', z='z',
              color='domain')
fig.update_traces(marker_size=4, opacity = 0.6)
fig.update_layout(
    legend=dict(
        font=dict(
            size=15,
            color="black"
        )
    )
)
fig.show()

In [None]:
# The goal here is to identify a method to create a virtual space of the binding cavity
# consequently to identify aminoacids that are interacting with the binding cavity 
# 1st idea is to create a virtual space of cavity by shrinking the coordinates of TM3,5,6,7 

In [None]:
TM_df = pd.DataFrame()
for resid in np.unique(cryoEM_df.resid):
    if(resid in list(range(93,110))):
        temp = cryoEM_df.iloc[np.where(cryoEM_df.resid == resid)]
        temp['domain'] = 'TM3'
        TM_df = pd.concat([TM_df,temp])
    if(resid in list(range(195,210))):
        temp = cryoEM_df.iloc[np.where(cryoEM_df.resid == resid)]
        temp['domain'] = 'TM5'
        TM_df = pd.concat([TM_df,temp])
    if(resid in list(range(249,265))):
        temp = cryoEM_df.iloc[np.where(cryoEM_df.resid == resid)]
        temp['domain'] = 'TM6'
        TM_df = pd.concat([TM_df,temp])
    if(resid in list(range(276,286))):
        temp = cryoEM_df.iloc[np.where(cryoEM_df.resid == resid)]
        temp['domain'] = 'TM7'
        TM_df = pd.concat([TM_df,temp])


In [820]:
# New approach as volume isn't really feasble and accurate 
# calculate the distance of coordinates CA - CB. As CA is the mid point of backbone and CB is the first C of the branch 
# 1. isolate TMs, align top CA and bottom CA of TMs to a the parrallel line of centroid with TM 
# 2. Flatten the image to a 2D space. 
TM_df

Unnamed: 0,x,y,z,models,resid,atom,domain
636,-9.454367,7.076098,-21.428846,cryoEM,93,N,TM3
637,-9.318367,5.707098,-20.947846,cryoEM,93,CA,TM3
638,-10.639367,4.982098,-21.160846,cryoEM,93,C,TM3
639,-11.714367,5.553098,-20.946846,cryoEM,93,O,TM3
640,-8.907367,5.667098,-19.460846,cryoEM,93,CB,TM3
...,...,...,...,...,...,...,...
2128,6.948633,2.748098,-1.592846,cryoEM,285,C,TM7
2129,7.899633,2.124098,-1.098846,cryoEM,285,O,TM7
2130,4.822633,3.226098,-0.314846,cryoEM,285,CB,TM7
2131,3.929633,4.406098,-0.346846,cryoEM,285,CG,TM7


In [824]:
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=TM_df.iloc[np.where(TM_df.resid == 99)].x, 
                           y=TM_df.iloc[np.where(TM_df.resid == 99)].y, 
                           z=TM_df.iloc[np.where(TM_df.resid == 99)].z,
                           mode="markers",
                           name=models))
fig.add_trace(go.Scatter3d(x=cavity_df.x, 
                           y=cavity_df.y, 
                           z=cavity_df.z, 
                          mode="markers+lines")) 
fig.update_traces(marker_size=4, opacity = 0.6)
fig.show()

In [815]:
TM_df.iloc[np.where(TM_df.resid == 99)]

Unnamed: 0,x,y,z,models,resid,atom,domain
680,-11.801367,0.279098,-14.553846,cryoEM,99,N,TM3
681,-12.148367,-0.576902,-13.428846,cryoEM,99,CA,TM3
682,-11.064367,-1.607902,-13.143846,cryoEM,99,C,TM3
683,-10.819367,-1.932902,-11.976846,cryoEM,99,O,TM3
684,-13.484367,-1.267902,-13.693846,cryoEM,99,CB,TM3
685,-14.593367,-0.319902,-14.050846,cryoEM,99,CG,TM3
686,-14.610367,0.971098,-13.547846,cryoEM,99,CD1,TM3
687,-15.615367,-0.715902,-14.893846,cryoEM,99,CD2,TM3
688,-15.626367,1.844098,-13.875846,cryoEM,99,CE1,TM3
689,-16.633367,0.154098,-15.224846,cryoEM,99,CE2,TM3


In [806]:
cryoEM_df.iloc[np.where(cryoEM_df.domain == 'TM3')]

Unnamed: 0,x,y,z,models,resid,atom,domain


In [421]:
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=cryoEM_df.x, 
                           y=cryoEM_df.y, 
                           z=cryoEM_df.z,
                           mode="markers",
                           name=models))
fig.update_traces(marker_size=4, opacity = 0.6)
fig.show()

In [None]:
# extracts TM3,5,6 and 7s coordiniate as cavity_df to create cavity volume 
cavity_df = pd.DataFrame()
for resid in np.unique(cryoEM_df.resid):
    if(resid in list(range(93,110))):
        temp = cryoEM_df.iloc[np.where(cryoEM_df.resid == resid)]
        temp['domain'] = 'TM3'
        cavity_df = pd.concat([cavity_df,temp])
    if(resid in list(range(195,210))):
        temp = cryoEM_df.iloc[np.where(cryoEM_df.resid == resid)]
        temp['domain'] = 'TM5'
        cavity_df = pd.concat([cavity_df,temp])
    if(resid in list(range(249,265))):
        temp = cryoEM_df.iloc[np.where(cryoEM_df.resid == resid)]
        temp['domain'] = 'TM6'
        cavity_df = pd.concat([cavity_df,temp])
    if(resid in list(range(276,286))):
        temp = cryoEM_df.iloc[np.where(cryoEM_df.resid == resid)]
        temp['domain'] = 'TM7'
        cavity_df = pd.concat([cavity_df,temp])

In [None]:
# Since go.Volume / go.Isosuface does not accept irregular shapes
# Using centroid to find vertices of the binding cavity 

In [794]:
cavity_df

Unnamed: 0,x,y,z,models,resid,atom,domain
336,-10.639367,4.982098,-21.160846,cryoEM,93,C,TM3
337,-9.318367,5.707098,-20.947846,cryoEM,93,CA,TM3
338,-8.907367,5.667098,-19.460846,cryoEM,93,CB,TM3
339,-9.454367,7.076098,-21.428846,cryoEM,93,N,TM3
340,-11.714367,5.553098,-20.946846,cryoEM,93,O,TM3
...,...,...,...,...,...,...,...
1289,6.948633,2.748098,-1.592846,cryoEM,285,C,TM7
1290,6.192633,3.764098,-0.752846,cryoEM,285,CA,TM7
1291,4.822633,3.226098,-0.314846,cryoEM,285,CB,TM7
1292,5.856633,4.980098,-1.515846,cryoEM,285,N,TM7


In [525]:
# turns cavity_df dataframe into a list of coordinates array to find centroid 
cavity_coord = []
for i in cavity_df.iloc:
    cavity_coord.append(list(i[0:3]))
cavity_coord = np.array(cavity_coord)
rmsd.centroid(cavity_coord)

array([-7.21647104,  4.21327848, -1.78759869])

In [547]:
# plot TM3,5,6,7 and the centroid point 
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=cavity_df.x, 
                           y=cavity_df.y, 
                           z=cavity_df.z, 
                          mode="markers+lines")) 
# Plots structure to check where the TM plotted are correct 
# fig.add_trace(go.Scatter3d(x=cryoEM_df.x, 
#                            y=cryoEM_df.y, 
#                            z=cryoEM_df.z,
#                            mode="markers",
#                            name=models))
fig.add_trace(go.Scatter3d(x=[rmsd.centroid(cavity_coord)[0]], 
                           y=[rmsd.centroid(cavity_coord)[1]],
                           z=[rmsd.centroid(cavity_coord)[2]],
                          mode="markers"))
fig.update_traces(marker_size=4, opacity = 0.6)
fig.show()

In [654]:
# cavity_volume takes top and bottom points of 'C' on TM as a proxy to find the volume 
cavity_volume = []
c_df = cavity_df.iloc[np.where(cavity_df.atom == 'C')]
for TM in np.unique(c_df.domain):
    temp = c_df.iloc[np.where(c_df.domain == TM)]
    last_resid = len(temp.iloc[np.where(temp.domain == TM)]) - 1
#     cavity_volume = pd.concat(cavity_volume, temp.iloc[np.where(temp.domain == TM)].iloc[0:1])
#     cavity_volume = pd.concat(cavity_volume, temp.iloc[np.where(temp.domain == TM)].iloc[last_resid:last_resid+1])
    cavity_volume.append(temp.iloc[np.where(temp.domain == TM)].iloc[0])
    cavity_volume.append(temp.iloc[np.where(temp.domain == TM)].iloc[last_resid])
cavity_volume = pd.DataFrame(cavity_volume)

In [None]:
temp = cavity_df.iloc[np.where(cavity_df.atom == 'C')]
temp

In [793]:
fig = go.Figure()
fig.add_trace(go.Scatter3d(x=cavity_volume.x, 
                           y=cavity_volume.y, 
                           z=cavity_volume.z, 
                          mode='markers+lines'))
fig.add_trace(go.Scatter3d(x=temp.x, 
                           y=temp.y, 
                           z=temp.z,
                          mode='markers'))
fig.add_trace(go.Scatter3d(x=[rmsd.centroid(cavity_coord)[0]], 
                           y=[rmsd.centroid(cavity_coord)[1]],
                           z=[rmsd.centroid(cavity_coord)[2]],
                          mode="markers"))
fig.show()