In [1]:
import pandas as pd
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', 100)

# To remove pandas copy warnings:
import warnings
warnings.filterwarnings('ignore')

import time
import community
import networkx as nx
import igraph as ig
from sklearn.metrics.cluster import normalized_mutual_info_score
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
%matplotlib inline
from IPython.display import Image

In [3]:
ThTh_edges = pd.read_csv('../Ring_output/1VY4/1VY4_LSU_rRNA+rProtein_edges.txt', sep='\t')
ThTh_edges

Unnamed: 0,NodeId1,Interaction,NodeId2,Distance,Angle,Energy,Atom1,Atom2,Donor,Positive,Cation,Orientation
0,0:3001:_:MG,IAC:LIG_MC,0:21:_:LEU,5.220,-999.900,0.0,MG,O,,,,
1,0:3001:_:MG,IAC:LIG_SC,0:22:_:GLY,3.071,-999.900,0.0,MG,HA2,,,,
2,0:3001:_:MG,IAC:LIG_SC,0:23:_:VAL,1.977,-999.900,0.0,MG,H,,,,
3,0:3001:_:MG,IAC:LIG_MC,0:24:_:LYS,6.141,-999.900,0.0,MG,N,,,,
4,0:3001:_:MG,IAC:LIG_SC,0:26:_:TYR,4.767,-999.900,0.0,MG,HE1,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
40442,Z:152:_:ALA,VDW:SC_SC,Z:155:_:LEU,3.720,-999.900,6.0,CB,CD2,,,,
40443,Z:153:_:SER,VDW:SC_SC,Z:167:_:PRO,3.510,-999.900,6.0,CB,CB,,,,
40444,Z:157:_:LEU,VDW:MC_SC,Z:161:_:VAL,3.313,-999.900,6.0,C,CG1,,,,
40445,Z:158:_:PRO,VDW:MC_SC,Z:161:_:VAL,3.589,-999.900,6.0,C,CG1,,,,


In [4]:
ThTh_nodes = pd.read_csv('../Ring_output/1VY4/1VY4_LSU_rRNA+rProtein_nodes.txt', sep='\t').drop(['pdbFileName'], axis=1)
ThTh_nodes
ThTh_nodes = ThTh_nodes[(SaCe_rPro_nodes.x > -999) | (SaCe_rPro_nodes.y > -999)  | (SaCe_rPro_nodes.z > -999)]
name_chains = pd.read_csv('./PDB_SaCe_LSU_rPro_chain_names.csv', names=['rProtein', 'Chain'])
SaCe_rPro_nodes = pd.merge(SaCe_rPro_nodes, name_chains, on='Chain')

Unnamed: 0,NodeId,Chain,Position,Residue,Dssp,Degree,Bfactor_CA,x,y,z,Rapdf,Tap,Accessibility
0,B:1:_:U,B,1,U,,2,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
1,B:2:_:C,B,2,C,,3,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
2,B:3:_:C,B,3,C,,6,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
3,B:4:_:C,B,4,C,,11,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
4,B:5:_:C,B,5,C,,16,-999.90,-999.900,-999.900,-999.900,-999.900,-999.900,-999.900
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7369,3:55:_:ARG,3,55,ARG,E,1,32.68,-97.104,140.802,196.343,-16.082,-0.278,0.423
7370,3:56:_:VAL,3,56,VAL,E,1,53.08,-100.068,143.120,196.835,-168.072,0.394,0.339
7371,3:57:_:GLU,3,57,GLU,E,2,61.18,-102.952,143.319,199.234,-67.546,-0.069,0.408
7372,3:59:_:VAL,3,59,VAL,E,3,47.99,-105.613,147.953,202.595,-0.117,0.493,0.155


In [5]:
def plot_nodes(df):    
    
    trace1 = go.Scatter3d(
            x = df['x'],
            y = df['y'],
            z = df['z'],
            text = df['rProtein']+' '+df['Residue']+' '+df['Dssp'],
            mode = 'markers',
            marker=dict(
                color = df['rProtein'].str[2:],
                colorscale='Jet'
                )
            )

    layout = go.Layout(
        title = 'Saccharomyces cerevisae Nodes (Atoms) Colored by rProtein',
        showlegend = False
    )

    data=go.Data([trace1])
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)

In [6]:
plot_nodes(ThTh_nodes)

KeyError: 'rProtein'

In [7]:
G = nx.read_graphml('./SC_LSU_Protein_sup_AES_network.xml')

In [8]:
G.nodes['n0']

{'Accessibility': 0.536,
 'Bfactor_CA': 44.53,
 'Chain': 'A',
 'Degree': 3.0,
 'Dssp': ' ',
 'NodeId': 'A:3:_:ARG',
 'Position': 3.0,
 'Rapdf': -58.079,
 'Residue': 'ARG',
 'Tap': -0.052,
 'name': 'A:3:_:ARG',
 'pdbFileName': 'SC_LSU_Protein_sup_AES.pdb#3.A',
 'x': -23.242,
 'y': 145.781,
 'z': 140.369}

In [9]:
G.edges[('n0', 'n186', 0)]

{'Angle': 28.068,
 'Atom1': 'N',
 'Atom2': 'O',
 'Cation': 'None',
 'Distance': 2.883,
 'Donor': 'A:3:_:ARG',
 'Energy': 17.0,
 'Interaction': 'HBOND:MC_MC',
 'NodeId1': 'A:3:_:ARG',
 'NodeId2': 'A:207:_:VAL',
 'Orientation': 'None',
 'Positive': 'None'}

In [10]:
def plot_nodes_partitions(df):    
    
    trace1 = go.Scatter3d(
            x = df['x'],
            y = df['y'],
            z = df['z'],
            text = df['Residue']+' '+df['Dssp']+' '+df['Chain'],
            mode = 'markers',
            marker=dict(
                color = df[0],
                colorscale='Jet'
                )
    )
        
    layout = go.Layout(
        title = 'Coloring SaCe rProteins by Community',
        showlegend = True
    )

    data=go.Data([trace1])
    fig = go.Figure(data=data, layout=layout)
    iplot(fig)

In [13]:
def plot_louvain(res):
    partition = community.best_partition(G, resolution=res, weight='Energy')
    partition_df = pd.DataFrame.from_dict(partition, orient='index').reset_index()
    SaCe_rPro_partition = SaCe_rPro_nodes.join(partition_df)
    print('Resolution:', res)
    print('Number of partitions:',len(set(partition.values())))
    print('Modularity:', community.modularity(partition, G))
    plot_nodes_partitions(SaCe_rPro_partition)
    return(partition)

In [None]:
louvain10 = plot_louvain(10)

In [None]:
louvain15 = plot_louvain(15)

In [None]:
louvain20 = plot_louvain(20)

In [None]:
louvain25 = plot_louvain(25)

In [None]:
ig_G = ig.Graph.Read_GraphML('./SC_LSU_Protein_sup_AES_network.xml')

In [None]:
def walktrap_output(stps):
    %timeit walktrap = ig.Graph.community_walktrap(ig_G, weights='Energy', steps=stps)
    time.sleep(1)
    print('Steps:', stps)
    print('Optimal count:', walktrap.optimal_count)
    print('Modularity:', ig_G.modularity(membership=walktrap.as_clustering()))
    return([e for l in walktrap.merges for e in l])

In [None]:
walktrap2 = walktrap_output(2)

In [None]:
walktrap4 = walktrap_output(4)

In [None]:
walktrap6 = walktrap_output(6)

In [None]:
walktrap8 = walktrap_output(8)

In [None]:
walktrap10 = walktrap_output(10)

In [None]:
normalized_mutual_info_score(walktrap2, walktrap4)

In [None]:
normalized_mutual_info_score(list(louvain5.values()), list(louvain25.values()))