In [31]:
import pandas as pd
import os
import pickle
import plotly
#import matplotlib.pyplot as plt
import plotly.io as pio
import plotly.graph_objects as go

In [25]:
def load_graph_positions_and_clusters(file_path="community_graph.pickle"):
    with open(file_path, 'rb') as f:
        G, pos, partition = pickle.load(f)
    return G, pos, partition

## Figure A

In [36]:
G, pos, partition = load_graph_positions_and_clusters()
print(G, pos, partition)
# prepare data for edges
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None]) 
    edge_y.extend([y0, y1, None])

# create a trace for edges
edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')

# prepare data for nodes
node_x = []
node_y = []
node_colors = []  # to store colors of nodes
for node in G.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)
    if G.degree[node] > 0:  # if the node has at least one connection
        node_colors.append(partition[node])
    else:
        node_colors.append('black')  # set color to black for unconnected nodes

# create a trace for nodes
hover_texts = []
for node in G.nodes():
    community_id = partition[node]
    neighbors = list(G.neighbors(node))
    sorted_neighbors = sorted(neighbors)[:10]
    neighbors_str = ", ".join(sorted_neighbors)
    hover_text = f"Gene: {node}<br>Cluster ID: {community_id}<br>Top Neighbors: {neighbors_str}"
    hover_texts.append(hover_text)

node_trace = go.Scatter(x=node_x, y=node_y, 
                        mode='markers', 
                        hoverinfo='text',
                        marker=dict(showscale=False, 
                                    size=10,
                                    color=node_colors,  # use the modified colors list
                                    line_width=2),
                        text=hover_texts)

# create figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=0),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

# display figure
plotly.offline.plot(fig, image_filename='r100_clustering', image='svg') # saves in ~/Downloads and opens in browser.
fig.show()


Graph with 100 nodes and 1285 edges {'C55C3.3': array([-0.12004694, -0.45325076]), 'timm-17B.2': array([ 0.22155848, -0.21911119]), 'Y20F4.4': array([ 0.04916721, -0.15806419]), 'C08F11.7': array([ 0.12756508, -0.29265371]), 'E01G4.5': array([0.42174216, 0.3394945 ]), 'ZK402.3': array([0.52338842, 0.46030363]), 'C09G5.7': array([-0.09526422, -0.34472424]), 'hrde-1': array([-0.84960964,  0.63685144]), 'C18D4.6': array([ 0.08591515, -0.19727474]), 'R06C1.4': array([ 0.03706994, -0.23058961]), 'C38D9.2': array([ 0.19897691, -0.37495077]), 'F15D4.5': array([ 0.21192079, -0.3516478 ]), 'Y57G11C.51': array([0.09971287, 0.06346082]), 'pan-1': array([ 0.07652053, -0.216027  ]), 'hil-4': array([-0.72557702,  0.53187778]), 'cdk-1': array([-0.71860691,  0.4771043 ]), 'T16G12.4': array([ 0.07652186, -0.26842002]), 'fbxb-97': array([ 0.17948462, -0.25174965]), 'F39F10.4': array([0.38302766, 0.20315218]), 'K09H9.7': array([ 0.14259972, -0.22333229]), 'tbb-2': array([-0.86345074,  0.46276179]), 'saeg

## Figure C

In [37]:
# reading in the table occupancy sheet to choose the top genes
node_names = list(G.nodes())
db_ref = pd.read_excel("simplemine_results_TableOccupancy_full_27Sep2024.xlsx", comment="#")
wormbase = pd.read_csv('Wormbase_Gene_Sanitizer_Database_DownloadedOn_10-30-2023.txt', comment="#", sep="\t")

coldict = dict()
colref = dict()

for gene in node_names:
    wbgene = wormbase.loc[wormbase["Input"] == gene, "Suggested Match"].iloc[0]
    geneindex = db_ref[db_ref["Your Input"] == wbgene].index[0]
    line = db_ref.at[geneindex, "Reference"]
    coldict[gene] = str(line) 

for gene in coldict:
    element = coldict[gene]
    num_ref = element.split(',')
    if num_ref != ['N.A.']:
        colref[gene] = len(num_ref)
    else:
        colref[gene] = 0

# cap values at 100
capped_colref = {k: min(v, 100) for k, v in colref.items()}

# normalize capped values
max_value = 100
min_value = 0
normalized_colref = {k: v / 100 for k, v in capped_colref.items()}

G, pos, partition = load_graph_positions_and_clusters()

# sort nodes by their normalized value
sorted_nodes = sorted(G.nodes(), key=lambda node: normalized_colref[node])

# prepare data for edges
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None]) 
    edge_y.extend([y0, y1, None])

# create a trace for edges
edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')

# modify the node colors assignment
node_x = [pos[node][0] for node in sorted_nodes]
node_y = [pos[node][1] for node in sorted_nodes]
node_colors = [normalized_colref[node] for node in sorted_nodes]

hover_texts = [f"Gene: {node}<br>References: {colref[node]}" for node in sorted_nodes]

# create a trace for nodes
node_trace = go.Scatter(x=node_x, y=node_y, 
                        mode='markers', 
                        hoverinfo='text',
                        marker=dict(showscale=False, 
                                    size=10,
                                    color=node_colors, 
                                    colorscale='Greys',
                                    reversescale=False,
                                    line_width=2),
                        text=hover_texts)

# create figure
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=0),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

# display figure
plotly.offline.plot(fig, image_filename='r100_references_clustering', image='svg') # saves in ~/Downloads and opens in browser.
fig.show()

## Figure D

In [39]:
# nodes to be highlighted in red
highlight_nodes = ['csr-1', 'hrde-1', 'pgl-3', 'rme-2', 'wago-4', 'wago-1']

G, pos, partition = load_graph_positions_and_clusters()

# sorting nodes to ensure highlighted nodes are processed last and rendered on top
sorted_nodes = sorted(G.nodes(), key=lambda node: node in highlight_nodes)

# generate hover texts and colors for each node
hover_texts = []
node_colors = []
for node in sorted_nodes:
    community_id = partition[node]
    neighbors = list(G.neighbors(node))
    sorted_neighbors = sorted(neighbors)[:10]
    neighbors_str = ", ".join(sorted_neighbors)
    hover_text = f"Gene: {node}<br>Cluster ID: {community_id}<br>Top Neighbors: {neighbors_str}"
    hover_texts.append(hover_text)
    node_colors.append('red' if node in highlight_nodes else 'gray')

# prepare data for edges
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

# create traces for edges and nodes
edge_trace = go.Scatter(x=edge_x, y=edge_y, 
                        line=dict(width=0.5, 
                                  color='#888'), 
                        hoverinfo='none', 
                        mode='lines')
node_trace = go.Scatter(x=[pos[node][0] for node in sorted_nodes], 
                        y=[pos[node][1] for node in sorted_nodes], 
                        mode='markers', hoverinfo='text',
                        marker=dict(showscale=False, size=10, color=node_colors, line_width=2),
                        text=hover_texts)

# create figure
fig = go.Figure(data=[edge_trace, node_trace], 
                layout=go.Layout(
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=0),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

# display figure
plotly.offline.plot(fig, image_filename='r100_known_RNA_regulators_clustering', image='svg') # saves in ~/Downloads and opens in browser.
fig.show()

## Figure E

In [40]:
# nodes to be highlighted in red.
highlight_nodes = ['W04B5.1', 'F39E9.7', 'ZK402.3', 'W04B5.2', 'T02G5.4', 'Y105C5A.14','W05H12.2', 'gpx-8']

G, pos, partition = load_graph_positions_and_clusters()

# sorting nodes to ensure highlighted nodes are processed last and rendered on top
sorted_nodes = sorted(G.nodes(), key=lambda node: node in highlight_nodes)

# generate hover texts and colors for each node
hover_texts = []
node_colors = []
for node in sorted_nodes:
    community_id = partition[node]
    neighbors = list(G.neighbors(node))
    sorted_neighbors = sorted(neighbors)[:10]
    neighbors_str = ", ".join(sorted_neighbors)
    hover_text = f"Gene: {node}<br>Cluster ID: {community_id}<br>Top Neighbors: {neighbors_str}"
    hover_texts.append(hover_text)
    node_colors.append('red' if node in highlight_nodes else 'gray')

# prepare edge trace
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])
edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')

# prepare node trace
node_trace = go.Scatter(
    x=[pos[node][0] for node in sorted_nodes], 
    y=[pos[node][1] for node in sorted_nodes], 
    mode='markers', 
    hoverinfo='text',
    marker=dict(showscale=False, size=10, color=node_colors, line_width=2),
    text=hover_texts)

# create figure
fig = go.Figure(data=[edge_trace, node_trace], 
                layout=go.Layout(
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=0),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

# display figure
plotly.offline.plot(fig, image_filename='r100_pseudogenes_clustering', image='svg') # saves in ~/Downloads and opens in browser.
fig.show()

# pio.write_image(fig, '../figs/fig2/fig2G.svg', format='svg')

## Figure F

In [41]:
# nodes to be highlighted
red_nodes = ['R03D7.2', 'bath-45','bath-13', 'C04G6.6', 'C08F11.7', 'C38D9.2', 'F15D4.5', 'F41G4.7', 'F55B11.6','F58H7.5', 'fbxb-97', 'K09H9.7', 'rnh-1.3', 'scrm-4', 'timm-17B.2', 'Y48G1BM.6','Y53F4B.5']
light_blue_nodes = ['C18D4.6', 'Y20F4.4'] 
orange_nodes = ['W09B7.2', 'W09B7.1']

G, pos, partition = load_graph_positions_and_clusters()

# initialize dictionary to hold node priorities
node_priorities = {}

# assign priorities based on color groups
for node in G.nodes():
    if node in orange_nodes:
        node_priorities[node] = 4
    elif node in light_blue_nodes:
        node_priorities[node] = 3
    elif node in red_nodes:
        node_priorities[node] = 2
    else:  
        node_priorities[node] = 1 
        
# sort nodes by priority
sorted_nodes = sorted(G.nodes(), key=lambda node: node_priorities[node])

# prepare edge trace
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])
edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(width=0.5, color='#888'), hoverinfo='none', mode='lines')

# prepare node trace
hover_texts = []
node_colors = []
for node in sorted_nodes:
    if node in orange_nodes:
        node_colors.append('orange')
    elif node in light_blue_nodes:
        node_colors.append('lightblue')
    elif node in red_nodes:
        node_colors.append('red')
    else:
        node_colors.append('gray')

hover_texts = [f"Gene: {node}<br>Cluster ID: {partition[node]}<br>Top Neighbors: {', '.join(sorted(list(G.neighbors(node)))[:10])}" for node in sorted_nodes]

# append node data
node_trace = go.Scatter(
    x=[pos[node][0] for node in sorted_nodes], 
    y=[pos[node][1] for node in sorted_nodes], 
    mode='markers', 
    hoverinfo='text',
    marker=dict(showscale=False, size=10, color=node_colors, line_width=2),
    text=hover_texts)

# create figure
fig = go.Figure(data=[edge_trace, node_trace], 
                layout=go.Layout(
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=0),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

plotly.offline.plot(fig, image_filename='r100_hrde-1_sid-1_dep_genes_clustering', image='svg') # saves in ~/Downloads and opens in browser.
fig.show()