In [None]:
# Import matplotlib before seaborn
import matplotlib as mpl
import matplotlib.pyplot as plt
import itertools  # for color palette cycling
import os
import re
import pandas as pd
import seaborn as sns
import sys
from cycler import cycler
import seaborn as sns
%matplotlib inline

In [None]:
import numpy as np

import networkx as nx

In [None]:
sys.path.append('/work/rnaseq/pcor_new/networkx')
import networkx_helpers as nxh
import networkx_explore as nxe

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
EDGE_DIR = 'cutoff_0.001'

In [None]:
plot_dir = '170415_networkX_explore_' + EDGE_DIR
print(plot_dir)
if not os.path.exists(plot_dir):
    os.mkdir(plot_dir)

In [None]:
! realpath $plot_dir

In [None]:
import glob

search_path = os.path.join(EDGE_DIR, 'results/data/*top_*_edges.tsv')
EDGE_FILE = glob.glob(search_path)[0]
print(EDGE_FILE)

In [None]:
edges = pd.read_csv(EDGE_FILE, sep='\t')
NUM_EDGES = edges.shape[0]
print('number of edges: {:.0e}'.format(NUM_EDGES))
edges.head()

In [None]:
fig, ax = plt.subplots(1,1, figsize = (4, 2.5))
edges['pcor'].plot.hist(ax=ax, bins=100)
ax.set_yscale('log')

In [None]:
# how many edges have 'hypothetical protein' in them? 
frac_hypothetical = \
    edges[edges['product_1'].str.contains('hypothetical protein') | 
          edges['product_2'].str.contains('hypothetical protein')].shape[0]/edges.shape[0]
print('fraction of edges that have "hypothetical protein": {}.  (for {} edges)'.format(
    frac_hypothetical, NUM_EDGES))

In [None]:
def extract_smaller_num(string):
    # 5_151185 --> 151185  (int type)
    m = re.search('[0-9]+_([0-9]+)', string)
    if m:
        found = m.group(1)
        return int(found)
    
extract_smaller_num('5_151185')

In [None]:
! pwd

In [None]:
network = nxh.build_network(EDGE_FILE)

In [None]:
SG = nxe.subgraph_by_cutoff(network, cutoff = 0.085, hypothetical=True)
nxe.draw(SG, layout=nx.spring_layout)

In [None]:
! pwd

In [None]:
# loosen the cutoff, but keep hypotheticals out.
SG = nxe.subgraph_by_cutoff(network, cutoff = 0.05, hypothetical=False)
image = nxe.draw(SG, layout=nx.spring_layout)
f = plt.figure()

In [None]:
type(f)

In [None]:
# SLOW for 1 million edges.
p = 0
nodes = set()
for u, v, d in network.edges_iter(data=True): 
    nodes.update([u])  # put u in a list or it splits up the characters.
    nodes.update([v])
    print(u, v, d)
    p += 1
    if p > 5: break
print(nodes)
        
print('--------')
for n in nodes:
    print(n)
    #print(network.nodes(data=True)[n])
    #print(network[n])
    #print(network[n]['product'])

In [None]:
p = 0
for x in network.nodes_iter(data=True): 
    #print(x)
    print(x[1]['product'])
    p += 1
    if p > 5: break

In [None]:
toy_nodes = [n for (n, d) in nxe.get_nodes_including_string(network, 'ethane')]
toy_graph = nx.Graph(network.subgraph(toy_nodes))
toy_graph_trimmed = nxe.subgraph_by_cutoff(toy_graph, cutoff = 0.01)
print(len(toy_nodes))
print(toy_nodes[0:4])
nxe.draw(toy_graph_trimmed, layout = nx.spring_layout, edge_multiplier=800)