In [9]:
import networkx as nx
import pandas as pd
import numpy as np
import csv
pd.options.display.max_rows = 20

In [10]:
# Vid's function to import data
def parse_ckn_csv(fname):
   g = nx.DiGraph()
   fields = ['from', 'to', 'type']
   with open(fname, newline='') as csvfile:
       reader = csv.DictReader(csvfile, fieldnames=fields, dialect=csv.excel_tab, restkey='rest', )
       for row in reader:
           g.add_edge(row['from'], row['to'], type=row['type'])
           if row['type'] == 'binding':
               g.add_edge(row['to'], row['from'], type=row['type'])
   return g

In [11]:
G = parse_ckn_csv("/Users/mmarzi/MLprojects/nib_ckn/data/LKN_ath_v3B_2016-08-30.txt")
G

<networkx.classes.digraph.DiGraph at 0x12706b190>

# Basic graph analysis

In [7]:
# Number of nodes
len(G.nodes())

20011

In [8]:
# Number of edges
len(G.edges())

94542

In [21]:
# nodes have no attr
list(G.nodes.data())[:10]

[('6K1', {}),
 ('6K2', {}),
 ('CI', {}),
 ('NIa-Pro', {}),
 ('VPg', {}),
 ('AT1G01010', {}),
 ('AT1G10030', {}),
 ('AT1G12390', {}),
 ('AT1G14360', {}),
 ('AT1G16240', {})]

In [23]:
# edges and attr
list(G.edges.data())[:20]

[('6K1', '6K2', {'type': 'binding'}),
 ('6K1', 'CI', {'type': 'binding'}),
 ('6K2', '6K1', {'type': 'binding'}),
 ('6K2', 'CI', {'type': 'binding'}),
 ('6K2', 'NIa-Pro', {'type': 'binding'}),
 ('6K2', 'VPg', {'type': 'binding'}),
 ('CI', '6K1', {'type': 'binding'}),
 ('CI', '6K2', {'type': 'binding'}),
 ('CI', 'AT1G30380', {'type': 'binding'}),
 ('CI', 'AT5G03160', {'type': 'binding'}),
 ('CI', 'CI', {'type': 'binding'}),
 ('CI', 'CP', {'type': 'binding'}),
 ('CI', 'HC-Pro', {'type': 'binding'}),
 ('CI', 'NIa-Pro', {'type': 'binding'}),
 ('CI', 'NIb', {'type': 'binding'}),
 ('CI', 'P1', {'type': 'binding'}),
 ('CI', 'P3', {'type': 'binding'}),
 ('CI', 'P3N-PIPO', {'type': 'binding'}),
 ('CI', 'VPg', {'type': 'binding'}),
 ('NIa-Pro', '6K2', {'type': 'binding'})]

In [13]:
# Number of weakly connected components, to see how many disconected sub-graphs there are
nx.number_weakly_connected_components(G)

# list component by size and visualise a few

2427

In [27]:
# Test strong connectivity
print(nx.is_strongly_connected(G))

# Number of strongly connected nodes
nx.number_strongly_connected_components(G)

False


13386

In [29]:
https://networkx.org/documentation/latest/auto_examples/drawing/plot_degree.html?highlight=degree%20distribution
# Make histogram for in, out, undirected    
# Degree distribution
list(G.degree())[:20]

[('6K1', 4),
 ('6K2', 8),
 ('CI', 26),
 ('NIa-Pro', 22),
 ('VPg', 36),
 ('AT1G01010', 62),
 ('AT1G10030', 39),
 ('AT1G12390', 18),
 ('AT1G14360', 160),
 ('AT1G16240', 22),
 ('AT1G17080', 19),
 ('AT1G21790', 14),
 ('AT1G62880', 16),
 ('AT1G75630', 26),
 ('AT1G76670', 17),
 ('AT1G77350', 36),
 ('AT2G02990', 18),
 ('AT2G20740', 8),
 ('AT2G31490', 20),
 ('AT2G32380', 40)]

In [31]:
# Outdegree distribution
list(G.out_degree())[:10]

[('6K1', 2),
 ('6K2', 4),
 ('CI', 13),
 ('NIa-Pro', 11),
 ('VPg', 18),
 ('AT1G01010', 31),
 ('AT1G10030', 19),
 ('AT1G12390', 9),
 ('AT1G14360', 79),
 ('AT1G16240', 11)]

In [32]:
# Indegree distribution
list(G.in_degree())[:10]

[('6K1', 2),
 ('6K2', 4),
 ('CI', 13),
 ('NIa-Pro', 11),
 ('VPg', 18),
 ('AT1G01010', 31),
 ('AT1G10030', 20),
 ('AT1G12390', 9),
 ('AT1G14360', 81),
 ('AT1G16240', 11)]