In [1]:
import networkx as nx
import time
import timeit

from file_utils import read_lines_tsv

In [2]:
# Loading dataset from file
    # source: https://graphchallenge.mit.edu/data-sets
    # file_source: https://graphchallenge.s3.amazonaws.com/snap/roadNet-CA/roadNet-CA_adj.tsv
    # file_format: Adjacency TSV (Tab-Separated Values)
        # <dataset-name>_adj.tsv
        # (Row, Col, Value) tuple describing the adjacency matrix of the graph in tab separated format.
        # Adjacency matrix is of size Num_vertices x Num_vertices
    # file: roadNet-CA_adj.tsv (Road network of California)
    # num of nodes:     1.965.206
    # num of edges:     5.533.214
    # num of triangles:   120.676
    
start = timeit.default_timer()
    
G = nx.read_edgelist('../datasets/roadNet-CA_adj.tsv', data=(('ajd_value',float),))
print(nx.info(G))

end = timeit.default_timer()
print ('elapsed time: %f' % (end - start))

Name: 
Type: Graph
Number of nodes: 1965206
Number of edges: 2766607
Average degree:   2.8156
elapsed time: 86.938774


In [None]:
# Alternative way to build graphs from file
# This will allow to generate random samples of graphs from a list of edges in memory

start = timeit.default_timer()
lines = read_lines_tsv('../datasets/roadNet-CA_adj.tsv')
end = timeit.default_timer()
print ('elapsed time to read file: %f' % (end - start))

start = timeit.default_timer()
G = nx.parse_edgelist(lines, nodetype = int, data=(('ajd_value',float),))
end = timeit.default_timer()
print ('elapsed time to build graph in memory: %f' % (end - start))

start = timeit.default_timer()
print(nx.info(G))
end = timeit.default_timer()
print ('elapsed time to print info: %f' % (end - start))

start = timeit.default_timer()
print(('Number of lines: %s') % len(lines))
end = timeit.default_timer()
print ('elapsed time to calculate length of list in memory: %f' % (end - start))

elapsed time to read file: 3.666405


In [3]:
start = timeit.default_timer()
print('Number of nodes: ', G.number_of_nodes())
end = timeit.default_timer()
print ('elapsed time: %f' % (end - start))

start = timeit.default_timer()
print('Number of edges: ', G.number_of_edges())
end = timeit.default_timer()
print ('elapsed time: %f' % (end - start))

start = timeit.default_timer()
# each triangle is counted 3 times then divided by 3
print('Number of triangles: ', int(sum(list(nx.triangles(G).values()))/3))
end = timeit.default_timer()
print ('elapsed time: %f' % (end - start))

Number of nodes:  1965206
elapsed time: 0.109279
Number of edges:  2766607
elapsed time: 2.167284
Number of triangles:  120676
elapsed time: 63.889577
