In [1]:
import networkx as nx
import time
import timeit
import numpy as np

from file_utils import read_lines_tsv

In [2]:
# Loading dataset from file
    # source: https://graphchallenge.mit.edu/data-sets
    # file_source: https://graphchallenge.s3.amazonaws.com/snap/as20000102/as20000102_adj.tsv
    # file_format: Adjacency TSV (Tab-Separated Values)
        # <dataset-name>_adj.tsv
        # (Row, Col, Value) tuple describing the adjacency matrix of the graph in tab separated format.
        # Adjacency matrix is of size Num_vertices x Num_vertices
    # file: as20000102_adj.tsv (Autonomous Systems graph from January 02 2000)
    # num of nodes:      6.474
    # num of edges:     25.144
    # num of triangles:  6.584

start = timeit.default_timer()

G = nx.read_edgelist('../datasets/as20000102_adj.tsv', data=(('ajd_value',float),))
print(nx.info(G))

end = timeit.default_timer()
print ('elapsed time: %f' % (end - start))

Name: 
Type: Graph
Number of nodes: 6474
Number of edges: 12572
Average degree:   3.8838
elapsed time: 0.319631


In [3]:
# Alternative way to build graphs from file
# This will allow to generate random samples of graphs from a list of edges in memory

start = timeit.default_timer()

lines = read_lines_tsv('../datasets/as20000102_adj.tsv')
G = nx.parse_edgelist(lines, nodetype = int, data=(('ajd_value',float),))
print(nx.info(G))
print(('Number of lines: %s') % len(lines))

end = timeit.default_timer()
print ('elapsed time: %f' % (end - start))

Name: 
Type: Graph
Number of nodes: 6474
Number of edges: 12572
Average degree:   3.8838
Number of lines: 25144
elapsed time: 0.314244


In [4]:
start = timeit.default_timer()

print('Number of nodes: ', G.number_of_nodes())
print('Number of edges: ', G.number_of_edges())
# each triangle is counted 3 times then divided by 3
print('Number of triangles: ', int(sum(list(nx.triangles(G).values()))/3))

end = timeit.default_timer()
print ('elapsed time: %f' % (end - start))

Number of nodes:  6474
Number of edges:  12572
Number of triangles:  6584
elapsed time: 0.714447


In [6]:
# Using samples to build graphs and to calculate 

for i in range(1000, len(lines)+1, 1000):
    G.clear()
    sample = np.random.choice(lines, i)
    G = nx.parse_edgelist(sample, nodetype = int, data=(('ajd_value',float),))
    print('Graph builded with %s lines:' % i)
    print(nx.info(G))
    print('Number of triangles: ', int(sum(list(nx.triangles(G).values()))/3))
    print('---------------------------------------')

Graph builded with 1000 lines:
Name: 
Type: Graph
Number of nodes: 1117
Number of edges: 970
Average degree:   1.7368
Number of triangles:  4
---------------------------------------
Graph builded with 2000 lines:
Name: 
Type: Graph
Number of nodes: 1937
Number of edges: 1863
Average degree:   1.9236
Number of triangles:  25
---------------------------------------
Graph builded with 3000 lines:
Name: 
Type: Graph
Number of nodes: 2534
Number of edges: 2656
Average degree:   2.0963
Number of triangles:  53
---------------------------------------
Graph builded with 4000 lines:
Name: 
Type: Graph
Number of nodes: 3030
Number of edges: 3416
Average degree:   2.2548
Number of triangles:  183
---------------------------------------
Graph builded with 5000 lines:
Name: 
Type: Graph
Number of nodes: 3433
Number of edges: 4118
Average degree:   2.3991
Number of triangles:  273
---------------------------------------
Graph builded with 6000 lines:
Name: 
Type: Graph
Number of nodes: 3894
Number o