In [15]:
import csv
import networkx as nx
import matplotlib.pyplot as plt

from collections import OrderedDict
from os import listdir
from os.path import isfile, isdir, join, exists

In [2]:
plt.rcParams['figure.figsize'] = [10,5]
plt.rcParams['patch.force_edgecolor'] = True
plt.rcParams['font.size'] = 14

In [61]:
def human_format(num, round_to=2):
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num = round(num / 1000.0, round_to)
    return '{:.{}f}{}'.format(round(num, round_to), round_to, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])

In [3]:
datasetpath = '../../src/datasets/'
data = [ f for f in listdir(datasetpath) if isdir(join(datasetpath, f))] 
data

['Emailcontacts',
 'EmailURV',
 'EmailEUCore',
 'POK',
 'AdultIMDB',
 'LesMiserables']

In [59]:
big_loops = 1000
small_loops = 100

In [63]:
for dataset in data:
    G = nx.Graph()
    with open(join(datasetpath,dataset,dataset+'.csv'), 'r') as file:
        reader = csv.reader(file, delimiter=",")
        for row in reader:
            G.add_edge(int(row[0]), int(row[1]))

    n_nodes = len(G.nodes())
    n_edges = len(G.edges())
    kcores = sorted(list(OrderedDict.fromkeys(nx.core_number(G).values()))) 
    n_cores = len(kcores)

    print(dataset)
    print("")
    print("nodes:", n_nodes)
    print("edges:", n_edges)
    print("min k-core:", min(kcores), ", max k-core:", max(kcores))
    print("all cores:", kcores == list(range(min(kcores), max(kcores)+1)))
    print("")
    print("command full:")
    print("bash ./loopRunProject.sh -n", loops, "-m", n_nodes)
    print("total runs big:  ", human_format(big_loops*(n_nodes+1)))
    print("total runs small:", human_format(small_loops*(n_nodes+1)))
    print("")
    print("command lazy:")
    print("bash ./loopRunProject.sh -n", loops, "-r", n_cores)
    print("total runs big:  ", human_format(big_loops*n_cores))
    print("total runs small:", human_format(small_loops*n_cores))
    print("")
    print("====================================")
    print("")

Emailcontacts

nodes: 12625
edges: 20362
min k-core: 1 , max k-core: 23
all cores: False

command full:
bash ./loopRunProject.sh -n 1000 -m 12625
total runs big:   12.63M
total runs small: 1.26M

command lazy:
bash ./loopRunProject.sh -n 1000 -r 22
total runs big:   22.00K
total runs small: 2.20K


EmailURV

nodes: 1133
edges: 5451
min k-core: 1 , max k-core: 11
all cores: True

command full:
bash ./loopRunProject.sh -n 1000 -m 1133
total runs big:   1.13M
total runs small: 113.40K

command lazy:
bash ./loopRunProject.sh -n 1000 -r 11
total runs big:   11.00K
total runs small: 1.10K


EmailEUCore

nodes: 986
edges: 16064
min k-core: 1 , max k-core: 34
all cores: True

command full:
bash ./loopRunProject.sh -n 1000 -m 986
total runs big:   987.00K
total runs small: 98.70K

command lazy:
bash ./loopRunProject.sh -n 1000 -r 34
total runs big:   34.00K
total runs small: 3.40K


POK

nodes: 28876
edges: 115324
min k-core: 1 , max k-core: 17
all cores: True

command full:
bash ./loopRunProje