In [1]:
import csv
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt


from IPython.display import HTML, display
import tabulate

from collections import OrderedDict
from os import listdir
from os.path import isfile, isdir, join, exists

In [2]:
plt.rcParams['figure.figsize'] = [10,5]
plt.rcParams['patch.force_edgecolor'] = True
plt.rcParams['font.size'] = 14

In [18]:
def human_format(num, round_to=2):
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num = round(num / 1000.0, round_to)
    return '{:.{}f}{}'.format(round(num, round_to), round_to, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])

In [19]:
datasetpath = '../../src/datasets/'
data = [ f for f in listdir(datasetpath) if isdir(join(datasetpath, f)) and "URV" in f or "contacts" in f  ] 
data

['Emailcontacts', 'EmailURV']

In [20]:
big_loops = 1000
small_loops = 100

In [21]:
for dataset in data:
    G = nx.Graph()
    with open(join(datasetpath,dataset,dataset+'.csv'), 'r') as file:
        reader = csv.reader(file, delimiter=",")
        for row in reader:
            G.add_edge(int(row[0]), int(row[1]))

    n_nodes = len(G.nodes())
    n_edges = len(G.edges())
    kcores = sorted(list(OrderedDict.fromkeys(nx.core_number(G).values()))) 
    n_cores = len(kcores)
    degree = list(dict(nx.degree(G)).values())
    print(dataset)
    print("")
    print("nodes:", n_nodes)
    print("edges:", n_edges)
    print("min k-core:", min(kcores), ", max k-core:", max(kcores))
    print("min degree:", min(degree), ", max degree:", max(degree))
    print("average degree:", np.mean(degree), ", std:", np.std(degree))
    print("average squared degree:", np.mean(np.square(degree)), ", std:", np.std(np.square(degree)))
    print("max ks:", max(kcores))
    print("all cores:", kcores == list(range(min(kcores), max(kcores)+1)))
    print("")
    print("command full:\n")
    print("bash ./loopRunProject.sh -n", big_loops, "-m", n_nodes)
    print("total runs:  ", human_format(big_loops*(n_nodes+1)))
    print("")
    print("bash ./loopRunProject.sh -n", small_loops, "-m", n_nodes)
    print("total runs:", human_format(small_loops*(n_nodes+1)))
    print("")
    print("command lazy:\n")
    print("bash ./loopRunProject.sh -n", big_loops, "-r", n_cores)
    print("total runs:  ", human_format(big_loops*n_cores))
    print("")
    print("bash ./loopRunProject.sh -n", small_loops, "-m", n_nodes)
    print("total runs:", human_format(small_loops*n_cores))
    print("")
    print("====================================")
    print("")    
    
    with open(join(datasetpath,dataset,dataset+".kcore.csv"), 'w') as file:
        writer = csv.writer(file, delimiter=",")
        for i in range(max(nx.core_number(G).values())):
            l = i + 1
            writer.writerow([key for key, val in nx.core_number(G).items() if val == l ])
            
    with open(join(datasetpath,dataset,dataset+".degree.csv"), 'w') as file:
        writer = csv.writer(file, delimiter=",")
        for d in degree:
            writer.writerow([d])

Emailcontacts

nodes: 12625
edges: 20362
min k-core: 1 , max k-core: 23
min degree: 1 , max degree: 576
average degree: 3.225663366336634 , std: 18.59988307228084
average squared degree: 356.36055445544554 , std: 6560.060881767449
max ks: 23
all cores: False

command full:

bash ./loopRunProject.sh -n 1000 -m 12625
total runs:   12.63M

bash ./loopRunProject.sh -n 100 -m 12625
total runs: 1.26M

command lazy:

bash ./loopRunProject.sh -n 1000 -r 22
total runs:   22.00K

bash ./loopRunProject.sh -n 100 -m 12625
total runs: 2.20K


EmailURV

nodes: 1133
edges: 5451
min k-core: 1 , max k-core: 11
min degree: 1 , max degree: 71
average degree: 9.62224183583407 , std: 9.339640177535761
average squared degree: 179.81641659311563 , std: 365.53461773807055
max ks: 11
all cores: True

command full:

bash ./loopRunProject.sh -n 1000 -m 1133
total runs:   1.13M

bash ./loopRunProject.sh -n 100 -m 1133
total runs: 113.40K

command lazy:

bash ./loopRunProject.sh -n 1000 -r 11
total runs:   11.00K


In [8]:
dict(nx.core_number(G))

{1: 10,
 2: 10,
 3: 10,
 4: 9,
 5: 8,
 6: 10,
 7: 10,
 8: 7,
 9: 10,
 10: 9,
 11: 10,
 12: 10,
 13: 10,
 14: 9,
 15: 9,
 16: 10,
 17: 5,
 18: 10,
 19: 10,
 20: 10,
 21: 10,
 22: 10,
 23: 10,
 24: 9,
 25: 9,
 26: 3,
 27: 9,
 28: 8,
 29: 6,
 30: 10,
 31: 10,
 32: 3,
 33: 7,
 34: 9,
 35: 1,
 36: 1,
 37: 1,
 38: 9,
 39: 10,
 40: 9,
 41: 10,
 42: 10,
 43: 8,
 44: 10,
 45: 10,
 46: 9,
 47: 7,
 48: 6,
 49: 10,
 50: 10,
 51: 9,
 52: 10,
 53: 2,
 54: 10,
 55: 9,
 56: 10,
 57: 9,
 58: 9,
 59: 9,
 60: 3,
 61: 8,
 62: 9,
 63: 7,
 64: 9,
 65: 8,
 66: 9,
 67: 8,
 68: 8,
 69: 9,
 70: 9,
 71: 8,
 72: 9,
 73: 8,
 74: 9,
 75: 6,
 76: 10,
 77: 6,
 78: 8,
 79: 8,
 80: 10,
 81: 9,
 82: 9,
 83: 6,
 84: 10,
 85: 10,
 86: 8,
 87: 5,
 88: 9,
 89: 6,
 90: 8,
 91: 6,
 92: 7,
 93: 6,
 94: 9,
 95: 5,
 96: 5,
 97: 2,
 98: 3,
 99: 5,
 100: 3,
 101: 8,
 102: 3,
 103: 5,
 104: 10,
 105: 10,
 106: 9,
 107: 9,
 108: 6,
 109: 2,
 110: 7,
 111: 1,
 112: 9,
 113: 9,
 114: 6,
 115: 6,
 116: 10,
 117: 9,
 118: 6,
 119: 7,
 1

In [26]:
for i in range(max(nx.core_number(G).values())):
    l = i + 1
    print(l, [key for key, val in nx.core_number(G).items() if val == l ])



1 [0, 1, 189, 187, 183, 182, 181, 168, 167, 164, 156, 152, 151, 148, 147, 138, 127, 101, 98, 93, 43, 38, 14, 10, 9, 8, 5, 4, 3, 1361, 6013, 6012, 6011, 6010, 6009, 6008, 6007, 6006, 6005, 6004, 6003, 6002, 6001, 5999, 5998, 5997, 5995, 5994, 5993, 5992, 5990, 5989, 5988, 5987, 5986, 5985, 5984, 5983, 5982, 5981, 5980, 5979, 5978, 5977, 5976, 5974, 5973, 5972, 5971, 5970, 5969, 5968, 5967, 5966, 5965, 5964, 5963, 5960, 5959, 5958, 5957, 5956, 5954, 5953, 5952, 5948, 5947, 5946, 5945, 5944, 5943, 5942, 5941, 5940, 5939, 5938, 5937, 5935, 5934, 5933, 5932, 5931, 5930, 5929, 5928, 5927, 5926, 5925, 5923, 5922, 5921, 7697, 7696, 7695, 7694, 7692, 7691, 7690, 7689, 7688, 7687, 7686, 7685, 7684, 7682, 7681, 7680, 7679, 7678, 7677, 7676, 7674, 7673, 7672, 7671, 7670, 7669, 7668, 7667, 7666, 7664, 7663, 7662, 7660, 7659, 7658, 7657, 7656, 7655, 7654, 7653, 7652, 7651, 7650, 7649, 7648, 7647, 7646, 7645, 7643, 7641, 7640, 7639, 7638, 7637, 7635, 7634, 7633, 7632, 7631, 7630, 7629, 7627, 7625, 76

2 [188, 179, 178, 137, 125, 123, 104, 91, 60, 44, 37, 35, 6000, 5308, 402, 2506, 3872, 322, 5975, 5274, 5962, 5951, 5950, 5949, 5936, 2072, 2064, 7683, 5305, 7675, 2032, 3632, 5701, 7661, 613, 6635, 2315, 7644, 7636, 1851, 7628, 254, 7621, 2728, 7615, 293, 452, 6952, 6948, 3118, 6942, 9714, 9708, 9328, 9694, 6298, 2757, 9667, 2772, 2795, 508, 6115, 6106, 556, 510, 6100, 6091, 1522, 2787, 2551, 2547, 2776, 2775, 2774, 2773, 2767, 2765, 1519, 1516, 7116, 7114, 7112, 5253, 7100, 7099, 2808, 2805, 2804, 2799, 2797, 2641, 7020, 4073, 1119, 7006, 7004, 7002, 7001, 6999, 6997, 6994, 8356, 8374, 8346, 8370, 9386, 5538, 8343, 421, 8339, 8383, 8415, 8396, 8351, 8338, 4773, 2995, 2545, 3930, 2253, 2546, 4971, 4964, 2521, 2517, 2499, 1715, 2254, 4945, 2454, 2237, 2236, 2451, 2461, 2913, 2444, 2228, 4903, 2439, 4901, 2428, 4378, 2225, 2426, 2425, 2220, 3227, 2250, 1713, 2204, 2199, 4794, 4790, 2201, 2416, 1716, 2223, 2249, 4774, 2597, 4771, 1706, 2427, 2232, 2196, 4702, 2363, 2209, 2212, 2200, 2248

6 [180, 96, 68, 1068, 5955, 2214, 5199, 1705, 538, 2336, 6094, 2655, 573, 306, 1833, 2371, 592, 1507, 274, 1176, 826, 1889, 2817, 1987, 2792, 2331, 297, 2368, 2663, 1153, 677, 2373, 743, 692, 308, 1387, 198, 2559, 1574, 268, 910, 248, 3072, 4479, 1348, 1930, 4355, 975, 788, 2422, 961, 2149, 2611, 2290, 6019, 5316, 5116, 226, 1577, 3941, 289, 6934, 4387, 2252, 690, 4569, 1298, 229, 1981, 1446, 2147, 1189, 225, 1505, 2670, 1207, 800, 2658, 4109, 1393, 1190, 768, 700, 423, 2751, 2606, 365, 504, 1381, 6760, 5886, 5135, 356, 8972, 2281, 1840]
7 [186, 165, 154, 144, 139, 124, 117, 94, 89, 83, 78, 5961, 2392, 5261, 1863, 370, 1195, 1104, 2802, 1368, 197, 530, 661, 243, 471, 1559, 211, 2801, 969, 665, 2633, 2813, 2664, 1693, 747, 766, 1905, 706, 723, 261, 635, 339, 2660, 348, 1303, 7502, 977, 513, 5657, 2340, 6033, 6032, 331, 771, 8973, 4225, 1210, 2185, 5584, 4578, 2339, 2614, 3038, 281, 2404, 7174, 3070, 2828, 8698, 2638, 1895, 2593, 232, 7965, 4411]
8 [92, 1918, 1187, 3871, 205, 5187, 2820,

In [36]:
kkkk = [key for key, val in nx.core_number(G).items() if val == 20 ]
kkkk

[106, 76, 314, 224]

In [35]:
kkkk[np.random.choice(len(kkkk),  replace=False)]

76

In [45]:
max(nx.core_number(G).values())

9

In [50]:
from IPython.display import HTML, display
import tabulate
table = [["Sun",696000,1989100000],
         ["Earth",6371,5973.6],
         ["Moon",1737,73.5],
         ["Mars",3390,641.85]]
display(HTML(tabulate.tabulate(table, tablefmt='html')))

0,1,2
Sun,696000,1989100000.0
Earth,6371,5973.6
Moon,1737,73.5
Mars,3390,641.85
