In [1]:
import random
import numpy as np
from classes.helper import Helper
import random
import os
from algorithms.pies import pies_sampling
from algorithms.ties import ties_sampling
from algorithms.DLAS import dlas_algorithm
from algorithms.DUE import due_sampling_directed
from algorithms.FF import forest_fire_model
from algorithms.icla_ns import icla_ns_directed
from algorithms.EDLAS import edlas_sampling
from classes.dataset_manager import DatasetManager
from classes.benchmark_temporal import BenchmarkTemporal
import json
random.seed(42)
np.random.seed(42)
# url https://www.researchgate.net/publication/254639513_Network_Sampling_via_Edge-based_Node_Selection_with_Graph_Induction

In [None]:
filename = 'soc-redditHyperlinks-title.csv'
manager = DatasetManager(folder_path='data',file_extension=filename)
manager.load_edgelists(source_col='SOURCE', target_col='TARGET', timestamp_col='TIMESTAMP')
manager.graphs[filename]

In [None]:
G = manager.graphs[filename]
helper = Helper()
fractions = [0.1,0.2,0.3]
St = [
    [
        pies_sampling(G=G, phi=fraction),
        ties_sampling(G=G, phi=fraction),
        dlas_algorithm(graph=G, sample_ratio=fraction, num_iterations=10000),
        due_sampling_directed(graph=G, sampling_ratio=fraction)[-1],
        forest_fire_model(graph=G, min_percent=fraction, p_f=0.2, p_b=0.1)[-1],
        icla_ns_directed(graph=G,f=fraction, tau=0.05),
        edlas_sampling(graph=G, sampling_ratio=fraction)
    ][:]
    for fraction in fractions[:]
]

In [None]:
# gather the first 20% of the nodes in the graph
G = manager.graphs[filename] 
G_smaller = [G.subgraph(list(G.nodes())[:int(G.number_of_nodes()*fraction)]) for fraction in fractions]
G_smaller

In [5]:
benchmark = BenchmarkTemporal(G_smaller)
# path = f'results/base_statistics {filename.split(".")[0]}.json'
# if not os.path.exists(path):
#     results  = benchmark.base_statistics()
#     print(results)
#     with open(path, 'w') as f:
#         json.dump(results, f)

In [6]:
order_of_sampling = ["PIES", "TIES", "DLAS", "DUE", "FF", "ICLA", "EDLAS"]
bench_list = {}
benchmark.precompute()

In [None]:
# make St such that each array is a list of the same sampling method
St = [[St[j][i] for j in range(len(St))] for i in range(len(St[0]))]
St

In [None]:
for sampling_technique, st_t in zip(order_of_sampling, St):
    for fraction in fractions:
        # print(sampling_technique,fraction,st_t)
        benchmark.St = st_t
        bench_dict = {}
        print(f"-- New Benchmark method: {sampling_technique} fraction: {fraction} --")
        t1 = benchmark.T1()
        t2 = benchmark.T2()
        statistic_hubs,statistic_authorities = benchmark.T3()
        t4 = benchmark.T4()
        t5 = benchmark.T5()
        bench_dict = {"T1": t1, "T2": t2, "T3": {"hubs": statistic_hubs, "authorities": statistic_authorities}, "T4": t4, "T5": t5}
        bench_list[f"{round(fraction,5)}_{sampling_technique}"] = bench_dict
        # print(f"S1: {s1}\nS2: {s2}\nS3: {s3}\nS4: {s4}\nS5: {s5}\nS6: {s6}\nS7: {s7}\nS8: {s8}\nS9: {s9}")
with open(f'results static {filename.split(".")[0]} temporal.json', "w") as f:
    json.dump(bench_list, f,indent=4)