# Artificial benchmark: Demon on Rdyn
**Description**

*Run Demon algorithm on 3 instances of Rdyn Benchmark with 10 000 nodes which evolves over 100 iterations with the default parameters. *

0. requirements and setup

In [0]:
# 1. generate files over
# 2. run demon on every instance
# 3. calculate F1 statistics
# 4. plot results.  

In [0]:
filestructure:
    

In [1]:
!pip install demon
!pip install nf1

Collecting demon
  Downloading https://files.pythonhosted.org/packages/0d/91/710b28712f5ff5f3e678d2c0119a9eaa40b15ca125232a34ab96c9b28520/demon-2.0.4-py3-none-any.whl
Installing collected packages: demon
Successfully installed demon-2.0.4
Collecting nf1
  Downloading https://files.pythonhosted.org/packages/4c/41/18e9d6c802db1084c157a0d6c5b38be5878f925e1cf53d7eb03edc7ec6ad/nf1-0.0.3-py3-none-any.whl
Installing collected packages: nf1
Successfully installed nf1-0.0.3


1. imports

In [0]:
import os
import demon as d
import pandas as pd

2. Datapaths + links to folders

In [0]:
datapath = 'gdrive/My Drive/data/'
folder = 'rdyn_data_6/results/'
output_folder = '/rdyn_data_6/output_dem/'
algorithm_name = 'demon'




""" Parses the names of an Rdyn instance and returns the relevant arguments

    :: return : dict with size, avd, pa, pd

"""

def parser_args_from_fn(res_fn):
    parser = res_fn[0].split("_")
    return { 'size': int(parser[0]), 'avd':int(parser[2]), 'pa':float(parser[7]), 'pd':float(parser[8])}






3. Run Demon on three instances of Rdyn

In [0]:
# aanzet tot real world networks te runnen


datapath = "gdrive/My Drive/data/nf1/edgestream_1.txt"
output = datapath + "nf1output/"


dm = d.Demon(network_filename= datapath,\
                     epsilon=0.25, min_community_size=3, file_output= output+"demonnf1.txt")
dm.execute()

In [0]:
#run this code to make folders for every instance of Rdyn
res_fn = os.listdir(datapath+data_folder)
for rd in res_fn[268:]:
    print (rd)
    os.mkdir(datapath+output_folder+rd)

In [0]:
datapath = 'gdrive/My Drive/data/rdyn_data_6/'
data_folder = 'results/'
output_folder = 'output_dem/'


results_fn = os.listdir(datapath+data_folder)

def run_algorithm_on_rdyn(datapath, data_folder, output_folder):
    import time
    start_global = time.time()
    
    times = []
    
    results_fn = os.listdir(datapath+data_folder)
    
    for rdyn_instance in results_fn[268:]:
        print (results_fn[268])
        rdyn_graphs = os.listdir(datapath+data_folder+rdyn_instance)
 
        graph_fn = [x for x in rdyn_graphs if not x.find('graph')]
        
        
        
        start = time.time()
        print("Status: Start : ", rdyn_instance )
        ### algorithm ###
    
        # todo: check how to supress output of demon.
    
        for graph in graph_fn:
            dm = d.Demon(network_filename= datapath+data_folder+rdyn_instance+"/"+graph, \
                     epsilon=0.25, min_community_size=3, file_output= datapath+output_folder+rdyn_instance+"/"+graph )
            dm.execute()
        end = time.time()
        
        x = str(end-start)
        
        run_time = {   "algorithm": algorithm_name,
                       "rdyn_instance_name" : rdyn_instance,
                       "execution_time" : x.replace(".",",") 
        }
        times.append(run_time)
        print ("Status: End : ", rdyn_instance, " took ", end - start, " s.")
        break
    df = pd.DataFrame(times)  
    df.to_csv(datapath+"execution_times_"+algorithm_name+".txt", sep=' ', index=False)
    
    end_global = time.time()
    print(end_global - start_global)
    
run_algorithm_on_rdyn(datapath,data_folder, output_folder) 

In [0]:
from nf1 import NF1
import pandas as pd

#! important note: 
#!   
#! In order to avoid biased evaluations be sure to comply with the following rules:
#!
#!    A community must be composed by at least 3 nodes.
#!    No nested communities: a community must not be a proper subset of another community.

datapath = 'gdrive/My Drive/data/rdyn_data_6/'
data_folder = 'results/'
output_folder = 'output_dem/'

def read_coms(filename):
        com = []
        with open(filename) as f:
            for l in f:
                try:
                    l = l.replace("(","[").replace(")","]").replace("]", "").replace(" ", "").split("[")[1]
                except:
                    pass
                ns = tuple(map(int, l.rstrip().replace(" ", "\t").replace(",", "\t").split("\t")))

                if len(ns) == 2:
                    raise Exception
                com.append(ns)
        return com



# todo: zien dat Rdyn instances correct worden ingelezen uit beide folders.

def create_output_dataframe(datapath, data_folder, output_folder):
    
    # overloop alle files na processing
    
    # steek alle gegevens van een file in 1 dataframe
     
    results_fn = os.listdir(datapath+data_folder)
    #print(results_fn)
    output_fn = os.listdir(datapath+output_folder)
    
    rows_list = []
    
    for rdyn_instance in results_fn:
        rdyn_graphs = os.listdir(datapath+data_folder+rdyn_instance)
        #print(rdyn_graphs)
        
        
        alg_results = os.listdir(datapath+output_folder+rdyn_instance)
        #print(alg_results)
        print(rdyn_instance, rdyn_graphs, alg_results)
        
        comm_fn = sorted([x for x in rdyn_graphs if not x.find('communities')])
        alg_results_fn = sorted([x for x in alg_results if not x.find('graph')])
        #print (" 1. ", alg_results_fn)
        # print(len(alg_results_fn), len(comm_fn) )
        
        
        if len(alg_results_fn) != len(comm_fn):
            raise Exception("The length of graph_fn should equal comm_fn")
        #for i in range(len(comm_fn)): 
            #print (alg_results_fn[i])
        
        
        for i in range(len(comm_fn)): 
            #print(len(comm_fn))
            #print(alg_results_fn[i], " " , comm_fn[i])
            #print(len(alg_results_fn) , len(comm_fn))
            #print(rdyn_instance, alg_results_fn[i], comm_fn[i])
            
            identified_comm= read_coms(datapath+output_folder+rdyn_instance+"/"+alg_results_fn[i])
            gt= read_coms(datapath+data_folder+rdyn_instance+"/"+comm_fn[i])
            if identified_comm == []:
                continue

            nf = NF1(identified_comm, gt)
            #print(nf.summary())
            res = []
            res.extend(nf.get_f1())
            #print(nf.get_partition_stats())
            #print ()
            mean = str(res[0])
            std = str(res[1])
            maxx = str(res[2])
            minn = str(res[3])
            mode =str(res[4])
           
    
            row = {
                "algorithm": algorithm_name,
                "rdyn_instance": str(rdyn_instance),
                "graph_n": str(alg_results_fn[i]),
                "mean": mean.replace(".",","),
                "std" : std.replace(".",","),
                "min": minn.replace(".",","),
                "max": maxx.replace(".",","),
                "mode": mode.replace(".",",")
            }
            print (row)
            rows_list.append(row)

    df = pd.DataFrame(rows_list)  
    
    
    return df


dff= create_output_dataframe(datapath, data_folder, output_folder)
dff.to_csv(datapath+"output_281"+algorithm_name+".txt", sep=' ', index=False)

2000_100_15_0.6_0.8_0.2_1_2.5_0 ['interactions.txt', 'events.txt', 'communities-1.txt', 'graph-1.txt', 'communities-5.txt', 'graph-5.txt', 'communities-15.txt', 'graph-15.txt', 'communities-23.txt', 'graph-23.txt', 'communities-27.txt', 'graph-27.txt', 'communities-34.txt', 'graph-34.txt', 'communities-38.txt', 'graph-38.txt', 'communities-55.txt', 'graph-55.txt', 'communities-57.txt', 'graph-57.txt', 'communities-65.txt', 'graph-65.txt', 'communities-89.txt', 'graph-89.txt', 'communities-94.txt', 'graph-94.txt', 'communities-98.txt', 'graph-98.txt', 'communities-99.txt', 'graph-99.txt'] ['graph-27.txt', 'graph-34.txt', 'graph-38.txt', 'graph-55.txt', 'graph-57.txt', 'graph-65.txt', 'graph-89.txt', 'graph-94.txt', 'graph-98.txt', 'graph-99.txt', 'graph-1.txt', 'graph-5.txt', 'graph-15.txt', 'graph-23.txt']
{'algorithm': 'demon', 'rdyn_instance': '2000_100_15_0.6_0.8_0.2_1_2.5_0', 'graph_n': 'graph-15.txt', 'mean': '0,27090909090909093', 'std': '0,3456865249814042', 'min': '0,06', 'max'

In [0]:
results_fn = os.listdir(datapath+data_folder)
print(results_fn[269])

2000_100_15_0.6_0.8_0.2_1_3.5_90
