In [1]:
from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from clustering_utils import  build_cdlib_graph, build_ground_truth, get_ordered_communities, get_events_count
from utils import pre_processing_xes, discard_events_in_multiple_activities
import pm4py
import networkx as nx

In [None]:
# LOADING DATA
# event log not partitioned (single trace with a list of events)
event_log = xes_importer.apply('BP-Meets-IoT2022/sim_22d1p/EventLogXESNoSegment.xes') # ('sim_22d1p_22/EventLogXESNoSegment.xes')
# event log partitioned (list of named traces)
event_log_labelled = xes_importer.apply('BP-Meets-IoT2022/sim_22d1p/EventLogXES_fixed_wc.xes') # ('sim_22d1p_22/EventLogXES.xes')

In [3]:
# Pre-processing: removing noises
event_log, event_log_labelled = pre_processing_xes(event_log, event_log_labelled)

In [4]:
# Build DFG with PM4PY
dfg_graph = dfg_discovery.apply(event_log)

In [5]:
G, nodes, nameIndex, skippedEvents = build_cdlib_graph(dfg_graph)

Skip due to low frequency 3 (change_clothes,go_kitchen_shelf). Level to 4
Skip due to low frequency 1 (go_computer,clean). Level to 4
Skip due to low frequency 1 (clean,go_book_shelf). Level to 4
Skip due to low frequency 2 (go_book_shelf,clean). Level to 4
Skip due to low frequency 2 (clean,go_wardrobe). Level to 4
Skip due to low frequency 3 (switch_computer_off,go_entrace). Level to 4
Skip due to low frequency 3 (interact_with_man,go_bathroom_sink). Level to 4
Skip due to low frequency 1 (put_plate_to_sink,go_book_shelf). Level to 4
Skip due to low frequency 2 (go_wardrobe,clean). Level to 4
Skip due to low frequency 1 (clean,go_wc). Level to 4
Skip due to low frequency 3 (wash_hands,go_kitchen_shelf). Level to 4
Skip due to low frequency 4 (drink_water,go_wardrobe). Level to 4
Skip due to low frequency 1 (clean,go_chair). Level to 4
Skip due to low frequency 2 (go_dining_table,go_bed). Level to 4
Skip due to low frequency 2 (wash_hands,go_entrace). Level to 4
Skip due to low freque

# Ground truth
The ground truth is represented by the communities present in the log file labeled with the names of the activities.


In [13]:
ground_truth, ground_truth_dictionary =  build_ground_truth(event_log_labelled, nameIndex, G)

Event go_wardrobe is in the set? True
Event get_clothes is in the set? True
Event get_clothes is in the set? True
Event change_clothes is in the set? True
Event change_clothes is in the set? True
Event go_bathtub is in the set? True
Event go_bathtub is in the set? True
Event have_bath is in the set? True
Event have_bath is in the set? True
Event go_bathroom_sink is in the set? True
Event brush_teeth is in the set? True
Event brush_teeth is in the set? True
Event go_bed is in the set? True
Event sleep_in_bed is in the set? True
Event sleep_in_bed is in the set? True
Event go_wardrobe is in the set? True
Event change_clothes is in the set? True
Event change_clothes is in the set? True
Event go_wardrobe is in the set? True
Event go_wardrobe is in the set? True
Event get_clothes is in the set? True
Event get_clothes is in the set? True
Event change_clothes is in the set? True
Event change_clothes is in the set? True
Event go_bathtub is in the set? True
Event go_bathtub is in the set? True


In [9]:
def dump_comm_to_names(new_dict):
    print("*** Communities to names")
    for item in new_dict:
         print(f"Community [{item['id']}]\t{item['len']}\t{item['evt']}\t")
    print("*******\n")

In [10]:
def comm_to_names(communities, nodes, sort_events=False):
    new_dict = get_ordered_communities(communities, nodes, sort_events)
    dump_comm_to_names(new_dict)

Dump the ground truth of events names

In [11]:
comms_truth =[]
idx = 0
for community_label in ground_truth_dictionary:
    #print(f"{idx} - {community_label}")
    comms_truth.append(list(ground_truth_dictionary[community_label]))
    idx = idx + 1
    
comm_to_names(comms_truth, nodes, sort_events=True)

[0, 1, 2, 3, 4, 5, 6, 7, 8]
[9, 10, 11, 12, 13]
[14, 15, 16, 0]
[0, 2, 17, 18, 19, 20, 21, 22]
[14, 23, 24, 25, 26]
[27, 28]
[5, 29, 30, 31, 9, 32, 33, 34, 11, 35]
[36, 37, 38, 5, 29]
[39, 40, 52]
[41, 42, 43]
[44, 45]
[41, 46, 47]
[0, 2, 17, 18, 48, 49, 21, 9, 50]
[11, 51]
*** Communities to names
Community [7]	10	eat_cold_meal	get_bread	get_food_from_fridge	go_bathroom_sink	go_dining_table	go_fridge	go_kitchen_shelf	go_kitchen_sink	put_plate_to_sink	wash_hands	
Community [1]	9	brush_teeth	change_clothes	get_clothes	go_bathroom_sink	go_bathtub	go_bed	go_wardrobe	have_bath	sleep_in_bed	
Community [13]	9	change_clothes	dress_down_outdoor	dress_up_outdoor	get_food	go_kitchen_shelf	go_outside	go_shoe_shelf	go_wardrobe	pack_goods	
Community [4]	8	change_clothes	dress_down_outdoor	dress_up_outdoor	finish_walk	go_shoe_shelf	go_wardrobe	go_workplace	work	
Community [2]	5	drink_water	get_glass	get_water	go_kitchen_shelf	go_kitchen_sink	
Community [5]	5	go_computer	go_computer_chair	switch_comp

In [None]:
comm_to_names(comms_truth, nodes, sort_events=False)


# Algorithms evaluation
Often it make sense to execute a given CD algorithm multiple times, varying its parameters, so to identify the optimal configuration w.r.t. a given fitness score.
We now experiment several algorithms: louvain, leiden, infomap, ...

In [None]:
from cdlib import algorithms
from cdlib import evaluation
from cdlib import ensemble

# Calculate stats comparing ground truth communities and the discovered ones

In [None]:
import copy
from statistics import mean

In [None]:
def coverage_activities(executed_alg, groud_truth, nodes, communities=None, label=None):
    communities_p1 = executed_alg.communities
    communities_p2 = communities
    if groud_truth is not None:
        communities_p2 = groud_truth.communities
        
    if len(communities_p1) == 0:
        return None
        
    dict_p1 = get_ordered_communities(communities_p1, nodes, sort_events=True)
    dict_p2 = get_ordered_communities(communities_p2, nodes, sort_events=True)
    
    disc_stat = {}
    # compute num communities over expected ones
    disc_stat["partitions"] = len(dict_p1)
    disc_stat["partitions_truth"] = len(dict_p2)
    disc_stat["covered_partitions"] = len(dict_p1)/len(dict_p2)
    # if disc_stat["coverage_p1"] < 1:
    #     disc_stat["coverage_p1_overall_balance"] = "fewer"
    # elif disc_stat["coverage_p1"] > 1:
    #     disc_stat["coverage_p1_overall_balance"] = "higher"
    # else:
    #     disc_stat["coverage_p1_overall_balance"] = "match"
    disc_stat["stat_covered_partitions"] = \
        "fewer" if disc_stat["covered_partitions"] < 1 else \
        "higher" if disc_stat["covered_partitions"] > 1 else \
        "match"
    # Now search among all the discovered communities their coverage w.r.t. ground truth communities' events
    disc_stat["communities"] = []
    '''
    item["len"] = len(vals)
    item["vals"] = vals    
    item["evt"] = vals_str
    item["id"] = idx
    item["vals_hash"] = vals_hash
    '''
    for community in dict_p1:
        events = community["vals"]
    
        #coverage_cluster = []
        better_coverage_cluster = 0
        coverage_community_idx = -1
        for community_truth in dict_p2:
            events_truth = community_truth["vals"]
            common_elements = list(set(events).intersection(set(events_truth)))
            #coverage_cluster.append(len(common_elements)/len(events_truth))
            coverage_cluster = len(common_elements)/len(events_truth)
            if better_coverage_cluster < coverage_cluster:
                better_coverage_cluster = coverage_cluster
                coverage_community_idx = community_truth["id"]
    
        d2 = copy.deepcopy(community)
        d2["max_coverage_cluster"] = better_coverage_cluster  
        d2["max_coverage_cluster_id"] = coverage_community_idx  
        disc_stat["communities"].append(d2)
    
    # calculate the average of the max_coverage_cluster
    coverages_cluster = []
    for community in  disc_stat["communities"]:
        coverages_cluster.append(community["max_coverage_cluster"])
    disc_stat["mean_coverage_partitions"] = mean(coverages_cluster)
    disc_stat["min_coverage_partitions"] = min(coverages_cluster)
    disc_stat["max_coverage_partitions"] = max(coverages_cluster)
    disc_stat["method_parameters"] = executed_alg.method_parameters
    
    if label is not None:
        disc_stat["label"] = label

    if groud_truth is not None:
        f1 = evaluation.f1(executed_alg, groud_truth)
        disc_stat["f1_score"] = f1.score
        disc_stat["f1_std"] = f1.std
    else:
        disc_stat["f1_score"] = None
        disc_stat["f1_std"] = None
   
    return disc_stat

In [None]:
#coms_congo = algorithms.congo(G, number_communities=14, height=2);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#coms_conga = algorithms.congo(G, number_communities=14);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
coms_dcs = algorithms.dcs(G);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
coms_ebgc = algorithms.ebgc(G);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
coms_graph_entropy = algorithms.graph_entropy(G);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
com_lais2 = algorithms.lais2(G);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
coms_multicom = algorithms.multicom(G, seed_node=0);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
coms_umstmo = algorithms.umstmo(G);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
coms_percomvc = algorithms.percomvc(G);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)

In [None]:
algs ={}

In [None]:
# algs['coms_aslpaw']=coms_aslpaw
# algs['coms_angel']=coms_angel
# algs['coms_coach']=coms_coach
#algs['coms_congo']=coms_conga
#algs['coms_conga']=coms_conga
# algs['coms_core_expansion']=coms_core_expansion
algs['coms_dcs']=coms_dcs
# algs['coms_demon']=coms_demon
# algs['coms_dpclus']=coms_dpclus
algs['coms_ebgc']=coms_ebgc
# algs['coms_ego_networks']=coms_ego_networks
# algs['coms_endntm']=coms_endntm
# algs['com_kclique']=com_kclique
algs['coms_graph_entropy']=coms_graph_entropy
#algs['coms_ipca']=coms_ipca
algs['com_lais2']=com_lais2
#algs['coms_lpam']=coms_lpam
#algs['coms_lpanni']=coms_lpanni
#algs['com_lfm']=com_lfm
algs['coms_multicom']=coms_multicom
# algs['coms_node_perception']=coms_node_perception
# algs['com_overlapping_seed_set_expansion']=com_overlapping_seed_set_expansion
algs['coms_umstmo']=coms_umstmo
algs['coms_percomvc']=coms_percomvc
# algs['coms_slpa']=coms_slpa
# algs['coms_wCommunity']=coms_wCommunity

In [None]:
list_algs = []

In [None]:
#algorithms.angel
item = {
    "alg" : algorithms.angel, "params" : [
        ["threshold", 0.1, 1, 0.1]
    ]
}; list_algs.append(item)

#algorithms.coach
item = {
    "alg" : algorithms.coach, "params" : [
        ["density_threshold", 0.1, 1, 0.1],
        ["affinity_threshold", 0.1, 1, 0.1],
        ["closeness_threshold", 0.1, 1, 0.1]
    ]
}; list_algs.append(item)

#algorithms.congo
# item = {
#     "alg" : algorithms.congo, "params" : [
#         ["height", 1, 10, 1]
#     ]
# }; list_algs.append(item)

#algorithms.core_expansion
item = {
    "alg" : algorithms.core_expansion, "params" : [
        ["tolerance", 0.0001, 0.0001, 0.001]
    ]
}; list_algs.append(item)

#algorithms.demon
item = {
    "alg" : algorithms.demon, "params" : [
        ["epsilon", 0.1, 1, 0.1]
    ]
}; list_algs.append(item)

#algorithms.dpclus
item = {
    "alg" : algorithms.dpclus, "params" : [
        ["d_threshold", 0.1, 2, 0.1],
        ["cp_threshold", 0.1, 2, 0.1]
    ]
}; list_algs.append(item)



In [None]:

#algorithms.ego_networks
item = {
    "alg" : algorithms.ego_networks, "params" : [
        ["level", 1, 10, 1]
    ]
}; list_algs.append(item)

#algorithms.endntm
item = {
    "alg" : algorithms.endntm, "params" : [
        ["epsilon", 1, 10, 1]
    ]
};# list_algs.append(item)

#algorithms.kclique
item = {
    "alg" : algorithms.kclique, "params" : [
        ["k", 3, 10, 1]
    ]
}; list_algs.append(item)

#algorithms.ipca
item = {
    "alg" : algorithms.ipca, "params" : [
        ["t_in", 0.1, 1, 0.1]
    ]
}; list_algs.append(item)

#algorithms.lemon
item = {
    "alg" : algorithms.lemon, "params" : [
        ["expand_step", 1, 10, 1],
        ["subspace_dim", 1, 10, 1],
        ["walk_steps", 1, 10, 1]
    ]
};# list_algs.append(item)

#algorithms.lpam
item = {
    "alg" : algorithms.lpam, "params" : [
        ["k", 1, 1, 10],
        ["subspace_dim", 1, 10, 1],
        ["threshold", 0.1, 1, 0.1]
    ]
}; list_algs.append(item)

#algorithms.lpanni
item = {
    "alg" : algorithms.lpanni, "params" : [
        ["threshold", 0.1, 1, 0.1]
    ]
}; list_algs.append(item)

#algorithms.lfm
item = {
    "alg" : algorithms.lfm, "params" : [
        ["alpha", 0.1, 1, 0.1]
    ]
}; list_algs.append(item)


In [None]:
#list_algs = []


In [None]:
#algorithms.node_perception
item = {
    "alg" : algorithms.node_perception, "params" : [
        ["threshold", 0.1, 1, 0.1],
        ["overlap_threshold", 0.1, 1, 0.1]
    ]
}; list_algs.append(item)

#algorithms.overlapping_seed_set_expansion
item = {
    "alg" : algorithms.overlapping_seed_set_expansion, "params" : [
        ["nruns", 13, 20, 1],
        ["alpha", 0.1, 2, 0.1],
        ["delta", 0.1, 1, 0.1]
    ]
};# list_algs.append(item)

#algorithms.slpa
item = {
    "alg" : algorithms.slpa, "params" : [
        ["t", 21, 30, 1],
        ["r", 0.1, 1, 0.1]
    ]
}; list_algs.append(item)

#algorithms.walkscan
item = {
    "alg" : algorithms.walkscan, "params" : [
        ["nb_steps", 2, 10, 1],
        ["eps", 0.1, 1, 0.1],
        ["min_samples", 3, 10, 1]
    ]
}; list_algs.append(item)

#algorithms.wCommunity
item = {
    "alg" : algorithms.wCommunity, "params" : [
        ["min_bel_degree", 0.1, 0.9, 0.1],
        ["threshold_bel_degree", 0.1, 0.9, 0.1]
    ]
}; list_algs.append(item)

In [None]:
for item in list_algs:
    G, nodes, nameIndex = build_cdlib_graph(dfg_graph)

    resolution = ensemble.Parameter(name="resolution", start=0.5, end=10, step=0.1)
    parameters = []
    for p in item["params"]:
        parameters.append(ensemble.Parameter(name=p[0], start=p[1], end=p[2], step=p[3]))

    idx = 0
    for coms in ensemble.grid_execution(graph=G, method=item["alg"], parameters=parameters):
        algorithm_key = coms.method_name + "_" + str(idx)
        idx = idx+1
        algs[algorithm_key] = coms
        print("added " + algorithm_key, coms.method_parameters)

In [None]:
# id_name, cover_partitions, stat coverage, mean, min, max
for alg, comms_alg in algs.items():
    alg_coverage = coverage_activities(comms_alg, ground_truth, nodes, label="overlapping")
    #print(f"{alg}\t{alg_coverage['covered_partitions']}\t{alg_coverage['stat_covered_partitions']}\t{alg_coverage['mean_coverage_partitions']}\t{alg_coverage['min_coverage_partitions']}\t{alg_coverage['max_coverage_partitions']}")

In [None]:
# list_algs = []
# #           name, start, stop, step 
# item = {
#     "alg" : "name",
#     "params" : [
#         ["name", 1, 1, 1]
#     ]
# }
# list_algs.append(item)


In [None]:
import pandas as pd

In [None]:
def get_stats(algs, label=None):
    structure_algs = []
    for alg, comms_alg in algs.items():
        alg_coverage = coverage_activities(comms_alg, ground_truth, nodes, label=label)
        if alg_coverage is None:
            continue

        my_item = {}
        my_item["alg"] = alg
        my_item["partitions"] = alg_coverage['partitions']
        my_item["truth"] = alg_coverage['partitions_truth']
        my_item["covered"] = alg_coverage['covered_partitions']
        my_item["matching"] = alg_coverage['stat_covered_partitions']
        my_item["mean_coverage"] = alg_coverage['mean_coverage_partitions']
        my_item["min_coverage"] = alg_coverage['min_coverage_partitions']
        my_item["max_coverage"] = alg_coverage['max_coverage_partitions']
        my_item["f1_score"] = alg_coverage['f1_score']
        my_item["f1_std"] = alg_coverage['f1_std']
        
        if label is not None:
            my_item["label"] = label
        
        for param, value in alg_coverage['method_parameters'].items():
            my_item[param] = value

        structure_algs.append(my_item)
    return structure_algs

In [None]:
structure_algs = get_stats(algs, label="overlapping")

In [None]:
dataframe = pd.DataFrame(structure_algs) 

# Crisp (non overlapping) algorithms

In [None]:
G, nodes, nameIndex = build_cdlib_graph(dfg_graph)

In [None]:
com_agdl = algorithms.agdl(G, number_communities=3, kc=4);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#com_bayan = algorithms.bayan(G);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#coms_cpm = algorithms.cpm(G, resolution_parameter=1);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#coms_der = algorithms.der(G, 3, .00001, 50)
com_eigenvector = algorithms.eigenvector(G);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
com_em = algorithms.em(G, k=14);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#com_ga = algorithms.ga(G, population=2, generation=50);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#com_gdmp2 = algorithms.gdmp2(G);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#com_girvan_newman = algorithms.girvan_newman(G, level=3);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
coms_greedy_modularity = algorithms.greedy_modularity(G, weight="weight");G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#coms_head_tail = algorithms.head_tail(G, head_tail_ratio=0.8);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
coms_infomap = algorithms.infomap(G);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
coms_leiden = algorithms.leiden(G, weights="weight");G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#coms_louvain = algorithms.louvain(G, weight='weight', resolution=3);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#coms_lswl_plus = algorithms.lswl_plus(G, detect_overlap=False)
#coms_mcode = algorithms.mcode(G, weights="weight");G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
coms_paris = algorithms.paris(G);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#coms_pycombo = algorithms.pycombo(G, weight="weight", max_communities=14);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
#coms_rber_pots = algorithms.rb_pots(G, weights="weight")
#ßcoms_surprise_communities = algorithms.surprise_communities(G, weights="weight")
#ßcoms_threshold_clustering = algorithms.threshold_clustering(G)

                       

In [None]:
algs_crisp= {}

## Louvain test

In [None]:
from cdlib import ensemble

In [None]:
G, nodes, nameIndex = build_cdlib_graph(dfg_graph)

resolution = ensemble.Parameter(name="resolution", start=0.5, end=10, step=0.1)

idx = 0
for coms in ensemble.grid_execution(graph=G, method=algorithms.louvain, parameters=[resolution]):
    algorithm_key = coms.method_name + "_" + str(idx)
    idx = idx+1
    algs_crisp[algorithm_key] = coms

In [None]:
G, nodes, nameIndex = build_cdlib_graph(dfg_graph)

resolution_parameter = ensemble.Parameter(name="resolution_parameter", start=0.1, end=10, step=0.1)

idx = 0
for coms in ensemble.grid_execution(graph=G, method=algorithms.cpm, parameters=[resolution_parameter]):
    #print(coms.method_name, coms.method_parameters, "\n", coms.communities, "\n")
    #print(coms.method_parameters["resolution"], "\t", evaluation.f1(coms, ground_truth).score)
    algorithm_key = coms.method_name + "_" + str(idx)
    idx = idx+1
    algs_crisp[algorithm_key] = coms


In [None]:
G, nodes, nameIndex = build_cdlib_graph(dfg_graph)

population = ensemble.Parameter(name="population", start=300, end=400, step=10)
generation = ensemble.Parameter(name="generation", start=30, end=40, step=5)

idx = 0
for coms in ensemble.grid_execution(graph=G, method=algorithms.ga, parameters=[population, generation]):
    print(coms.method_name, coms.method_parameters)#, "\n", coms.communities, "\n")
    #print(coms.method_parameters["resolution"], "\t", evaluation.f1(coms, ground_truth).score)
    algorithm_key = coms.method_name + "_" + str(idx)
    idx = idx+1
    algs_crisp[algorithm_key] = coms


In [None]:
G, nodes, nameIndex = build_cdlib_graph(dfg_graph)

min_threshold = ensemble.Parameter(name="min_threshold", start=0.5, end=2, step=0.1)

idx = 0
for coms in ensemble.grid_execution(graph=G, method=algorithms.gdmp2, parameters=[min_threshold]):
    #print(coms.method_name, coms.method_parameters, "\n", coms.communities, "\n")
    #print(coms.method_parameters["resolution"], "\t", evaluation.f1(coms, ground_truth).score)
    algorithm_key = coms.method_name + "_" + str(idx)
    idx = idx+1
    algs_crisp[algorithm_key] = coms

In [None]:
G, nodes, nameIndex = build_cdlib_graph(dfg_graph)

level = ensemble.Parameter(name="level", start=1, end=10, step=1)

idx = 0
for coms in ensemble.grid_execution(graph=G, method=algorithms.girvan_newman, parameters=[level]):
    #print(coms.method_name, coms.method_parameters, "\n", coms.communities, "\n")
    #print(coms.method_parameters["resolution"], "\t", evaluation.f1(coms, ground_truth).score)
    algorithm_key = coms.method_name + "_" + str(idx)
    idx = idx+1
    algs_crisp[algorithm_key] = coms

In [None]:
G, nodes, nameIndex = build_cdlib_graph(dfg_graph)

head_tail_ratio = ensemble.Parameter(name="head_tail_ratio", start=0.1, end=1, step=0.1)

idx = 0
for coms in ensemble.grid_execution(graph=G, method=algorithms.head_tail, parameters=[head_tail_ratio]):
    #print(coms.method_name, coms.method_parameters, "\n", coms.communities, "\n")
    #print(coms.method_parameters["resolution"], "\t", evaluation.f1(coms, ground_truth).score)
    algorithm_key = coms.method_name + "_" + str(idx)
    idx = idx+1
    algs_crisp[algorithm_key] = coms

In [None]:
G, nodes, nameIndex = build_cdlib_graph(dfg_graph)

weight_threshold = ensemble.Parameter(name="weight_threshold", start=0.1, end=1, step=0.1)

idx = 0
for coms in ensemble.grid_execution(graph=G, method=algorithms.mcode, parameters=[weight_threshold]):
    #print(coms.method_name, coms.method_parameters, "\n", coms.communities, "\n")
    #print(coms.method_parameters["resolution"], "\t", evaluation.f1(coms, ground_truth).score)
    algorithm_key = coms.method_name + "_" + str(idx)
    idx = idx+1
    algs_crisp[algorithm_key] = coms

In [None]:
#coms_pycombo = algorithms.pycombo(G, weight="weight", max_communities=14);G, nodes, nameIndex = build_cdlib_graph(dfg_graph)
G, nodes, nameIndex = build_cdlib_graph(dfg_graph)

modularity_resolution = ensemble.Parameter(name="modularity_resolution", start=0.5, end=5, step=0.1)

idx = 0
for coms in ensemble.grid_execution(graph=G, method=algorithms.pycombo, parameters=[modularity_resolution]):
    #print(coms.method_name, coms.method_parameters, "\n", coms.communities, "\n")
    #print(coms.method_parameters["resolution"], "\t", evaluation.f1(coms, ground_truth).score)
    algorithm_key = coms.method_name + "_" + str(idx)
    idx = idx+1
    algs_crisp[algorithm_key] = coms

In [None]:
algs_crisp['com_agdl']=com_agdl
#algs_crisp['coms_cpm']=coms_cpm
algs_crisp['com_eigenvector']=com_eigenvector
algs_crisp['com_em']=com_em
#algs_crisp['com_ga']=com_ga
#algs_crisp['com_gdmp2']=com_gdmp2
#algs_crisp['com_girvan_newman']=com_girvan_newman
algs_crisp['coms_greedy_modularity']=coms_greedy_modularity
#algs_crisp['coms_head_tail']=coms_head_tail
algs_crisp['coms_infomap']=coms_infomap
algs_crisp['coms_leiden']=coms_leiden
#algs_crisp['coms_louvain']=coms_louvain
#algs_crisp['coms_mcode']=coms_mcode
algs_crisp['coms_paris']=coms_paris
#algs_crisp['coms_pycombo']=coms_pycombo

In [None]:
structure_algs_crips = get_stats(algs_crisp, label="crisp")

In [None]:
dataframe2 = pd.DataFrame(structure_algs_crips) 

In [None]:
dataframe_all = pd.concat([dataframe, dataframe2])

In [None]:
dataframe_all.sort_values(["f1_score", "f1_std", "mean_coverage", "partitions"],  ascending =[False, False, False, False], inplace=True)

In [None]:
dataframe_all.to_csv('abstraction/algorithms_stats.csv')

In [None]:
dataframe_all.dtypes

In [None]:
dataframe_all.to_excel("abstraction/algorithms_stats.xlsx")


In [None]:
comm_to_names(algs_crisp['coms_infomap'].communities, nodes, sort_events=False)


# Saving communities

Selected the algorithm that perform better, save the communities to a dedicated file to be used with the CEP engine

In [None]:
from conversion import write_communities

In [None]:
selected_alg = algs_crisp['coms_infomap']

In [None]:
write_communities(selected_alg, nodes, filename="abstraction/communities_coms_infomap.txt", events_count=get_events_count(event_log))

In [None]:
selected_alg = algs_crisp['Louvain_18']
write_communities(selected_alg, nodes, filename="abstraction/communities_Louvain_18.txt", events_count=get_events_count(event_log))


In [None]:

selected_alg = algs['dpclus_79']
write_communities(selected_alg, nodes, filename="abstraction/communities_dpclus_79.txt", events_count=get_events_count(event_log))


In [None]:
selected_alg = algs_crisp['Louvain_23']
write_communities(selected_alg, nodes, filename="abstraction/communities_Louvain_23.txt", events_count=get_events_count(event_log))


In [None]:
selected_alg = algs_crisp['com_agdl']
write_communities(selected_alg, nodes, filename="abstraction/communities_com_agdl.txt", events_count=get_events_count(event_log))
