In [1]:
# !pip install python-docx

In [2]:
import os
import random
import pandas as pd
from docx import Document
from docx.enum.text import WD_ALIGN_PARAGRAPH


from Utils.utils1 import *
import Utils.graph_functions as grf

import warnings
warnings.filterwarnings('ignore')

In [3]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import normalized_mutual_info_score

import networkx as nx
from infomap import Infomap
from itertools import combinations
from collections import defaultdict
import community as community_louvain

import pm4py
from pm4py.objects.log.util import dataframe_utils
from pm4py.algo.discovery.dfg import algorithm as dfg_discovery
from pm4py.objects.conversion.log import converter as log_converter


In [4]:
filename = "Cluster-Results.docx"
Combination = 2
if os.path.exists(filename):
    doc = Document(filename)
else:
    doc = Document()
    centered_heading = doc.add_heading('Cluster Results', level=1)
    centered_heading.alignment = WD_ALIGN_PARAGRAPH.CENTER

centered_heading = doc.add_heading(f'Iteration {Combination}', level=3)
centered_heading.alignment = WD_ALIGN_PARAGRAPH.CENTER

In [5]:
# === RUN MODE OPTIONS ===
# 1: Use already saved logs
# 2: Use specified routines
# 3: New random selection
run_mode = 1  # Set to 1, 2, or 3

specified_routines = [52, 66, 85]  # Only used if run_mode == 2

technique = "Our"
noise_level = 0
results_file_name = f'{technique}_all_results_noise_{noise_level}.xlsx'
results_dir = "out/results/"
os.makedirs(results_dir) if not os.path.exists(results_dir) else None
output_dir = f"Transformed_Logs_and_Results/{technique}/Transformed_Log_With_Noise_{noise_level}"

interleaved_results = []
all_results = []
all_iteration_metadata = []  # Add this line to collect metadata
input_dir = "Saved_Logs"
logs = pd.read_csv(f"{input_dir}/gt_labeled_all_routine_logs.csv", low_memory=False)
num_iterations = 1 if run_mode == 2 else 100  # Only one iteration for specified routines

for i in range(1, num_iterations + 1):
    print(f"===== iteration {i} =====")
    if run_mode == 1:
        # Option 1: Use already saved logs
        segment_log = pd.read_csv(f"{output_dir}/segment_log{i}.csv")
        random_logs = pd.read_csv(f"{output_dir}/unsegment_log{i}.csv")
        random_numbers = list(segment_log['routine_type'].unique())
        iteration_metadata = None  # No metadata for saved logs
    elif run_mode == 2:
        # Option 2: Use specified routines (only one iteration)
        segment_log, random_logs, random_numbers, doc, iteration_metadata = process_random_logs(
            logs, document=doc, variance_criteria='max', specified_routines=specified_routines
        )
        if segment_log is None:
            print("Specified routines did not yield enough traces. Exiting.")
            break
    else:
        print("\n","="*5, f"Start Iteration {i}:", "="*5)
        # Option 3: New random selection (current behavior)
        segment_log, random_logs, random_numbers, doc, iteration_metadata = process_random_logs(
            logs, document=doc, variance_criteria='min'
        )
        # segment_log.to_csv(f"{output_dir}/segment_log{i}.csv", index=False)
        # random_logs.to_csv(f"{output_dir}/unsegment_log{i}.csv", index=False)

    trace_interleaved_counts = count_trace_interleaving_cases(segment_log)
    trace_interleaved_counts = update_avg_counts(trace_interleaved_counts)
    trace_interleaved_counts['iteration'] = f"iteration_{i}"
    interleaved_results.append(trace_interleaved_counts)
    
#     # Store metadata for Excel export
#     all_iteration_metadata.append(iteration_metadata)

#     doc = plot_interleaved_routines(segment_log, random_numbers, doc, image_path=f"out/plots/interleaved_routines_plot{i}.png", interleaved_only=False)
#     plot_trace_interleaving_cases(segment_log, image_path=f"out/plots/trace_interleaving_cases_bar{i}.png")

#     dfg = grf.discover_dfg(random_logs)
#     G_Directed = grf.get_Network_Graph(dfg, output_filename=f"Graph_Matrix_Directed{i}.csv")
#     G_Directed_Scored2 = grf.get_scored2_grpah_directed(G_Directed, output_filename=f"Graph_Matrix_UnDirected{i}.csv")

#     doc.add_heading('Infomap Clustering with scored2 directed graph', level=2)
#     infomap_clusters, doc = grf.infomap_clustering(G_Directed_Scored2, document=doc, MRT=4.0)

#     results_dict = evaluate_clusters_with_jc(infomap_clusters, segment_log)
#     results_dict = append_averages_to_results(results_dict, random_numbers)
#     all_results.append(results_dict)

#     doc = display_and_export_results_doc(results_dict, doc, table_title="Cluster Evaluation Summary (Infomap)")

# display_and_export_results_xlx(all_results, all_iteration_metadata, results_dir, results_file_name)
# plot_jc_boxplot_by_non_interleaving_bins("out/results/Our_all_results_noise_0_new.xlsx")

===== iteration 1 =====

Not interleaved: 260
Interleaved with one: 636
Interleaved with two or more: 311

===== iteration 2 =====

Not interleaved: 545
Interleaved with one: 326
Interleaved with two or more: 60

===== iteration 3 =====

Not interleaved: 329
Interleaved with one: 311
Interleaved with two or more: 63

===== iteration 4 =====

Not interleaved: 558
Interleaved with one: 322
Interleaved with two or more: 71

===== iteration 5 =====

Not interleaved: 120
Interleaved with one: 579
Interleaved with two or more: 503

===== iteration 6 =====

Not interleaved: 419
Interleaved with one: 189
Interleaved with two or more: 16

===== iteration 7 =====

Not interleaved: 136
Interleaved with one: 747
Interleaved with two or more: 703

===== iteration 8 =====

Not interleaved: 182
Interleaved with one: 533
Interleaved with two or more: 363

===== iteration 9 =====

Not interleaved: 233
Interleaved with one: 99
Interleaved with two or more: 17

===== iteration 10 =====

Not interleaved: 

In [6]:
# Convert to DataFrame
df = pd.DataFrame(interleaved_results)
# ---- Add Average Row at Bottom ----
avg_row = {
    'iteration': 'AVERAGE',
    'not_interleaved': df['not_interleaved'].mean(),
    'interleaved_with_one': df['interleaved_with_one'].mean(),
    'interleaved_with_two_or_more': df['interleaved_with_two_or_more'].mean(),
    'avg_not_interleaved': df['avg_not_interleaved'].mean(),
    'avg_interleaved': df['avg_interleaved'].mean()
}
df = pd.concat([df, pd.DataFrame([avg_row])], ignore_index=True)
df.to_csv(f"out/results/interleave_stats_noise_{noise_level}.csv", index=False)
print("Saved to interleave_stats.csv")

Saved to interleave_stats.csv


In [None]:
# # Save the updated document
# doc.save(filename)
# print(f"âœ” Appended results to {filename}")