In [2]:
import pm4py
import io
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns

from pm4py.objects.log.importer.xes import importer as xes_importer
from pm4py.algo.discovery.alpha import algorithm as alpha_miner
from pm4py.visualization.petri_net import visualizer as pn_visualizer
from pm4py.visualization.bpmn import visualizer as bpmn_vis
from pm4py.statistics.traces.generic.log import case_statistics

In [3]:
from src.loader import load_data

In [4]:
log_domestic = load_data('domestic')
log_international = load_data('international')

parsing log, completed traces ::   0%|          | 0/10500 [00:00<?, ?it/s]

parsing log, completed traces ::   0%|          | 0/6449 [00:00<?, ?it/s]

In [5]:
dom_case_duration = pm4py.get_all_case_durations(log_domestic, activity_key='concept:name', case_id_key='case:concept:name',timestamp_key='time:timestamp')
int_case_duration = pm4py.get_all_case_durations(log_international, activity_key='concept:name', case_id_key='case:concept:name',timestamp_key='time:timestamp')

In [11]:
def convert_case_duration_to_hms(ms):
    seconds = ms /1000
    hours = int(ms // 3600)
    minutes = (seconds % 3600) // 60
    seconds = (seconds % 3600) % 60
    return hours, minutes, seconds

med_int_case_duration = np.mean(int_case_duration)
med_domestic_case_duration = np.mean(dom_case_duration)

int_h, int_m, int_s = convert_case_duration_to_hms(int(med_int_case_duration))
domestic_h, domestic_m, domestic_s = convert_case_duration_to_hms(int(med_domestic_case_duration))

print(f"Durchschnitt der Bearbeitungsdauer für internationale Reiseanträge: {int(int_h)} Stunden, {int(int_m)} Minuten, {int(int_s)} Sekunden")
print(f"Durchschnitt der Bearbeitungsdauer für inländische Reiseanträge: {int(domestic_h)} Stunden, {int(domestic_m)} Minuten, {int(domestic_s)} Sekunden")


Durchschnitt der Bearbeitungsdauer für internationale Reiseanträge: 2074 Stunden, 4 Minuten, 29 Sekunden
Durchschnitt der Bearbeitungsdauer für inländische Reiseanträge: 276 Stunden, 16 Minuten, 35 Sekunden


In [11]:
import pm4py
import pandas as pd
import numpy as np

# Funktion zur Berechnung der Aktivitätsdauer
def calculate_activity_durations(log):
    df = pm4py.convert_to_dataframe(log)
    df['duration'] = df.groupby('case:concept:name')['time:timestamp'].diff().dt.total_seconds()
    return df

# Aktivitätsdauer für jeden Log berechnen
df_domestic = calculate_activity_durations(log_domestic)
df_international = calculate_activity_durations(log_international)

# Funktion zur Berechnung von Statistiken pro Aktivität
def activity_duration_statistics(df):
    stats = df.groupby('concept:name')['duration'].agg(['mean', 'median', 'min', 'max', 'std']).reset_index()
    stats_filtered = stats#[stats['median'] > 86400]
    return stats_filtered

# Statistiken für inländische und internationale Aktivitäten berechnen
domestic_stats = activity_duration_statistics(df_domestic)
international_stats = activity_duration_statistics(df_international)

# Sortierung nach 'mean' in absteigender Reihenfolge, bevor die Umwandlung durchgeführt wird
domestic_stats_sorted = domestic_stats.sort_values(by='mean', ascending=False)
international_stats_sorted = international_stats.sort_values(by='mean', ascending=False)

# Umrechnung der Dauer in Tage, Stunden, Minuten, Sekunden
def convert_seconds_to_dhms(seconds):
    if np.isnan(seconds):  # NaN-Werte behandeln
        return "NaN"
    days = int(seconds // (24 * 3600))
    seconds = seconds % (24 * 3600)
    hours = int(seconds // 3600)
    seconds %= 3600
    minutes = int(seconds // 60)
    seconds %= 60
    return f"{days} days {hours:02}:{minutes:02}:{seconds:02}"

# Anwendung der Umrechnung auf die Spalten der Statistiken
for column in ['mean', 'median', 'min', 'max', 'std']:
    domestic_stats_sorted[column] = domestic_stats_sorted[column].apply(lambda x: convert_seconds_to_dhms(x))
    international_stats_sorted[column] = international_stats_sorted[column].apply(lambda x: convert_seconds_to_dhms(x))

# Ergebnisse als Tabellen anzeigen
print("Domestic Declarations (sorted by mean)")
display(domestic_stats_sorted)

print("International Declarations (sorted by mean)")
display(international_stats_sorted)

Domestic Declarations (sorted by mean)


Unnamed: 0,concept:name,mean,median,min,max,std
10,Declaration REJECTED by MISSING,11 days 02:26:14.087912087910809,3 days 01:39:8.0,0 days 00:18:25.0,293 days 08:39:1.0,36 days 11:12:11.173972286749631
7,Declaration REJECTED by ADMINISTRATION,5 days 15:54:6.007352941203862,0 days 00:01:35.5,0 days 00:00:1.0,469 days 06:40:18.0,29 days 11:43:16.184812592342496
14,Declaration SUBMITTED by EMPLOYEE,5 days 07:03:49.359348198981024,0 days 05:12:39.0,0 days 00:00:29.0,356 days 22:08:4.0,18 days 15:35:49.64518926362507
15,Payment Handled,3 days 15:05:45.140681003569625,3 days 05:46:19.5,0 days 02:19:6.0,284 days 05:35:30.0,3 days 21:06:30.20641245529987
16,Request Payment,3 days 03:56:58.345219123526476,1 days 03:25:56.5,0 days 00:00:0.0,234 days 15:19:13.0,7 days 04:22:5.132526379078627
9,Declaration REJECTED by EMPLOYEE,2 days 08:13:1.315018315013731,0 days 23:07:37.0,0 days 00:00:8.0,55 days 04:30:36.0,4 days 04:08:20.005586758663412
12,Declaration REJECTED by SUPERVISOR,2 days 02:59:37.27303754267632,0 days 19:57:42.0,0 days 00:00:2.0,22 days 03:17:3.0,3 days 00:35:17.828304017544724
3,Declaration FINAL_APPROVED by SUPERVISOR,2 days 00:30:35.537854111142224,0 days 21:00:16.0,0 days 00:00:1.0,144 days 02:19:41.0,3 days 20:51:37.60624424600974
11,Declaration REJECTED by PRE_APPROVER,1 days 22:43:55.22093023255002,0 days 00:01:5.5,0 days 00:00:2.0,71 days 00:02:27.0,8 days 16:35:52.916239037993364
1,Declaration APPROVED by BUDGET OWNER,1 days 21:32:48.943262411339674,0 days 20:46:16.5,0 days 00:00:18.0,49 days 22:18:50.0,3 days 03:05:12.978159898717422


International Declarations (sorted by mean)


Unnamed: 0,concept:name,mean,median,min,max,std
33,Start trip,48 days 07:17:1.2161499387584627,28 days 06:28:38.0,0 days 00:06:44.0,454 days 09:43:15.0,70 days 04:26:33.187973115593195
32,Send Reminder,37 days 19:30:54.64516129018739,39 days 06:00:6.0,0 days 00:00:0.0,139 days 15:13:3.0,17 days 11:54:4.634669904131442
30,Permit SUBMITTED by EMPLOYEE,20 days 05:42:34.34443288249895,4 days 14:08:44.0,0 days 00:00:47.0,366 days 11:20:13.0,44 days 00:34:28.09060653252527
25,Permit REJECTED by DIRECTOR,20 days 01:48:2.0,20 days 01:48:2.0,20 days 01:48:2.0,20 days 01:48:2.0,
13,Declaration SAVED by EMPLOYEE,19 days 00:02:8.16417910438031,9 days 09:46:54.0,0 days 00:29:24.0,185 days 14:33:30.0,29 days 22:46:0.29115054197609425
27,Permit REJECTED by MISSING,15 days 09:23:38.95348837203346,3 days 01:44:16.0,0 days 00:01:16.0,115 days 10:12:50.0,28 days 00:19:52.505049059633166
14,Declaration SUBMITTED by EMPLOYEE,11 days 18:24:59.834243369754404,4 days 13:11:35.5,0 days 00:00:42.0,332 days 00:19:6.0,24 days 22:11:18.106748166959733
15,End trip,7 days 13:34:41.55667545355391,4 days 00:00:0.0,0 days 00:00:0.0,660 days 06:27:57.0,25 days 16:39:28.62707201158628
10,Declaration REJECTED by MISSING,4 days 19:45:57.51456310681533,2 days 18:00:54.0,0 days 01:45:45.0,45 days 01:18:53.0,6 days 04:01:34.052888249163516
8,Declaration REJECTED by DIRECTOR,4 days 19:03:13.25,4 days 21:25:50.5,2 days 04:34:24.0,7 days 04:46:48.0,2 days 05:18:1.30152323335642
