## Загружаем необходимые библиотеки

In [None]:
import numpy as np
import pandas as pd
from graphviz import Digraph
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings("ignore")

## загружаем данные

In [None]:
data = pd.read_csv('prepared_data.csv', sep=';')

## Находим последовательность событий в процессе

In [None]:
def shift_event_column(df, id_column, col_transact, time_column):
    
    sort_grup_1 = df.groupby(id_column).apply(lambda x: x.sort_values(time_column))
    
    sort_grup_1['concept:name:2'] = pd.DataFrame(sort_grup_1.groupby(id_column).apply(lambda \
              x: x[col_transact].shift(periods=-1).fillna('Конец лога')))[col_transact].values
    
    sort_grup_1['transact'] = sort_grup_1[col_transact]+'-->'+sort_grup_1['concept:name:2']
    
    return sort_grup_1

In [None]:
def shift_time_column(df_shift_event, id_column, time_column):
    
    t_date = pd.to_datetime('2060-01-01 00:00:00.00')
    
    df_shift_event['time:timestamp:2'] = pd.DataFrame(df_shift_event.groupby(id_column).apply(lambda \
                        x: x[time_column].shift(periods=-1).fillna(t_date)))[time_column].values
    
    df_shift_event['time_diff'] = pd.to_datetime(df_shift_event['time:timestamp:2']) - \
                                  pd.to_datetime(df_shift_event[time_column])
    
    df_shift_event['time_diff'] = df_shift_event['time_diff'].apply(lambda x: \
                               0 if x.days > 1000 else x.total_seconds())

    result = df_shift_event[['transact', 'time_diff']].reset_index()
    result = result.drop('level_1', axis = 1)
    
    return result

In [None]:
%%time
df_temp1 = shift_event_column(data, 'case_name', 'concept:name', 'time:timestamp')

In [None]:
%%time
df_temp2 = shift_time_column(df_temp1, 'case_name', 'time:timestamp')

In [None]:
df_temp2.head()

## Проводим подсчет событий

In [None]:
counts = pd.DataFrame(df_temp2['transact'].value_counts()).reset_index()
counts2 = counts[counts['transact'] >= 60] ## только переходы частота которых превышает 60
counts2.columns = ['transact', 'counts']
transact = counts2['transact'].values
countss = counts2['counts'].values

## Строим частотный граф

In [None]:
def get_stat_freq(freq_to_int):
    
    percent_25 = int(np.percentile(freq_to_int, 25))
    percent_50 = int(np.percentile(freq_to_int, 50))
    percent_75 = int(np.percentile(freq_to_int, 75))
    percent_95 = int(np.percentile(freq_to_int, 95))
    
    return [percent_25, percent_50, percent_75, percent_95]

In [None]:
def change_width_freq(count_transact, stat):
    if count_transact <= stat[0]:
        width = '1'
    elif (count_transact > stat[0]) and (count_transact <= stat[1]):
        width = '2'
    elif (count_transact > stat[1]) and (count_transact <= stat[2]):
        width = '3'
    elif (count_transact > stat[2]) and (count_transact <= stat[3]):
        width = '4'
    elif count_transact > stat[3]:
        width = '5'
    return width

In [None]:
def change_color_freq(count_transact, stat):
    if count_transact <= stat[0]:
        color = 'brown'
    elif (count_transact > stat[0]) and (count_transact <= stat[1]):
        color = 'coral1'
    elif (count_transact > stat[1]) and (count_transact <= stat[2]):
        color = 'goldenrod'
    elif (count_transact > stat[2]) and (count_transact <= stat[3]):
        color = 'deepskyblue1'
    elif count_transact > stat[3]:
        color = 'cyan'
    return color

In [None]:
f = Digraph('finite_state_machine', filename='Рисунок_8')
f.attr(rankdir='T', size='8,5')

f.attr('node', shape='box', style='filled', color='deepskyblue')
f.node('A_Create Application', shape='doublecircle', color='deepskyblue1')
f.node('Конец лога', shape='doublecircle', color='brown3')
f.attr('node', shape='box', color='lightblue')

for c in range(len(transact)):
    stat_percent = get_stat_freq(countss)
    tr = transact[c]
    count = int(countss[c])
    start = tr.split('-->')[0]
    end = tr.split('-->')[1]

    f.edge('{0}'.format(start), '{0}'.format(end), 
           label='{0}'.format(count), arrowhead='vee', 
           penwidth=change_width_freq(count, stat_percent), 
           color = change_color_freq(count, stat_percent), 
           fontcolor=change_color_freq(count, stat_percent))

f.view()

## Строим граф с отображением времени перехода

In [None]:
med_time = pd.DataFrame(df_temp2.groupby('transact')['time_diff'].agg(['min', 'max', 'median'])).reset_index()

In [None]:
# med_time = med_time[med_time['median'] > 0.1]

In [None]:
def secondsToText(secs):
    
#     secs = int(np.median(list_time))
    if secs > 1:
        days = round(secs//86400)
        hours = round((secs - days*86400)//3600)
        minutes = round((secs - days*86400 - hours*3600)//60)
        seconds = round(secs - days*86400 - hours*3600 - minutes*60)
        result = ("{}d:".format(days) if days else "") + \
        ("{}h:".format(hours) if hours else "") + \
        ("{}m:".format(minutes) if minutes else "") + \
        ("{}s:".format(seconds))
    else:
        result = str(round(secs, 4)) +' '+ 'sec'
    
    return result

In [None]:
def change_width(secs):
    if secs <= 1:
        width = '1'
    elif (secs > 1) and (secs <= 60):
        width = '2'
    elif (secs > 60) and (secs <= 3600):
        width = '3'
    elif (secs > 3600) and (secs <= 36000):
        width = '4'
    elif secs > 36000:
        width = '5'
    return width

In [None]:
def change_color(secs):
    if secs <= 1:
        color = 'brown'
    elif (secs > 1) and (secs <= 60):
        color = 'coral1'
    elif (secs > 60) and (secs <= 3600):
        color = 'goldenrod'
    elif (secs > 3600) and (secs <= 36000):
        color = 'deepskyblue1'
    elif secs > 36000:
        color = 'cyan'
    return color

In [None]:
list_trans = med_time['transact'].values
times = med_time['median'].values

In [None]:
f = Digraph('finite_state_machine', filename='Рисунок_10')
f.attr(rankdir='T', size='8,5')


f.attr('node', shape='box', style='filled', color='deepskyblue')
f.node('A_Create Application', shape='doublecircle')
f.node('Конец лога', shape='doublecircle', color='brown3')
f.attr('node', shape='box', color='lightblue')

for c in range(len(list_trans)):
    tr = list_trans[c]
    time = float(times[c])
    start = tr.split('-->')[0]
    end = tr.split('-->')[1]

    f.edge('{0}'.format(start), '{0}'.format(end), 
           label='{0}'.format(secondsToText(time)), 
           arrowhead='vee', 
           penwidth=change_width(time), 
           color = change_color(time),
           fontcolor=change_color(time))

In [None]:
f.view()