## Import lib

In [None]:
import pandas as pd
import os

from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA

from functions import *

import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
%matplotlib inline

## Load data 

In [None]:
PermitLog_Cases = pd.read_csv('C:/Users/username/Documents/PermitLog_Cases.csv', sep=';')
PermitLog_Events = pd.read_csv('C:/Users/username/Documents/PermitLog_Events.csv', sep=';')
PermitLog_Cases.shape, PermitLog_Events.shape

In [None]:
PermitLog_Cases.head(2)

In [None]:
PermitLog_Events[['case_name', 'concept:name', 'time:timestamp']].head()

## Get event sequences 

In [None]:
res_df = Prepare(PermitLog_Events, 'concept:name', 'time:timestamp', 'case_name').get_result()

In [None]:
res_df.head()

In [None]:
un_case = res_df["case_name"].unique().shape[0]
print(f"Количество уникальных идентификаторов = {un_case}")

## Get features with help pivot table

In [None]:
p_table = res_df.pivot_table(index="case_name", columns="transact", aggfunc="size", fill_value=0)

In [None]:
p_table.head()

## KMeans section

In [None]:
model_kmeans = KMeans(random_state=17, n_jobs=-1, algorithm='full')

In [None]:
kmeans_class = KMeans_Clusterization(model_kmeans, p_table)

In [None]:
kmeans_class.draw_elbow_method_plot()

In [None]:
kmeans_result = kmeans_class.clustering(num_clusters=5)

In [None]:
df_kmeans_cl = pd.merge(res_df, kmeans_result[["case_name", "clusters"]], on = "case_name")

### Drawing graph

In [None]:
num_cluster = 4
for_draw = Select_cluster(df_kmeans_cl, num_cluster).select()

In [None]:
draw_frequency_graph(for_draw, name_file=f'frequency_cl_{num_cluster}')

## DBSCAN section

In [None]:
p_table = res_df.pivot_table(index="case_name", columns="transact", aggfunc="size", fill_value=0)

In [None]:
dbscan_class = DBSCAN_Clusterization(p_table)

In [None]:
dbscan_class.epsilon_optimal_graph()

In [None]:
dbscan_class.epsilon_optimal_graph(ranges=[5500, 7000, 0, 40])

In [None]:
dbscan_res = dbscan_class.clustering(eps_val=4, min_sampls=100)

In [None]:
df_dbscan_cl = pd.merge(res_df, dbscan_res[["case_name", "clusters"]], on = "case_name")

### Drawing graph

In [None]:
num_cluster = -1
for_draw = Select_cluster(df_dbscan_cl, num_cluster).select()

In [None]:
draw_frequency_graph(for_draw, name_file=f'frequency_cl_{num_cluster}')

In [None]:
#Frequency_graph(for_draw, filename = 'Frequency_cl_1').draw_freq(count_treshold = 100, less_or_more = '>')

### Bonus

In [None]:
draw_frequency_graph(for_draw, name_file=f'frequency_cl_{num_cluster}', count_treshold=100, less_or_more='<')

In [None]:
draw_performance_graph(for_draw, name_file = f'performance_cl_{num_cluster}', type_value ='min', time_treshold=8, less_or_more='>')

In [None]:
#Performance_graph(for_draw, filename = 'Performance_cl_1').draw_perform(time_treshold = 8, type_value = 'min', less_or_more = '>')