In [24]:
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, Layout, HBox, VBox
import pandas as pd
import time
import re
import os
import sys
import requests
import subprocess
import numpy as np
from json import JSONDecodeError
import sqlite3
from sqlite3 import OperationalError
data_dir = './data/experiments'
data_files = [' '] + os.listdir(data_dir)
if 'response.npy' in os.listdir():
    os.remove('response.npy')

# Widget styles
default = (data_files[0], ' ')
style = {'description_width': 'initial'}
features_layout = {'width': 'max-content','height':'200px'}

# File selection menu
file_dd=widgets.SelectMultiple(options=data_files,value=default,
    description='File:',style=style,layout=features_layout)
# Analyse button 
button = widgets.Button(description = "Analyse",style=style)
button.style.button_color = 'lightgreen'
# Metric menues
metrics_layout = {'display':'flex','width': '130px','height':'30px', 'justify_content':'flex-end'}
options = list(np.arange(1,11))
options = [str(o) for o in options]
metrics_optimize = {'min_max_tpc': ('min', 1), 'wcss': ('min', 1), 'bcss': ('max', 1), 'ch_index': ('max', 1),\
'db_index':('min', 1), 'silhouette':('max', 1), 'words_pairs': ('max', 1)}
metrics = list(metrics_optimize.keys())
metrics_menues = {}
for metric in metrics:
    menue=widgets.Dropdown(options=options,value='1',description=metric, layout=metrics_layout)
    metrics_menues[metric]=menue
# Granularity slider
granularity = widgets.IntSlider(value=100, min=2, max=1000, step=1, description='Number of Clusters',\
                                     orientation='horizontal',readout=True, readout_format='d',\
                                     style = {'description_width': 'initial'}, layout=Layout(width='400px'))
apply_granularity = widgets.ToggleButton(value=False, description='Select granularity level?',
    disabled=False, button_style='info', tooltip='Description',
    icon='check', layout=Layout(width='200px'))
# Minimal cluster size
min_cluster_menue=widgets.Dropdown(options=['0']+ options,value='4',\
                       description='Minimum number of tasks in cluster',\
                                   style = {'description_width': 'initial'},\
                                   layout=Layout(width='300px'))
# Service    
url = 'http://127.0.0.01:6002/cluster_analysis/api/v0.1/clustering'
out = widgets.Output() 
metrics_optimize = {'min_max_tpc': ('min', 1), 'wcss': ('min', 1), 'bcss': ('max', 1), 'ch_index': ('max', 1),\
'db_index':('min', 1), 'silhouette':('max', 1), 'words_pairs': ('max', 1)}
conn = sqlite3.connect('./results/CAdb')
c = conn.cursor()

def print_results(b):
    #clear_output()
    cluster_key = clusters_keys_dd.value[0]
    if cluster_key == ' ':
        print('No key selected')
    else:
        response = np.load('response.npy', allow_pickle=True)[()]
        response = list(response.values())[0]
        clusters_keys = list(response.keys())
        sep_length = len(cluster_key)+5 
        # Print results
        print(sep_length*'=')
        print('key:',cluster_key)
        print(sep_length*'-')
        names = list(response[cluster_key])
        for name in names: print(name)


def run_service(b):
    file = file_dd.value[0]
    if file == ' ':
        print('No file selected')
    else:
        config = {}
        
        # Experiment id
        experiment_ids = pd.read_sql_query("SELECT experiment_id from experiments", conn).astype(int)
        if len(experiment_ids) == 0: experiment_id = 1
        else: experiment_id = int(max(experiment_ids.values)[0]) + 1
        
        print('experiment_id:', experiment_id)
        config['experiment_id'] = experiment_id
        
        min_cluster_size = min_cluster_menue.value[0]
        print('min_cluster_size:', min_cluster_size)
        config['min_cluster_size'] = min_cluster_size
        
        # Metric weights
        for metric, menue in metrics_menues.items():
            config[metric] = menue.value[0]
        if apply_granularity.value:
            config['num_clusters'] = granularity.value
        data_path = os.path.join(data_dir, file)
        files = {'file': open(data_path, 'rb')}
        response = requests.post(url, files=files, data=config)
        try:
            response = response.json()
            #print('response')
            #print(response)
            np.save('response.npy', response)
            print('Best performing run id=', list(response.keys())[0])
            experiment_ids = pd.read_sql_query("SELECT experiment_id FROM experiments", conn).astype(int)
            current_id = int(max(experiment_ids.values)[0])
            run_cols = ['project_name', 'customer', 'run_start', 'run_end', 'duration', 'tasks_count']
            experiment_df = pd.read_sql_query("SELECT * FROM experiments \
            WHERE experiment_id={cid}".format(cid=current_id), conn).drop(run_cols, axis=1)
            display(HTML('<h1 style="color:magenta">Run Scores </h1>'))
            display(experiment_df)
          
        except JSONDecodeError:
            print(response.text)

In [25]:
# Dashboard
button.on_click(run_service)
display(HTML('<h1 style="color:magenta">Cluster Activities</h1>\
              <p style="color:blue">Use to following menus to submit a file for analysis:</p>\
                 <ul>\
                  <li style="color:magenta">File to analyze</li>\
                  <li style="color:magenta">Select granularity level</li>\
                  <li style="color:magenta">Set weights for validation metrics</li>\
                </ul>'))
file_box = VBox(children=[file_dd, button])
metrics_box = VBox(children=list(metrics_menues.values()))
config_box = VBox(children=[apply_granularity, granularity, min_cluster_menue])
HBox(children=[file_box, config_box, metrics_box])

HBox(children=(VBox(children=(SelectMultiple(description='File:', index=(0, 0), layout=Layout(height='200px', …

experiment_id: 26
min_cluster_size: 4


In [26]:
display(HTML('<h1 style="color:magenta">Cluster names for best performing run </h1>'))
clusters_keys = [' ']
if 'response.npy' in os.listdir():
    response = np.load('response.npy', allow_pickle=True)[()]
    response = list(response.values())[0]
    clusters_keys += list(response.keys())
default = (clusters_keys[0], ' ')
clusters_keys_dd=widgets.SelectMultiple(options=clusters_keys, value=default,
    description='Clusters Keys:',style=style,layout=features_layout)

# Run buttons
button = widgets.Button(description = "Activity Names",style=style)
button.style.button_color = 'lightgreen'

output = widgets.Output()
button.on_click(print_results)
HBox(children=[clusters_keys_dd, button])

HBox(children=(SelectMultiple(description='Clusters Keys:', index=(0, 0), layout=Layout(height='200px', width=…

In [27]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')