## Setup

In [None]:
!pip install --quiet matplotlib
!pip install --quiet plotly
!pip install --quiet nbformat
!pip install --quiet ipykernel
!pip install --quiet ipywidgets

## Import required libraries

In [None]:
import sys
sys.path.append('../')

import pandas as pd
import plotly.express as px
import dash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output
import pandas as pd
import plotly.graph_objects as go
from configs.config import cache as cache_path

## Read input data

In [None]:
in_file_name = "features.csv"
features_data = pd.read_csv(cache_path + in_file_name)

In [None]:
# take only first 1000 rows
features_data = features_data.head(1000)

In [None]:
features_data.info()

## Visualize different plots from dropdown

In [None]:
app = dash.Dash(__name__)

df = pd.DataFrame(features_data)

app.layout =html.Div([
    dcc.Dropdown(
        id='graph-type',
        placeholder='Select graph type',
        options=[
            {'label': 'Processing Time per Task per Case', 'value': 'processing_time'},
            {'label': 'Activity Count per Task per Case', 'value': 'activity_count'},
            {'label': 'Task Switches per Task per Case', 'value': 'task_switch'},
            {'label': 'Features Table', 'value': 'features_table'}
        ]
    ),
    dcc.Graph(
        id='graph'
    )
])

@app.callback(
    Output('graph', 'figure'),
    [Input('graph-type', 'value')]
)
def choose_graph_type(graph_type):
    if graph_type is None:
        raise dash.exceptions.PreventUpdate()

    sorted_df = df.sort_values(by=['case', 'first_TS'])

    if graph_type == 'processing_time':
        grouped_df = sorted_df.groupby(['case', 'task'])['processing_time'].sum().reset_index()

        # Calculate total sum of PT for each case (required for ordering the bars)
        sorted_bars_df = sorted_df.groupby('case')['processing_time'].sum().reset_index()
        sorted_bars_df = sorted_bars_df.sort_values(by='processing_time', ascending=False)
        category_order = sorted_bars_df['case'].tolist()

        # convert case column to string to avoid sorting by number
        grouped_df['case'] = grouped_df['case'].astype(str)

        # Sort the DataFrame by processing_time in descending order
        processing_time_df = grouped_df.sort_values(by='processing_time', ascending=False)

        fig = px.bar(processing_time_df, x='case', y='processing_time', color='task', barmode='stack',
                     labels={'processing_time': 'Processing Time', 'case': 'Case'},
                     title='Processing Time per Task per Case',
                     category_orders={'case': category_order})
    elif graph_type == 'activity_count':
        grouped_df = sorted_df.groupby(['case', 'task'])['activity_count'].sum().reset_index()

        # Calculate total sum of activity_count for each case (required for ordering the bars)
        sorted_bars_df = sorted_df.groupby('case')['activity_count'].sum().reset_index()
        sorted_bars_df = sorted_bars_df.sort_values(by='activity_count', ascending=False)
        category_order = sorted_bars_df['case'].tolist()

        # convert case column to string to avoid sorting by number
        grouped_df['case'] = grouped_df['case'].astype(str)

        # Sort the DataFrame by activity_count in descending order
        activity_count_df = grouped_df.sort_values(by='activity_count', ascending=False)

        fig = px.bar(activity_count_df, x='case', y='activity_count', color='task', barmode='stack',
                     labels={'activity_count': 'Activity Count', 'case': 'Case'},
                     title='Activity Count per Task per Case',
                     category_orders={'case': category_order})
    elif graph_type == 'task_switch':
        grouped_df = sorted_df.groupby(['case', 'task'])['task_switch'].sum().reset_index()

        # Calculate total sum of activity_count for each case (required for ordering the bars)
        sorted_bars_df = sorted_df.groupby('case')['task_switch'].sum().reset_index()
        sorted_bars_df = sorted_bars_df.sort_values(by='task_switch', ascending=False)
        category_order = sorted_bars_df['case'].tolist()

        # convert case column to string to avoid sorting by number
        grouped_df['case'] = grouped_df['case'].astype(str)

        # Sort the DataFrame by task_switch in descending order
        task_switch_df = grouped_df.sort_values(by='task_switch', ascending=False)

        fig = px.bar(task_switch_df, x='case', y='task_switch', color='task', barmode='stack',
                     labels={'task_switch': 'Task Switch', 'case': 'Case'},
                     title='Task Switches per Task per Case (Empty = No Switch)',
                     category_orders={'case': category_order})
    elif graph_type == 'features_table':
        fig = go.Figure(data=[go.Table(
            header=dict(values=list(sorted_df.columns),
                        fill_color='paleturquoise',
                        align='left'),
            cells=dict(values=[sorted_df[col] for col in sorted_df.columns],
                       fill_color='lavender',
                       align='left'))
        ])
        fig.update_layout(title='Features sorted by case and first timestamp of the task')

    return fig

if __name__ == '__main__':
    app.run_server(debug=True, port=8051)


# dataframes for viewing and debugging (DO NOT UNCOMMENT THESE HERE AS THEY WILL WORK ONLY WHEN THE CORRESPONDING
# GRAPH IS SELECTED IN THE DASHBOARD, THAT'S WHEN THE DATAFRAMES ARE CREATED)
# processing_time_df.head(10)
# activity_count_df.head(10)
# task_switch_df.head(10)