## Setup

In [None]:
!pip install --quiet matplotlib
!pip install --quiet plotly
!pip install --quiet nbformat
!pip install --quiet ipykernel
!pip install --quiet ipywidgets
!pip install --quiet psutil

## Import required libraries

In [None]:
import os
import sys
sys.path.append('../')

import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
from configs.config import cache as cache_path

## Read input data

In [None]:
in_file_name = "variants.csv"
filtered_folder_name = "filtered"
features_data = pd.read_csv(os.path.join(cache_path, filtered_folder_name, in_file_name))

In [None]:
features_data.info()

## Functions to view variants in different formats

In [None]:
def stack_subvariants_by_counts(features_data):
    global substack_subvariants_by_counts_df
    # Calculate median avg_TAT and avg_PT for each substack within variant
    median_values_per_substack = features_data.groupby(['variant_name', 'task_list']).agg({'avg_TAT': 'median', 'avg_PT': 'median', 'count': 'sum'}).reset_index()

    # Calculate total count for each variant
    variant_counts = features_data.groupby('variant_name')['count'].sum().reset_index()
    variant_counts = variant_counts.sort_values(by='count', ascending=False)

    lower_stack_color = '#1f77b4'
    upper_stack_color = '#ff7f0e'

    category_order = variant_counts['variant_name'].tolist()

    # Sort by the median of avg_TAT for each substack within the variant
    substack_subvariants_by_counts_df = median_values_per_substack.sort_values(by='count', ascending=False)
    substack_subvariants_by_counts_df['variant_name'] = pd.Categorical(substack_subvariants_by_counts_df['variant_name'], categories=category_order)

    fig = px.bar(substack_subvariants_by_counts_df, y='variant_name', x='count', color='count', orientation='h',
                 title='Counts of subvariants',
                 labels={'variant_name': 'Variant', 'avg_TAT': 'Median Avg TAT', 'avg_PT': 'Median Avg PT', 'task_list': 'Subvariant', 'count': '# of Cases'},
                 template='plotly_dark', color_discrete_map={1: lower_stack_color, 2: upper_stack_color},
                 category_orders={'variant_name': category_order},
                 hover_data={'task_list': True, 'avg_TAT': True, 'avg_PT': True, 'count': True})  # Add hover data

    fig.update_layout(barmode='stack')
    return fig


def stack_subvariants_by_median_tat(features_data):
    global substack_subvariants_by_median_tat_df
    # Calculate median avg_TAT and avg_PT for each substack within variant
    median_values_per_substack = features_data.groupby(['variant_name', 'task_list']).agg({'avg_TAT': 'median', 'avg_PT': 'median', 'count': 'sum'}).reset_index()

    # Calculate total count for each variant
    variant_counts = features_data.groupby('variant_name')['avg_TAT'].sum().reset_index()
    variant_counts = variant_counts.sort_values(by='avg_TAT', ascending=False)

    lower_stack_color = '#1f77b4'
    upper_stack_color = '#ff7f0e'

    category_order = variant_counts['variant_name'].tolist()

    # Sort by the median of avg_TAT for each substack within the variant
    substack_subvariants_by_median_tat_df = median_values_per_substack.sort_values(by='avg_TAT', ascending=False)
    substack_subvariants_by_median_tat_df['variant_name'] = pd.Categorical(substack_subvariants_by_median_tat_df['variant_name'], categories=category_order)

    fig = px.bar(substack_subvariants_by_median_tat_df, y='variant_name', x='avg_TAT', color='avg_TAT', orientation='h',
                 title='Median of Subvariants Avg TAT',
                 labels={'variant_name': 'Variant', 'avg_TAT': 'Median Avg TAT', 'avg_PT': 'Median Avg PT', 'task_list': 'Subvariant', 'count': '# of Cases'},
                 template='plotly_dark', color_discrete_map={1: lower_stack_color, 2: upper_stack_color},
                 category_orders={'variant_name': category_order},
                 hover_data={'task_list': True, 'avg_TAT': True, 'avg_PT': True, 'count': True})  # Add hover data

    fig.update_layout(barmode='stack')
    return fig


def stack_subvariants_by_median_pt(features_data):
    global substack_subvariants_by_median_pt_df
    # Calculate median avg_TAT and avg_PT for each substack within variant
    median_values_per_substack = features_data.groupby(['variant_name', 'task_list']).agg({'avg_TAT': 'median', 'avg_PT': 'median', 'count': 'sum'}).reset_index()

    # Calculate total count for each variant
    variant_counts = features_data.groupby('variant_name')['avg_PT'].sum().reset_index()
    variant_counts = variant_counts.sort_values(by='avg_PT', ascending=False)

    lower_stack_color = '#1f77b4'
    upper_stack_color = '#ff7f0e'

    category_order = variant_counts['variant_name'].tolist()

    # Sort by the median of avg_TAT for each substack within the variant
    substack_subvariants_by_median_pt_df = median_values_per_substack.sort_values(by='avg_PT', ascending=False)
    substack_subvariants_by_median_pt_df['variant_name'] = pd.Categorical(substack_subvariants_by_median_pt_df['variant_name'], categories=category_order)

    fig = px.bar(substack_subvariants_by_median_pt_df, y='variant_name', x='avg_PT', color='avg_PT', orientation='h',
                 title='Median of Subvariants Avg PT',
                 labels={'variant_name': 'Variant', 'avg_TAT': 'Median Avg TAT', 'avg_PT': 'Median Avg PT', 'task_list': 'Subvariant', 'count': '# of Cases'},
                 template='plotly_dark', color_discrete_map={1: lower_stack_color, 2: upper_stack_color},
                 category_orders={'variant_name': category_order},
                 hover_data={'task_list': True, 'avg_TAT': True, 'avg_PT': True, 'count': True})  # Add hover data

    fig.update_layout(barmode='stack')
    return fig


def bars_by_median_tat(features_data):
    global median_tat_count_per_variant_df

    df = pd.DataFrame(features_data)

    # Calculate median TAT and count for each variant
    median_tat_count_per_variant_df = df.groupby('variant_name').agg({'avg_TAT': 'median', 'avg_PT': 'median', 'count': 'sum'}).reset_index()

    # Sort by median TAT in descending order
    median_tat_count_per_variant_df = median_tat_count_per_variant_df.sort_values(by='avg_TAT', ascending=False)

    # Plotting a horizontal bar chart showing the median TAT per variant with count on hover
    fig = px.bar(median_tat_count_per_variant_df, 
                 y='variant_name', 
                 x='avg_TAT', 
                 # color='avg_TAT',
                 title='Median TAT per Variant',
                 hover_data={'avg_PT': True, 'count': True},  # Display count on hover
                 template='plotly_dark',
                 labels={'variant_name': 'Variant', 'avg_TAT': 'Median Avg TAT', 'count': '# of Cases'},
                 orientation='h')

    fig.update_traces(marker_color='#4169E1')  # Change bar color if needed
    fig.update_layout(yaxis={'categoryorder': 'total ascending'})

    return fig


def bars_by_median_pt(features_data):
    global median_tat_count_per_variant_df
    df = pd.DataFrame(features_data)

    # Calculate median TAT and count for each variant
    median_tat_count_per_variant_df = df.groupby('variant_name').agg({'avg_TAT': 'median', 'avg_PT': 'median', 'count': 'sum'}).reset_index()

    # Sort by median TAT in descending order
    median_tat_count_per_variant_df = median_tat_count_per_variant_df.sort_values(by='avg_PT', ascending=False)

    # Plotting a horizontal bar chart showing the median TAT per variant with count on hover
    fig = px.bar(median_tat_count_per_variant_df, 
                 y='variant_name', 
                 x='avg_PT', 
                 title='Median PT per Variant',
                 hover_data={'avg_TAT': True, 'count': True},  # Display count on hover
                 template='plotly_dark',
                 labels={'variant_name': 'Variant', 'avg_PT': 'Median Avg PT', 'count': '# of Cases'},
                 orientation='h')

    fig.update_traces(marker_color='#4169E1')  # Change bar color if needed
    fig.update_layout(yaxis={'categoryorder': 'total ascending'})

    return fig


def bars_by_variant_count(features_data):
    global variant_cases_df
    df = pd.DataFrame(features_data)

    # Calculate median TAT and count for each variant
    variant_cases_df = df.groupby('variant_name').agg({'avg_TAT': 'median', 'avg_PT': 'median', 'count': 'sum'}).reset_index()

    # Sort by median TAT in descending order
    variant_cases_df = variant_cases_df.sort_values(by='count', ascending=False)


    # Plotting a horizontal bar chart showing the count of each variant
    fig = px.bar(variant_cases_df, 
                 y='variant_name', 
                 x='count', 
                 title='Total Count per Variant',
                 hover_data={'avg_TAT': True, 'avg_PT': True},  # Display count on hover
                 template='plotly_dark',
                 labels={'variant_name': 'Variant', 'avg_PT': 'Median Avg PT', 'avg_TAT': 'Median Avg TAT', 'count': '# of Cases'},
                 orientation='h')


    fig.update_traces(marker_color='#4169E1')  # Set bar color to royal blue
    fig.update_layout(yaxis={'categoryorder':'total ascending'})
    # fig.show()
    return fig

## Options to select from dropdown to view the visuals

In [None]:
app = dash.Dash(__name__)

app.layout = html.Div([
    dcc.Dropdown(
        id='graph-type',
        options=[
            {'label': 'Stack Subvariants by Counts', 'value': 'stack_subvariants_by_counts'},
            {'label': 'Stack Subvariants by TAT', 'value': 'stack_subvariants_by_median_tat'},
            {'label': 'Stack Subvariants by PT', 'value': 'stack_subvariants_by_median_pt'},
            {'label': 'Bars by Median Avg TAT', 'value': 'bars_by_median_tat'},
            {'label': 'Bars by Median Avg PT', 'value': 'bars_by_median_pt'},
            {'label': 'Bars by Variant Count', 'value': 'bars_by_variant_count'}
        ],
        value='stack_subvariants_by_counts',  # Default value
        clearable=False
    ),
    dcc.Graph(id='graph')
])

@app.callback(
    Output('graph', 'figure'),
    [Input('graph-type', 'value')]
)
def update_graph(selected_value):
    if selected_value == 'stack_subvariants_by_counts':
        return stack_subvariants_by_counts(features_data)
    elif selected_value == 'stack_subvariants_by_median_tat':
        return stack_subvariants_by_median_tat(features_data)
    elif selected_value == 'stack_subvariants_by_median_pt':
        return stack_subvariants_by_median_pt(features_data)
    elif selected_value == 'bars_by_median_tat':
        return bars_by_median_tat(features_data)
    elif selected_value == 'bars_by_median_pt':
        return bars_by_median_pt(features_data)
    elif selected_value == 'bars_by_variant_count':
        return bars_by_variant_count(features_data)

if __name__ == '__main__':
    app.run_server(debug=True, port=8051)



# Dataframes for viewing and debugging (DO NOT UNCOMMENT THESE HERE AS THEY WILL WORK ONLY WHEN THE CORRESPONDING
# GRAPH IS SELECTED IN THE DASHBOARD, THAT'S WHEN THE DATAFRAMES ARE CREATED)
# variant_cases_df.head()
# median_tat_count_per_variant_df.head()
# substack_subvariants_by_median_pt_df.head()
# substack_subvariants_by_median_tat_df.head()
# substack_subvariants_by_counts_df.head()
