This jupyter notebook only contains the major working codes for reference.

## Import libraries

In [1]:
# Import the  libraries.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly as py
import plotly.express as px 
import plotly.graph_objects as go
import plotly.offline
import plotly.graph_objs as go
from pyecharts import options as opts
from pyecharts.charts import Sankey

# Optional - Ignore warnings.
import warnings
warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'pyecharts'

## data cleansing


### Handle dataset 1: actual_duration

In [None]:
# Import and sense-check the actual_duration.csv data set as ad.
ad = pd.read_csv('actual_duration.csv')

# View the DataFrame.
ad.head()

In [None]:
# Determine the metadata of the data set.
ad.info()

In [None]:
# Convert the appointment date to datetime type
ad['appointment_date'] = pd.to_datetime(ad['appointment_date'])

# Determine the record period of ad dataframe
ad['appointment_date'].agg(['min','max'])

In [None]:
# Create some additional columns of dates in ad DataFrame

month_dict={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May',6:'Jun',
            7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}

season_dict={1:'Winter',2:'Winter',3:'Winter',
             4:'Spring',5:'Spring',6:'Spring',
             7:'Summer',8:'Summer',9:'Summer',
             10:'Autumn',11:'Autumn',12:'Autumn'}

region_dict={'E40000003':'London', 'E40000005':'South East',
             'E40000006':'South West', 'E40000007':'East of England', 
             'E40000010':'North West', 'E40000011':'Midlands', 'E40000012':'North East and Yorkshire'}

ad['appointment_month'] = ad['appointment_date'].dt.to_period('M')
ad['year']=ad['appointment_date'].dt.year
ad['quarter']=ad['appointment_date'].dt.quarter
ad['month'] = ad['appointment_date'].dt.month
ad['month_year']=ad['month'].map(month_dict).astype(str)+'-'+ad.year.astype(str)
ad['season']=ad['month'].map(season_dict)

ad['weekday']=ad['appointment_date'].dt.day_name()

ad['region']=ad['region_ons_code'].map(region_dict)

ad.head()

In [None]:
# Determine the number of unique elements of the data set.
ad.nunique()

In [None]:
# Determine whether there are missing values.
ad.isnull().sum()

In [None]:
# Determine whether there are duplicated records.
ad.duplicated().sum()

In [None]:
ad.describe()

In [None]:
# Determine the total actualized appointment in ad dataframe
ad['count_of_appointments'].sum()

## Handle dataset 2: appointments_regional

In [None]:
# Import and sense-check the appointments_regional.csv data set as ar.
ar = pd.read_csv('appointments_regional.csv')

# View the DataFrame.
ar.head()

In [None]:
# Determine the metadata of the data set.
ar.info()

In [None]:
# Convert the appointment month to datetime type
ar['appointment_month'] = pd.to_datetime(ar['appointment_month'])

# Determine the record period of ad dataframe
ar['appointment_month'].agg(['min','max'])

In [None]:
# Create some additional columns of dates in ad DataFrame
month_dict={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May',6:'Jun',
            7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}

season_dict={1:'Winter',2:'Winter',3:'Winter',
             4:'Spring',5:'Spring',6:'Spring',
             7:'Summer',8:'Summer',9:'Summer',
             10:'Autumn',11:'Autumn',12:'Autumn'}

region_dict={'E40000003':'London', 'E40000005':'South East',
             'E40000006':'South West', 'E40000007':'East of England', 
             'E40000010':'North West', 'E40000011':'Midlands', 'E40000012':'North East and Yorkshire'}

ar['year']=ar['appointment_month'].dt.year
ar['quarter']=ar['appointment_month'].dt.quarter
ar['month']= ar['appointment_month'].dt.month
ar['month_year']=ar['month'].map(month_dict).astype(str)+'-'+ar.year.astype(str)
ar['season']=ar['month'].map(season_dict)

code = ad[['icb_ons_code','region_ons_code']].drop_duplicates()
code_dict = dict(zip(code.icb_ons_code,code.region_ons_code))
ar['region_ons_code']=ar['icb_ons_code'].map(code_dict)

region_dict={'E40000003':'London', 'E40000005':'South East',
             'E40000006':'South West', 'E40000007':'East of England', 
             'E40000010':'North West', 'E40000011':'Midlands', 'E40000012':'North East and Yorkshire'}

ar['region']=ar['region_ons_code'].map(region_dict)

ar.head()

In [None]:
# Determine the number of unique elements of the data set.
ar.nunique()

In [None]:
# Determine whether there are missing values.
ar.isnull().sum()

In [None]:
# Determine the number of appointment records in ar dataframe
ar['count_of_appointments'].sum()

In [None]:
# Determine the number of duplicated record
ar.duplicated().sum()

# Keep all records since there is no record of every single appointments for cross-check. 
# The total numbers of appointment match with nc dataset.

In [None]:
# Determine the descriptive statistics of the data set.
ar.describe()

## Handle dataset 3: national_categories

In [None]:
# Import and sense-check the national_categories.xlsx data set as nc.
nc = pd.read_excel('national_categories.xlsx')

# View the DataFrame.
nc.head()

In [None]:
# Determine the number of categories in each variable.
nc.nunique()

In [None]:
# Determine the metadata of the data set.
nc.info()

In [None]:
# Determine whether there are missing values.
nc.isnull().sum()

In [None]:
# Convert the appointment date to datetime type
nc['appointment_date'] = pd.to_datetime(nc['appointment_date'])

# Determine the record period of ad dataframe
nc['appointment_date'].agg(['min','max'])

In [None]:
# Create some additional columns of dates in nc DataFrame

month_dict={1:'Jan', 2:'Feb', 3:'Mar', 4:'Apr', 5:'May',6:'Jun',
            7:'Jul', 8:'Aug', 9:'Sep', 10:'Oct', 11:'Nov', 12:'Dec'}

season_dict={1:'Winter',2:'Winter',3:'Winter',
             4:'Spring',5:'Spring',6:'Spring',
             7:'Summer',8:'Summer',9:'Summer',
             10:'Autumn',11:'Autumn',12:'Autumn'}

nc['year']=nc['appointment_date'].dt.year
nc['quarter']=nc['appointment_date'].dt.quarter
nc['month']=nc['appointment_date'].dt.month
nc['month_year']=nc['month'].map(month_dict).astype(str)+'-'+nc.year.astype(str)
nc['season']=nc['month'].map(season_dict)
nc['weekday']=nc['appointment_date'].dt.day_name()

code = ad[['icb_ons_code','region_ons_code']].drop_duplicates()
code_dict = dict(zip(code.icb_ons_code,code.region_ons_code))
nc['region_ons_code']=nc['icb_ons_code'].map(code_dict)


region_dict={'E40000003':'London', 'E40000005':'South East',
             'E40000006':'South West', 'E40000007':'East of England', 
             'E40000010':'North West', 'E40000011':'Midlands', 'E40000012':'North East and Yorkshire'}

nc['region']=nc['region_ons_code'].map(region_dict)

nc.head()

In [None]:
# Determine the number of duplicated record
nc.duplicated().sum()

In [None]:
# Determine the descriptive statistics of the data set.
nc.describe()

# Investigate staffing issue 

### Appointment VS Capacity

In [None]:
# Sum the appointments: by month

# Calculate total appointments per month (Jan 2020 - Jun 2022)
ar_df = ar.groupby(['appointment_month','appointment_status'])['count_of_appointments'].agg('sum').reset_index()

# Calculate the calendars of month
ar_df['daysinmonth'] = ar_df['appointment_month'].apply(lambda t: pd.Period(t, freq='S').days_in_month)

# Calculate the monthly max capacity
ar_df['month_capacity']=1200000*ar_df['daysinmonth']

# Calculate the expected capacity utilisation rate
ar_df['expected_utilisation_rate'] = ar_df['count_of_appointments']/ar_df['month_capacity']*100

# Calculate the actual capacity utilisation for attended appointments
def actual_utilisation(row):
   if row['appointment_status'] == 'Attended':
        return row['count_of_appointments']
ar_df['actual_utilisation'] = ar_df.apply(actual_utilisation, axis=1)
ar_df['actual_utilisation'].fillna(0, inplace=True)

# Calculate the actual capacity utilisation rate
def actual_utilisation_rate(row):
    if row['appointment_status'] == 'Attended':
        return row['expected_utilisation_rate']

ar_df['actual_utilisation_rate'] = ar_df.apply(actual_utilisation_rate, axis=1)
ar_df['actual_utilisation_rate'].fillna(0, inplace=True)

ar_df_line = ar_df.groupby(['appointment_month'])['expected_utilisation_rate','actual_utilisation_rate'].agg('sum').reset_index()
fig = px.line(ar_df_line, x='appointment_month', y=ar_df_line.columns[1:],
              template='none', title='Capacity utilization rate per month',
              labels={'variable':'Utilization Rate',
                      'value': 'Percentage against the maximum capacity',
                     'appointment_month': 'Appointment Month'})
fig.update_xaxes(dtick="M1", tickangle=45)
fig.add_hline(y=100,line_width=3, line_dash="dash", line_color='green', name='Capacity')
fig.update_yaxes(range=[0, 100])
fig.show()

In [None]:
#Total appointments (ar)

data = ar.groupby(['appointment_month','season'])['count_of_appointments'].agg('sum').reset_index()

fig = px.bar(data, x='appointment_month', y='count_of_appointments', 
             color='season',
             category_orders={'season': ['Spring', 'Summer', 'Autumn', 'Winter']},
             text='count_of_appointments', template='none')
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_layout(title = 'Number of appointments per month', 
                  xaxis_title = "Month", yaxis_title = "Number of Appointments")
fig.show()

# Regions

In [None]:
data = ar.groupby(['appointment_month','region'])['count_of_appointments'].agg('sum').reset_index()

fig = px.line(data, x='appointment_month', y='count_of_appointments',
             template='none', color='region',width=800, height=400)
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_yaxes(rangemode="tozero")
fig.update_layout(title = 'Number of appointments per month for region', 
                  xaxis_title = "Month", yaxis_title = "Numbers of appointments")
fig.show()

In [None]:
# 100% stack bar plots

data = ar.groupby(['region','appointment_status'])['count_of_appointments'].agg('sum').reset_index()
data['Percentage'] = 100 * data['count_of_appointments'] / data.groupby('region')['count_of_appointments'].transform('sum')
data['Percentage'] = data['Percentage'].apply(lambda x: '{0:.2f}%'.format(x))

fig = px.bar(data, x='region', y='Percentage',color='appointment_status',
             category_orders={'region': ['Midlands', 'North East and Yorkshire',
                                         'South East', 'London', 'North West',
                                         'East of England','South West']},
                              title="Appointment per region", template='none', text_auto= True)   

fig.show()

## HCP types

In [None]:
data = ar.groupby(['appointment_month','hcp_type'])['count_of_appointments'].agg('sum').reset_index()

fig = px.pie(data, values='count_of_appointments', 
             names='hcp_type', template='none')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_layout(title = 'Appointments by hcp types' ,
                  xaxis_title = "Month", yaxis_title = "Number of Appointments")
fig.show()

In [None]:
data = ar.groupby(['appointment_month','hcp_type'])['count_of_appointments'].agg('sum').reset_index()
data['Percentage'] = 100 * data['count_of_appointments'] / data.groupby('appointment_month')['count_of_appointments'].transform('sum')
data['Percentage'] = data['Percentage'].apply(lambda x: '{0:.2f}%'.format(x))

fig = px.bar(data, x='appointment_month', y='Percentage',color='hcp_type',
        title='Appointments per hcp type', template='none', text_auto= True)
fig.update_xaxes(dtick="M1", tickangle=45)
fig.show()

In [None]:
data = ar.groupby(['hcp_type','appointment_status'])['count_of_appointments'].agg('sum').reset_index()
data['Percentage'] = 100 * data['count_of_appointments'] / data.groupby('hcp_type')['count_of_appointments'].transform('sum')
data['Percentage'] = data['Percentage'].apply(lambda x: '{0:.2f}%'.format(x))

fig = px.bar(data, x='hcp_type', y='Percentage',color='appointment_status',
        title='Appointments per hcp type', template='none', text_auto= True)   
fig.update_layout(barmode='group')
fig.show()

In [None]:
data = ar.groupby(['appointment_month','hcp_type'])['count_of_appointments'].agg('sum').reset_index()

fig = px.line(data, x='appointment_month', y='count_of_appointments',
             template='none', color='hcp_type',width=800, height=400)
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_yaxes(rangemode="tozero")
fig.update_layout(title = 'Number of appointments per month for hcp type', 
                  xaxis_title = "Month", yaxis_title = "Numbers of appointments")
fig.show()

## Appointment modes

In [None]:
# 1.3 Total appointments (ar) - mode
# 1.3.1 Pie chart - mode % 
data = ar.groupby(['appointment_month','appointment_mode'])['count_of_appointments'].agg('sum').reset_index()

fig = px.pie(data, values='count_of_appointments', 
             names='appointment_mode', template='none',
             category_orders=
             {'appointment_mode':['Face-to-Face', 'Telephone','Home Visit', 'Video/Online','Unknown']})
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_layout(title = 'Appointments by appointment modes', 
                  xaxis_title = "Month", yaxis_title = "Number of Appointments")
fig.show()

In [None]:
data = ar.groupby(['appointment_month','appointment_mode'])['count_of_appointments'].agg('sum').reset_index()
data['Percentage'] = 100 * data['count_of_appointments'] / data.groupby('appointment_month')['count_of_appointments'].transform('sum')
data['Percentage'] = data['Percentage'].apply(lambda x: '{0:.2f}%'.format(x))

fig = px.bar(data, x='appointment_month', y='Percentage',color='appointment_mode',
        title='Appointments per mode', template='none', text_auto= True,category_orders=
             {'appointment_mode':['Face-to-Face', 'Telephone','Home Visit', 'Video/Online','Unknown']})
#fig.update_layout(barmode="relative")
fig.update_xaxes(dtick="M1", tickangle=45)
fig.show()

In [None]:
data = ar.groupby(['appointment_month','appointment_mode'])['count_of_appointments'].agg('sum').reset_index()

fig = px.line(data, x='appointment_month', y='count_of_appointments',
             template='none', color='appointment_mode',width=800, height=400,
              category_orders=
             {'appointment_mode':['Face-to-Face', 'Telephone','Home Visit', 'Video/Online','Unknown']})
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_yaxes(rangemode="tozero")
fig.update_layout(title = 'Number of appointments per month for appointment mode', 
                  xaxis_title = "Month", yaxis_title = "Numbers of appointments")
fig.show()

## Waiting time between booking and appointment

In [None]:
data = ar.groupby(['appointment_month','time_between_book_and_appointment'])['count_of_appointments'].agg('sum').reset_index()

fig = px.pie(data, values='count_of_appointments', 
             names='time_between_book_and_appointment', 
             category_orders={'time_between_book_and_appointment': 
                              ['Same Day','1 Day', '2 to 7 Days','8  to 14 Days','15  to 21 Days', 
                               '22  to 28 Days', 'More than 28 Days','Unknown / Data Quality']},
             template='none')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_layout(title = 'Number of appointments per waiting time', 
                  xaxis_title = "Month", yaxis_title = "Number of Appointments")
fig.show()

In [None]:
data = ar.groupby(['appointment_month','time_between_book_and_appointment'])['count_of_appointments'].agg('sum').reset_index()
data['Percentage'] = 100 * data['count_of_appointments'] / data.groupby('appointment_month')['count_of_appointments'].transform('sum')
data['Percentage'] = data['Percentage'].apply(lambda x: '{0:.2f}%'.format(x))

fig = px.bar(data, x='appointment_month', y='Percentage',color='time_between_book_and_appointment',
        title='Appointments per waiting time', template='none', text_auto= True,                           
             category_orders={'time_between_book_and_appointment': 
                              ['Same Day','1 Day', '2 to 7 Days','8  to 14 Days','15  to 21 Days', 
                               '22  to 28 Days', 'More than 28 Days','Unknown / Data Quality']})
fig.update_xaxes(dtick="M1", tickangle=45)
fig.show()

In [None]:
data = ar.groupby(['appointment_month','time_between_book_and_appointment'])['count_of_appointments'].agg('sum').reset_index()

fig = px.line(data, x='appointment_month', y='count_of_appointments',
             template='none', color='time_between_book_and_appointment',width=1000, height=400,
                           category_orders={'time_between_book_and_appointment': 
                              ['Same Day','1 Day', '2 to 7 Days','8  to 14 Days','15  to 21 Days', 
                               '22  to 28 Days', 'More than 28 Days','Unknown / Data Quality']})
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_yaxes(rangemode="tozero")
fig.update_layout(title = 'Number of appointments per month for waiting time', 
                  xaxis_title = "Month", yaxis_title = "Numbers of appointments")
fig.show()

## Appointment Status

In [None]:
data = ar.groupby(['appointment_month','appointment_status'])['count_of_appointments'].agg('sum').reset_index()

fig = px.line(data, x='appointment_month', y='count_of_appointments',
             template='none', color='appointment_status',width=1000, height=400)
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_yaxes(rangemode="tozero")
fig.update_layout(title = 'Number of appointments per appointment status', 
                  xaxis_title = "Month", yaxis_title = "Numbers of appointments")
fig.show()

In [None]:
data = ar.groupby(['appointment_month','appointment_status','season'])['count_of_appointments'].agg('sum').reset_index()
data['Percentage'] = 100 * data['count_of_appointments'] / data.groupby('appointment_month')['count_of_appointments'].transform('sum')
data['Percentage'] = data['Percentage'].apply(lambda x: '{0:.2f}%'.format(x))

fig = px.bar(data, x='appointment_month', y='Percentage', color='season',
             category_orders={'season': ['Spring', 'Summer', 'Autumn', 'Winter']},
             facet_row='appointment_status',
            title='Total appointments per status', template='none', text_auto= True)   

fig.update_xaxes(dtick="M1", tickangle=45)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
fig.show()

# Patient pathway

In [None]:


data = ar.groupby(['month_year','hcp_type','appointment_mode','appointment_status'])['count_of_appointments'].agg('sum').reset_index()

px.bar(data, x='appointment_mode', y='count_of_appointments',color='appointment_status',
       facet_col ='hcp_type',
       title="Number of appointment per hcp types and appointment mode", template='none', height=400)

In [None]:
data = ar.groupby(['hcp_type','appointment_mode','appointment_status','time_between_book_and_appointment'])['count_of_appointments'].agg('sum').reset_index()
data['Percentage'] = 100 * data['count_of_appointments'] / data.groupby(['hcp_type','appointment_mode','time_between_book_and_appointment'])['count_of_appointments'].transform('sum')
data['Percentage'] = data['Percentage'].apply(lambda x: '{0:.2f}%'.format(x))

fig = px.bar(data, x='appointment_mode', y='Percentage',color='appointment_status',
             facet_row='time_between_book_and_appointment', animation_frame = 'hcp_type',
             category_orders=
             {'time_between_book_and_appointment': 
              ['Same Day','1 Day', '2 to 7 Days','8  to 14 Days','15  to 21 Days', 
               '22  to 28 Days', 'More than 28 Days','Unknown / Data Quality'],
              'appointment_mode':['Face-to-Face', 'Telephone','Home Visit', 'Video/Online','Unknown']},
             title='Appointment attendance % per hcp, mode, waiting time', template='none', text_auto= True,
             width=800, height=1500)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
fig.update_xaxes(dtick="M1", tickangle=45)

fig.show()

In [None]:
data = ar.groupby(['hcp_type','appointment_mode','appointment_status','time_between_book_and_appointment'])['count_of_appointments'].agg('sum').reset_index()
data['Percentage'] = 100 * data['count_of_appointments'] / data.groupby(['hcp_type','appointment_mode','time_between_book_and_appointment'])['count_of_appointments'].transform('sum')
data['Percentage'] = data['Percentage'].apply(lambda x: '{0:.2f}%'.format(x))

fig = px.bar(data, x='appointment_mode', y='Percentage',color='appointment_status',
             facet_row='time_between_book_and_appointment', animation_frame = 'hcp_type',
             category_orders=
             {'time_between_book_and_appointment': 
              ['Same Day','1 Day', '2 to 7 Days','8  to 14 Days','15  to 21 Days', 
               '22  to 28 Days', 'More than 28 Days','Unknown / Data Quality'],
              'appointment_mode':['Face-to-Face', 'Telephone','Home Visit', 'Video/Online','Unknown']},
             title='Appointment attendance % per hcp, mode, waiting time', template='none', text_auto= True,
             width=800, height=1500)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
fig.update_xaxes(dtick="M1", tickangle=45)

fig.show()

In [None]:
data = ar.groupby(['region','hcp_type','appointment_mode','appointment_status','time_between_book_and_appointment'])['count_of_appointments'].agg('sum').reset_index()
data['Percentage'] = 100 * data['count_of_appointments'] / data.groupby(['region','hcp_type','appointment_mode','time_between_book_and_appointment'])['count_of_appointments'].transform('sum')
data['Percentage'] = data['Percentage'].apply(lambda x: '{0:.2f}%'.format(x))

fig = px.bar(data, x='appointment_mode', y='Percentage',color='appointment_status',
             facet_col='hcp_type',
             facet_row='time_between_book_and_appointment', animation_frame = 'region',
             category_orders=
             {'time_between_book_and_appointment': 
              ['Same Day','1 Day', '2 to 7 Days','8  to 14 Days','15  to 21 Days', 
               '22  to 28 Days', 'More than 28 Days','Unknown / Data Quality'],
              'appointment_mode':['Face-to-Face', 'Telephone','Home Visit', 'Video/Online','Unknown']},
             title='Appointment attendance % per region', template='none', text_auto= True,
             width=1500, height=1500)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
fig.update_xaxes(dtick="M1", tickangle=45)

fig.show()

In [None]:

data = ar.groupby(['month_year','hcp_type','appointment_mode','time_between_book_and_appointment','appointment_status'])['count_of_appointments'].agg('sum').reset_index()

fig = px.bar(data, x='appointment_mode', y='count_of_appointments',color='appointment_status',
       facet_row ='time_between_book_and_appointment', animation_frame='hcp_type',
       category_orders={'time_between_book_and_appointment': 
                        ['Same Day','1 Day', '2 to 7 Days','8  to 14 Days','15  to 21 Days', 
                         '22  to 28 Days', 'More than 28 Days','Unknown / Data Quality']},
                        title="Number of Appointments", template='none', height=1900)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))

# Actual Duration

In [None]:
data = ad.groupby(['actual_duration','month_year'])['count_of_appointments'].agg('sum').reset_index()

fig = px.bar(data, x='month_year', y='count_of_appointments',
              category_orders={'month_year': 
                        ['Dec-2021', 'Jan-2022', 'Feb-2022', 'Mar-2022', 'Apr-2022',
                         'May-2022', 'Jun-2022'],
                               'actual_duration':['1-5 Minutes','6-10 Minutes','11-15 Minutes',
                                                  '16-20 Minutes','21-30 Minutes','31-60 Minutes'
                                                  'Unknown / Data Quality']},
             template='none', color='actual_duration',width=1000, height=400)
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_yaxes(rangemode="tozero")
fig.update_layout(title = 'Number of appointments per actual duration', 
                  xaxis_title = "Month", yaxis_title = "Numbers of appointments")
fig.show()

In [None]:
# Appointment Distribution [HCP type, Appointment mode, Waiting time, Final Status]

data = ad.groupby(['actual_duration','region'])['count_of_appointments'].sum().reset_index()

fig =px.sunburst(data, 
                 path=['actual_duration','region'],
                 values='count_of_appointments',
                       title="Appointment duartion per region",
                  width=750, height=750, template = 'none')
fig.update_traces(textinfo="label+percent parent")
fig.show()

## National Category

In [None]:
data = nc.groupby(['national_category','appointment_month'])['count_of_appointments'].agg('sum').reset_index()
data['Percentage'] = 100 * data['count_of_appointments'] / data.groupby('appointment_month')['count_of_appointments'].transform('sum')
data['Percentage'] = data['Percentage'].apply(lambda x: '{0:.2f}%'.format(x))

fig = px.bar(data, x='appointment_month', y='Percentage',color='national_category',
        title='Appointment per national category', template='none', text_auto= True)                           
fig.update_xaxes(dtick="M1", tickangle=45)
fig.show()

In [None]:
data = nc.groupby(['national_category','appointment_month'])['count_of_appointments'].agg('sum').reset_index()

fig = px.line(data, x='appointment_month', y='count_of_appointments',
             template='none', color='national_category',width=1000, height=400)
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_yaxes(rangemode="tozero")
fig.update_layout(title = 'Number of appointments per national category', 
                  xaxis_title = "Month", yaxis_title = "Numbers of appointments")
fig.show()

In [None]:
# Appointment per national category

data = nc.groupby(['service_setting','national_category'])['count_of_appointments'].agg('sum').reset_index()
new_data = data.sort_values('count_of_appointments',ascending = False)

fig = px.bar(new_data, x='national_category', y='count_of_appointments', 
             color='service_setting',
             text='count_of_appointments', template='none')
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_traces(opacity=0.85)
fig.update_xaxes(dtick="M1", tickangle=45)
fig.update_layout(title = 'Appointment per national category', 
                  xaxis_title = 'National Category', yaxis_title = "Number of Appointments")
fig.show()

In [None]:
# Appointment Distribution per region,service_setting,national_category

data = nc.groupby(['service_setting','national_category'])['count_of_appointments'].sum().reset_index()

fig =px.sunburst(data, 
                 path=['service_setting','national_category'],
                 values='count_of_appointments',
                       title="Total Appointment Distribution [region,service_setting,national_category]",
                  width=750, height=750,template = 'none')
fig.update_traces(textinfo="label+percent parent")
fig.show()

In [None]:
# Appointment Distribution [region,service_setting,national_category]

data = nc.groupby(['region','service_setting','national_category'])['count_of_appointments'].sum().reset_index()

fig =px.sunburst(data, 
                 path=['region','service_setting','national_category'],
                 values='count_of_appointments',
                       title="Total Appointment Distribution [region,service_setting,national_category]",
                  width=750, height=750,template = 'none')
fig.update_traces(textinfo="label+percent parent")
fig.show()

# No Show Appointment

In [None]:
# DNA appointments distribution (patient pathway)

# Appointment Distribution [HCP type, Appointment mode, Waiting time, Final Status]
data = ar.groupby(['appointment_month','region','hcp_type','appointment_mode','time_between_book_and_appointment','appointment_status'])['count_of_appointments'].sum().reset_index()
filter = (data['appointment_status']=='DNA') & (data['appointment_month']=='2022-04-01')

fig =px.sunburst(data[filter], 
                 path=['region','hcp_type','appointment_mode'], #'time_between_book_and_appointment'],
                 values='count_of_appointments',
                 title='DNA Appointment Distribution - region, HCP type, appointment mode, Waiting time',
                  width=750, height=750)
fig.update_traces(textinfo="label+percent parent")
fig.show()

In [None]:
data = ar[ar['appointment_status'] == 'DNA']
data = data.groupby(['appointment_status','hcp_type','appointment_mode','time_between_book_and_appointment'])['count_of_appointments'].agg('sum').reset_index()

fig = px.bar(data, x='appointment_mode', y='count_of_appointments',color='time_between_book_and_appointment',
             facet_col ='hcp_type',facet_row ='appointment_status',
             category_orders={'time_between_book_and_appointment': 
                              ['Same Day','1 Day', '2 to 7 Days','8  to 14 Days','15  to 21 Days', 
                               '22  to 28 Days', 'More than 28 Days','Unknown / Data Quality']},
             title="DNA Appointments", template='none')   
fig.update_layout(barmode="relative")
fig.show()

In [None]:
# Tree plot
# Determine the trend of DNA appintment.
ar_filter = (ar['appointment_status'] == "DNA")

# View the output.
filtered_ar = ar.loc[ar_filter]

ar_tree = filtered_ar.groupby(['hcp_type','appointment_mode','time_between_book_and_appointment'])['count_of_appointments'].sum().reset_index()

ar_tree["all"] = "all" # in order to have a single root node
fig = px.treemap(ar_tree, 
                 path=['all','hcp_type','appointment_mode','time_between_book_and_appointment'], 
                 values='count_of_appointments', title="DNA appointments per month, appointment mode, waiting time") # 面积大小用total_bill字段决定
                
fig.update_traces(textinfo="label+percent parent")
fig.show()

In [None]:
# DNA Appointment Distribution [HCP type, Appointment mode, Waiting time, Final Status]

data = ar.groupby(['hcp_type','appointment_mode','time_between_book_and_appointment','appointment_status'])['count_of_appointments'].sum().reset_index()
filter = data['appointment_status']=='DNA'

fig =px.sunburst(data[filter], 
                 path=['hcp_type','appointment_mode','time_between_book_and_appointment'],
                 values='count_of_appointments',
                       title='DNA Appointment Distribution - region, HCP type, appointment mode, Waiting time',
                  width=750, height=750,template = 'none')
fig.update_traces(textinfo="label+percent parent")
fig.show()