## Packages

In [None]:
import pandas as pd
import numpy as np
from math import ceil
import plotly.express as px
import sqlalchemy 
from sqlalchemy import create_engine, text
from db_secrets import SQL_107

## Test Connection

In [None]:
## text for query
with open("111_sql.sql", "r") as file:
    query_text = file.read()

query_text = query_text.replace('REPLACE START DATE','2024-01-01')

In [None]:
## Create an engine + connection
engine = create_engine(SQL_107())
conn = engine.connect()

## Return data
df_raw = pd.read_sql(query_text,conn)

In [None]:
## Makes working copy
df = df_raw.copy()

In [None]:
## Function to round up to the nearest 5 for small number supression
def round_up_to_5(x):
        return ceil(x / 5) * 5

In [None]:
## Function to determine type of trauma symptom
def Trauma_Detect(x):
    if 'Blunt' in x:
        return 'Blunt'
    elif 'Penetrating' in x:
        return 'Penetrating' 
    elif  'Trauma' in x:
        return 'Other Trauma'
    else:
        return 'Not Trauma'

In [None]:
## Function to determine pregnancy
def Pregnancy_Detect(x):
    if 'Pregnant, Over 20 Weeks' in x:
        return 'Over 20 Weeks'
    elif 'Pregnant, Under 20 Weeks' in x:
        return 'Under 20 Weeks' 
    elif  'Pregnant' in x:
        return 'Other Pregnancy'
    else:
        return 'Not Pregnant'

In [None]:
## Function replace thing
def replace_thing(data,col,x):
    data.loc[:,col] = (data[col].str.replace(x,'', regex=True))
    return data

In [None]:
df.columns

In [None]:
## Apply trauma and pregnancy functions
df.loc[:,"Trauma_Type"] = df["Symptom_Group"].transform(lambda x: Trauma_Detect(x))
df.loc[:,"Pregnant"] = df["Symptom_Group"].transform(lambda x: Pregnancy_Detect(x))

conditions = [', Blunt'
              ,', Penetrating'
              ,', Pregnant, Over 20 Weeks'
              ,', Pregnant, Under 20 Weeks'
              ,', Pregnant']

for c in conditions:    
    df = replace_thing(df,'Symptom_Group',x=c)


In [None]:
## Function to generate Table One
def generate_table1(data,group = None):

    ## Number of rows
    N_row = len(data)
     
    ## Separate numerical and categorical columns
    numeric_cols = data.select_dtypes(include=['number']).columns
    categorical_cols = data.select_dtypes(include=['object', 'category', 'bool']).columns
    
     ## filters to remove group, creates group item values
    if group:
        categorical_cols = [x for x in categorical_cols if x != group]
        group_counts = df[group].value_counts()
        items = (data.groupby(group)
                    .size()
                    .to_frame(name='count')
                    .sort_values('count', ascending=False)
                    .index)
    
    ## Initialize an empty list to collect rows for the summary DataFrame
    summary_rows = []

    ## Function to format median and quartiles
    def median_iqr(col):
        return f"{col.median():.1f} ({col.quantile(0.25):.1f},{col.quantile(0.75):.1f})"
    
    ## Function to format number and percent
    def count_pc(data,col,level,group = None,group_counts = None,item = None):
        if group:
            count = data.loc[data[group] == item, col].value_counts().get(level,0)
            percent = (count / group_counts.get(item,0)) * 100
        else:
            count = data[col].value_counts().get(level, 0)
            percent = (count / N_row) * 100
        return f'{round_up_to_5(count):,} ({percent:.1f}%)'          

    # Numeric data summary (median IQR)
    for col in numeric_cols:
        row = {'Characteristic': f'{col} (median, IQR)'
               ,'Level': ''}
        if group:  
            for item in items:               
                item_value = median_iqr(data.loc[data[group] == item, col])
                row[f'{item}, N={round_up_to_5(group_counts.get(item,0)):,}'] = item_value
        row[f'Overall, N={round_up_to_5(N_row):,}'] = median_iqr(data[col])
        summary_rows.append(row)

    # Categorical data summary (each category level on a separate line)
    for col in categorical_cols:       
        levels = (data.groupby(col)
                    .size()
                    .to_frame(name='count')
                    .sort_values('count', ascending=False)
                    .index)

        for level in levels:
            row = { 'Characteristic': f'{col} (N, %)'
                , 'Level': f'{level}'}
            if group:  
                for item in items:
                    item_value = count_pc(data,col,level,group,group_counts,item)
                    row[f'{item}, N={round_up_to_5(group_counts.get(item,0)):,}'] = item_value
            row[f'Overall, N={round_up_to_5(N_row):,}'] = count_pc(data,col,level)
            summary_rows.append(row)


    # Convert the summary rows to a DataFrame
    table1_df = pd.DataFrame(summary_rows).set_index('Characteristic')
    return table1_df



In [None]:
## Inital Table One
df_table_one = df[[  'Sub ICB Name', 'Disposition Group'
                     ,'In_Out_Hours', 'Call_Taker_Triages', 'Clinical_Triages'
                     ,'Patient Age', 'Patient Sex', 'Outcome Type'
                     ,'Outcome Location Name', 'Hours to Outcome']].copy()

table_one = generate_table1(df_table_one,group='Outcome Type')

table_one

In [None]:
## Symptom Group
df_table_two = df[['Trauma_Type','Pregnant'
                   ,'Symptom_Group','Outcome Type']].copy()

table_two = generate_table1(df_table_two,group='Outcome Type')

table_two

In [None]:
## Function to create week plot
def create_week_plot(data,timestamp,title):
    # Extract hour of day and day of week
    data['hour'] = data[timestamp].dt.hour
    data['day_of_week'] = data[timestamp].dt.dayofweek  # 0=Monday, 6=Sunday

    ## Group by both day_of_week and hour and get counts
    hourly_weekly_counts = data.groupby(['day_of_week', 'hour']).size().reset_index(name='count')

    # Create a combined 'day-hour' category for a continuous x-axis
    hourly_weekly_counts['day_hour'] = (hourly_weekly_counts['day_of_week'] * 24
                                         + hourly_weekly_counts['hour'])

    # Generate readable labels for each day-hour combination
    hourly_weekly_counts['day_hour_label'] = [
        f"{['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][int(i // 24)]}\n{int(i % 24):02d}:00"
        for i in hourly_weekly_counts['day_hour']
    ]

    # Create the bar plot
    fig = px.bar(
        hourly_weekly_counts,
        x='day_hour_label',
        y='count',
        title=title,
        labels={'day_hour_label': "Day Hour"
                , 'count': "Count"},
    )

    # Customize layout
    fig.update_layout(
        xaxis=dict(
            tickangle=90
        ),
        template='plotly_white'
    )

    fig.show()



In [None]:
create_week_plot(df,'Call Connect Time','111 Call Connect Time by Day'+
                                            ' of Week and Hour of Day')

In [None]:
## Function to month plot
def create_month_plot(data,timestamp,bank_holiday,title):
    # Extract date
    data['date'] = data[timestamp].dt.date

    ## Group by date
    date_counts = (
                data
                .groupby(['date',bank_holiday])
                .size()
                .reset_index(name='count') )

    ## Generate readable labels for each day
    
    ## Create the bar plot
    fig = px.bar(
        date_counts,
        x='date',
        y='count',
        color=f'{bank_holiday}',  # Color bars based on public holiday status
        title=title,
        labels={'date': "Date"
                , 'count': "Count"
                ,f'{bank_holiday}': "Bank Holiday"},
        color_discrete_map={'Yes': 'orange', 'No': 'skyblue'}  
    )

    # Customize layout
    fig.update_layout(
        xaxis=dict(
            tickangle=90
        ),
        template='plotly_white'
    )

    fig.show()

In [None]:
create_month_plot(df,'Call Connect Time','Bank Holiday','111 Call Connect Time by Date')