## Packages

In [None]:
import pandas as pd
import numpy as np
from math import ceil
import sqlalchemy 
from sqlalchemy import create_engine, text
from db_secrets import SQL_107

## Test Connection

In [None]:
## text for query
query_text = """
SELECT  
TOP 100
		a.[TotalCalls]
	,	a.[Indicator]
	,	a.[Call_ID]
	,	a.[Pseudo_NHS]
	,	a.[CallDate]
	,	a.[Call Connect Time]
	,	b.[Call Connect Time] AS [999_Call Connect Time]
	,	a.[YearMonth]
	,	a.[Weekday_Name]
	,	a.[Week_Start]
	,	a.[Financial Year]
	,	a.[ICP]
	,	a.[Sub ICB]
	,	a.[PCN]
	,	a.[GP Practice]
	,	a.[Practice_Code]
	,	a.[SymptomGroup]
	,	a.[Disposition_Group]
	,	a.[UEC_Lookup]
	,	a.[In_Out_Hours]
	,	a.[CAS Input]
	,	a.[Contacts]
	,	a.[Call_Taker_Triages]
	,	a.[Clinical_Triages]
	,	a.[Latest_COMPLETE_FM_To_DATE]
	,	a.[Patient Age]
	,	a.[Attend_ID]
	,	a.[patient.nhs_number.value Pseudo]
	,	ISNULL(a.[AE_Arrival],b.[Call Connect Time]) AS [Next Contact]
	,	CASE
			WHEN a.[AE_Outcome] IS NOT NULL THEN a.[AE_Outcome]
			WHEN b.[SeeTreatConvey] = 1 THEN 'See, Treat, Convey'
			WHEN b.[SeeTreatRefer] = 1 THEN 'See, Treat, Refer'
			WHEN b.[HearTreat] = 1 THEN 'Hear, Treat'
			WHEN b.[NoResponse] = 1 THEN '999 Call Only'
			ELSE 'No UEC Contact'
			END AS [AE_Outcome]
	,	CASE WHEN a.[Indicator Type] IS NULL AND b.[Call Connect Time] IS NOT NULL THEN '999' ELSE a.[Indicator Type] END AS [Indicator Type]
	,	a.[call_rank_ae_ID]
      ,b.[NoResponse]
      ,b.[HearTreat]
      ,b.[SeeTreatRefer]
      ,b.[SeeTreatConvey]
    ,	ISNULL(	CONVERT(DATETIME,a.[AE_Arrival],21) ,b.[Call Connect Time]) AS [AE_Arrival_Datetime]
    ,	CAST(DATEDIFF(MINUTE
                ,a.[Call Connect Time]
                ,ISNULL( CONVERT(DATETIME,a.[AE_Arrival],21) ,b.[Call Connect Time]) )
           AS float)/60.0 AS [Hours_to_Next]
FROM
	[ReportingGateway].[dbo].[uec_111_V2] AS a
	LEFT JOIN
		[ReportingGateway].[dbo].[uec_999_V2] AS b
		ON	a.[Pseudo_NHS] = b.[Pseudo_NHS]
		AND b.[CallDate] >= '2023-01-01'
		AND DATEDIFF(SECOND
                ,a.[Call Connect Time]
                ,b.[Call Connect Time])
			BETWEEN 0 AND (90000-1) --seconds in 24 hours
WHERE	
	1=1
	AND a.[Pseudo_NHS] != ''
	AND a.[Indicator Type] IS NULL
	AND	a.[CallDate] >= '2023-01-01'
"""

In [None]:
## Function to round up to the nearest 5 for small number supression
def round_up_to_5(x):
        return ceil(x / 5) * 5

In [None]:
## Create an engine + connection
engine = create_engine(SQL_107())
conn = engine.connect()

## Return data
df_raw = pd.read_sql(query_text,conn)

In [None]:
## Makes working copy
df = df_raw.copy()

In [None]:
def Trauma_Detect(x):
    if 'Blunt' in x:
        return 'Blunt'
    elif 'Penetrating' in x:
        return 'Penetrating' 
    elif  'Trauma' in x:
        return 'Other Trauma'
    else:
        return 'Not Trauma'

In [None]:
def Pregnancy_Detect(x):
    if 'Pregnant, Over 20 Weeks' in x:
        return 'Over 20 Weeks'
    elif 'Pregnant, Under 20 Weeks' in x:
        return 'Under 20 Weeks' 
    elif  'Pregnant' in x:
        return 'Other Pregnancy'
    else:
        return 'Not Pregnant'

In [None]:
df.columns

In [None]:
df.loc[:,'Call_Taker_Triage'] = df['Call_Taker_Triages'].transform(lambda x: 'No' if x == 0 else 'Yes')
df.loc[:,'Clinical_Triage'] = df['Clinical_Triages'].transform(lambda x: 'No' if x == 0 else 'Yes')

df.loc[:,"Trauma_Type"] = df["SymptomGroup"].transform(lambda x: Trauma_Detect(x))
df.loc[:,"Pregnant"] = df["SymptomGroup"].transform(lambda x: Pregnancy_Detect(x))

df.loc[:,'SymptomGroup'] = df['SymptomGroup'].str.replace(', Blunt', '', regex=True)
df.loc[:,'SymptomGroup'] = df['SymptomGroup'].str.replace(', Penetrating', '', regex=True)
df.loc[:,'SymptomGroup'] = df['SymptomGroup'].str.replace(', Pregnant, Over 20 Weeks', '', regex=True)
df.loc[:,'SymptomGroup'] = df['SymptomGroup'].str.replace(', Pregnant, Under 20 Weeks', '', regex=True)
df.loc[:,'SymptomGroup'] = df['SymptomGroup'].str.replace(', Pregnant', '', regex=True)

df.loc[:,'AE_Outcome'] = (df.loc[:,'AE_Outcome']
                          .transform(lambda x: x or None) ## replaces empty strings with None
                          .fillna('No UEC Contact')
                          )
df.loc[:,'Indicator Type'] = (df.loc[:,'Indicator Type']
                          .fillna('No UEC Contact'))


In [None]:
def generate_table1(data,group = None):

    ## Number of rows
    N_row = len(data)
     
    ## Separate numerical and categorical columns
    numeric_cols = data.select_dtypes(include=['number']).columns
    categorical_cols = data.select_dtypes(include=['object', 'category', 'bool']).columns
    
     ## filters to remove group, creates group item values
    if group:
        categorical_cols = [x for x in categorical_cols if x != group]
        group_counts = df[group].value_counts()
        items = (data.groupby(group)
                    .size()
                    .to_frame(name='count')
                    .sort_values('count', ascending=False)
                    .index)
    
    ## Initialize an empty list to collect rows for the summary DataFrame
    summary_rows = []

    ## Function to format median and quartiles
    def median_iqr(col):
        return f"{col.median():.1f} ({col.quantile(0.25):.1f},{col.quantile(0.75):.1f})"
    
    ## Function to format number and percent
    def count_pc(data,col,level,group = None,group_counts = None,item = None):
        if group:
            count = data.loc[data[group] == item, col].value_counts().get(level,0)
            percent = (count / group_counts.get(item,0)) * 100
        else:
            count = data[col].value_counts().get(level, 0)
            percent = (count / N_row) * 100
        return f'{round_up_to_5(count):,} ({percent:.1f}%)'          

    # Numeric data summary (median IQR)
    for col in numeric_cols:
        row = {'Characteristic': f'{col} (median, IQR)'
               ,'Level': ''}
        if group:  
            for item in items:               
                item_value = median_iqr(data.loc[data[group] == item, col])
                row[f'{item}, N={round_up_to_5(group_counts.get(item,0)):,}'] = item_value
        row[f'Overall, N={round_up_to_5(N_row):,}'] = median_iqr(data[col])
        summary_rows.append(row)

    # Categorical data summary (each category level on a separate line)
    for col in categorical_cols:       
        levels = (data.groupby(col)
                    .size()
                    .to_frame(name='count')
                    .sort_values('count', ascending=False)
                    .index)

        for level in levels:
            row = { 'Characteristic': f'{col} (N, %)'
                , 'Level': f'{level}'}
            if group:  
                for item in items:
                    item_value = count_pc(data,col,level,group,group_counts,item)
                    row[f'{item}, N={round_up_to_5(group_counts.get(item,0)):,}'] = item_value
            row[f'Overall, N={round_up_to_5(N_row):,}'] = count_pc(data,col,level)
            summary_rows.append(row)


    # Convert the summary rows to a DataFrame
    table1_df = pd.DataFrame(summary_rows).set_index('Characteristic')
    return table1_df



In [None]:
df_table_one = df[['Disposition_Group',
       'In_Out_Hours', 'Call_Taker_Triage',
       'Clinical_Triage', 'Patient Age',
       'AE_Outcome', 'Indicator Type','Hours_to_Next','UEC_Lookup']].copy()

table_one = generate_table1(df_table_one,group='Indicator Type')

table_one

In [None]:
df_table_two = df[['Trauma_Type','Pregnant','SymptomGroup','Indicator Type']].copy()

table_two = generate_table1(df_table_two,group='Indicator Type')

table_two