In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from avoidable_admissions.features import feature_maps

In [3]:
import avoidable_admissions as aa
import pandas as pd
import warnings
import datetime
import zlib
import os

import numpy as np
from IPython.display import HTML

from avoidable_admissions.data.validate import (
    AdmittedCareEpisodeSchema,
    AdmittedCareFeatureSchema,
    get_schema_properties,
    validate_admitted_care_data,
    validate_admitted_care_features,
    validate_dataframe,
)
from avoidable_admissions.features import feature_maps
from avoidable_admissions.features.build_features import build_admitted_care_features

In [4]:
path = "T:/Business Intelligence/Data Science/Work/hdruk_avoidable_adms/"

In [5]:
adm_care = pd.read_csv(os.path.join(path, "admitted_care.csv"))
adm_care['patient_id'] = adm_care['patient_id'].astype(str).map(lambda cid: zlib.crc32(bytes(cid, 'utf-8')))

In [6]:
df = adm_care.copy() 

df.townsend_score_quintile = df.townsend_score_quintile.fillna(0)
df.townsend_score_quintile = pd.array(df.townsend_score_quintile, dtype="Int64") 

df.gender = df.gender.astype(str)
df.admisorc = df.admisorc.astype(str)
df.disdest = df.disdest.astype(str)
df.dismeth = df.dismeth.astype(str)
df.patient_id = df.patient_id.astype(str)

df.admidate = df.admidate.apply(pd.to_datetime, dayfirst=True)

df['visit_id'] = df.index

In [7]:
opdate_cols = df.filter(regex='opdate_[0-9]{2}').columns
df[opdate_cols]=df[opdate_cols].apply(pd.to_datetime, dayfirst=True)
del df['spellnum']

In [8]:
good, bad = validate_dataframe(df, AdmittedCareEpisodeSchema)

print("Good dataframe has %d rows" % good.shape[0])
print("Bad dataframe has %d rows" % bad.shape[0])

Good dataframe has 39087 rows
Bad dataframe has 0 rows


In [9]:
dff = build_admitted_care_features(good.copy())

In [10]:
dff.admiage_cat = dff.admiage_cat.astype(str)
dff.admidate = dff.admidate.dt.date

In [11]:
good_f, bad_f = validate_dataframe(dff, AdmittedCareFeatureSchema)
print("Good dataframe has %d rows" % good_f.shape[0])
print("Bad dataframe has %d rows" % bad_f.shape[0])

Good dataframe has 39087 rows
Bad dataframe has 0 rows


In [12]:
df = good_f.copy()

In [13]:
# All acute admissions - this should be redundant if done at extraction
df = df[
    (df.admimeth.isin({"21", "22", "23", "24", "25", "2A", "2B", "2C", "2D", "28"}))
    & (df.epiorder == 1)
]

# Acute admissions by ACSC status
df["is_acsc"] = df.diag_01_acsc.where(df.diag_01_acsc == "-", "ACSC").replace(
    "-", "Non-ACSC"
)

In [14]:
df['procedures'] = np.where(df.opertn_count == 0, 'No', 'Yes')

In [15]:
df.townsend_score_quintile = np.where(df.townsend_score_quintile == 0, np.NaN, df.townsend_score_quintile)

#### APC Analysis

In [None]:
categorical_features = {
    "admiage_cat": "Age Bands",
    "gender_cat": "Gender",
    "ethnos_cat": "Ethnicity",
    "townsend_score_quintile": "Townsend Score Quintile",
    "diag_01_acsc": "Ambulatory Care Sensitive Condition",
    "admisorc_cat": "Admission Source",
    "admidayofweek": "Admission Day of Week",
    "length_of_stay_cat": "Length of Stay",
    "disdest_cat": "Discharge Destination",
    "dismeth_cat": "Discharge Method",
    "procedures": "Procedures",
}

In [None]:
def make_crosstab(colname: str, tablename: str) -> pd.DataFrame:
    x = pd.crosstab(
        df_site[colname], df_site.is_acsc, margins=False, dropna=False, margins_name="Total"
    )

    y = (
        pd.crosstab(
            df_site[colname],
            df_site.is_acsc,
            normalize="index",
            dropna=False,
            margins_name="Total",
        )
        .mul(100)
        .round(2)
        .rename(columns={"ACSC": "ACSC %", "Non-ACSC": "Non-ACSC %"})
    )

    z = pd.concat([x, y], axis=1).sort_index(axis=1).fillna("-")

    #z['ACSC'].values[(z['ACSC'].values > 0) & (z['ACSC'].values < 10)] = 10
    #z['Non-ACSC'].values[z['Non-ACSC'].values < 10] = 10
    #z['ACSC %'] = z['ACSC']/(z['ACSC']+z['Non-ACSC'])
    #z['Non-ACSC %'] = z['Non-ACSC']/(z['ACSC']+z['Non-ACSC'])

    z.insert(loc=0, column='All Attendances', value=z['ACSC']+z['Non-ACSC'])
    z.insert(loc=1, column='All Attendances (%)', value=z['All Attendances']/sum(z['All Attendances']))
    z.insert(loc=0, column='Site', value=filter)
    
    z.index = pd.MultiIndex.from_tuples([(tablename, i) for i in z.index])

    return z

In [None]:
def Q1(growth_vals:pd.Series):
    return growth_vals.quantile(.25)

def Q3(growth_vals:pd.Series):
    return growth_vals.quantile(.75)

cat_features = {
    "admiage": "Age",
    "length_of_stay": "Length of Stay",
    "opertn_count": "Number of Procedures",    
        }
    
def make_summary(colname, tablename):
    x = pd.DataFrame(pd.pivot_table(df_site, values=[k], columns=['is_acsc'],  
    aggfunc={np.size, min, max, np.mean, np.std, np.median, Q1, Q3})).reset_index().rename(columns={'level_0':'Group', 'level_1':'Measure'})
    y = pd.DataFrame(pd.pivot_table(df_site, values=[k], columns=['procodet'],  
    aggfunc={np.size, min, max, np.mean, np.std, np.median, Q1, Q3})).reset_index().rename(columns={'level_0':'Group', 'level_1':'Measure'})
    y = y.rename(columns={'RXN00': 'Total'})
    x = pd.concat([y[['Measure', 'Total']], x[['ACSC','Non-ACSC']]], axis=1)
    x.insert(loc=0, column='Group', value=k)
    x.insert(loc=0, column='Site', value=filter)
    return x

In [None]:
# Create Analysis Tables

filter = 'RXN01'
df_site = df[df['sitetret'] == filter]
out = ""
df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    df_results.append(z)
    out += f"""
        <h3 style='page-break-before : always'>{v}</h3>
        {z.to_html()}
        <hr><br>
        """
df_results_s = pd.concat(df_results)

filter = 'RXN02'
df_site = df[df['sitetret'] == filter]


out = ""
df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    df_results.append(z)
    out += f"""
        <h3 style='page-break-before : always'>{v}</h3>
        {z.to_html()}
        <hr><br>
        """
df_results = pd.concat(df_results)
df_results = pd.concat([df_results_s, df_results])
df_results.to_csv(os.path.join(path, "results/avoidable_admissions/", "apc_analysis_tables.csv"))


# Create Summary Statistic Tables

filter = 'RXN01'
df_site = df[df['sitetret'] == filter]
out = ""
df_results = []
for k, v in cat_features.items():
    z = make_summary(k, v)
    df_results.append(z)
df_results_s = pd.concat(df_results)
df_results_s.Measure = np.where(df_results_s.Measure == 'size', 'N', df_results_s.Measure)

filter = 'RXN02'
df_site = df[df['sitetret'] == filter]
out = ""
df_results = []
for k, v in cat_features.items():
    z = make_summary(k, v)
    df_results.append(z)
df_results = pd.concat(df_results)
df_results.Measure = np.where(df_results.Measure == 'size', 'N', df_results.Measure)
df_results = pd.concat([df_results_s, df_results])

df_results.to_csv(os.path.join(path, "results/avoidable_admissions/", "apc_summary_statistics.csv"), index=False)

# Winter Pressures

#### Data Prep

In [None]:
df_wp = good_f.copy()

# All acute admissions - this should be redundant if done at extraction
df_wp = df_wp[
    (df_wp.admimeth.isin({"21", "22", "23", "24", "25", "2A", "2B", "2C", "2D", "28"}))
    & (df_wp.epiorder == 1)
]

# Acute admissions by ACSC status
df_wp["is_acsc"] = df_wp.diag_01_acsc.where(df_wp.diag_01_acsc == "-", "ACSC").replace(
    "-", "Non-ACSC"
)

#df_wp = df_wp[df_wp.townsend_score_quintile != 0]
df_wp.admidate =  pd.to_datetime(df_wp.admidate, format='%Y-%m-%d')

# Set deprivation as object
df_wp.townsend_score_quintile = np.where(df_wp.townsend_score_quintile == 0, np.NaN, df_wp.townsend_score_quintile)
df_wp.townsend_score_quintile = df_wp.townsend_score_quintile.astype('object')

# Creat binary outcome variables
df_wp['over_1_proc'] = np.where(df_wp.opertn_count >= 1, 'Yes', 'No')
df_wp['proc_cat'] = np.where(df_wp.opertn_count >= 2, '>=2', '<2')
df_wp['disreadydays_cat'] = np.where(df_wp.disreadydays >= 2, '>=2 days', np.where(df_wp.disreadydays < 2, '<2 days', df_wp.disreadydays))

# Flag time of day
f = pd.Series(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'])
df_wp['time_of_day'] = np.where((df_wp.admitime.str[:2].astype('float') >= 8) & (df_wp.admitime.str[:2].astype('float') < 18) & (df_wp['admidayofweek'].isin(f)), 'in-hours', 'out-hours')

# Merge categories
df_wp['ethnos_cat'] = np.where(df_wp.ethnos_cat == 'Not stated', 'Not known', np.where(df_wp.ethnos_cat == 'Not known', 'Not known', np.where(df_wp.ethnos_cat == 'Other Ethnic Groups', 'Not known', df_wp.ethnos_cat)))
df_wp['diag_seasonal_cat'] = np.where(df_wp.diag_seasonal_cat == '-', 'None', df_wp.diag_seasonal_cat)
df_wp['avoidable_adm'] = np.where(df_wp.diag_01_acsc == '-', 'Non-avoidable (non-ACSCs)', 'Avoidable (ACSCs)')
df_wp['disch_cat'] = np.where(df_wp.disdest_cat == 'Care Home', 'Discharged to care home', np.where(df_wp.disdest_cat == 'Medical care', 'Discharged to medical care', 'Discharged to other'))
df_wp['procedures'] = np.where(df_wp.opertn_count == 0, 'No', 'Yes')

In [None]:
# Create Winter categories
start_date, end_date = np.datetime64('2021-10-01'), np.datetime64('2022-04-01')
df_wp['winter_flag'] = np.where((df_wp.admidate >= start_date) & (df_wp.admidate < end_date), 'winter', 'not-winter')

start_date = np.datetime64('2022-10-01')
df_wp['winter_flag'] = np.where((df_wp.admidate >= start_date), 'winter', df_wp.winter_flag)

start_date, end_date = np.datetime64('2021-12-01'), np.datetime64('2022-03-01')
df_wp['winter_flag2'] = np.where((df_wp.admidate >= start_date) & (df_wp.admidate < end_date), 'winter', 'not-winter')

### Part 1

In [None]:
def Q1(growth_vals:pd.Series):
    return growth_vals.quantile(.25)

def Q3(growth_vals:pd.Series):
    return growth_vals.quantile(.75)

In [None]:
categorical_features = {
    "admiage": "Age",
    "length_of_stay": "LOS",
    "disreadydays": "Days Ready"
}

In [None]:
def make_summary(colname, tablename):
    x = pd.DataFrame(pd.pivot_table(df_site, values=[k], columns=['winter_flag'],  
    aggfunc={np.size, min, max, np.mean, np.std, np.median, Q1, Q3})).reset_index().rename(columns={'level_0':'Group', 'level_1':'Measure', 'winter':'Oct-March', 'not-winter':'Apr-Sept'})    
    x.insert(loc=0, column='Site', value=filter)
    return x

def make_summary2(colname, tablename):
    x = pd.DataFrame(pd.pivot_table(df_site, values=[k], columns=['winter_flag2'],  
    aggfunc={np.size, min, max, np.mean, np.std, np.median, Q1, Q3})).reset_index().rename(columns={'level_0':'Group', 'level_1':'Measure', 'winter':'Dec-Feb', 'not-winter':'Mar-Nov'})    
    x.insert(loc=0, column='Site', value=filter)
    return x

In [None]:
filter = 'RXN01'
df_site = df_wp[df_wp['sitetret'] == filter]

df_results = []
for k, v in categorical_features.items():
    z = make_summary(k, v)
    df_results.append(z)
df_results_s = pd.concat(df_results)
df_results_s

df_results = []
for k, v in categorical_features.items():
    z = make_summary2(k, v)
    df_results.append(z)
df_results_s2 = pd.concat(df_results)
df_results_s3 = pd.concat([df_results_s[['Site','Group','Measure','Oct-March','Apr-Sept']],df_results_s2[['Dec-Feb','Mar-Nov']]], axis=1)

filter = 'RXN02'
df_site = df_wp[df_wp['sitetret'] == filter]

df_results = []
for k, v in categorical_features.items():
    z = make_summary(k, v)
    df_results.append(z)
df_results_s4 = pd.concat(df_results)

df_results = []
for k, v in categorical_features.items():
    z = make_summary2(k, v)
    df_results.append(z)
df_results_s5 = pd.concat(df_results)
df_results_s6 = pd.concat([df_results_s4[['Site','Group','Measure','Oct-March','Apr-Sept']],df_results_s5[['Dec-Feb','Mar-Nov']]], axis=1)

df_results = pd.concat([df_results_s3,df_results_s6])
df_results.Measure = np.where(df_results.Measure == 'size', 'N', df_results.Measure)
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_1/", "apc_summary_statistics.csv"), index=False)

In [None]:
def make_crosstab(colname, tablename):
    x = pd.crosstab(df_site[k], df_site.winter_flag, margins=False, dropna=False).rename(columns={"winter": "Winter (6 months)", "not-winter": "Non-Winter (6 months)"})
    y = (
        pd.crosstab(
            df_site[k],
            df_site.winter_flag,
            normalize="index",
            dropna=False,
        )
        .mul(100)
        .round(2)
        .rename(columns={"winter": "Winter (6 months) %", "not-winter": "Non-Winter (6 months) %"})
    )
    a = pd.crosstab(df_site[k], df_site.winter_flag2, margins=False, dropna=False).rename(columns={"winter": "Winter (3 months)", "not-winter": "Non-Winter (3 months)"})
    b = (
        pd.crosstab(
            df_site[k],
            df_site.winter_flag2,
            normalize="index",
            dropna=False,
        )
        .mul(100)
        .round(2)
        .rename(columns={"winter": "Winter (3 months) %", "not-winter": "Non-Winter (3 months) %"})
    )

    z = pd.concat([x, y, a, b], axis=1).sort_index(axis=1).fillna("-")
    z = z[['Winter (6 months)', 'Winter (6 months) %', 'Non-Winter (6 months)', 'Non-Winter (6 months) %', 'Winter (3 months)', 'Winter (3 months) %', 'Non-Winter (3 months)', 'Non-Winter (3 months) %']]
    z.insert(loc=0, column='Site', value=filter)
    z.index = pd.MultiIndex.from_tuples([(v, i) for i in z.index])

    return z

In [None]:
categorical_features = {
    "admiage_cat": "Age Bands",
    "gender_cat": "Gender",
    "ethnos_cat": "Ethnicity",
    "townsend_score_quintile": "Townsend Score Quintile",
    "admisorc_cat": "Admission Source",
    "time_of_day": "Time of Admission",
    "diag_seasonal_cat": "Seasonal Diagnosis",
    "disdest_cat": "Discharge Destination",
    "avoidable_adm": "Admission Type",
    "length_of_stay_cat": "Length of Stay",
    "procedures": "Procedures",
    "disreadydays_cat": "Days Ready for Discharge"
}

In [None]:
filter = 'RXN01'
df_site = df_wp[df_wp['sitetret'] == filter]

df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    df_results.append(z)

df_results_s = pd.concat(df_results)


filter = 'RXN02'
df_site = df_wp[df_wp['sitetret'] == filter]

df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    df_results.append(z)

df_results = pd.concat(df_results)
df_results = pd.concat([df_results_s,df_results])
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_1/", "apc_analysis_tables.csv"))

### Part 2

In [None]:
categorical_features = {
    "admiage": "disch_cat",
    "length_of_stay": "disch_cat",
    "disreadydays": "disch_cat"
}

In [None]:
def make_summary(colname, tablename):
    t = pd.DataFrame(pd.pivot_table(df_site, values=k, index=v, aggfunc={np.size,min, max, np.mean, np.std, np.median, Q1, Q3}, columns=['winter_flag']).reset_index())
    t_cols = pd.DataFrame(t.columns)
    t_cols['x'], t_cols['y'] = zip(*t_cols[0])
    t_cols['name'] = t_cols['x'] + ' (' + t_cols['y'] + ')'
    t.columns = t_cols.name.tolist()
    t = t.rename(columns={t.columns[0]:'group'})
    t.insert(loc=0, column='period', value='6-month')
    t.insert(loc=1, column='site', value=filter)
    q = pd.DataFrame(pd.pivot_table(df_site, values=k, index=v, aggfunc={np.size, min, max, np.mean, np.std, np.median, Q1, Q3}, columns=['winter_flag2']).reset_index())
    q_cols = pd.DataFrame(q.columns)
    q_cols['x'], q_cols['y'] = zip(*q_cols[0])
    q_cols['name'] = q_cols['x'] + ' (' + q_cols['y'] + ')'
    q.columns = q_cols.name.tolist()
    q = q.rename(columns={q.columns[0]:'group'})
    q.insert(loc=0, column='period', value='3-month')
    q.insert(loc=1, column='site', value=filter)
    t = pd.concat([t,q])
    return t

In [None]:
filter = 'RXN01'
df_site = df_wp[df_wp['sitetret'] == filter]

df_results = []
for k, v in categorical_features.items():
    z = make_summary(k, v)
    z.insert(loc=1, column='measure', value=k)
    df_results.append(z)

df_results_s = pd.concat(df_results)

filter = 'RXN02'
df_site = df_wp[df_wp['sitetret'] == filter]

df_results = []
for k, v in categorical_features.items():
    z = make_summary(k, v)
    z.insert(loc=1, column='measure', value=k)
    df_results.append(z)

df_results = pd.concat(df_results)
df_results = pd.concat([df_results_s,df_results])
df_results = df_results.rename(columns={'size (not-winter)': 'N (not-winter)','size (winter)': 'N (winter)'})
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_2/", "apc_summary_statistics.csv"), index=False)

In [None]:
def make_crosstab(colname, tablename):
    x = pd.crosstab(df2[k], df2.winter_flag, margins=False, dropna=False).rename(columns={"winter": "Winter (6 months)", "not-winter": "Non-Winter (6 months)"})
    y = (
        pd.crosstab(
            df2[k],
            df2.winter_flag,
            normalize="index",
            dropna=False,
        )
        .mul(100)
        .round(2)
        .rename(columns={"winter": "Winter (6 months) %", "not-winter": "Non-Winter (6 months) %"})
    )
    a = pd.crosstab(df2[k], df2.winter_flag2, margins=False, dropna=False).rename(columns={"winter": "Winter (3 months)", "not-winter": "Non-Winter (3 months)"})
    b = (
        pd.crosstab(
            df2[k],
            df2.winter_flag2,
            normalize="index",
            dropna=False,
        )
        .mul(100)
        .round(2)
        .rename(columns={"winter": "Winter (3 months) %", "not-winter": "Non-Winter (3 months) %"})
    )

    z = pd.concat([x, y, a, b], axis=1).sort_index(axis=1).fillna("-")
    z = z[['Winter (6 months)', 'Winter (6 months) %', 'Non-Winter (6 months)', 'Non-Winter (6 months) %', 'Winter (3 months)', 'Winter (3 months) %', 'Non-Winter (3 months)', 'Non-Winter (3 months) %']]
    z.insert(loc=0, column='Site', value=filter)
    z.index = pd.MultiIndex.from_tuples([(v, i) for i in z.index])
    
    return z

In [None]:
categorical_features = {
    "admiage_cat": "Age Bands",
    "gender_cat": "Gender",
    "ethnos_cat": "Ethnicity",
    "townsend_score_quintile": "Townsend Score Quintile",
    "admisorc_cat": "Admission Source",
    "time_of_day": "Time of Admission",
    "diag_seasonal_cat": "Seasonal Diagnosis",
    "disdest_cat": "Discharge Destination",
    "avoidable_adm": "Admission Type",
    "length_of_stay_cat": "Length of Stay",
    "procedures": "Procedures",
    "disreadydays_cat": "Days Ready for Discharge"
}

In [None]:
filter = 'RXN01'
strata = 'Discharged to care home'

df2 = df_wp[(df_wp['disch_cat']==strata) & (df_wp['sitetret'] == filter)]
out = ""
df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

strata = 'Discharged to medical care'
df2 = df_wp[(df_wp['disch_cat']==strata) & (df_wp['sitetret'] == filter)]
#out = ""
#df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

strata = 'Discharged to other'
df2 = df_wp[(df_wp['disch_cat']==strata) & (df_wp['sitetret'] == filter)]
#out = ""
#df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

df_results_s = pd.concat(df_results)


filter = 'RXN02'
strata = 'Discharged to care home'

df2 = df_wp[(df_wp['disch_cat']==strata) & (df_wp['sitetret'] == filter)]
out = ""
df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

strata = 'Discharged to medical care'
df2 = df_wp[(df_wp['disch_cat']==strata) & (df_wp['sitetret'] == filter)]
#out = ""
#df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

strata = 'Discharged to other'
df2 = df_wp[(df_wp['disch_cat']==strata) & (df_wp['sitetret'] == filter)]
#out = ""
#df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

df_results = pd.concat(df_results)
df_results = pd.concat([df_results_s,df_results])
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_2/", "apc_analysis_tables.csv"))

### Part 3

In [None]:
categorical_features = {
    "admiage": "time_of_day",
    "length_of_stay": "time_of_day",
    "disreadydays": "time_of_day"
}

In [None]:
filter = 'RXN01'
df_site = df_wp[df_wp['sitetret'] == filter]

df_results = []
for k, v in categorical_features.items():
    z = make_summary(k, v)
    z.insert(loc=1, column='measure', value=k)
    df_results.append(z)

df_results_s = pd.concat(df_results)

filter = 'RXN02'
df_site = df_wp[df_wp['sitetret'] == filter]

df_results = []
for k, v in categorical_features.items():
    z = make_summary(k, v)
    z.insert(loc=1, column='measure', value=k)
    df_results.append(z)

df_results = pd.concat(df_results)
df_results = pd.concat([df_results_s,df_results])
df_results = df_results.rename(columns={'size (not-winter)': 'N (not-winter)','size (winter)': 'N (winter)'})
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_3/", "apc_summary_statistics.csv"), index=False)

In [None]:
categorical_features = {
    "admiage_cat": "Age Bands",
    "gender_cat": "Gender",
    "ethnos_cat": "Ethnicity",
    "townsend_score_quintile": "Townsend Score Quintile",
    "admisorc_cat": "Admission Source",
    "time_of_day": "Time of Admission",
    "diag_seasonal_cat": "Seasonal Diagnosis",
    "disdest_cat": "Discharge Destination",
    "avoidable_adm": "Admission Type",
    "length_of_stay_cat": "Length of Stay",
    "procedures": "Procedures",
    "disreadydays_cat": "Days Ready for Discharge"
}

In [None]:
filter = 'RXN01'
strata = 'in-hours'

df2 = df_wp[(df_wp['time_of_day']==strata) & (df_wp['sitetret'] == filter)]
out = ""
df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

strata = 'out-hours'
df2 = df_wp[(df_wp['time_of_day']==strata) & (df_wp['sitetret'] == filter)]
#out = ""
#df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

df_results_s = pd.concat(df_results)


filter = 'RXN02'
strata = 'in-hours'

df2 = df_wp[(df_wp['time_of_day']==strata) & (df_wp['sitetret'] == filter)]
out = ""
df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

strata = 'out-hours'
df2 = df_wp[(df_wp['time_of_day']==strata) & (df_wp['sitetret'] == filter)]
#out = ""
#df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

df_results = pd.concat(df_results)
df_results = pd.concat([df_results_s,df_results])
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_3/", "apc_analysis_tables.csv"))

### Part 4

In [None]:
categorical_features = {
    "admiage": "avoidable_adm",
    "length_of_stay": "avoidable_adm",
    "disreadydays": "avoidable_adm"
}

In [None]:
filter = 'RXN01'
df_site = df_wp[df_wp['sitetret'] == filter]

df_results = []
for k, v in categorical_features.items():
    z = make_summary(k, v)
    z.insert(loc=1, column='measure', value=k)
    df_results.append(z)

df_results_s = pd.concat(df_results)

filter = 'RXN02'
df_site = df_wp[df_wp['sitetret'] == filter]

df_results = []
for k, v in categorical_features.items():
    z = make_summary(k, v)
    z.insert(loc=1, column='measure', value=k)
    df_results.append(z)

df_results = pd.concat(df_results)
df_results = pd.concat([df_results_s,df_results])
df_results = df_results.rename(columns={'size (not-winter)': 'N (not-winter)','size (winter)': 'N (winter)'})
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_4/", "apc_summary_statistics.csv"), index=False)

In [None]:
categorical_features = {
    "admiage_cat": "Age Bands",
    "gender_cat": "Gender",
    "ethnos_cat": "Ethnicity",
    "townsend_score_quintile": "Townsend Score Quintile",
    "admisorc_cat": "Admission Source",
    "time_of_day": "Time of Admission",
    "diag_seasonal_cat": "Seasonal Diagnosis",
    "disdest_cat": "Discharge Destination",
    "avoidable_adm": "Admission Type",
    "length_of_stay_cat": "Length of Stay",
    "procedures": "Procedures",
    "disreadydays_cat": "Days Ready for Discharge"
}

In [None]:
filter = 'RXN01'
strata = 'Avoidable (ACSCs)'

df2 = df_wp[(df_wp['avoidable_adm']==strata) & (df_wp['sitetret'] == filter)]

df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

strata = 'Non-avoidable (non-ACSCs)'
df2 = df_wp[(df_wp['avoidable_adm']==strata) & (df_wp['sitetret'] == filter)]

for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

df_results_s = pd.concat(df_results)


filter = 'RXN02'
strata = 'Avoidable (ACSCs)'
df2 = df_wp[(df_wp['avoidable_adm']==strata) & (df_wp['sitetret'] == filter)]

df_results = []
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

strata = 'Non-avoidable (non-ACSCs)'
df2 = df_wp[(df_wp['avoidable_adm']==strata) & (df_wp['sitetret'] == filter)]
for k, v in categorical_features.items():
    z = make_crosstab(k, v)
    z.insert(loc=0, column='Strata', value=strata)
    df_results.append(z)

df_results = pd.concat(df_results)
df_results = pd.concat([df_results_s,df_results])
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_4/", "apc_analysis_tables.csv"))

### Logistic Regressions

In [None]:
import statsmodels.formula.api as smf
import statsmodels.api as sm
import re

import warnings
warnings.filterwarnings("ignore")

### 6 Month Winter Analysis

In [None]:
# Create Winter category
start_date, end_date = np.datetime64('2021-10-01'), np.datetime64('2022-04-01')
df_wp['winter_flag'] = np.where((df_wp.admidate >= start_date) & (df_wp.admidate < end_date), 'winter', 'not-winter')

start_date = np.datetime64('2022-10-01')
df_wp['winter_flag'] = np.where((df_wp.admidate >= start_date), 'winter', df_wp.winter_flag)

In [None]:
filter = 'RXN01'
dfm = df_wp[['sitetret','length_of_stay_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(length_of_stay_cat,Treatment(reference="<2 days")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable = results.Variable.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_1_six_s = results[['Site', 'Variable', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

filter = 'RXN02'
dfm = df_wp[['sitetret','length_of_stay_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(length_of_stay_cat,Treatment(reference="<2 days")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable = results.Variable.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_1_six = results[['Site', 'Variable', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

results_1_six = pd.concat([results_1_six_s,results_1_six])

In [None]:
filter = 'RXN01'
dfm = df_wp[['proc_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(proc_cat,Treatment(reference="<2")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable = results.Variable.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_2_six_s = results[['Site', 'Variable', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

filter = 'RXN02'
dfm = df_wp[['proc_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(proc_cat,Treatment(reference="<2")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable = results.Variable.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_2_six = results[['Site', 'Variable', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

results_2_six = pd.concat([results_2_six_s,results_2_six])

In [None]:
filter = 'RXN01'
dfm = df_wp[['disreadydays', 'disreadydays_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(disreadydays_cat,Treatment(reference="<2 days")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable = results.Variable.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_3_six_s = results[['Site', 'Variable', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

filter = 'RXN02'
dfm = df_wp[['disreadydays', 'disreadydays_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(disreadydays_cat,Treatment(reference="<2 days")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable = results.Variable.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_3_six = results[['Site', 'Variable', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

results_3_six = pd.concat([results_3_six_s,results_3_six])

In [None]:
filter = 'RXN01'
dfm = df_wp[['avoidable_adm','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(avoidable_adm,Treatment(reference="Avoidable (ACSCs)")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable = results.Variable.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_4_six_s = results[['Site', 'Variable', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

filter = 'RXN02'
dfm = df_wp[['avoidable_adm','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(avoidable_adm,Treatment(reference="Avoidable (ACSCs)")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable = results.Variable.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_4_six = results[['Site', 'Variable', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

results_4_six = pd.concat([results_4_six_s,results_4_six])

### 3 Month Winter Analysis

In [None]:
# Create Winter category
start_date, end_date = np.datetime64('2021-12-01'), np.datetime64('2022-03-01')
df_wp['winter_flag'] = np.where((df_wp.admidate >= start_date) & (df_wp.admidate < end_date), 'winter', 'not-winter')

In [None]:
filter = 'RXN01'
dfm = df_wp[['sitetret','length_of_stay_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(length_of_stay_cat,Treatment(reference="<2 days")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable2', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable2 = results.Variable2.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_1_three_s = results[['Site', 'Variable2', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

filter = 'RXN02'
dfm = df_wp[['sitetret','length_of_stay_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(length_of_stay_cat,Treatment(reference="<2 days")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable2', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable2 = results.Variable2.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_1_three = results[['Site', 'Variable2', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

results_1_three = pd.concat([results_1_three_s,results_1_three])

In [None]:
filter = 'RXN01'
dfm = df_wp[['proc_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(proc_cat,Treatment(reference="<2")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable2', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable2 = results.Variable2.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_2_three_s = results[['Site', 'Variable2', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

filter = 'RXN02'
dfm = df_wp[['proc_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(proc_cat,Treatment(reference="<2")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable2', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable2 = results.Variable2.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_2_three = results[['Site', 'Variable2', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

results_2_three = pd.concat([results_2_three_s,results_2_three])

In [None]:
filter = 'RXN01'
dfm = df_wp[['disreadydays', 'disreadydays_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(disreadydays_cat,Treatment(reference="<2 days")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable2', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable2 = results.Variable2.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_3_three_s = results[['Site', 'Variable2', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

filter = 'RXN02'
dfm = df_wp[['disreadydays', 'disreadydays_cat','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(disreadydays_cat,Treatment(reference="<2 days")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable2', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable2 = results.Variable2.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_3_three = results[['Site', 'Variable2', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

results_3_three = pd.concat([results_3_three_s,results_3_three])

In [None]:
filter = 'RXN01'
dfm = df_wp[['avoidable_adm','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(avoidable_adm,Treatment(reference="Avoidable (ACSCs)")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable2', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable2 = results.Variable2.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_4_three_s = results[['Site', 'Variable2', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

filter = 'RXN02'
dfm = df_wp[['avoidable_adm','winter_flag','admiage','gender_cat','ethnos_cat','townsend_score_quintile','diag_seasonal_cat','time_of_day']][df_wp['sitetret'] == filter].dropna()

formula = '''C(avoidable_adm,Treatment(reference="Avoidable (ACSCs)")) ~ C(winter_flag, Treatment(reference="not-winter"))+
                                                                    admiage+
                                                                    C(gender_cat, Treatment(reference="Male"))+
                                                                    C(ethnos_cat, Treatment(reference="White"))+
                                                                    C(townsend_score_quintile, Treatment(reference=1))+
                                                                    C(diag_seasonal_cat, Treatment(reference='None'))+
                                                                    time_of_day'''

model = smf.glm(formula = formula, data=dfm, family=sm.families.Binomial())
result = model.fit()

results = pd.DataFrame(result.summary().tables[1])[1:]
results = results.rename(columns={0: 'Variable2', 1: 'coef', 2: 'std_err', 3: 'z', 4: 'p_value', 5: 'Lower 95% CL',6: 'Upper 95% CL'})
results['OR'] = np.exp(results.coef.astype(str).astype(float))
results.Variable2 = results.Variable2.astype(str)
results.insert(loc=0, column='Site', value=filter)
results_4_three = results[['Site', 'Variable2', 'OR', 'Lower 95% CL', 'Upper 95% CL', 'p_value']]

results_4_three = pd.concat([results_4_three_s,results_4_three])

### Results

In [None]:
results_1 = pd.concat([results_1_six, results_1_three], axis=1)
results_2 = pd.concat([results_2_six, results_2_three], axis=1)
results_3 = pd.concat([results_3_six, results_3_three], axis=1)
results_4 = pd.concat([results_4_six, results_4_three], axis=1)

In [None]:
def extractor(val):
    if val.startswith('C('):
        pattern1 = 'C?\(?([A-Za-z]+_[A-Za-z]+_?[A-Za-z]+)'        
        pattern2 = '(\[.*\])'        
        
        match1 = re.findall(pattern1, val)[0]        
        match2 = re.findall(pattern2, val)[0]
        
        final = match1+match2        
        
        return final    
    else:
        return val
                
results_1['Variable'] = results_1['Variable'].apply(lambda x: extractor(x))
results_2['Variable'] = results_2['Variable'].apply(lambda x: extractor(x))
results_3['Variable'] = results_3['Variable'].apply(lambda x: extractor(x))
results_4['Variable'] = results_4['Variable'].apply(lambda x: extractor(x))

results_1['Variable2'] = results_1['Variable2'].apply(lambda x: extractor(x))
results_2['Variable2'] = results_2['Variable2'].apply(lambda x: extractor(x))
results_3['Variable2'] = results_3['Variable2'].apply(lambda x: extractor(x))
results_4['Variable2'] = results_4['Variable2'].apply(lambda x: extractor(x))

In [None]:
string_2 = ['', '', '6-month winter analysis', '6-month winter analysis', '6-month winter analysis', '6-month winter analysis','','', '3-month winter analysis', '3-month winter analysis', '3-month winter analysis', '3-month winter analysis']
string_3 = ['Site','Variable','OR','Lower 95% CL', 'Upper 95% CL','p_value','Site','Variable','OR','Lower 95% CL', 'Upper 95% CL','p_value']

In [None]:
cols = list(zip(string_2, string_3))
results_1.columns = pd.MultiIndex.from_tuples(cols)
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_5/", "apc_los_regression.csv"), index=False)

In [None]:
cols = list(zip(string_2, string_3))
results_2.columns = pd.MultiIndex.from_tuples(cols)
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_5/", "apc_procedures_regression.csv"), index=False)

In [None]:
cols = list(zip(string_2, string_3))
results_3.columns = pd.MultiIndex.from_tuples(cols)
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_5/", "apc_days_ready_regression.csv"), index=False)

In [None]:
cols = list(zip(string_2, string_3))
results_4.columns = pd.MultiIndex.from_tuples(cols)
df_results.to_csv(os.path.join(path, "results/winter_pressures/part_5/", "apc_avoid_adms_regression.csv"), index=False)