This is a machine learning model aimed at investigating inpatient readmissions

In [11]:
import pyodbc
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

Load base data using SQL

In [23]:
conn_str = (
    r'DRIVER={SQL Server};'
    r'SERVER=lct-sqlbidev\dev;'
    r'DATABASE=Informatics_SSAS_Live;'
    r'Trusted_Connection=yes;'
    )

cnxn = pyodbc.connect(conn_str) # connect using the connection string

cursor_admissions = cnxn.cursor()

cursor_admissions.execute("EXEC [Informatics_SSAS_Live].[Reporting]."
               "[usp_ML_Inpatient_Readmissions_process]") # the sql we want to run

readmissions_data = cursor_admissions.fetchall() # return all the data

# get list of headers using list comprehension - this will account for new 
# columns dynamically as they are added to the SQL source data
admissions_headers = [column[0] for column in cursor_admissions.description] 

#headers

# load data into pandas dataframe
readmissions_df = pd.DataFrame(np.array(readmissions_data),
                                columns = admissions_headers)

readmissions_df['ReAdmission'] = readmissions_df['ReAdmission'].astype(int)

readmissions_df

Unnamed: 0,NHSNumber,AdmissionWard,MetricReAdmissions,MetricLengthofStay,ReAdmission,AnEContact,DeprivationIndex,Gender,SexualOrientation,Ethnicity,LearningDisability,AutismDiagnosis,ExBAF,AccommodationStatus,ReferralSource,ReferralSourceGroup,DetainingSection
0,4000157876,Longridge Ward,1,13,1,0,6,Male,[NOVALUE],White - British,0,0,0,Unknown,Community Health Service (CH),Internal,
1,4000615300,Kentmere Ward,0,71,0,1,2,Male,[NOVALUE],Not Known/Specified,0,0,0,Mainstream Housing,Community Mental Health Team (Adult Mental Hea...,Internal,Section 2 - Admission for assessment
2,4001253909,Wesham Rehabilitation Centre,0,104,0,1,6,Male,[NOVALUE],White - British,0,0,0,Tenant - Housing Association,Mental Health Service (CH),Internal,
3,4001361582,Woodview Greendale,0,13,0,1,4,Male,[NOVALUE],White - British,0,0,0,Not known,Community Health Service (CH),Internal,Section 2 - Admission for assessment
4,4001361582,Woodview Greendale,0,29,0,1,4,Male,[NOVALUE],White - British,0,0,0,Not known,Community Mental Health Team (Adult Mental Hea...,Internal,Section 2 - Admission for assessment
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7833,7317835781,Dova Ward,0,20,0,0,2,Male,[NOVALUE],Not Known/Specified,0,0,0,Unknown,Community Mental Health Team (Adult Mental Hea...,Internal,Section 2 - Admission for assessment
7834,7321958140,Ribblemere,0,5,0,0,2,Female,[NOVALUE],Bangladeshi,0,0,0,Mainstream Housing,Accident And Emergency Department,Other,
7835,7332918595,Duxbury,0,12,0,0,7,Female,[NOVALUE],Any other Asian background,0,0,0,Tenant - Local Authority/Arms Length Managemen...,Community Health Service (CH),Internal,
7836,7346143590,The Orchard,0,46,0,0,1,Male,[NOVALUE],Black or Black British - African,0,0,0,Not elsewhere classified,Community Mental Health Team (Adult Mental Hea...,Internal,Section 2 - Admission for assessment


Tidy up data prior to one hot coding

In [24]:
# get list of unique Ethnicity and Gender values

conditions_ethnicity = [
    readmissions_df['Ethnicity'] == 'Not Known/Specified',
    readmissions_df['Ethnicity'] == 'Not stated',
    readmissions_df['Ethnicity'] == 'White - British',
    readmissions_df['Ethnicity'] == 'White and Black African',
    readmissions_df['Ethnicity'] == 'Any other Asian background',
    readmissions_df['Ethnicity'] == 'White and Asian',
    readmissions_df['Ethnicity'] == 'Pakistani',
    readmissions_df['Ethnicity'] == 'Indian',
    readmissions_df['Ethnicity'] == 'Bangladeshi',
    readmissions_df['Ethnicity'] == 'Any other White background',
    readmissions_df['Ethnicity'] == 'Any other mixed background',
    readmissions_df['Ethnicity'] == 'Chinese',
    readmissions_df['Ethnicity'] == 'Any other ethnic group',
    readmissions_df['Ethnicity'] == 'White - Irish',
    readmissions_df['Ethnicity'] == 'Black or Black British - Caribbean',
    readmissions_df['Ethnicity'] == 'White and Black Caribbean',
    readmissions_df['Ethnicity'] == 'Any other Black background',
    readmissions_df['Ethnicity'] == 'Black or Black British - African'
]       

outputs = [
    'Not Known', 'NotStated', 'WhiteBr', 'WhiteBlkAfr', 'AsianOther',
    'WhiteAsian', 'Pakistani', 'Indian', 'Bangladeshi',
    'OtherWhite','OtherMixed','Chinese','AnyOther',
    'WhiteIrish','Caribbean','WhtBlkCarib','BlackOther','BlkAfrican' 

]
# add new column 
readmissions_df['ethnicity_clean'] = np.select(conditions_ethnicity, outputs, 'Err')
# get rid of old column
readmissions_df.drop('Ethnicity',axis=1,inplace=True)

conditions_gender = [
    readmissions_df['Gender'] == 'Male',
    readmissions_df['Gender'] == 'Female',
    readmissions_df['Gender'] == 'Not Known',
    readmissions_df['Gender'] == 'Not Specified'
   ]       

outputs_gender = [
    'Male', 'Female', 'NK', 'NK'
]
# add new column 
readmissions_df['gender_clean'] = np.select(conditions_gender, outputs_gender
                                             , 'Err')
# get rid of old column
readmissions_df.drop('Gender',axis=1,inplace=True)

conditions_accom = [
    readmissions_df['AccommodationStatus'] == 'Owner occupier',
    readmissions_df['AccommodationStatus'] == 'Unknown',
    readmissions_df['AccommodationStatus'] == 'Not known',
    readmissions_df['AccommodationStatus'] == 'Tenant - private landlord',
    readmissions_df['AccommodationStatus'] == 'Mainstream Housing',
    readmissions_df['AccommodationStatus'] == 'Tenant - Housing Association',
    readmissions_df['AccommodationStatus'] == 'Accommodation with mental health care support',
    readmissions_df['AccommodationStatus'] == 'Secure psychiatric unit',
    readmissions_df['AccommodationStatus'] == 'Independent hospital/clinic',
    readmissions_df['AccommodationStatus'] == 'Sheltered housing for older persons',
    readmissions_df['AccommodationStatus'] == 'Other accommodation with mental health care and support',
    readmissions_df['AccommodationStatus'] == 'Homeless',
    readmissions_df['AccommodationStatus'] == 'Settled mainstream housing with family/friends',
    readmissions_df['AccommodationStatus'] == 'NHS acute psychiatric ward',
    readmissions_df['AccommodationStatus'] == 'Specialist rehabilitation/recovery',
    readmissions_df['AccommodationStatus'] == 'Supported accommodation',
    readmissions_df['AccommodationStatus'] == 'Non-Mental Health Registered Care Home',
    readmissions_df['AccommodationStatus'] == 'Mental Health Registered Care Home',
    readmissions_df['AccommodationStatus'] == '[NOVALUE]',
    readmissions_df['AccommodationStatus'] == 'Staying with friends/family as a short term guest',
    readmissions_df['AccommodationStatus'] == 'Rough sleeper',
    readmissions_df['AccommodationStatus'] == 'Tenant - Local Authority/Arms Length Management Organisation/Registered Landlord',
    readmissions_df['AccommodationStatus'] == 'Other NHS facilities/hospital'
   ]       

outputs_accom = [
    'Owner', 'NK', 'NK', 'Private','Mainstream','HA','Supp','Psych','Hosp','Shelt','Supp','HL','FF','Psych','Rehab','Supp','CH','CH','NK','FF','HL','HA','NHS'
]
# add new column 
readmissions_df['accom_clean'] = np.select(conditions_accom, outputs_accom
                                             , 'Oth')
# get rid of old column
readmissions_df.drop('AccommodationStatus',axis=1,inplace=True)
#check the results

readmissions_df

Unnamed: 0,NHSNumber,AdmissionWard,MetricReAdmissions,MetricLengthofStay,ReAdmission,AnEContact,DeprivationIndex,SexualOrientation,LearningDisability,AutismDiagnosis,ExBAF,ReferralSource,ReferralSourceGroup,DetainingSection,ethnicity_clean,gender_clean,accom_clean
0,4000157876,Longridge Ward,1,13,1,0,6,[NOVALUE],0,0,0,Community Health Service (CH),Internal,,WhiteBr,Male,NK
1,4000615300,Kentmere Ward,0,71,0,1,2,[NOVALUE],0,0,0,Community Mental Health Team (Adult Mental Hea...,Internal,Section 2 - Admission for assessment,Not Known,Male,Mainstream
2,4001253909,Wesham Rehabilitation Centre,0,104,0,1,6,[NOVALUE],0,0,0,Mental Health Service (CH),Internal,,WhiteBr,Male,HA
3,4001361582,Woodview Greendale,0,13,0,1,4,[NOVALUE],0,0,0,Community Health Service (CH),Internal,Section 2 - Admission for assessment,WhiteBr,Male,NK
4,4001361582,Woodview Greendale,0,29,0,1,4,[NOVALUE],0,0,0,Community Mental Health Team (Adult Mental Hea...,Internal,Section 2 - Admission for assessment,WhiteBr,Male,NK
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7833,7317835781,Dova Ward,0,20,0,0,2,[NOVALUE],0,0,0,Community Mental Health Team (Adult Mental Hea...,Internal,Section 2 - Admission for assessment,Not Known,Male,NK
7834,7321958140,Ribblemere,0,5,0,0,2,[NOVALUE],0,0,0,Accident And Emergency Department,Other,,Bangladeshi,Female,Mainstream
7835,7332918595,Duxbury,0,12,0,0,7,[NOVALUE],0,0,0,Community Health Service (CH),Internal,,AsianOther,Female,HA
7836,7346143590,The Orchard,0,46,0,0,1,[NOVALUE],0,0,0,Community Mental Health Team (Adult Mental Hea...,Internal,Section 2 - Admission for assessment,BlkAfrican,Male,Oth


One hot code relevant columns

In [None]:
# Get one hot encoding of ethnicity column
one_hot = pd.get_dummies(readmissions_df['ethnicity_clean'],dtype=int)
# Drop column as it is now encoded
readmissions_df = readmissions_df.drop('ethnicity_clean',axis = 1)
# Join main data to the encoded df
readmissions_df = readmissions_df.join(one_hot)

#readmissions_df  

# Get one hot encoding of gender column
one_hot = pd.get_dummies(readmissions_df['gender_clean'],dtype=int)
# Drop column as it is now encoded
readmissions_df = readmissions_df.drop('gender_clean',axis = 1)
# Join main data to the encoded df
readmissions_df = readmissions_df.join(one_hot)

# set correct data type for value of interest
readmissions_df['IsDNA'] = readmissions_df['IsDNA'].astype(int)
