## Packages

In [3]:
import pandas as pd
import numpy as np
from math import ceil
import plotly.express as px
## import matplotlib.pyplot as plt
import sqlalchemy 
from sqlalchemy import create_engine, text

import sys
import os

## Add the path of the functions folder
current_dir = os.getcwd()  ## Gets the current working directory
sub_dir = os.path.abspath(os.path.join(current_dir, '..'
                                       , 'Functions'))
sys.path.append(sub_dir)

# Now you can import functions
from db_secrets import SQL_107

In [54]:
# TensorFlow sequential model
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


## Connection

In [4]:
## text for query
with open("../Exploratory_Analysis/111_sql.sql", "r") as file:
    query_text = file.read()

query_text = query_text.replace('REPLACE START DATE','2024-01-01')

In [5]:
## Create an engine + connection
engine = create_engine(SQL_107())
conn = engine.connect()

## Return data
df_raw = pd.read_sql(query_text,conn)

In [35]:
## Makes working copy
df = df_raw.copy()

#df = df.sample(n=100000, random_state=42)

## Wrangle

In [36]:
## List columns
df.columns

Index(['Start_Location', 'Call ID', 'Pseudo NHS Number', 'CallDate',
       'Call Connect Time', 'Weekday_Name', 'Week_Start', 'Financial Year',
       'Bank Holiday', 'In_Out_Hours', 'Sub ICB Code', 'Sub ICB Name',
       'GP Practice', 'GP Practice Code', 'GP Deprivation',
       'GP Survey Q21 Wait for Appt', 'Symptom_Group',
       'Final Disposition Code', 'Disposition Group', 'Disposition',
       'Call_Taker_Triages', 'Clinical_Triages', 'Patient Age', 'Patient Sex',
       'Outcome ID', 'Outcome Datetime', 'Outcome Type', 'Outcome',
       'Outcome Location Code', 'Outcome Location Name', 'Hours to Outcome'],
      dtype='object')

In [37]:
df = df[['Call Connect Time'
         ,'Bank Holiday'
         , 'In_Out_Hours'
         , 'Sub ICB Name'
         ,'Outcome Type']].copy()

#### binary outcome

In [38]:
df['Calls'] = 1

In [39]:
df['Outcome'] = df['Outcome Type'].transform(lambda x: 0 if x == 'No UEC Contact' else 1)
df = df.drop(['Outcome Type'],axis=1) 

In [40]:
## Date time conversion to numeric
df['Hour']    = df['Call Connect Time'].dt.hour
df['year']    = df['Call Connect Time'].dt.year
df['month']   = df['Call Connect Time'].dt.month
df['day']     = df['Call Connect Time'].dt.day
df['hour']    = df['Call Connect Time'].dt.hour
df['weekday'] = df['Call Connect Time'].dt.weekday  # Monday=0, Sunday=6

df = df.drop('Call Connect Time',axis=1) 

#### Aggregates count 

In [41]:
df = df.groupby(['Hour'
         , 'year'
         , 'month'
         , 'day'
         , 'hour'
         , 'weekday'
         , 'Bank Holiday'
         , 'In_Out_Hours'
         , 'Sub ICB Name']).agg('sum').reset_index()

In [42]:
## One hot encodinng for boolean variables
bool_mapping = {
    'Yes': 1,
    'No': 0,
    'In Hours': 1,
    'Out of Hours': 0
}

df.loc[:,'Is Bank Holiday'] = df['Bank Holiday'].map(bool_mapping)             
df.loc[:,'In Hours'] = df['In_Out_Hours'].map(bool_mapping)
df = df.drop(['Bank Holiday','In_Out_Hours'],axis=1) 

In [None]:
## Dummy variables from ICB
df = pd.concat([df, pd.get_dummies(df['Sub ICB Name']
                                   ,dtype=int
                                   , prefix='SubICB')]
                ,axis=1)
df = df.drop('Sub ICB Name', axis=1)

In [None]:
# Scale the data for better convergence
#scaler = MinMaxScaler()

In [45]:
df.head()

Unnamed: 0,Hour,year,month,day,hour,weekday,Calls,Outcome,Is Bank Holiday,In Hours,SubICB_County Durham,SubICB_Newcastle Gateshead,SubICB_North Tyneside,SubICB_Northumberland,SubICB_South Tyneside,SubICB_Sunderland,SubICB_Tees Valley
0,0,2024,1,1,0,0,10,4,1,0,1,0,0,0,0,0,0
1,0,2024,1,1,0,0,11,4,1,0,0,1,0,0,0,0,0
2,0,2024,1,1,0,0,3,2,1,0,0,0,1,0,0,0,0
3,0,2024,1,1,0,0,2,1,1,0,0,0,0,1,0,0,0
4,0,2024,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0


In [53]:
len(df.columns)

17

## Split

In [48]:
X = df.drop('Outcome',axis=1).to_numpy() # X = all  except the 'Outcome' column
y = df['Outcome'].to_numpy() # y = 'Outcome' column 


X_train, X_test, y_train, y_test = train_test_split(X
                                                    , y 
                                                    , test_size = 0.25
                                                    , random_state=42)

## Build a model

In [None]:
def make_net(number_features, 
             hidden_layers=3, 
             hidden_layer_neurones=128, 
             dropout=0.0, 
             learning_rate=0.003):
    
    """Make TensorFlow neural net"""
    
    # Clear Tensorflow 
    K.clear_session()
    
    # Set up neural net
    net = Sequential()
    
    # Add hidden hidden_layers using a loop
    for i in range(hidden_layers):
        # Add fully connected layer with ReLu activation
        net.add(Dense(
            hidden_layer_neurones, 
            input_dim=number_features,
            activation='relu'))
        # Add droput layer
        net.add(Dropout(dropout))
    
    # Add final sigmoid activation output
    net.add(Dense(1, activation='sigmoid'))    
    
    # Compiling model
    opt = Adam(learning_rate=learning_rate)
    
    net.compile(loss='binary_crossentropy', 
                optimizer=opt, 
                metrics=['accuracy'])
    
    return net

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


NameError: name 'Dropout' is not defined

In [None]:
model.compile(optimizer='adam'
              , loss='mse'
              , metrics=['mae','mse'])

## Predict values

In [None]:
## Predict training and test set labels
y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

## Accuracy

In [None]:
accuracy = accuracy_score(y_test, y_pred_test)
report = classification_report(y_test, y_pred_test
                               , target_names=label_encoder_y.classes_)


In [None]:
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:\n', report)