## Packages

In [None]:
import pandas as pd
import numpy as np
from math import ceil
## import plotly.express as px
import matplotlib.pyplot as plt
import sqlalchemy 
from sqlalchemy import create_engine, text

import sys
import os

## Add the path of the functions folder
current_dir = os.getcwd()  ## Gets the current working directory
sub_dir = os.path.abspath(os.path.join(current_dir, '..'
                                       , 'Functions'))
sys.path.append(sub_dir)

# Now you can import functions
from db_secrets import SQL_107

In [None]:
# TensorFlow sequential model
from tensorflow import keras
from tensorflow.keras import backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


In [None]:
# Turn warnings off to keep notebook tidy
import warnings
warnings.filterwarnings("ignore")

## Connection

In [None]:
## text for query
with open("../Exploratory_Analysis/111_sql.sql", "r") as file:
    query_text = file.read()

query_text = query_text.replace('REPLACE START DATE','2022-01-01')

In [None]:
## Create an engine + connection
engine = create_engine(SQL_107())
conn = engine.connect()

## Return data
df_raw = pd.read_sql(query_text,conn)

## Wrangle

In [None]:
## Makes working copy
df = df_raw.copy()

#df = df.sample(n=100000, random_state=42)

In [None]:
## List columns
df.columns

In [None]:
df = df[['Call Connect Time'
         ,'Bank Holiday'
         , 'In_Out_Hours'
         , 'Sub ICB Name'
         ,'Outcome Type']].copy()

#### binary outcome

In [None]:
df['Calls'] = 1

In [None]:
df['Outcome'] = df['Outcome Type'].transform(lambda x: 0 if x == 'No UEC Contact' else 1)
df = df.drop(['Outcome Type'],axis=1) 

In [None]:
## Date time conversion to numeric
df['Hour']    = df['Call Connect Time'].dt.hour
df['year']    = df['Call Connect Time'].dt.year
df['month']   = df['Call Connect Time'].dt.month
df['day']     = df['Call Connect Time'].dt.day
df['hour']    = df['Call Connect Time'].dt.hour
df['weekday'] = df['Call Connect Time'].dt.weekday  # Monday=0, Sunday=6

df = df.drop('Call Connect Time',axis=1) 

#### Aggregates count 

In [None]:
df = df.groupby(['Hour'
         , 'year'
         , 'month'
         , 'day'
         , 'hour'
         , 'weekday'
         , 'Bank Holiday'
         , 'In_Out_Hours'
         , 'Sub ICB Name']).agg('sum').reset_index()

In [None]:
## One hot encodinng for boolean variables
bool_mapping = {
    'Yes': 1,
    'No': 0,
    'In Hours': 1,
    'Out of Hours': 0
}

df.loc[:,'Is Bank Holiday'] = df['Bank Holiday'].map(bool_mapping)             
df.loc[:,'In Hours'] = df['In_Out_Hours'].map(bool_mapping)
df = df.drop(['Bank Holiday','In_Out_Hours'],axis=1) 

In [None]:
## Dummy variables from ICB
df = pd.concat([df, pd.get_dummies(df['Sub ICB Name']
                                   ,dtype=int
                                   , prefix='SubICB')]
                ,axis=1)
df = df.drop('Sub ICB Name', axis=1)

In [None]:
df.head()

## Split

In [None]:
X = df.drop('Outcome',axis=1).to_numpy() # X = all  except the 'Outcome' column
y = df['Outcome'].to_numpy() # y = 'Outcome' column 


X_train, X_test, y_train, y_test = train_test_split(X
                                                    , y 
                                                    , test_size = 0.25
                                                    , random_state=42)

## Scale

In [None]:
def scale_data(Xy_train, Xy_test,X_or_y = ['X','y']):
    """Scale data 0-1 based on min and max in training set"""
    
    # Initialise a new scaling object for normalising input data
    sc = MinMaxScaler()
    
    if X_or_y == 'X':
        # Apply the scaler to the training and test sets
        train_sc = sc.fit_transform(Xy_train)
        test_sc = sc.transform(Xy_test)

    elif X_or_y == 'y':
        # Apply the scaler to the training and test sets
        train_sc = sc.fit_transform(Xy_train.reshape(-1, 1))
        test_sc = sc.transform(Xy_test.reshape(-1, 1))        
        
    return train_sc, test_sc

In [None]:
# Scale X data
X_train, X_test = scale_data(X_train, X_test, X_or_y='X')


# Scale y data
#y_train, y_test = scale_data(y_train, y_test, X_or_y='y')

## Build a model

In [None]:
def make_net(number_features, 
             hidden_layers=3, 
             hidden_layer_neurones=128, 
             dropout=0.0, 
             learning_rate=0.003):
    
    """Make TensorFlow neural net"""
    
    # Clear Tensorflow 
    K.clear_session()
    
    # Set up neural net
    net = Sequential()
    
    # Add hidden hidden_layers using a loop
    for i in range(hidden_layers):
        # Add fully connected layer with ReLu activation
        net.add(Dense(
            hidden_layer_neurones, 
            input_dim=number_features,
            activation='relu'))
        # Add droput layer
        net.add(Dropout(dropout))
    
    # Add final sigmoid activation output
    net.add(Dense(1, activation='linear'))    
    #    net.add(Dense(1, activation='sigmoid'))    

    # Compiling model
    opt = Adam(learning_rate=learning_rate)
    
    net.compile(loss='mse', 
                optimizer=opt, 
                metrics=['mae'])
    
    return net

In [None]:
def calculate_accuracy(model, X_train_sc, X_test_sc, y_train, y_test):
    """Calculate and print accuracy of training and test data fits"""    
    
   # Predict on training and test data
    y_pred_train = model.predict(X_train_sc).flatten()
    y_pred_test = model.predict(X_test_sc).flatten()

    # Calculate Mean Absolute Error (MAE) for training and test sets
    mae_train = np.mean(np.abs(y_pred_train - y_train))
    mae_test = np.mean(np.abs(y_pred_test - y_test))
    
    # Calculate Mean Squared Error (MSE) for training and test sets
    mse_train = np.mean((y_pred_train - y_train) ** 2)
    mse_test = np.mean((y_pred_test - y_test) ** 2)

    # Print the results
    print(f'Training MAE: {mae_train:.3f}')
    print(f'Test MAE: {mae_test:.3f}')
    print(f'Training MSE: {mse_train:.3f}')
    print(f'Test MSE: {mse_test:.3f}')

In [None]:
def plot_training(history_dict,measure='mae'):
    acc_values = history_dict[measure]
    val_acc_values = history_dict[f'val_{measure}']
    epochs = range(1, len(acc_values) + 1)

    fig, ax = plt.subplots()

    ax.set_xlabel('Time')
    ax.set_ylabel(measure)

    ax.plot(epochs, acc_values, color='blue', label=f'Training {measure}')
    ax.plot(epochs, val_acc_values, color='red', label=f'Test {measure}')
    ax.set_title(f'Training and validation {measure}')
    
    ax.legend()

    fig.show()

In [None]:
model = make_net(number_features=X_train.shape[1], 
             hidden_layers=3, 
             hidden_layer_neurones=128, 
             dropout=0.20, 
             learning_rate=0.003)

In [None]:
# Define save checkpoint callback (only save if new best validation results)
checkpoint_cb = keras.callbacks.ModelCheckpoint(
    'model_checkpoint.keras', save_best_only=True)

# Define early stopping callback
# Stop when no validation improvement for 25 epochs
# Restore weights to best validation accuracy
early_stopping_cb_loss = keras.callbacks.EarlyStopping(
    patience=20, restore_best_weights=True, monitor='val_loss')

#early_stopping_cb_acc = keras.callbacks.EarlyStopping(
#    patience=5, restore_best_weights=True, monitor='val_accuracy')


### Train model (and store training info in history)
history = model.fit(X_train,
                    y_train,
                    epochs=200,
                    batch_size=64,
                    validation_data=(X_test, y_test),
                    verbose=1,
                    callbacks=[checkpoint_cb
                               , early_stopping_cb_loss
                               #, early_stopping_cb_acc
                               ])

## Accuracy

In [None]:
calculate_accuracy(model, X_train, X_test, y_train, y_test)

In [None]:
plot_training(history.history,measure='mae')