# Exercise 2.4 - Hyperparameter Optimisation (CNN)

This script is organised using the following structure:
1. **Importing Libraries and Data**
2. **Reshape Data for Bayesian Optimisation**
3. **Split the Data**
4. **Convolution Neural Network and Bayesian Optimiser Function**
5. **Running CNN with Hyperparameters from Bayesian Optimisation**
6. **Confusion Matrix**

## 1. Importing Libraries and Data

In [1]:
import pandas as pd
import numpy as np
import os
import operator
import matplotlib.pyplot as plt
import seaborn as sns
import time

# Packages and functions specific to models and optimisation
import tensorflow as tf
from keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Conv2D, Dense, Dropout, BatchNormalization, Flatten, MaxPooling1D
from sklearn.model_selection import train_test_split, cross_val_score
from tensorflow.keras.utils import to_categorical
from keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from keras.callbacks import EarlyStopping, ModelCheckpoint
from scikeras.wrappers import KerasClassifier
from math import floor
from sklearn.metrics import make_scorer, accuracy_score
from bayes_opt import BayesianOptimization
from sklearn.model_selection import StratifiedKFold
from keras.layers import LeakyReLU
LeakyReLU = LeakyReLU(negative_slope=0.1) # alpha deprecated - use negative_slope instead
import warnings
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", None)

In [2]:
path = r'C:\Users\kyles\CareerFoundary\Machine Learning\Achievement 2'

In [3]:
# Import unscaled weather data
df_weather = pd.read_csv(os.path.join(path, '02 Data', 'Cleaned_Weather_Data.csv'))
df_weather.drop(columns=['Unnamed: 0'], inplace = True)
# Check
df_weather

Unnamed: 0,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,BELGRADE_cloud_cover,BELGRADE_humidity,BELGRADE_pressure,BELGRADE_global_radiation,BELGRADE_precipitation,BELGRADE_sunshine,BELGRADE_temp_mean,BELGRADE_temp_min,BELGRADE_temp_max,BUDAPEST_cloud_cover,BUDAPEST_humidity,BUDAPEST_pressure,BUDAPEST_global_radiation,BUDAPEST_precipitation,BUDAPEST_sunshine,BUDAPEST_temp_mean,BUDAPEST_temp_min,BUDAPEST_temp_max,DEBILT_cloud_cover,DEBILT_humidity,DEBILT_pressure,DEBILT_global_radiation,DEBILT_precipitation,DEBILT_sunshine,DEBILT_temp_mean,DEBILT_temp_min,DEBILT_temp_max,DUSSELDORF_cloud_cover,DUSSELDORF_humidity,DUSSELDORF_pressure,DUSSELDORF_global_radiation,DUSSELDORF_precipitation,DUSSELDORF_sunshine,DUSSELDORF_temp_mean,DUSSELDORF_temp_min,DUSSELDORF_temp_max,HEATHROW_cloud_cover,HEATHROW_humidity,HEATHROW_pressure,HEATHROW_global_radiation,HEATHROW_precipitation,HEATHROW_sunshine,HEATHROW_temp_mean,HEATHROW_temp_min,HEATHROW_temp_max,KASSEL_cloud_cover,KASSEL_humidity,KASSEL_pressure,KASSEL_global_radiation,KASSEL_precipitation,KASSEL_sunshine,KASSEL_temp_mean,KASSEL_temp_min,KASSEL_temp_max,LJUBLJANA_cloud_cover,LJUBLJANA_humidity,LJUBLJANA_pressure,LJUBLJANA_global_radiation,LJUBLJANA_precipitation,LJUBLJANA_sunshine,LJUBLJANA_temp_mean,LJUBLJANA_temp_min,LJUBLJANA_temp_max,MAASTRICHT_cloud_cover,MAASTRICHT_humidity,MAASTRICHT_pressure,MAASTRICHT_global_radiation,MAASTRICHT_precipitation,MAASTRICHT_sunshine,MAASTRICHT_temp_mean,MAASTRICHT_temp_min,MAASTRICHT_temp_max,MADRID_cloud_cover,MADRID_humidity,MADRID_pressure,MADRID_global_radiation,MADRID_precipitation,MADRID_sunshine,MADRID_temp_mean,MADRID_temp_min,MADRID_temp_max,MUNCHENB_cloud_cover,MUNCHENB_humidity,MUNCHENB_pressure,MUNCHENB_global_radiation,MUNCHENB_precipitation,MUNCHENB_sunshine,MUNCHENB_temp_mean,MUNCHENB_temp_min,MUNCHENB_temp_max,OSLO_cloud_cover,OSLO_humidity,OSLO_pressure,OSLO_global_radiation,OSLO_precipitation,OSLO_sunshine,OSLO_temp_mean,OSLO_temp_min,OSLO_temp_max,SONNBLICK_cloud_cover,SONNBLICK_humidity,SONNBLICK_pressure,SONNBLICK_global_radiation,SONNBLICK_precipitation,SONNBLICK_sunshine,SONNBLICK_temp_mean,SONNBLICK_temp_min,SONNBLICK_temp_max,STOCKHOLM_cloud_cover,STOCKHOLM_humidity,STOCKHOLM_pressure,STOCKHOLM_global_radiation,STOCKHOLM_precipitation,STOCKHOLM_sunshine,STOCKHOLM_temp_mean,STOCKHOLM_temp_min,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,0.85,1.0180,0.32,0.09,0.7,6.5,0.8,10.9,1,0.81,1.0195,0.88,0.00,7.0,3.7,-0.9,7.9,4,0.67,1.017,0.44,0.01,2.3,2.4,-0.4,5.1,7,0.85,1.0032,0.07,0.25,0.0,9.3,7.4,11.0,8,0.83,1.0161,0.12,0.08,0.0,10.0,7.0,11.5,7,0.91,1.0010,0.13,0.22,0.0,10.6,9.4,8.3,8,0.82,1.0094,0.28,0.48,1.6,7.9,3.9,9.4,8,1.00,1.0173,0.20,0.00,0.0,-0.6,-1.9,0.5,7,0.83,1.0063,0.22,0.32,1.0,9.5,8.5,11.1,6,0.92,1.0260,0.53,0.00,1.4,7.6,4.4,10.8,5,0.67,1.0304,0.20,0.10,0.0,6.9,1.1,10.4,8,0.98,0.9978,0.04,1.14,0.0,4.9,3.8,5.9,4,0.73,1.0304,0.48,0.01,2.3,-5.9,-8.5,-3.2,5,0.98,1.0114,0.05,0.32,0.0,4.2,2.2,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,6,0.84,1.0180,0.36,1.05,1.1,6.1,3.3,10.1,6,0.84,1.0172,0.25,0.00,0.0,2.9,2.2,4.4,4,0.67,1.017,0.18,0.31,0.0,2.3,1.4,3.1,8,0.90,1.0056,0.14,0.06,0.1,7.7,6.4,8.3,8,0.89,1.0161,0.18,0.66,0.5,8.2,7.4,11.0,7,0.98,1.0051,0.13,0.23,0.0,6.1,3.9,10.6,6,0.86,1.0086,0.12,0.27,0.0,7.7,6.8,9.1,6,0.94,1.0173,0.56,0.13,3.2,2.1,-1.3,5.5,8,0.92,1.0062,0.17,1.34,0.4,8.6,7.5,9.9,7,0.86,1.0254,0.46,0.00,0.9,9.8,7.4,12.2,6,0.72,1.0292,0.61,0.30,5.1,6.2,4.2,10.2,8,0.62,1.0139,0.04,0.00,0.0,3.4,2.8,4.9,6,0.97,1.0292,0.21,0.61,0.0,-9.5,-10.5,-8.5,5,0.62,1.0114,0.05,0.06,0.0,4.0,3.0,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,8,0.90,1.0180,0.18,0.30,0.0,8.5,5.1,9.9,6,0.77,1.0179,0.67,0.00,3.5,3.1,-0.5,6.4,4,0.67,1.017,0.30,0.00,0.6,2.7,1.7,5.3,6,0.92,1.0165,0.28,0.01,3.0,6.8,4.6,9.9,7,0.95,1.0161,0.12,0.07,0.0,7.1,6.9,9.1,8,0.96,1.0166,0.15,0.07,0.1,8.4,6.1,12.2,8,0.91,1.0129,0.12,0.60,0.0,6.5,6.0,8.0,8,0.96,1.0173,0.20,0.12,0.0,4.6,0.9,6.3,7,0.97,1.0167,0.12,0.46,0.0,6.9,5.5,9.9,5,0.90,1.0287,0.63,0.00,2.3,8.6,6.4,10.8,6,0.91,1.0320,0.20,0.30,0.0,5.8,4.0,8.0,8,0.69,1.0234,0.04,0.08,0.0,1.9,0.6,3.1,8,0.93,1.0320,0.21,3.20,0.0,-9.5,-10.0,-8.9,5,0.69,1.0114,0.05,0.02,0.0,2.4,1.3,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,3,0.92,1.0180,0.58,0.00,4.1,6.3,3.8,10.6,8,0.93,1.0268,0.25,0.00,0.0,2.0,-2.0,3.0,4,0.67,1.017,0.19,0.00,0.0,2.0,0.4,4.4,8,0.95,1.0265,0.08,0.09,0.0,6.7,3.6,10.1,8,0.86,1.0161,0.12,0.02,0.0,6.8,3.6,8.0,8,0.98,1.0230,0.13,0.00,0.0,9.4,6.7,8.9,6,0.87,1.0290,0.12,0.00,0.0,5.8,5.2,6.5,6,0.94,1.0173,0.49,0.00,2.2,3.2,1.0,7.0,7,0.89,1.0277,0.16,0.00,0.3,7.0,3.0,10.0,0,0.75,1.0281,1.16,0.00,8.7,10.3,4.5,16.1,6,0.90,1.0443,0.20,0.01,0.0,3.9,3.2,5.4,8,0.98,1.0244,0.04,0.35,0.0,3.0,0.4,4.9,5,0.93,1.0443,0.22,1.10,0.0,-11.5,-12.9,-10.0,5,0.98,1.0114,0.05,0.00,0.0,1.2,0.4,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,6,0.95,1.0180,0.65,0.14,5.4,3.0,-0.7,6.0,8,0.99,1.0286,0.25,0.06,0.0,2.0,0.7,2.8,4,0.67,1.017,0.19,0.00,0.0,2.5,1.1,5.3,6,0.90,1.0243,0.04,0.39,0.0,8.0,2.4,11.2,7,0.92,1.0161,0.12,0.62,0.0,7.7,6.2,11.0,5,0.84,1.0275,0.30,0.00,2.1,8.9,8.9,7.2,7,0.86,1.0262,0.13,0.71,0.0,5.4,3.7,6.0,7,0.94,1.0173,0.20,0.00,0.0,3.6,0.4,4.8,7,0.92,1.0259,0.12,0.56,0.0,8.1,2.5,11.1,2,0.64,1.0269,1.10,0.00,7.8,12.1,8.2,16.0,5,0.85,1.0430,0.65,0.96,5.6,1.8,-3.0,6.0,8,0.96,1.0092,0.05,0.26,0.0,3.7,2.9,4.9,2,0.75,1.0430,0.72,0.01,6.1,-9.3,-12.0,-6.5,5,0.96,1.0114,0.05,1.32,0.0,3.3,0.8,4.3,3,0.80,1.0328,0.46,0.00,5.7,5.7,3.0,8.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,1,0.79,1.0248,1.34,0.22,7.7,15.9,11.4,21.4,2,0.68,1.0278,1.57,0.18,5.7,18.2,12.1,24.4,4,0.67,1.017,1.41,0.14,5.4,11.7,7.9,16.2,8,0.84,1.0190,1.13,0.22,2.8,15.7,12.8,19.4,8,0.75,1.0214,1.13,0.20,6.4,17.8,13.6,21.4,5,0.87,1.0140,1.18,0.16,1.9,16.4,11.9,18.9,4,0.77,1.0161,1.14,0.19,4.0,9.1,5.4,13.1,4,0.80,1.0289,1.35,0.37,5.9,14.7,12.1,21.1,8,0.67,1.0201,1.17,0.20,5.3,18.6,14.1,22.6,8,0.52,1.0204,1.89,0.12,5.3,20.0,16.2,23.9,2,0.76,1.0263,1.37,0.26,9.7,14.3,8.3,22.2,8,0.98,1.0139,1.06,0.21,0.1,9.7,5.8,12.0,2,0.84,1.0263,1.56,0.47,4.7,0.6,-1.4,2.6,5,0.98,1.0161,1.11,0.14,3.2,11.5,8.2,14.2,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5
22946,6,0.77,1.0244,1.34,0.22,5.4,16.7,14.3,21.9,0,0.68,1.0295,1.57,0.18,5.7,15.9,10.6,21.2,4,0.67,1.017,1.41,0.14,5.4,11.7,7.9,16.2,8,0.84,1.0177,1.13,0.22,3.5,16.0,10.3,20.5,7,0.71,1.0186,1.13,0.20,4.9,19.4,15.4,23.9,4,0.82,1.0152,1.18,0.16,4.2,15.8,12.7,21.8,3,0.77,1.0161,1.14,0.19,4.0,9.1,5.4,13.1,3,0.82,1.0291,1.35,0.37,4.5,12.9,9.8,19.8,7,0.70,1.0190,1.17,0.20,5.0,18.9,15.8,23.5,8,0.51,1.0221,1.89,0.12,3.9,19.1,14.7,23.5,6,0.70,1.0263,1.37,0.26,7.7,16.1,8.9,26.1,8,1.00,1.0107,1.06,0.21,0.0,10.9,8.8,11.7,5,0.84,1.0263,1.56,0.47,4.7,2.3,0.6,4.0,5,1.00,1.0122,1.11,0.14,0.8,12.5,11.0,14.3,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5
22947,4,0.76,1.0227,1.34,0.22,6.1,16.7,13.1,22.4,2,0.68,1.0278,1.57,0.18,5.7,13.4,8.6,18.2,4,0.67,1.017,1.41,0.14,5.4,11.7,7.9,16.2,8,0.86,1.0174,1.13,0.22,3.3,15.8,9.3,21.1,8,0.73,1.0196,1.13,0.20,4.0,18.2,13.4,22.0,7,0.85,1.0138,1.18,0.16,4.2,16.5,11.2,17.0,3,0.77,1.0161,1.14,0.19,4.0,9.1,5.4,13.1,3,0.81,1.0270,1.35,0.37,5.1,13.2,10.2,20.7,8,0.69,1.0183,1.17,0.20,3.2,18.2,13.7,24.3,8,0.46,1.0186,1.89,0.12,8.1,19.0,15.4,22.6,7,0.64,1.0263,1.37,0.26,6.8,17.4,11.2,26.2,3,0.85,1.0082,1.06,0.21,6.8,9.7,7.7,14.2,3,0.84,1.0263,1.56,0.47,4.7,3.3,2.1,4.5,5,0.85,1.0059,1.11,0.14,6.9,13.1,12.1,14.4,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5
22948,5,0.80,1.0212,1.34,0.22,5.8,15.4,11.6,21.1,1,0.68,1.0238,1.57,0.18,5.7,15.0,9.1,20.9,4,0.67,1.017,1.41,0.14,5.4,11.7,7.9,16.2,8,0.87,1.0174,1.13,0.22,6.0,14.4,10.3,20.2,7,0.73,1.0176,1.13,0.20,6.9,16.7,11.9,21.1,5,0.86,1.0147,1.18,0.16,0.6,15.2,13.4,17.5,3,0.77,1.0161,1.14,0.19,4.0,9.1,5.4,13.1,3,0.77,1.0238,1.35,0.37,5.7,14.0,10.0,23.1,8,0.73,1.0181,1.17,0.20,6.8,16.3,12.8,21.4,5,0.66,1.0186,1.89,0.12,3.1,15.7,13.1,18.3,6,0.75,1.0263,1.37,0.26,8.3,14.5,9.2,23.5,5,0.94,1.0150,1.06,0.21,2.9,5.9,2.1,8.1,3,0.84,1.0263,1.56,0.47,4.7,3.4,2.7,4.1,5,0.94,1.0160,1.11,0.14,8.4,7.5,5.1,12.4,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5


In [4]:
# Import 'pleasant weather' data
df_answers = pd.read_csv(os.path.join(path, '02 Data', 'Pleasant_Weather_Prediction_Answers.csv'))
df_answers.drop(columns=['DATE'], inplace = True) # Assuming we don't need dates for this
# Check
df_answers

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22946,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22947,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22948,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## 2. Reshape Data for Bayesian Optimisation

Bayesian Optimisation requires a slightly different form for both data sets before they are split:
- X shape will need to be (22950, 15, 9)
- y shape will need to be (22950,)

In [5]:
# Start with renaming and turning data into arrays
X = np.array(df_weather)
y = np.array(df_answers)

# Check current shapes
print(f'X shape is currently: {X.shape}')
print(f'y shape is currently: {y.shape}')

X shape is currently: (22950, 135)
y shape is currently: (22950, 15)


In [6]:
# Change shape of X
X = X.reshape(-1, 15, 9)
X.shape # verify

(22950, 15, 9)

In [7]:
from sklearn.utils.multiclass import type_of_target
type_of_target(y)

'multilabel-indicator'

In [8]:
# Change shape of y
y = np.argmax(y, axis = 1)
y # look at data to see if it seems correct

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [9]:
# Verify X and y shapes
print(f'X shape is currently: {X.shape}')
print(f'y shape is currently: {y.shape}')

X shape is currently: (22950, 15, 9)
y shape is currently: (22950,)


Fantastic

In [10]:
# Check unique values in y
unique_classes = np.unique(y)
print(unique_classes)
print(len(unique_classes)) 

[ 0  1  2  3  4  5  6  7  8  9 10 11 13 14]
14


Perhaps there is an error with Sonnblick (having only one type of value)

## 3. Split the Data

In [11]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42) # Can also control size with train_size before random_state

In [12]:
# Check sizes
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(17212, 15, 9) (17212,)
(5738, 15, 9) (5738,)


## 4. Convolution Neural Network and Bayesian Optimiser Function

In [13]:
# Setup essential/non-changing parameters
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = 15 # number of weather stations
# Make scorer accuracy
score_acc = make_scorer(accuracy_score)

In [14]:
# Create function

def bay_area(neurons, activation, kernel, optimizer, learning_rate, batch_size, epochs,
              layers1, layers2, normalization, dropout, dropout_rate): 
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl','SGD']
    optimizerD = {
                    'Adam': Adam(learning_rate=learning_rate),
                    'SGD': SGD(learning_rate=learning_rate),
                    'RMSprop': RMSprop(learning_rate=learning_rate),
                    'Adadelta': Adadelta(learning_rate=learning_rate),
                    'Adagrad': Adagrad(learning_rate=learning_rate),
                    'Adamax': Adamax(learning_rate=learning_rate),
                    'Nadam': Nadam(learning_rate=learning_rate),
                    'Ftrl': Ftrl(learning_rate=learning_rate)
                 }
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
                   'elu', 'exponential', LeakyReLU,'relu']
    
    neurons = round(neurons)
    kernel = round(kernel)
    activation = activationL[round(activation)]  #optimizerD[optimizerL[round(optimizer)]]
    optimizer = optimizerL[round(optimizer)]
    batch_size = round(batch_size)
    
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    
    def cnn_model():
        model = Sequential()
        model.add(Conv1D(neurons, kernel_size=kernel,activation=activation, input_shape=(timesteps, input_dim)))
        #model.add(Conv1D(32, kernel_size=1,activation='relu', input_shape=(timesteps, input_dim)))
        
        if normalization > 0.5:
            model.add(BatchNormalization())
        for i in range(layers1):
            model.add(Dense(neurons, activation=activation)) #(neurons, activation=activation))
        if dropout > 0.5:
            model.add(Dropout(dropout_rate, seed=123))
        for i in range(layers2):
            model.add(Dense(neurons, activation=activation))
        model.add(MaxPooling1D())
        model.add(Flatten())
        model.add(Dense(n_classes, activation='softmax')) #sigmoid softmax
        #model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) #categorical_crossentropy
        model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) #categorical_crossentropy
        return model
    es = EarlyStopping(monitor='accuracy', mode='max', verbose=2, patience=20)
    nn = KerasClassifier(build_fn=cnn_model, epochs=epochs, batch_size=batch_size, verbose=2)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
    score = cross_val_score(nn, X_train, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score

In [15]:
start = time.time()
params ={
    'neurons': (10, 100),
    'kernel': (1, 3),
    'activation':(0, 9), 
    'optimizer':(0,7),
    'learning_rate':(0.01, 1),
    'batch_size': (200, 1000), 
    'epochs':(20, 50),
    'layers1':(1,3),
    'layers2':(1,3),
    'normalization':(0,1),
    'dropout':(0,1),
    'dropout_rate':(0,0.3)
}
# Run Bayesian Optimization
nn_opt = BayesianOptimization(bay_area, params, random_state=42)
nn_opt.maximize(init_points=15, n_iter=4) 
print('Search took %s minutes' % ((time.time() - start)/60))

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  kernel   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Epoch 1/25
15/15 - 4s - 244ms/step - accuracy: 0.5993 - loss: 2.7131
Epoch 2/25
15/15 - 1s - 34ms/step - accuracy: 0.6440 - loss: 2.7004
Epoch 3/25
15/15 - 0s - 31ms/step - accuracy: 0.6440 - loss: 2.6971
Epoch 4/25
15/15 - 0s - 31ms/step - accuracy: 0.6440 - loss: 2.6942
Epoch 5/25
15/15 - 0s - 31ms/step - accuracy: 0.6440 - loss: 2.6917
Epoch 6/25
15/15 - 0s - 31ms/step - accuracy: 0.6440 - loss: 2.6894
Epoch 7/25
15/15 - 0s - 31ms/step - accuracy: 0.6440 - loss: 2.6873
Epoch 8/25
15/15 - 0s - 31ms/step - accuracy: 0.6440 - loss: 2.6853
Epoch 9/25
15/15 - 0s - 32ms/step - accuracy: 0.6440 - loss: 2.6834
Epoch 10/25
15/15 - 0s - 32ms/step - accuracy: 

ValueError: Input y contains NaN.

In [16]:
optimum = nn_opt.max['params']
learning_rate = optimum['learning_rate']

activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'exponential', LeakyReLU, 'relu']
optimum['activation'] = activationL[round(optimum['activation'])]

optimum['batch_size'] = round(optimum['batch_size'])
optimum['epochs'] = round(optimum['epochs'])
optimum['layers1'] = round(optimum['layers1'])
optimum['layers2'] = round(optimum['layers2'])
optimum['neurons'] = round(optimum['neurons'])

optimizerL = ['Adam', 'SGD', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl', 'Adam']
optimizerD = {
    'Adam': Adam(learning_rate=learning_rate),
    'SGD': SGD(learning_rate=learning_rate),
    'RMSprop': RMSprop(learning_rate=learning_rate),
    'Adadelta': Adadelta(learning_rate=learning_rate),
    'Adagrad': Adagrad(learning_rate=learning_rate),
    'Adamax': Adamax(learning_rate=learning_rate),
    'Nadam': Nadam(learning_rate=learning_rate),
    'Ftrl': Ftrl(learning_rate=learning_rate)
}
optimum['optimizer'] = optimizerD[optimizerL[round(optimum['optimizer'])]]
optimum

{'activation': 'softsign',
 'batch_size': 460,
 'dropout': 0.7296061783380641,
 'dropout_rate': 0.19126724140656393,
 'epochs': 47,
 'kernel': 1.9444298503238986,
 'layers1': 1,
 'layers2': 2,
 'learning_rate': 0.7631771981307285,
 'neurons': 61,
 'normalization': 0.770967179954561,
 'optimizer': <keras.src.optimizers.adadelta.Adadelta at 0x2ba8eec4410>}

## 5. Running CNN with Hyperparameters from Bayesian Optimisation

In [13]:
# Put the y_test set back into a one-hot configuration
y_train_one_hot = to_categorical(y_train, num_classes=15)

In [14]:
print(f'y_train_one_hot shape: {y_train_one_hot.shape}')

y_train_one_hot shape: (17212, 15)


In [37]:
# Construct model with optimised hyperparameters
epochs = 47
batch_size = 460

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train_one_hot[0])

layers1 = 1
layers2 = 2
activation = 'softsign'
kernel = int(round(1.9444298503238986))  # Rounded kernel size for Conv1D
neurons = 61
normalization = 0.770967179954561
dropout = 0.7296061783380641
dropout_rate = 0.19126724140656393
optimizer = Adadelta(learning_rate=0.7631771981307285)  # Instantiate RMSprop with learning rate

model = Sequential()
model.add(Conv1D(neurons, kernel_size=kernel, activation=activation, input_shape=(timesteps, input_dim)))

if normalization > 0.5:
    model.add(BatchNormalization())

for i in range(layers1):
    model.add(Dense(neurons, activation=activation))

if dropout > 0.5:
    model.add(Dropout(dropout_rate))

for i in range(layers2):
    model.add(Dense(neurons, activation=activation))

model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) 

model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [38]:
model.summary()

In [39]:
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [40]:
model.fit(X_train, y_train_one_hot, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/47
38/38 - 3s - 83ms/step - accuracy: 0.5916 - loss: 1.4315
Epoch 2/47
38/38 - 1s - 13ms/step - accuracy: 0.7031 - loss: 0.8945
Epoch 3/47
38/38 - 0s - 13ms/step - accuracy: 0.7271 - loss: 0.8063
Epoch 4/47
38/38 - 1s - 13ms/step - accuracy: 0.7340 - loss: 0.7691
Epoch 5/47
38/38 - 1s - 13ms/step - accuracy: 0.7571 - loss: 0.7189
Epoch 6/47
38/38 - 1s - 15ms/step - accuracy: 0.7684 - loss: 0.6770
Epoch 7/47
38/38 - 1s - 14ms/step - accuracy: 0.7793 - loss: 0.6446
Epoch 8/47
38/38 - 1s - 14ms/step - accuracy: 0.7888 - loss: 0.6166
Epoch 9/47
38/38 - 0s - 13ms/step - accuracy: 0.7983 - loss: 0.5865
Epoch 10/47
38/38 - 1s - 14ms/step - accuracy: 0.8040 - loss: 0.5660
Epoch 11/47
38/38 - 0s - 13ms/step - accuracy: 0.8112 - loss: 0.5436
Epoch 12/47
38/38 - 1s - 13ms/step - accuracy: 0.8170 - loss: 0.5266
Epoch 13/47
38/38 - 1s - 13ms/step - accuracy: 0.8266 - loss: 0.4992
Epoch 14/47
38/38 - 0s - 13ms/step - accuracy: 0.8310 - loss: 0.4858
Epoch 15/47
38/38 - 0s - 13ms/step - accura

<keras.src.callbacks.history.History at 0x1c94c45a290>

## 6. Confusion Matrix

In [41]:
# Set up 'label key' for confusion matrix (similar to activities in HAR Data)
stations = {
    0: 'BASEL', 
    1: 'BELGRADE', 
    2: 'BUDAPEST', 
    3: 'DEBILT', 
    4: 'DUSSELDORF', 
    5: 'HEATHROW', 
    6: 'KASSEL', 
    7: 'LJUBLJANA', 
    8: 'MAASTRICHT', 
    9: 'MADRID', 
    10: 'MUNCHENB', 
    11: 'OSLO', 
    12: 'SONNBLICK', 
    13: 'STOCKHOLM', 
    14: 'VALENTIA'
}

In [42]:
def confusion_matrix(y_true, y_pred, stations):
    # Check if y_true and y_pred are one-hot encoded or already class indices
    if y_true.ndim == 1:
        y_true_labels = y_true
    else:
        y_true_labels = np.argmax(y_true, axis=1)
    
    if y_pred.ndim == 1:
        y_pred_labels = y_pred
    else:
        y_pred_labels = np.argmax(y_pred, axis=1)
        
    # Map numeric labels to activity names
    y_true_series = pd.Series([stations[y] for y in y_true_labels])
    y_pred_series = pd.Series([stations[y] for y in y_pred_labels])
    
    return pd.crosstab(y_true_series, y_pred_series, rownames=['True'], colnames=['Pred'])

In [43]:
y_pred = model.predict(X_test)

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


In [44]:
# Create and show confusion matrix

print(confusion_matrix(y_test, y_pred, stations))

Pred        BASEL  BELGRADE  BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  \
True                                                                          
BASEL        3547        98         6       1           4         1       0   
BELGRADE      193       899         0       0           0         0       0   
BUDAPEST       38        56       120       0           0         0       0   
DEBILT         22        10        14      36           0         0       0   
DUSSELDORF      9         3         4       6           5         2       0   
HEATHROW       16         6         3       4           7        45       0   
KASSEL          2         2         1       0           2         1       3   
LJUBLJANA      15         5         4       0           0         4       0   
MAASTRICHT      7         1         0       0           0         0       0   
MADRID         41        28        15       2           5        20       1   
MUNCHENB        4         3         0       0       