# 2.4 Evaluating Hyperparameters - CNN

## Contents:

1. Import Libraries
2. Import Datasets
3. Data Wrangling
4. Reshaping the Model
5. Data Split
6. Bayesian Hyperparameter Optimisation
7. Running CNN with Optimised Search Parameters
8. Creating Confusion Matrix

## 1. Import Libraries

In [71]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import time
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from numpy import unique
from numpy import reshape
from keras.models import Sequential
from sklearn.model_selection import cross_val_score
from keras.layers import Conv1D, Conv2D, Dense, Dropout, BatchNormalization, Flatten, MaxPooling1D
from tensorflow.keras.utils import to_categorical
from keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from keras.callbacks import EarlyStopping, ModelCheckpoint
from scikeras.wrappers import KerasClassifier
from math import floor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import make_scorer, accuracy_score
from bayes_opt import BayesianOptimization
from sklearn.model_selection import StratifiedKFold
from keras.layers import LeakyReLU
LeakyReLU = LeakyReLU(negative_slope=0.1)

import warnings
warnings.filterwarnings('ignore')

pd.set_option("display.max_columns", None)

## 2. Import Datasets

In [73]:
# Define path to dataset
path = r'/Users/analazarevska/Documents/CAREER FOUNDRY/Data Analytics Program/Machine Learning/A1, Basics of Machine Learning for Analysts/ClimateWins/Data Sets/'

# Import dataset
df = pd.read_csv(os.path.join(path, 'df_cleaned_with_date.csv'))
df_pleasant = pd.read_csv(os.path.join(path, 'df_pleasant_with_date.csv'))

In [74]:
df.shape

(22950, 137)

In [75]:
df_pleasant.shape

(22950, 16)

## 3. Data Wrangling

In [81]:
# Drop unnecessary columns

df.drop(['DATE', 'MONTH'], axis=1, inplace=True)
df_pleasant.drop(columns = 'DATE', inplace = True)

## 4. Reshaping the Model

In [87]:
# Turn X and answers from a df to arrays

X = np.array(df)
y = np.array(df_pleasant)

In [89]:
X = X.reshape(-1,15,9)

In [93]:
# Use argmax to transform y

y =  np.argmax(y, axis = 1)
y

array([0, 0, 0, ..., 0, 0, 0])

In [97]:
# Check y layout

from sklearn.utils.multiclass import type_of_target
type_of_target(y)

'multiclass'

## 5. Data Split

In [101]:
# Split data into train and test sets

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 42)

In [103]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(17212, 15, 9) (17212,)
(5738, 15, 9) (5738,)


## 6. Bayesian Hyperparameter Optimisation

In [107]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = 15 # Number of weather stations
# Make scorer accuracy
score_acc = make_scorer(accuracy_score)

In [109]:
# Create function

def bay_area(neurons, activation, kernel, optimizer, learning_rate, batch_size, epochs,
              layers1, layers2, normalization, dropout, dropout_rate): 
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl','SGD']
    #optimizerD= {'Adam':Adam(lr=learning_rate), 'SGD':SGD(lr=learning_rate),
                 #'RMSprop':RMSprop(lr=learning_rate), 'Adadelta':Adadelta(lr=learning_rate),
                 #'Adagrad':Adagrad(lr=learning_rate), 'Adamax':Adamax(lr=learning_rate),
                 #'Nadam':Nadam(lr=learning_rate), 'Ftrl':Ftrl(lr=learning_rate)}
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
                   'elu', 'exponential', LeakyReLU,'relu']
    
    neurons = round(neurons)
    kernel = round(kernel)
    activation = activationL[round(activation)]  #optimizerD[optimizerL[round(optimizer)]]
    optimizer = optimizerL[round(optimizer)]
    batch_size = round(batch_size)
    
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    
    def cnn_model():
        model = Sequential()
        model.add(Conv1D(neurons, kernel_size=kernel,activation=activation, input_shape=(timesteps, input_dim)))
        #model.add(Conv1D(32, kernel_size=1,activation='relu', input_shape=(timesteps, input_dim)))
        
        if normalization > 0.5:
            model.add(BatchNormalization())
        for i in range(layers1):
            model.add(Dense(neurons, activation=activation)) #(neurons, activation=activation))
        if dropout > 0.5:
            model.add(Dropout(dropout_rate, seed=123))
        for i in range(layers2):
            model.add(Dense(neurons, activation=activation))
        model.add(MaxPooling1D())
        model.add(Flatten())
        model.add(Dense(n_classes, activation='softmax')) #sigmoid softmax
        #model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) #categorical_crossentropy
        model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) #categorical_crossentropy
        return model
    es = EarlyStopping(monitor='accuracy', mode='max', verbose=2, patience=20)
    nn = KerasClassifier(build_fn=cnn_model, epochs=epochs, batch_size=batch_size, verbose=2)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
    score = cross_val_score(nn, X_train, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score

In [111]:
start = time.time()
params ={
    'neurons': (10, 100),
    'kernel': (1, 3),
    'activation':(0, 9), 
    'optimizer':(0,7),
    'learning_rate':(0.01, 1),
    'batch_size': (200, 1000), 
    'epochs':(20, 50),
    'layers1':(1,3),
    'layers2':(1,3),
    'normalization':(0,1),
    'dropout':(0,1),
    'dropout_rate':(0,0.3)
}
# Run Bayesian Optimization
nn_opt = BayesianOptimization(bay_area, params, random_state=42)
nn_opt.maximize(init_points=15, n_iter=4) 
print('Search took %s minutes' % ((time.time() - start)/60))

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  kernel   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Epoch 1/25
15/15 - 1s - 82ms/step - accuracy: 0.6014 - loss: 2.7097
Epoch 2/25
15/15 - 1s - 44ms/step - accuracy: 0.6440 - loss: 2.7004
Epoch 3/25
15/15 - 1s - 46ms/step - accuracy: 0.6440 - loss: 2.6971
Epoch 4/25
15/15 - 1s - 49ms/step - accuracy: 0.6440 - loss: 2.6942
Epoch 5/25
15/15 - 1s - 44ms/step - accuracy: 0.6440 - loss: 2.6917
Epoch 6/25
15/15 - 1s - 44ms/step - accuracy: 0.6440 - loss: 2.6895
Epoch 7/25
15/15 - 1s - 44ms/step - accuracy: 0.6440 - loss: 2.6873
Epoch 8/25
15/15 - 1s - 44ms/step - accuracy: 0.6440 - loss: 2.6854
Epoch 9/25
15/15 - 1s - 44ms/step - accuracy: 0.6440 - loss: 2.6835
Epoch 10/25
15/15 - 1s - 44ms/step - accuracy: 0

ValueError: Input y contains NaN.

In [113]:
optimum = nn_opt.max['params']
learning_rate = optimum['learning_rate']

activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'exponential', LeakyReLU, 'relu']
optimum['activation'] = activationL[round(optimum['activation'])]

optimum['batch_size'] = round(optimum['batch_size'])
optimum['epochs'] = round(optimum['epochs'])
optimum['layers1'] = round(optimum['layers1'])
optimum['layers2'] = round(optimum['layers2'])
optimum['neurons'] = round(optimum['neurons'])

optimizerL = ['Adam', 'SGD', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl', 'Adam']
optimizerD = {
    'Adam': Adam(learning_rate=learning_rate),
    'SGD': SGD(learning_rate=learning_rate),
    'RMSprop': RMSprop(learning_rate=learning_rate),
    'Adadelta': Adadelta(learning_rate=learning_rate),
    'Adagrad': Adagrad(learning_rate=learning_rate),
    'Adamax': Adamax(learning_rate=learning_rate),
    'Nadam': Nadam(learning_rate=learning_rate),
    'Ftrl': Ftrl(learning_rate=learning_rate)
}
optimum['optimizer'] = optimizerD[optimizerL[round(optimum['optimizer'])]]
optimum

{'activation': 'softsign',
 'batch_size': 460,
 'dropout': 0.7296061783380641,
 'dropout_rate': 0.19126724140656393,
 'epochs': 47,
 'kernel': 1.9444298503238986,
 'layers1': 1,
 'layers2': 2,
 'learning_rate': 0.7631771981307285,
 'neurons': 61,
 'normalization': 0.770967179954561,
 'optimizer': <keras.src.optimizers.adadelta.Adadelta at 0x36a171dc0>}

## 7. Running CNN with Optimised Search Parameters

In [116]:
# Set the model with optimised hyperparameters

epochs = 47
batch_size = 460

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = 15

layers1 = 1
layers2 = 2
activation = 'softsign'
kernel = int(round(1.9444298503238986))  # Rounded kernel size for Conv1D
neurons = 61
normalization = 0.770967179954561
dropout = 0.7296061783380641
dropout_rate = 0.19126724140656393
optimizer = Adadelta(learning_rate=0.7631771981307285)  # Instantiate RMSprop with learning rate

model = Sequential()
model.add(Conv1D(neurons, kernel_size=kernel, activation=activation, input_shape=(timesteps, input_dim)))

if normalization > 0.5:
    model.add(BatchNormalization())

for i in range(layers1):
    model.add(Dense(neurons, activation=activation))

if dropout > 0.5:
    model.add(Dropout(dropout_rate))

for i in range(layers2):
    model.add(Dense(neurons, activation=activation))

model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) 

model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [118]:
model.summary()

In [120]:
# Put the y_test set back into a one-hot configuration

y_train_one_hot = to_categorical(y_train, num_classes=15)

In [122]:
# Check shapes

print(f'X_train shape: {X_train.shape}')
print(f'y_train_one_hot shape: {y_train_one_hot.shape}')

X_train shape: (17212, 15, 9)
y_train_one_hot shape: (17212, 15)


In [124]:
# Compile the model with categorical_crossentropy

model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [126]:
# Fit the model to the data

model.fit(X_train, y_train_one_hot, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/47
38/38 - 1s - 27ms/step - accuracy: 0.5849 - loss: 1.3933
Epoch 2/47
38/38 - 1s - 14ms/step - accuracy: 0.6995 - loss: 0.8895
Epoch 3/47
38/38 - 1s - 13ms/step - accuracy: 0.7304 - loss: 0.8005
Epoch 4/47
38/38 - 1s - 13ms/step - accuracy: 0.7421 - loss: 0.7566
Epoch 5/47
38/38 - 1s - 14ms/step - accuracy: 0.7610 - loss: 0.7132
Epoch 6/47
38/38 - 1s - 14ms/step - accuracy: 0.7682 - loss: 0.6835
Epoch 7/47
38/38 - 1s - 14ms/step - accuracy: 0.7771 - loss: 0.6605
Epoch 8/47
38/38 - 1s - 18ms/step - accuracy: 0.7870 - loss: 0.6293
Epoch 9/47
38/38 - 1s - 14ms/step - accuracy: 0.7931 - loss: 0.6045
Epoch 10/47
38/38 - 1s - 14ms/step - accuracy: 0.7982 - loss: 0.5841
Epoch 11/47
38/38 - 1s - 14ms/step - accuracy: 0.8054 - loss: 0.5563
Epoch 12/47
38/38 - 1s - 15ms/step - accuracy: 0.8159 - loss: 0.5362
Epoch 13/47
38/38 - 1s - 14ms/step - accuracy: 0.8237 - loss: 0.5127
Epoch 14/47
38/38 - 1s - 14ms/step - accuracy: 0.8316 - loss: 0.4907
Epoch 15/47
38/38 - 1s - 14ms/step - accura

<keras.src.callbacks.history.History at 0x36b68cf20>

## 8. Creating Confusion Matrix

In [129]:
# Define list of stations names

stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'
}

In [131]:
def confusion_matrix(y_true, y_pred, stations):
    # Check if y_true and y_pred are one-hot encoded or already class indices
    if y_true.ndim == 1:
        y_true_labels = y_true
    else:
        y_true_labels = np.argmax(y_true, axis=1)
    
    if y_pred.ndim == 1:
        y_pred_labels = y_pred
    else:
        y_pred_labels = np.argmax(y_pred, axis=1)
        
    # Map numeric labels to activity names
    y_true_series = pd.Series([stations[y] for y in y_true_labels])
    y_pred_series = pd.Series([stations[y] for y in y_pred_labels])
    
    return pd.crosstab(y_true_series, y_pred_series, rownames=['True'], colnames=['Pred'])

In [133]:
y_pred = model.predict(X_test)

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 644us/step


In [135]:
# Evaluate

print(confusion_matrix(y_test, y_pred, stations))

Pred        BASEL  BELGRADE  BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  \
True                                                                          
BASEL        3528        48        16       2           8         9       1   
BELGRADE       98       991         1       0           0         0       0   
BUDAPEST       20        20       172       2           0         0       0   
DEBILT         10         4        15      53           0         0       0   
DUSSELDORF      3         0         1       8           8         9       0   
HEATHROW        6         1         2       3           4        65       0   
KASSEL          1         2         1       0           1         0       4   
LJUBLJANA       6         5         4       0           0         7       1   
MAASTRICHT      3         0         0       1           0         1       0   
MADRID         12        13        15       2           3        13       0   
MUNCHENB        5         1         0       0       