# 2.4.2 Evaluating hyperparameters - Keras - CNN
### The following script contains the following:

#### 1. Import data libraries, additional requirements
#### 2. Import data
#### 3. Data preprocessing
#### 4. Bayesian optimization function
#### 5. Build and run CNN keras model
#### 6. Run confusion matrix
        - check accuracy and loss
---------------------------------------------------------------------------------------------------------------------------
## 1. Import data libraries, additional requirements
---------------------------------------------------------------------------------------------------------------------------

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import time
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from numpy import unique
from numpy import reshape
import tensorflow.keras as keras
from keras.models import Sequential
from sklearn.model_selection import cross_val_score
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout
from tensorflow.keras.utils import to_categorical
from keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from keras.callbacks import EarlyStopping, ModelCheckpoint
from scikeras.wrappers import KerasClassifier
from math import floor
from sklearn.metrics import make_scorer, accuracy_score
from bayes_opt import BayesianOptimization
from sklearn.model_selection import StratifiedKFold
from keras.layers import LeakyReLU
LeakyReLU = LeakyReLU(negative_slope=0.1)
import warnings

In [3]:
# Turning off warning feature
warnings.filterwarnings('ignore')

# Set display options to show all columns without truncation
pd.set_option('display.max_columns', None)

---------------------------------------------------------------------------------------------------------------------------
## 2. Import data

In [7]:
# Create a path for importing
path = r'C:\Users\shrav\_Data_Analysis_CF\ML_ClimateWins_Project'

In [9]:
weather = pd.read_csv(os.path.join(path, '02 Data', 'Unsupervised', 'weather-data-cleaned.csv'))
weather

Unnamed: 0,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,BELGRADE_cloud_cover,BELGRADE_humidity,BELGRADE_pressure,BELGRADE_global_radiation,BELGRADE_precipitation,BELGRADE_sunshine,BELGRADE_temp_mean,BELGRADE_temp_min,BELGRADE_temp_max,BUDAPEST_cloud_cover,BUDAPEST_humidity,BUDAPEST_pressure,BUDAPEST_global_radiation,BUDAPEST_precipitation,BUDAPEST_sunshine,BUDAPEST_temp_mean,BUDAPEST_temp_min,BUDAPEST_temp_max,DEBILT_cloud_cover,DEBILT_humidity,DEBILT_pressure,DEBILT_global_radiation,DEBILT_precipitation,DEBILT_sunshine,DEBILT_temp_mean,DEBILT_temp_min,DEBILT_temp_max,DUSSELDORF_cloud_cover,DUSSELDORF_humidity,DUSSELDORF_pressure,DUSSELDORF_global_radiation,DUSSELDORF_precipitation,DUSSELDORF_sunshine,DUSSELDORF_temp_mean,DUSSELDORF_temp_min,DUSSELDORF_temp_max,HEATHROW_cloud_cover,HEATHROW_humidity,HEATHROW_pressure,HEATHROW_global_radiation,HEATHROW_precipitation,HEATHROW_sunshine,HEATHROW_temp_mean,HEATHROW_temp_min,HEATHROW_temp_max,KASSEL_humidity,KASSEL_pressure,KASSEL_global_radiation,KASSEL_precipitation,KASSEL_sunshine,KASSEL_temp_mean,KASSEL_temp_min,KASSEL_temp_max,LJUBLJANA_cloud_cover,LJUBLJANA_humidity,LJUBLJANA_pressure,LJUBLJANA_global_radiation,LJUBLJANA_precipitation,LJUBLJANA_sunshine,LJUBLJANA_temp_mean,LJUBLJANA_temp_min,LJUBLJANA_temp_max,MAASTRICHT_cloud_cover,MAASTRICHT_humidity,MAASTRICHT_pressure,MAASTRICHT_global_radiation,MAASTRICHT_precipitation,MAASTRICHT_sunshine,MAASTRICHT_temp_mean,MAASTRICHT_temp_min,MAASTRICHT_temp_max,MADRID_cloud_cover,MADRID_humidity,MADRID_pressure,MADRID_global_radiation,MADRID_precipitation,MADRID_sunshine,MADRID_temp_mean,MADRID_temp_min,MADRID_temp_max,MUNCHENB_cloud_cover,MUNCHENB_humidity,MUNCHENB_global_radiation,MUNCHENB_precipitation,MUNCHENB_sunshine,MUNCHENB_temp_mean,MUNCHENB_temp_min,MUNCHENB_temp_max,OSLO_cloud_cover,OSLO_humidity,OSLO_pressure,OSLO_global_radiation,OSLO_precipitation,OSLO_sunshine,OSLO_temp_mean,OSLO_temp_min,OSLO_temp_max,SONNBLICK_cloud_cover,SONNBLICK_humidity,SONNBLICK_pressure,SONNBLICK_global_radiation,SONNBLICK_precipitation,SONNBLICK_sunshine,SONNBLICK_temp_mean,SONNBLICK_temp_min,SONNBLICK_temp_max,STOCKHOLM_cloud_cover,STOCKHOLM_pressure,STOCKHOLM_global_radiation,STOCKHOLM_precipitation,STOCKHOLM_sunshine,STOCKHOLM_temp_mean,STOCKHOLM_temp_min,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max,KASSEL_cloud_cover,MUNCHENB_pressure,STOCKHOLM_humidity
0,7,0.85,1.0180,0.32,0.09,0.7,6.5,0.8,10.9,1,0.81,1.0195,0.88,0.00,7.0,3.7,-0.9,7.9,4,0.67,1.017,0.44,0.01,2.3,2.4,-0.4,5.1,7,0.85,1.0032,0.07,0.25,0.0,9.3,7.4,11.0,8,0.83,1.0161,0.12,0.08,0.0,10.0,7.0,11.5,7,0.91,1.0010,0.13,0.22,0.0,10.6,9.4,8.3,0.82,1.0094,0.28,0.48,1.6,7.9,3.9,9.4,8,1.00,1.0173,0.20,0.00,0.0,-0.6,-1.9,0.5,7,0.83,1.0063,0.22,0.32,1.0,9.5,8.5,11.1,6,0.92,1.0260,0.53,0.00,1.4,7.6,4.4,10.8,5,0.67,0.20,0.10,0.0,6.9,1.1,10.4,8,0.98,0.9978,0.04,1.14,0.0,4.9,3.8,5.9,4,0.73,1.0304,0.48,0.01,2.3,-5.9,-8.5,-3.2,5,1.0114,0.05,0.32,0.0,4.2,2.2,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9,8,1.0304,0.98
1,6,0.84,1.0180,0.36,1.05,1.1,6.1,3.3,10.1,6,0.84,1.0172,0.25,0.00,0.0,2.9,2.2,4.4,4,0.67,1.017,0.18,0.31,0.0,2.3,1.4,3.1,8,0.90,1.0056,0.14,0.06,0.1,7.7,6.4,8.3,8,0.89,1.0161,0.18,0.66,0.5,8.2,7.4,11.0,7,0.98,1.0051,0.13,0.23,0.0,6.1,3.9,10.6,0.86,1.0086,0.12,0.27,0.0,7.7,6.8,9.1,6,0.94,1.0173,0.56,0.13,3.2,2.1,-1.3,5.5,8,0.92,1.0062,0.17,1.34,0.4,8.6,7.5,9.9,7,0.86,1.0254,0.46,0.00,0.9,9.8,7.4,12.2,6,0.72,0.61,0.30,5.1,6.2,4.2,10.2,8,0.62,1.0139,0.04,0.00,0.0,3.4,2.8,4.9,6,0.97,1.0292,0.21,0.61,0.0,-9.5,-10.5,-8.5,5,1.0114,0.05,0.06,0.0,4.0,3.0,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1,6,1.0292,0.62
2,8,0.90,1.0180,0.18,0.30,0.0,8.5,5.1,9.9,6,0.77,1.0179,0.67,0.00,3.5,3.1,-0.5,6.4,4,0.67,1.017,0.30,0.00,0.6,2.7,1.7,5.3,6,0.92,1.0165,0.28,0.01,3.0,6.8,4.6,9.9,7,0.95,1.0161,0.12,0.07,0.0,7.1,6.9,9.1,8,0.96,1.0166,0.15,0.07,0.1,8.4,6.1,12.2,0.91,1.0129,0.12,0.60,0.0,6.5,6.0,8.0,8,0.96,1.0173,0.20,0.12,0.0,4.6,0.9,6.3,7,0.97,1.0167,0.12,0.46,0.0,6.9,5.5,9.9,5,0.90,1.0287,0.63,0.00,2.3,8.6,6.4,10.8,6,0.91,0.20,0.30,0.0,5.8,4.0,8.0,8,0.69,1.0234,0.04,0.08,0.0,1.9,0.6,3.1,8,0.93,1.0320,0.21,3.20,0.0,-9.5,-10.0,-8.9,5,1.0114,0.05,0.02,0.0,2.4,1.3,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9,8,1.0320,0.69
3,3,0.92,1.0180,0.58,0.00,4.1,6.3,3.8,10.6,8,0.93,1.0268,0.25,0.00,0.0,2.0,-2.0,3.0,4,0.67,1.017,0.19,0.00,0.0,2.0,0.4,4.4,8,0.95,1.0265,0.08,0.09,0.0,6.7,3.6,10.1,8,0.86,1.0161,0.12,0.02,0.0,6.8,3.6,8.0,8,0.98,1.0230,0.13,0.00,0.0,9.4,6.7,8.9,0.87,1.0290,0.12,0.00,0.0,5.8,5.2,6.5,6,0.94,1.0173,0.49,0.00,2.2,3.2,1.0,7.0,7,0.89,1.0277,0.16,0.00,0.3,7.0,3.0,10.0,0,0.75,1.0281,1.16,0.00,8.7,10.3,4.5,16.1,6,0.90,0.20,0.01,0.0,3.9,3.2,5.4,8,0.98,1.0244,0.04,0.35,0.0,3.0,0.4,4.9,5,0.93,1.0443,0.22,1.10,0.0,-11.5,-12.9,-10.0,5,1.0114,0.05,0.00,0.0,1.2,0.4,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6,6,1.0443,0.98
4,6,0.95,1.0180,0.65,0.14,5.4,3.0,-0.7,6.0,8,0.99,1.0286,0.25,0.06,0.0,2.0,0.7,2.8,4,0.67,1.017,0.19,0.00,0.0,2.5,1.1,5.3,6,0.90,1.0243,0.04,0.39,0.0,8.0,2.4,11.2,7,0.92,1.0161,0.12,0.62,0.0,7.7,6.2,11.0,5,0.84,1.0275,0.30,0.00,2.1,8.9,8.9,7.2,0.86,1.0262,0.13,0.71,0.0,5.4,3.7,6.0,7,0.94,1.0173,0.20,0.00,0.0,3.6,0.4,4.8,7,0.92,1.0259,0.12,0.56,0.0,8.1,2.5,11.1,2,0.64,1.0269,1.10,0.00,7.8,12.1,8.2,16.0,5,0.85,0.65,0.96,5.6,1.8,-3.0,6.0,8,0.96,1.0092,0.05,0.26,0.0,3.7,2.9,4.9,2,0.75,1.0430,0.72,0.01,6.1,-9.3,-12.0,-6.5,5,1.0114,0.05,1.32,0.0,3.3,0.8,4.3,3,0.80,1.0328,0.46,0.00,5.7,5.7,3.0,8.4,7,1.0430,0.96
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,1,0.79,1.0248,1.34,0.22,7.7,15.9,11.4,21.4,2,0.68,1.0278,1.57,0.18,5.7,18.2,12.1,24.4,4,0.67,1.017,1.41,0.14,5.4,11.7,7.9,16.2,8,0.84,1.0190,1.13,0.22,2.8,15.7,12.8,19.4,8,0.75,1.0214,1.13,0.20,6.4,17.8,13.6,21.4,5,0.87,1.0140,1.18,0.16,1.9,16.4,11.9,18.9,0.77,1.0161,1.14,0.19,4.0,9.1,5.4,13.1,4,0.80,1.0289,1.35,0.37,5.9,14.7,12.1,21.1,8,0.67,1.0201,1.17,0.20,5.3,18.6,14.1,22.6,8,0.52,1.0204,1.89,0.12,5.3,20.0,16.2,23.9,2,0.76,1.37,0.26,9.7,14.3,8.3,22.2,8,0.98,1.0139,1.06,0.21,0.1,9.7,5.8,12.0,2,0.84,1.0263,1.56,0.47,4.7,0.6,-1.4,2.6,5,1.0161,1.11,0.14,3.2,11.5,8.2,14.2,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5,4,1.0263,0.98
22946,6,0.77,1.0244,1.34,0.22,5.4,16.7,14.3,21.9,0,0.68,1.0295,1.57,0.18,5.7,15.9,10.6,21.2,4,0.67,1.017,1.41,0.14,5.4,11.7,7.9,16.2,8,0.84,1.0177,1.13,0.22,3.5,16.0,10.3,20.5,7,0.71,1.0186,1.13,0.20,4.9,19.4,15.4,23.9,4,0.82,1.0152,1.18,0.16,4.2,15.8,12.7,21.8,0.77,1.0161,1.14,0.19,4.0,9.1,5.4,13.1,3,0.82,1.0291,1.35,0.37,4.5,12.9,9.8,19.8,7,0.70,1.0190,1.17,0.20,5.0,18.9,15.8,23.5,8,0.51,1.0221,1.89,0.12,3.9,19.1,14.7,23.5,6,0.70,1.37,0.26,7.7,16.1,8.9,26.1,8,1.00,1.0107,1.06,0.21,0.0,10.9,8.8,11.7,5,0.84,1.0263,1.56,0.47,4.7,2.3,0.6,4.0,5,1.0122,1.11,0.14,0.8,12.5,11.0,14.3,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5,3,1.0263,1.00
22947,4,0.76,1.0227,1.34,0.22,6.1,16.7,13.1,22.4,2,0.68,1.0278,1.57,0.18,5.7,13.4,8.6,18.2,4,0.67,1.017,1.41,0.14,5.4,11.7,7.9,16.2,8,0.86,1.0174,1.13,0.22,3.3,15.8,9.3,21.1,8,0.73,1.0196,1.13,0.20,4.0,18.2,13.4,22.0,7,0.85,1.0138,1.18,0.16,4.2,16.5,11.2,17.0,0.77,1.0161,1.14,0.19,4.0,9.1,5.4,13.1,3,0.81,1.0270,1.35,0.37,5.1,13.2,10.2,20.7,8,0.69,1.0183,1.17,0.20,3.2,18.2,13.7,24.3,8,0.46,1.0186,1.89,0.12,8.1,19.0,15.4,22.6,7,0.64,1.37,0.26,6.8,17.4,11.2,26.2,3,0.85,1.0082,1.06,0.21,6.8,9.7,7.7,14.2,3,0.84,1.0263,1.56,0.47,4.7,3.3,2.1,4.5,5,1.0059,1.11,0.14,6.9,13.1,12.1,14.4,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5,3,1.0263,0.85
22948,5,0.80,1.0212,1.34,0.22,5.8,15.4,11.6,21.1,1,0.68,1.0238,1.57,0.18,5.7,15.0,9.1,20.9,4,0.67,1.017,1.41,0.14,5.4,11.7,7.9,16.2,8,0.87,1.0174,1.13,0.22,6.0,14.4,10.3,20.2,7,0.73,1.0176,1.13,0.20,6.9,16.7,11.9,21.1,5,0.86,1.0147,1.18,0.16,0.6,15.2,13.4,17.5,0.77,1.0161,1.14,0.19,4.0,9.1,5.4,13.1,3,0.77,1.0238,1.35,0.37,5.7,14.0,10.0,23.1,8,0.73,1.0181,1.17,0.20,6.8,16.3,12.8,21.4,5,0.66,1.0186,1.89,0.12,3.1,15.7,13.1,18.3,6,0.75,1.37,0.26,8.3,14.5,9.2,23.5,5,0.94,1.0150,1.06,0.21,2.9,5.9,2.1,8.1,3,0.84,1.0263,1.56,0.47,4.7,3.4,2.7,4.1,5,1.0160,1.11,0.14,8.4,7.5,5.1,12.4,5,0.82,1.0142,1.13,0.41,3.4,10.7,7.9,13.5,3,1.0263,0.94


In [11]:
outcomes = pd.read_csv(os.path.join(path, '02 Data', 'Unsupervised', 'outcomes-cleaned.csv'))
outcomes

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22946,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22947,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22948,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [13]:
outcomes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22950 entries, 0 to 22949
Data columns (total 15 columns):
 #   Column                       Non-Null Count  Dtype
---  ------                       --------------  -----
 0   BASEL_pleasant_weather       22950 non-null  int64
 1   BELGRADE_pleasant_weather    22950 non-null  int64
 2   BUDAPEST_pleasant_weather    22950 non-null  int64
 3   DEBILT_pleasant_weather      22950 non-null  int64
 4   DUSSELDORF_pleasant_weather  22950 non-null  int64
 5   HEATHROW_pleasant_weather    22950 non-null  int64
 6   KASSEL_pleasant_weather      22950 non-null  int64
 7   LJUBLJANA_pleasant_weather   22950 non-null  int64
 8   MAASTRICHT_pleasant_weather  22950 non-null  int64
 9   MADRID_pleasant_weather      22950 non-null  int64
 10  MUNCHENB_pleasant_weather    22950 non-null  int64
 11  OSLO_pleasant_weather        22950 non-null  int64
 12  SONNBLICK_pleasant_weather   22950 non-null  int64
 13  STOCKHOLM_pleasant_weather   22950 non-null  i

---------------------------------------------------------------------------------------------------------------------------
## 3. Data preprocessing

In [16]:
# List of weather stations prefixes/ need help defining these as we have 135 total observations of 15 weather station groups
#by sets of 9 observations per group.
observations = [
    'BASEL_',
    'BELGRADE_',
    'BUDAPEST_',
    'DEBILT_',
    'DUSSELDORF_',
    'HEATHROW_',
    'KASSEL_',
    'LJUBLJANA_',
    'MAASTRICHT_',
    'MADRID_',
    'MUNCHENB_',
    'OSLO_',
    'SONNBLICK_',
    'STOCKHOLM_',
    'VALENTIA_'
]

In [18]:
# Dictionary mapping outcomes to weather stations for confusion matrix
labels = {
    0: 'BASEL_pleasant_weather',
    1: 'BELGRADE_pleasant_weather',
    2: 'BUDAPEST_pleasant_weather',
    3: 'DEBILT_pleasant_weather',
    4: 'DUSSELDORF_pleasant_weather',
    5: 'HEATHROW_pleasant_weather',
    6: 'KASSEL_pleasant_weather',
    7: 'LJUBLJANA_pleasant_weather',
    8: 'MAASTRICHT_pleasant_weather',
    9: 'MADRID_pleasant_weather',
    10: 'MUNCHENB_pleasant_weather',
    11: 'OSLO_pleasant_weather',
    12: 'SONNBLICK_pleasant_weather',
    13: 'STOCKHOLM_pleasant_weather',
    14: 'VALENTIA_pleasant_weather'
}

In [20]:
# Load data, process it, and format it appropriately for training a machine learning model.

In [22]:
# Ensure y is cleaned of NaN values
y_cleaned = outcomes[~np.isnan(outcomes).any(axis=1)]
X_cleaned = weather.loc[y_cleaned.index]  # Ensure X aligns with cleaned y

In [24]:
# Check the shape of the cleaned y dataset
print("Shape of cleaned y dataset:", y_cleaned.shape)


Shape of cleaned y dataset: (22950, 15)


In [26]:
outcomes[~np.isnan(outcomes).any(axis=1)]

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22945,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22946,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22947,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
22948,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [28]:
# Reshape X dataset
X = weather.values.reshape(-1, 15, 9)

# Reshape y dataset 
y = y_cleaned.values.reshape(-1, 15)

# Turn into 1D array
###y = np.argmax(outcomes, axis = 1)

X_train, X_test, y2_train, y2_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
print(X_train.shape, y2_train.shape)
print(X_test.shape, y2_test.shape)

(18360, 15, 9) (18360, 15)
(4590, 15, 9) (4590, 15)


In [32]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [34]:
X_train

array([[[ 7.0000e+00,  7.2000e-01,  1.0204e+00, ...,  2.1200e+01,
          1.7600e+01,  2.5800e+01],
        [ 3.0000e+00,  4.9000e-01,  1.0182e+00, ...,  2.3000e+01,
          1.3800e+01,  2.8300e+01],
        [ 7.0000e+00,  6.4000e-01,  1.0173e+00, ...,  1.9500e+01,
          1.5500e+01,  2.3200e+01],
        ...,
        [ 1.0329e+00,  1.6500e+00,  8.2000e-01, ...,  5.3000e+00,
          6.0000e+00,  1.0090e+00],
        [ 1.8000e+00,  1.1000e-01,  5.6000e+00, ...,  7.0000e+00,
          8.8000e-01,  1.0138e+00],
        [ 1.3900e+00,  1.0000e-02,  1.7000e+00, ...,  3.0000e+00,
          1.0329e+00,  6.5000e-01]],

       [[ 2.0000e+00,  6.2000e-01,  1.0248e+00, ...,  1.8500e+01,
          1.0700e+01,  2.5600e+01],
        [ 0.0000e+00,  4.8000e-01,  1.0259e+00, ...,  1.9000e+01,
          1.4000e+01,  2.4000e+01],
        [ 3.0000e+00,  5.2000e-01,  1.0256e+00, ...,  1.8300e+01,
          1.2800e+01,  2.3200e+01],
        ...,
        [ 1.0404e+00,  3.7600e+00,  0.0000e+00, ...,  

In [36]:
len(X_train[0])

15

In [38]:
len(X_train[0][0])

9

In [40]:
y2_train

array([[0, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ...,
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0]], dtype=int64)

In [42]:
from sklearn.utils.multiclass import type_of_target
type_of_target(y2_train)

'multilabel-indicator'

In [44]:
#Use argmax to get rid of one-hot encoding and supply the numerical value.
y_train = np.argmax(y2_train, axis = 1)
print(y_train.shape)
y_train

#Turn y_test into one-hot format
#y_train=np_utils.to_categorical(y_test)
#print(y_train)

(18360,)


array([1, 0, 2, ..., 1, 9, 1], dtype=int64)

In [46]:
type_of_target(y_train)

'multiclass'

---------------------------------------------------------------------------------------------------------------------------
## 4. Bayesian optimization function

In [49]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = 15 #_count_classes(y_train)
# Make scorer accuracy
score_acc = make_scorer(accuracy_score)

In [51]:
# Create function
def bay_area(neurons, activation, kernel, optimizer, learning_rate, batch_size, epochs,
              layers1, layers2, normalization, dropout, dropout_rate): 
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl','SGD']
    optimizerD= {'Adam':Adam(learning_rate=learning_rate), 'SGD':SGD(learning_rate=learning_rate),
                 'RMSprop':RMSprop(learning_rate=learning_rate), 'Adadelta':Adadelta(learning_rate=learning_rate),
                 'Adagrad':Adagrad(learning_rate=learning_rate), 'Adamax':Adamax(learning_rate=learning_rate),
                 'Nadam':Nadam(learning_rate=learning_rate), 'Ftrl':Ftrl(learning_rate=learning_rate)}
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
                   'elu', 'exponential', LeakyReLU,'relu']
    
    neurons = round(neurons)
    kernel = round(kernel)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    
    def cnn_model():
        model = Sequential()
        model.add(Conv1D(neurons, kernel_size=kernel,activation=activation, input_shape=(timesteps, input_dim)))
        #model.add(Conv1D(32, kernel_size=1,activation='relu', input_shape=(timesteps, input_dim)))
        
        if normalization > 0.5:
            model.add(BatchNormalization())
        for i in range(layers1):
            model.add(Dense(neurons, activation=activation)) #(neurons, activation=activation))
        if dropout > 0.5:
            model.add(Dropout(dropout_rate, seed=123))
        for i in range(layers2):
            model.add(Dense(neurons, activation=activation))
        model.add(MaxPooling1D())
        model.add(Flatten())
        model.add(Dense(n_classes, activation='softmax')) #sigmoid softmax
        #model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) #categorical_crossentropy
        model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) #categorical_crossentropy
        return model
    es = EarlyStopping(monitor='accuracy', mode='max', verbose=2, patience=20)
    nn = KerasClassifier(build_fn=cnn_model, epochs=epochs, batch_size=batch_size, verbose=2)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=123)
    score = cross_val_score(nn, X_train, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    return score

In [53]:
start = time.time()
params ={
    'neurons': (10, 50),            # Reduced upper limit
    'kernel': (1, 3),
    'activation':(0, 9), 
    'optimizer':(0,7), 
    'learning_rate':(0.001, 0.1),   # Reduced range
    'batch_size': (200, 500),       # Reduced upper limit
    'epochs':(20, 50),              # Reduced upper limit
    'layers1':(1,2),                # Simplified to 1 or 2 layers
    'layers2':(1,2),                # Simplified to 1 or 2 layers
    'normalization':(0,1),
    'dropout':(0,1),
    'dropout_rate':(0,0.3)
}
# Run Bayesian Optimization
nn_opt = BayesianOptimization(bay_area, params, random_state=42)
nn_opt.maximize(init_points=15, n_iter=100)
print('Search took %s minutes' % ((time.time() - start)/60))

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  kernel   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Epoch 1/25
31/31 - 2s - 61ms/step - accuracy: 0.6227 - loss: 1.6853
Epoch 2/25
31/31 - 0s - 5ms/step - accuracy: 0.6433 - loss: 1.1951
Epoch 3/25
31/31 - 0s - 5ms/step - accuracy: 0.6433 - loss: 1.1771
Epoch 4/25
31/31 - 0s - 5ms/step - accuracy: 0.6433 - loss: 1.1715
Epoch 5/25
31/31 - 0s - 5ms/step - accuracy: 0.6433 - loss: 1.1692
Epoch 6/25
31/31 - 0s - 5ms/step - accuracy: 0.6433 - loss: 1.1676
Epoch 7/25
31/31 - 0s - 5ms/step - accuracy: 0.6433 - loss: 1.1666
Epoch 8/25
31/31 - 0s - 5ms/step - accuracy: 0.6433 - loss: 1.1656
Epoch 9/25
31/31 - 0s - 5ms/step - accuracy: 0.6433 - loss: 1.1654
Epoch 10/25
31/31 - 0s - 5ms/step - accuracy: 0.6433 - l

ValueError: Input y contains NaN.

In [55]:
# Display the best parameters
optimum = nn_opt.max['params']
learning_rate = optimum['learning_rate']
activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
               'elu', 'exponential', LeakyReLU,'relu']
optimum['activation'] = activationL[round(optimum['activation'])]
optimum['batch_size'] = round(optimum['batch_size']) 
optimum['epochs'] = round(optimum['epochs'])
optimum['layers1'] = round(optimum['layers1'])
optimum['layers2'] = round(optimum['layers2'])
optimum['neurons'] = round(optimum['neurons'])
optimizerL = ['Adam', 'SGD', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl','Adam']
optimizerD= {'Adam':Adam(learning_rate=learning_rate), 'SGD':SGD(learning_rate=learning_rate),
             'RMSprop':RMSprop(learning_rate=learning_rate), 'Adadelta':Adadelta(learning_rate=learning_rate),
             'Adagrad':Adagrad(learning_rate=learning_rate), 'Adamax':Adamax(learning_rate=learning_rate),
             'Nadam':Nadam(learning_rate=learning_rate), 'Ftrl':Ftrl(learning_rate=learning_rate)}
optimum['optimizer'] = optimizerD[optimizerL[round(optimum['optimizer'])]]
optimum

{'activation': 'softsign',
 'batch_size': 485,
 'dropout': 0.7319939418114051,
 'dropout_rate': 0.17959754525911098,
 'epochs': 25,
 'kernel': 1.3119890406724053,
 'layers1': 1,
 'layers2': 2,
 'learning_rate': 0.06051038616257767,
 'neurons': 38,
 'normalization': 0.020584494295802447,
 'optimizer': <keras.src.optimizers.ftrl.Ftrl at 0x1ff8767f810>}

---------------------------------------------------------------------------------------------------------------------------
## 5. Build and run CNN keras model

In [58]:
# Adjust model hyperparameters
epochs = 38
batch_size = 435
#n_hidden = 256

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = 15 #_count_classes(y_train)
layers1 = 2
layers2 = 1
learning_rate = 0.00744
activation = 'softsign'
kernel = 1
neurons = 48
normalization = 0.9656
dropout = 0.1997
dropout_rate = 0.1543
optimizer = 'Ftrl'

# Implement complex layers
model = Sequential()
model.add(Conv1D(neurons, kernel_size=kernel, activation=activation, input_shape=(timesteps, input_dim)))
if normalization > 0.5:
    model.add(BatchNormalization())
for i in range(layers1):
    model.add(Dense(neurons, activation=activation))
if dropout > 0.5:
    model.add(Dropout(dropout_rate, seed=123))
for i in range(layers2):
    model.add(Dense(neurons, activation=activation))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) #softmax sigmoid

# Build the model
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) #binary_crossentropy

In [60]:
# Run the model
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/38
43/43 - 2s - 49ms/step - accuracy: 0.6294 - loss: 2.7031
Epoch 2/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6923
Epoch 3/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6860
Epoch 4/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6807
Epoch 5/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6760
Epoch 6/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6716
Epoch 7/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6674
Epoch 8/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6631
Epoch 9/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6587
Epoch 10/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6540
Epoch 11/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6487
Epoch 12/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6427
Epoch 13/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6356
Epoch 14/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - loss: 2.6273
Epoch 15/38
43/43 - 0s - 6ms/step - accuracy: 0.6433 - l

<keras.src.callbacks.history.History at 0x1ffdd368050>

- **Notes:**
- In the optimum parameters the highest accuracy score was around 97% with loss converging to a minimum but these optimal values were not printed out due to stop  iteration.
  
- Lower loss but stagnant accuracy can indicate **overfitting** of the data.

- Stop iteration occurs for init_point=3 and n_iterations=100 as well; optimum values are shown and losses are not all NANS.
- The input data shape is correct for both data sets.

---------------------------------------------------------------------------------------------------------------------------
## 6. Run confusion matrix
        - check accuracy and loss

In [63]:
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([labels[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([labels[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

In [64]:
# Evaluate
print(confusion_matrix(y2_test, model.predict(X_test)))

[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Pred                         BASEL_pleasant_weather
True                                               
BASEL_pleasant_weather                         2955
BELGRADE_pleasant_weather                       879
BUDAPEST_pleasant_weather                       162
DEBILT_pleasant_weather                          64
DUSSELDORF_pleasant_weather                      25
HEATHROW_pleasant_weather                        67
KASSEL_pleasant_weather                           9
LJUBLJANA_pleasant_weather                       46
MAASTRICHT_pleasant_weather                       7
MADRID_pleasant_weather                         360
MUNCHENB_pleasant_weather                         8
OSLO_pleasant_weather                             4
STOCKHOLM_pleasant_weather                        3
VALENTIA_pleasant_weather                         1


In [65]:
model.summary()

------------------------------------------