# Importing libraries

In [77]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import time
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import (cross_val_score, StratifiedKFold)
from sklearn.metrics import make_scorer, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (Conv1D, Conv2D, Dense, Dropout, BatchNormalization, Flatten, MaxPooling1D, LeakyReLU)
from tensorflow.keras.optimizers import (Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl)
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from scikeras.wrappers import KerasClassifier
from bayes_opt import BayesianOptimization
from math import floor
from numpy import unique, reshape
from sklearn.model_selection import train_test_split, TimeSeriesSplit

# Importing data

In [78]:
path = r'C:\Users\thoma\OneDrive\Dokumente\data analytics\ML_Ach\ClimateWins'

In [79]:
path

'C:\\Users\\thoma\\OneDrive\\Dokumente\\data analytics\\ML_Ach\\ClimateWins'

In [80]:
Weather2 = pd.read_csv(os.path.join(path, 'Data', 'Prepared Data', 'Weather_unsc_clean.csv'))
Pleasant_weather = pd.read_pickle(os.path.join(path, 'Data', 'Prepared Data', 'Pleasant_weather.pkl'))

In [81]:
Weather2.shape

(22950, 136)

In [82]:
# Dropping 'Unnamed: 0' column
Weather2 = Weather2.drop(['Unnamed: 0'], axis=1)

In [83]:
Weather2.shape

(22950, 135)

In [84]:
Weather2.columns

Index(['BASEL_cloud_cover', 'BASEL_humidity', 'BASEL_pressure',
       'BASEL_global_radiation', 'BASEL_precipitation', 'BASEL_sunshine',
       'BASEL_temp_mean', 'BASEL_temp_min', 'BASEL_temp_max',
       'BELGRADE_cloud_cover',
       ...
       'VALENTIA_pressure', 'VALENTIA_global_radiation',
       'VALENTIA_precipitation', 'VALENTIA_sunshine', 'VALENTIA_temp_mean',
       'VALENTIA_temp_min', 'VALENTIA_temp_max', 'KASSEL_cloud_cover',
       'MUNCHENB_pressure', 'STOCKHOLM_humidity'],
      dtype='object', length=135)

In [85]:
station_cols = Weather2.columns

In [86]:
col_info = (pd.Series(station_cols).str.split('_', n=1, expand=True).rename(columns={0: 'station', 1: 'obs'}))

station_order = sorted(col_info['station'].unique())
print(len(station_order), station_order)

15 ['BASEL', 'BELGRADE', 'BUDAPEST', 'DEBILT', 'DUSSELDORF', 'HEATHROW', 'KASSEL', 'LJUBLJANA', 'MAASTRICHT', 'MADRID', 'MUNCHENB', 'OSLO', 'SONNBLICK', 'STOCKHOLM', 'VALENTIA']


In [87]:
# Creating a NumPy array for X
X = Weather2.to_numpy()
X.shape

(22950, 135)

In [88]:
# Reshaping
X = X.reshape(-1, 15, 9)
X.shape

(22950, 15, 9)

In [89]:
# Creating a NumPy array for y
y = Pleasant_weather.to_numpy()
y.shape

(22950, 15)

In [90]:
# Using argmax to get rid of one-hot encoding
y = np.argmax(y, axis = 1)
print(y.shape)
y

(22950,)


array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [91]:
# Splitting the data, while preserving the temporal aspect/dependancy by 'not shuffling'
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [92]:
from sklearn.utils.multiclass import type_of_target
type_of_target(y_train)

'multiclass'

# BAYESIAN SEARCH OPTIMIZATION

In [93]:
timesteps = X_train.shape[1]
input_dim = X_train.shape[2]
n_classes = int(y_train.max() + 1)
score_acc = make_scorer(accuracy_score)

In [94]:
# Create function
def bay_area(neurons, activation, kernel, optimizer, learning_rate, batch_size, epochs,
              layers1, layers2, normalization, dropout, dropout_rate): 
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl','SGD']
    optimizerD= {'Adam':Adam(learning_rate=learning_rate), 'SGD':SGD(learning_rate=learning_rate),
                 'RMSprop':RMSprop(learning_rate=learning_rate), 'Adadelta':Adadelta(learning_rate=learning_rate),
                 'Adagrad':Adagrad(learning_rate=learning_rate), 'Adamax':Adamax(learning_rate=learning_rate),
                 'Nadam':Nadam(learning_rate=learning_rate), 'Ftrl':Ftrl(learning_rate=learning_rate)}
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
                   'elu', 'exponential']

    
    neurons = round(neurons)
    kernel = round(kernel)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    
    def cnn_model():
        model = Sequential()
        model.add(Conv1D(neurons, kernel_size=kernel,activation=activation, input_shape=(timesteps, input_dim)))
        #model.add(Conv1D(32, kernel_size=1,activation='relu', input_shape=(timesteps, input_dim)))
        
        if normalization > 0.5:
            model.add(BatchNormalization())
        for i in range(layers1):
            model.add(Dense(neurons, activation=activation)) #(neurons, activation=activation))
        if dropout > 0.5:
            model.add(Dropout(dropout_rate, seed=123))
        for i in range(layers2):
            model.add(Dense(neurons, activation=activation))
        model.add(MaxPooling1D())
        model.add(Flatten())
        model.add(Dense(n_classes, activation='softmax')) #sigmoid softmax
        #model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) #categorical_crossentropy
        model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) #categorical_crossentropy
        return model
    es = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
    nn = KerasClassifier(build_fn=cnn_model, epochs=epochs, batch_size=batch_size, verbose=2)
    kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=123)
    score = cross_val_score(nn, X_train, y_train, scoring=score_acc, cv=kfold, fit_params={'callbacks':[es]}).mean()
    if np.isnan(score) or np.isinf(score):
        return -1e10
    return float(score)

In [95]:
# Hyperparameter setup
start = time.time()
params ={
    'neurons': (10, 50),
    'kernel': (1, 3),
    'activation':(0, 7),
    'optimizer':(0,7),
    'learning_rate':(0.0001, 0.01),
    'batch_size': (30, 100),
    'epochs':(10, 50),
    'layers1':(1,3),
    'layers2':(1,3),
    'normalization':(0,1),
    'dropout':(0,1),
    'dropout_rate':(0,0.3)
}
# Running Bayesian Optimization
nn_opt = BayesianOptimization(bay_area, params, random_state=23)
nn_opt.maximize(init_points=10, n_iter=3)
print('Search took %s minutes' % ((time.time() - start)/60))

|   iter    |  target   |  neurons  |  kernel   | activa... | optimizer | learni... | batch_... |  epochs   |  layers1  |  layers2  | normal... |  dropout  | dropou... |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Epoch 1/17
157/157 - 5s - 31ms/step - accuracy: 0.6893 - loss: 1.0001
Epoch 2/17
157/157 - 1s - 8ms/step - accuracy: 0.7422 - loss: 0.7487
Epoch 3/17
157/157 - 1s - 8ms/step - accuracy: 0.7618 - loss: 0.6867
Epoch 4/17
157/157 - 1s - 8ms/step - accuracy: 0.7676 - loss: 0.6520
Epoch 5/17
157/157 - 1s - 8ms/step - accuracy: 0.7742 - loss: 0.6261
Epoch 6/17
157/157 - 1s - 8ms/step - accuracy: 0.7783 - loss: 0.6077
Epoch 7/17
157/157 - 2s - 15ms/step - accuracy: 0.7852 - loss: 0.5898
Epoch 8/17
157/157 - 2s - 10ms/step - accuracy: 0.7927 - loss: 0.5775
Epoch 9/17
157/157 - 1s - 6ms/step - accuracy: 0.7961 - loss: 0.5649
Epoch 10/17
157/157 - 1s - 5ms/step 

In [96]:
# Looking for the best hyperparameters
optimum = nn_opt.max["params"]

learning_rate = float(optimum["learning_rate"])

activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu',
               'elu', 'exponential']
act_i = int(np.clip(round(optimum["activation"]), 0, len(activationL)-1))
optimum["activation"] = activationL[act_i]

optimum["batch_size"] = int(round(optimum["batch_size"]))
optimum["epochs"]     = int(round(optimum["epochs"]))
optimum["layers1"]    = int(round(optimum["layers1"]))
optimum["layers2"]    = int(round(optimum["layers2"]))
optimum["neurons"]    = int(round(optimum["neurons"]))
optimum["kernel"]     = int(round(optimum["kernel"]))  # don’t forget kernel

optimizerL = ['Adam', 'SGD', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl']
opt_i = int(np.clip(round(optimum["optimizer"]), 0, len(optimizerL)-1))
opt_name = optimizerL[opt_i]

optimizerD = {
    'Adam': Adam(learning_rate=learning_rate),
    'SGD': SGD(learning_rate=learning_rate),
    'RMSprop': RMSprop(learning_rate=learning_rate),
    'Adadelta': Adadelta(learning_rate=learning_rate),
    'Adagrad': Adagrad(learning_rate=learning_rate),
    'Adamax': Adamax(learning_rate=learning_rate),
    'Nadam': Nadam(learning_rate=learning_rate),
    'Ftrl': Ftrl(learning_rate=learning_rate),
}
optimum["optimizer"] = optimizerD[opt_name]

# optional: make flags booleans and clip dropout_rate
optimum["normalization"] = optimum["normalization"] > 0.5
optimum["dropout"] = optimum["dropout"] > 0.5
optimum["dropout_rate"] = float(np.clip(optimum["dropout_rate"], 0.0, 0.3))

optimum

{'neurons': 31,
 'kernel': 3,
 'activation': 'selu',
 'optimizer': <keras.src.optimizers.rmsprop.RMSprop at 0x1e43d4ad280>,
 'learning_rate': 0.0022883490962903605,
 'batch_size': 78,
 'epochs': 17,
 'layers1': 2,
 'layers2': 2,
 'normalization': False,
 'dropout': False,
 'dropout_rate': 0.26520965471163144}

# -> These are the best hyperparameters within this specific search configuration, with regards to:
# The search space bounds
# The number of Bayesian iterations (13 trials)
# 3-fold cross-validation

In [97]:
# Final model including the best hyperparameters
neurons = 31
kernel = 3
activation = 'selu'
learning_rate = 0.0022883490962903605
batch_size = 78
epochs = 17
layers1 = 2
layers2 = 2
normalization = False
dropout = False
dropout_rate = 0.26520965471163144

timesteps = X_train.shape[1]
input_dim = X_train.shape[2]
n_classes = int(y_train.max() + 1)

optimizer = RMSprop(learning_rate=learning_rate)

model = Sequential()
model.add(Conv1D(neurons, kernel_size=kernel, activation=activation, input_shape=(timesteps, input_dim)))

if normalization:
    model.add(BatchNormalization())

for _ in range(layers1):
    model.add(Dense(neurons, activation=activation))

if dropout:
    model.add(Dropout(dropout_rate, seed=123))

for _ in range(layers2):
    model.add(Dense(neurons, activation=activation))

model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()

In [98]:
# Training the model
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)
loss, acc = model.evaluate(X_test, y_test, verbose=0)
print("Opt_loss:", loss, "Opt_accuracy:", acc)

Epoch 1/17
236/236 - 3s - 12ms/step - accuracy: 0.6996 - loss: 0.8991
Epoch 2/17
236/236 - 1s - 5ms/step - accuracy: 0.7621 - loss: 0.6859
Epoch 3/17
236/236 - 1s - 5ms/step - accuracy: 0.7768 - loss: 0.6288
Epoch 4/17
236/236 - 1s - 5ms/step - accuracy: 0.7870 - loss: 0.5876
Epoch 5/17
236/236 - 1s - 5ms/step - accuracy: 0.7949 - loss: 0.5705
Epoch 6/17
236/236 - 1s - 5ms/step - accuracy: 0.8083 - loss: 0.5409
Epoch 7/17
236/236 - 1s - 5ms/step - accuracy: 0.8159 - loss: 0.5185
Epoch 8/17
236/236 - 1s - 5ms/step - accuracy: 0.8276 - loss: 0.4897
Epoch 9/17
236/236 - 1s - 5ms/step - accuracy: 0.8304 - loss: 0.4705
Epoch 10/17
236/236 - 1s - 5ms/step - accuracy: 0.8391 - loss: 0.4504
Epoch 11/17
236/236 - 1s - 5ms/step - accuracy: 0.8469 - loss: 0.4352
Epoch 12/17
236/236 - 1s - 5ms/step - accuracy: 0.8496 - loss: 0.4204
Epoch 13/17
236/236 - 1s - 5ms/step - accuracy: 0.8541 - loss: 0.4089
Epoch 14/17
236/236 - 1s - 5ms/step - accuracy: 0.8626 - loss: 0.3882
Epoch 15/17
236/236 - 1s - 5

# Confusion matrix

In [99]:
# Converting y_test to one-hot encoded for creating the confusion matrix
ty = to_categorical(np.asarray(y_test).ravel(), num_classes=15)
print(ty.shape)

(4590, 15)


In [100]:
# Predicting
y_pred_probs = model.predict(X_test)

[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


In [101]:
def confusion_matrix_onehot(y_true_onehot, y_pred_probs, class_names=None):
    y_true = np.argmax(y_true_onehot, axis=1)
    y_pred = np.argmax(y_pred_probs, axis=1)

    if class_names is None:
        return pd.crosstab(pd.Series(y_true, name="True"),
                           pd.Series(y_pred, name="Pred"))
    else:
        true_names = pd.Series([class_names[i] for i in y_true], name="True")
        pred_names = pd.Series([class_names[i] for i in y_pred], name="Pred")
        cm = pd.crosstab(true_names, pred_names, rownames=["True"], colnames=["Pred"])
        return cm.reindex(index=class_names, columns=class_names, fill_value=0)

In [102]:
# Confusion matrix
cm = confusion_matrix_onehot(ty, y_pred_probs, class_names=station_order)
print(cm)

Pred        BASEL  BELGRADE  BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  \
True                                                                          
BASEL        2827        73         8       4           0         1       0   
BELGRADE      204       697         6       1           0         0       0   
BUDAPEST       25        28        87       2           0         0       0   
DEBILT         21         9         9      34           1         0       0   
DUSSELDORF     12         3         2       3           5         1       0   
HEATHROW       20         7        10       4           1        21       0   
KASSEL          1         0         0       0           0         0       0   
LJUBLJANA      17         5         1       0           1         0       0   
MAASTRICHT      4         1         0       0           1         0       0   
MADRID         42        23        11       4           0         4       0   
MUNCHENB        8         1         0       0       

In [103]:
# Evaluate
print(confusion_matrix_onehot(ty, model.predict(X_test)))

[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Pred    0    1   2   3   4   5   7   8    9   10
True                                            
0     2827   73   8   4   0   1  10   2   32   0
1      204  697   6   1   0   0   0   0    1   0
2       25   28  87   2   0   0   1   0    1   0
3       21    9   9  34   1   0   0   0    0   0
4       12    3   2   3   5   1   3   1    3   0
5       20    7  10   4   1  21   2   0   20   0
6        1    0   0   0   0   0   0   0    0   0
7       17    5   1   0   1   0  17   0    4   0
8        4    1   0   0   1   0   0   2    1   0
9       42   23  11   4   0   4   7   0  229   0
10       8    1   0   0   0   0   0   0    0   0
11       1    0   0   0   0   0   0   0    1   0
13       0    0   0   0   0   0   0   0    1   1
