# 2.4 Evaluating Hyperparameters - RNN

## Contents:

1. Import Libraries
2. Import Datasets
3. Data Wrangling
4. Reshaping the Model
5. Data Split
6. Simple RNN Model

## 1. Import Libraries

In [141]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from numpy import unique
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense, Dropout
from tensorflow.keras.layers import SimpleRNN

In [142]:
# import additional libraries

from sklearn import datasets
from sklearn import metrics
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

## 2. Import Datasets

In [144]:
# Define path to dataset
path = r'/Users/analazarevska/Documents/CAREER FOUNDRY/Data Analytics Program/Machine Learning/A1, Basics of Machine Learning for Analysts/ClimateWins/Data Sets/'

# Import dataset
df_station = pd.read_csv(os.path.join(path, 'df_cleaned_with_date.csv'))
df_pleasant = pd.read_csv(os.path.join(path, 'df_pleasant_with_date.csv'))

##  3. Data Wrangling

In [146]:
df_station.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,19600102,1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,19600103,1,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,19600104,1,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,19600105,1,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,...,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [147]:
#drop columns in dataframe that are not in the pleasant weather dataframes, and DATE and MONTH columns
df_match= df_station.drop(['MONTH'], axis = 1)

In [148]:
df_match.head()

Unnamed: 0,DATE,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,10.9,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,19600102,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,10.1,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,19600103,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,9.9,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,19600104,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,10.6,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,19600105,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,6.0,...,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [149]:
df_match.shape

(22950, 136)

In [150]:
# Create X and y matrices
X = df_match.drop(['DATE'], axis=1)
y = df_pleasant.drop(['DATE'], axis=1)

In [151]:
print(X.shape)
print(y.shape)

(22950, 135)
(22950, 15)


## 4. Reshaping the Model

In [153]:
# Reshape
X = X.values.reshape(-1,15,9)

In [154]:
print(X.shape)

(22950, 15, 9)


In [155]:
# Reshape
y = y.values.reshape(-1,15)

In [156]:
print(y.shape)

(22950, 15)


## 5. Data Split

In [158]:
# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [159]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(17212, 15, 9)
(5738, 15, 9)
(17212, 15)
(5738, 15)


In [160]:
len(X_train[0])

15

In [161]:
len(X_train[0][0])

9

In [162]:
len(y_train[0])

15

## 6. Simple RNN Model

In [164]:
model = Sequential()
model.add(SimpleRNN(64, input_shape=(15, 9), activation='relu'))
model.add(Dense(15, activation='sigmoid'))
model.compile (loss='binary_crossentropy', optimizer='adam', metrics=[keras.metrics.TruePositives(name='tp'),
                                                                      keras.metrics.FalsePositives(name='fp'),
                                                                      keras.metrics.TrueNegatives(name='tn'),
                                                                      keras.metrics.FalseNegatives(name='fn'),
                                                                      keras.metrics.BinaryAccuracy(name='accuracy'),
                                                                      keras.metrics.Precision(name='precision'),
                                                                      keras.metrics.Recall(name='recall'),
                                                                      keras.metrics.AUC(name='auc')])

  super().__init__(**kwargs)


In [165]:
model.fit(X_train,
          y_train,
          batch_size=32,
          validation_data=(X_test, y_test),
          epochs=47)

Epoch 1/47
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8233 - auc: 0.8586 - fn: 10806.5547 - fp: 9300.9912 - loss: 0.4198 - precision: 0.5993 - recall: 0.5769 - tn: 92291.8359 - tp: 17199.5020 - val_accuracy: 0.8876 - val_auc: 0.9452 - val_fn: 5814.0000 - val_fp: 3858.0000 - val_loss: 0.2397 - val_precision: 0.7627 - val_recall: 0.6807 - val_tn: 64001.0000 - val_tp: 12397.0000
Epoch 2/47
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8949 - auc: 0.9510 - fn: 7189.0000 - fp: 6284.2637 - loss: 0.2286 - precision: 0.7663 - recall: 0.7404 - tn: 95347.7578 - tp: 20777.8672 - val_accuracy: 0.9029 - val_auc: 0.9584 - val_fn: 4566.0000 - val_fp: 3790.0000 - val_loss: 0.2094 - val_precision: 0.7826 - val_recall: 0.7493 - val_tn: 64069.0000 - val_tp: 13645.0000
Epoch 3/47
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9047 - auc: 0.9598 - fn: 6432.4194 - fp: 5887.8125 

<keras.src.callbacks.history.History at 0x302aa25d0>

In [166]:
y_pred = model.predict(X_test)

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 587us/step


In [167]:
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([stations[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([stations[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

In [168]:
stations = {
    0: 'BASSEL',
    1: 'BELGRADE',
    2: 'BUDAPEST',
    3: 'DEBILT',
    4: 'DUSSELDORF',
    5: 'HEATHROW',
    6: 'KASSEL',
    7: 'LJUBLJANA',
    8: 'MAASTRICHT',
    9: 'MADRID',
    10: 'MUNCHENB',
    11: 'OSLO',
    12: 'SONNBLICK',
    13: 'STOCKHOLM',
    14: 'VALENTIA'
}

In [169]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 385us/step
Pred        BASSEL  BELGRADE  BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  \
True                                                                           
BASSEL        1234       553       324      19          54        34      16   
BELGRADE         1       802        87      22          15         6       8   
BUDAPEST         2         1       129       2          10         0       5   
DEBILT           0         0         0      15          18         4       7   
DUSSELDORF       1         0         0       0           4         3       0   
HEATHROW         0         1         0       0           0        17       0   
KASSEL           0         0         0       0           0         0       5   
LJUBLJANA        1         1         0       0           0         0       0   
MAASTRICHT       0         0         0       0           0         0       0   
MADRID           0         1         0     