In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from numpy import unique
from numpy import reshape
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout
from keras.layers import LSTM
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings("ignore")

  if not hasattr(np, "object"):


In [2]:
# Set display options to show all columns

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
path = '~/Desktop/CareerFoundry/3.1./'
y = pd.read_csv(os.path.join(path, 'Data/Original/Dataset-Answers-Weather_Prediction_Pleasant_Weather.csv'))
X = pd.read_pickle(os.path.join(path, 'Data/Clean/cleaned_for_keras.pkl'))

Data wrangling

In [4]:
y.drop(columns='DATE', inplace=True)

In [5]:

X = np.array(X)
y = np.array(y)

In [6]:
X = X.reshape(-1,15,9)
X.shape

(22950, 15, 9)

In [7]:
y.shape

(22950, 15)

Split Data

In [8]:

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 42)

In [9]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(17212, 15, 9) (17212, 15)
(5738, 15, 9) (5738, 15)


Keras Model

In [10]:
epochs = 30
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(MaxPooling1D())
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='tanh')) # Don't use relu here!

In [11]:
model.summary()

Compile and Run

In [12]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [13]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 3s - 3ms/step - accuracy: 0.0246 - loss: 24.7998
Epoch 2/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0216 - loss: 25.4556
Epoch 3/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0240 - loss: 24.9250
Epoch 4/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0048 - loss: 24.8432
Epoch 5/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0168 - loss: 25.0481
Epoch 6/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0114 - loss: 24.7503
Epoch 7/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0094 - loss: 24.6486
Epoch 8/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0101 - loss: 24.7235
Epoch 9/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0084 - loss: 24.6141
Epoch 10/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0069 - loss: 24.7078
Epoch 11/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0132 - loss: 24.5572
Epoch 12/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0205 - loss: 25.0086
Epoch 13/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0368 - loss: 24.7837
Epoch 14/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0351 - l

<keras.src.callbacks.history.History at 0x3462fb380>

In [14]:
# Define list of stations names

stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'

}

In [15]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [16]:
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        DUSSELDORF  MUNCHENB
True                            
BASEL             3673         9
BELGRADE          1092         0
BUDAPEST           214         0
DEBILT              82         0
DUSSELDORF          29         0
HEATHROW            82         0
KASSEL              11         0
LJUBLJANA           61         0
MAASTRICHT           9         0
MADRID             458         0
MUNCHENB             8         0
OSLO                 5         0
STOCKHOLM            4         0
VALENTIA             1         0


REtrials unit convergence

In [17]:
epochs = 30
batch_size = 16
n_hidden = 4

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) 

In [18]:
model.summary()

In [19]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [20]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30


1076/1076 - 1s - 1ms/step - accuracy: 0.1216 - loss: 1014.8308
Epoch 2/30
1076/1076 - 0s - 463us/step - accuracy: 0.1300 - loss: 9937.6514
Epoch 3/30
1076/1076 - 0s - 456us/step - accuracy: 0.1210 - loss: 31142.5449
Epoch 4/30
1076/1076 - 1s - 484us/step - accuracy: 0.1158 - loss: 63808.0430
Epoch 5/30
1076/1076 - 0s - 454us/step - accuracy: 0.1126 - loss: 113437.1562
Epoch 6/30
1076/1076 - 0s - 463us/step - accuracy: 0.1139 - loss: 172974.2812
Epoch 7/30
1076/1076 - 0s - 451us/step - accuracy: 0.1174 - loss: 251439.4688
Epoch 8/30
1076/1076 - 0s - 464us/step - accuracy: 0.1152 - loss: 339663.2188
Epoch 9/30
1076/1076 - 0s - 463us/step - accuracy: 0.1167 - loss: 446331.0000
Epoch 10/30
1076/1076 - 1s - 506us/step - accuracy: 0.1185 - loss: 564601.7500
Epoch 11/30
1076/1076 - 1s - 491us/step - accuracy: 0.1190 - loss: 712740.3125
Epoch 12/30
1076/1076 - 1s - 507us/step - accuracy: 0.1193 - loss: 873148.7500
Epoch 13/30
1076/1076 - 1s - 476us/step - accuracy: 0.1230 - loss: 1043880.8750


<keras.src.callbacks.history.History at 0x34a259950>

In [23]:
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 329us/step
Pred        BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  LJUBLJANA  \
True                                                                    
BASEL            877       3          27      1225       1         40   
BELGRADE         375       0           0       371       0          0   
BUDAPEST          52       0           0       108       0          0   
DEBILT            18       0           0        42       0          0   
DUSSELDORF         3       0           0        20       0          0   
HEATHROW           5       0           0        57       0          0   
KASSEL             3       0           0         3       0          0   
LJUBLJANA         17       0           0        17       0          0   
MAASTRICHT         1       0           0         2       0          0   
MADRID            33       0           0       216       0          0   
MUNCHENB           4       0           0       

In [24]:
epochs = 30
batch_size = 16
n_hidden = 128

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='tanh')) # Options: sigmoid, tanh, softmax, relu

In [25]:
model.summary()

In [26]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [27]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0090 - loss: 25.3278
Epoch 2/30
1076/1076 - 1s - 789us/step - accuracy: 0.1116 - loss: 23.2679
Epoch 3/30
1076/1076 - 1s - 744us/step - accuracy: 0.1474 - loss: 22.6676
Epoch 4/30
1076/1076 - 1s - 762us/step - accuracy: 0.1972 - loss: 26.4387
Epoch 5/30
1076/1076 - 1s - 780us/step - accuracy: 0.2204 - loss: 22.5299
Epoch 6/30
1076/1076 - 1s - 769us/step - accuracy: 0.2138 - loss: 22.5299
Epoch 7/30
1076/1076 - 1s - 806us/step - accuracy: 0.2090 - loss: 22.5299
Epoch 8/30
1076/1076 - 1s - 760us/step - accuracy: 0.2038 - loss: 22.5299
Epoch 9/30
1076/1076 - 1s - 786us/step - accuracy: 0.1982 - loss: 22.5299
Epoch 10/30
1076/1076 - 1s - 751us/step - accuracy: 0.1948 - loss: 22.5299
Epoch 11/30
1076/1076 - 1s - 803us/step - accuracy: 0.1886 - loss: 22.5299
Epoch 12/30
1076/1076 - 1s - 741us/step - accuracy: 0.1861 - loss: 22.5299
Epoch 13/30
1076/1076 - 1s - 728us/step - accuracy: 0.1812 - loss: 22.5299
Epoch 14/30
1076/1076 - 1s - 741us/s

<keras.src.callbacks.history.History at 0x34a25a710>

In [28]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])
    

In [29]:
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 611us/step
Pred        BASEL  BELGRADE  BUDAPEST  HEATHROW  KASSEL  MUNCHENB  OSLO  \
True                                                                      
BASEL         591       247       914       362      89       302     0   
BELGRADE      236       262       548         8      16         5     0   
BUDAPEST       29        36       148         1       0         0     0   
DEBILT         22        18        42         0       0         0     0   
DUSSELDORF      6         6        15         2       0         0     0   
HEATHROW        5         9        63         4       0         1     0   
KASSEL          2         3         6         0       0         0     0   
LJUBLJANA       1         3        54         3       0         0     0   
MAASTRICHT      0         0         4         4       1         0     0   
MADRID         10        13       306        70      19        28     1   
MUNCHENB        0      

In [30]:
epochs = 30
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='tanh')) # Options: sigmoid, tanh, softmax, relu

In [31]:
model.summary()

In [32]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [33]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 2s - 1ms/step - accuracy: 0.0153 - loss: 18.5507
Epoch 2/30
1076/1076 - 1s - 771us/step - accuracy: 0.0235 - loss: 18.0809
Epoch 3/30
1076/1076 - 1s - 742us/step - accuracy: 0.0346 - loss: 18.0837
Epoch 4/30
1076/1076 - 1s - 717us/step - accuracy: 0.0435 - loss: 18.0968
Epoch 5/30
1076/1076 - 1s - 714us/step - accuracy: 0.1365 - loss: 20.2260
Epoch 6/30
1076/1076 - 1s - 729us/step - accuracy: 0.5813 - loss: 30.2809
Epoch 7/30
1076/1076 - 1s - 737us/step - accuracy: 0.5676 - loss: 30.0505
Epoch 8/30
1076/1076 - 1s - 750us/step - accuracy: 0.5677 - loss: 30.2743
Epoch 9/30
1076/1076 - 1s - 729us/step - accuracy: 0.5676 - loss: 30.2743
Epoch 10/30
1076/1076 - 1s - 731us/step - accuracy: 0.5676 - loss: 30.2743
Epoch 11/30
1076/1076 - 1s - 750us/step - accuracy: 0.5676 - loss: 30.2743
Epoch 12/30
1076/1076 - 1s - 727us/step - accuracy: 0.5676 - loss: 30.2743
Epoch 13/30
1076/1076 - 1s - 737us/step - accuracy: 0.5676 - loss: 30.2743
Epoch 14/30
1076/1076 - 1s - 725us/s

<keras.src.callbacks.history.History at 0x345d2b490>

In [34]:
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 618us/step
Pred        BASEL  BELGRADE  HEATHROW  OSLO  STOCKHOLM  VALENTIA
True                                                            
BASEL        3247        22        14    21          2       376
BELGRADE     1090         0         1     0          0         1
BUDAPEST      214         0         0     0          0         0
DEBILT         82         0         0     0          0         0
DUSSELDORF     29         0         0     0          0         0
HEATHROW       82         0         0     0          0         0
KASSEL         11         0         0     0          0         0
LJUBLJANA      61         0         0     0          0         0
MAASTRICHT      9         0         0     0          0         0
MADRID        456         0         0     0          1         1
MUNCHENB        8         0         0     0          0         0
OSLO            5         0         0     0          0         0
STOCKHOLM    

In [35]:
epochs = 30
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='sigmoid')) # Options: sigmoid, tanh, softmax, relu

In [36]:
model.summary()

In [37]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [38]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 2s - 1ms/step - accuracy: 0.6229 - loss: 10172.7842
Epoch 2/30
1076/1076 - 1s - 734us/step - accuracy: 0.6433 - loss: 112837.8359
Epoch 3/30
1076/1076 - 1s - 748us/step - accuracy: 0.6434 - loss: 374424.4375
Epoch 4/30
1076/1076 - 1s - 704us/step - accuracy: 0.6434 - loss: 793151.6875
Epoch 5/30
1076/1076 - 1s - 715us/step - accuracy: 0.6434 - loss: 1400674.8750
Epoch 6/30
1076/1076 - 1s - 728us/step - accuracy: 0.6434 - loss: 2229848.0000
Epoch 7/30
1076/1076 - 1s - 723us/step - accuracy: 0.6434 - loss: 3312063.5000
Epoch 8/30
1076/1076 - 1s - 767us/step - accuracy: 0.6434 - loss: 4602696.0000
Epoch 9/30
1076/1076 - 1s - 719us/step - accuracy: 0.6436 - loss: 6167870.0000
Epoch 10/30
1076/1076 - 1s - 697us/step - accuracy: 0.6436 - loss: 7995916.5000
Epoch 11/30
1076/1076 - 1s - 703us/step - accuracy: 0.6436 - loss: 10237489.0000
Epoch 12/30
1076/1076 - 1s - 692us/step - accuracy: 0.6436 - loss: 12592687.0000
Epoch 13/30
1076/1076 - 1s - 699us/step - accuracy: 0.

<keras.src.callbacks.history.History at 0x34a2363f0>

In [39]:
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 590us/step
Pred        BASEL  VALENTIA
True                       
BASEL        3678         4
BELGRADE     1092         0
BUDAPEST      214         0
DEBILT         82         0
DUSSELDORF     29         0
HEATHROW       82         0
KASSEL         11         0
LJUBLJANA      61         0
MAASTRICHT      9         0
MADRID        458         0
MUNCHENB        8         0
OSLO            5         0
STOCKHOLM       4         0
VALENTIA        1         0
