In [187]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from numpy import unique
from numpy import reshape
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout
from keras.layers import LSTM
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import warnings
warnings.filterwarnings("ignore")

In [188]:
path = '~/Desktop/CareerFoundry/3.1./'
pleasant = pd.read_csv(os.path.join(path, 'Data/Original/Dataset-Answers-Weather_Prediction_Pleasant_Weather.csv'))
unscaled = pd.read_csv(os.path.join(path, 'Data/Original/Dataset-weather-prediction-dataset-processed.csv'))
unscaled.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,2.1,0.85,1.018,0.32,0.09,0,0.7,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,19600102,1,6,2.1,0.84,1.018,0.36,1.05,0,1.1,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,19600103,1,8,2.1,0.9,1.018,0.18,0.3,0,0.0,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,19600104,1,3,2.1,0.92,1.018,0.58,0.0,0,4.1,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,19600105,1,6,2.1,0.95,1.018,0.65,0.14,0,5.4,...,3,0.8,1.0328,0.46,0.0,0,5.7,5.7,3.0,8.4


Data Wrangle

In [189]:
# Remove weather stations not included in "pleasant weather" answers
unscaled = unscaled.drop(['GDANSK_cloud_cover', 'GDANSK_humidity', 'GDANSK_precipitation', 'GDANSK_snow_depth', 'GDANSK_temp_mean', 'GDANSK_temp_min', 'GDANSK_temp_max',
                        'ROMA_cloud_cover', 'ROMA_wind_speed', 'ROMA_humidity', 'ROMA_pressure', 'ROMA_sunshine', 'ROMA_temp_mean',
                        'TOURS_wind_speed', 'TOURS_humidity', 'TOURS_pressure', 'TOURS_global_radiation', 'TOURS_precipitation', 'TOURS_temp_mean', 'TOURS_temp_min', 'TOURS_temp_max'], axis=1)

In [190]:
# Remove 2 observation types missing multiple years
unscaled['YEAR'] = unscaled['DATE'].astype(str).str[:4].astype(int)


In [191]:
long = unscaled.melt(
    id_vars=['YEAR'],
    var_name='observation',
    value_name='value'
)


In [192]:

years_per_obs = (
    long
    .groupby(['observation', 'YEAR'])['value']
    .apply(lambda x: x.isna().all())
    .reset_index(name='all_missing')
)
missing_years = (
    years_per_obs
    .groupby('observation')['all_missing']
    .sum()
    .sort_values(ascending=True)
)
missing_years
# No stations with missing years where found

observation
BASEL_cloud_cover            0
MADRID_sunshine              0
MADRID_temp_max              0
MADRID_temp_mean             0
MADRID_temp_min              0
                            ..
DUSSELDORF_wind_speed        0
HEATHROW_cloud_cover         0
HEATHROW_global_radiation    0
HEATHROW_precipitation       0
VALENTIA_temp_min            0
Name: all_missing, Length: 149, dtype: int64

In [193]:
unscaled.shape

(22950, 150)

In [194]:
unscaled.isna().sum().sort_values(ascending=True)

DATE                         0
MADRID_precipitation         0
MADRID_sunshine              0
MADRID_temp_mean             0
MADRID_temp_min              0
                            ..
DUSSELDORF_temp_max          0
HEATHROW_cloud_cover         0
HEATHROW_humidity            0
HEATHROW_global_radiation    0
YEAR                         0
Length: 150, dtype: int64

In [195]:
pleasant.isna().sum().sort_values(ascending=True)

DATE                           0
BASEL_pleasant_weather         0
BELGRADE_pleasant_weather      0
BUDAPEST_pleasant_weather      0
DEBILT_pleasant_weather        0
DUSSELDORF_pleasant_weather    0
HEATHROW_pleasant_weather      0
KASSEL_pleasant_weather        0
LJUBLJANA_pleasant_weather     0
MAASTRICHT_pleasant_weather    0
MADRID_pleasant_weather        0
MUNCHENB_pleasant_weather      0
OSLO_pleasant_weather          0
SONNBLICK_pleasant_weather     0
STOCKHOLM_pleasant_weather     0
VALENTIA_pleasant_weather      0
dtype: int64

In [196]:
unscaled.columns.tolist()


['DATE',
 'MONTH',
 'BASEL_cloud_cover',
 'BASEL_wind_speed',
 'BASEL_humidity',
 'BASEL_pressure',
 'BASEL_global_radiation',
 'BASEL_precipitation',
 'BASEL_snow_depth',
 'BASEL_sunshine',
 'BASEL_temp_mean',
 'BASEL_temp_min',
 'BASEL_temp_max',
 'BELGRADE_cloud_cover',
 'BELGRADE_humidity',
 'BELGRADE_pressure',
 'BELGRADE_global_radiation',
 'BELGRADE_precipitation',
 'BELGRADE_sunshine',
 'BELGRADE_temp_mean',
 'BELGRADE_temp_min',
 'BELGRADE_temp_max',
 'BUDAPEST_cloud_cover',
 'BUDAPEST_humidity',
 'BUDAPEST_pressure',
 'BUDAPEST_global_radiation',
 'BUDAPEST_precipitation',
 'BUDAPEST_sunshine',
 'BUDAPEST_temp_mean',
 'BUDAPEST_temp_min',
 'BUDAPEST_temp_max',
 'DEBILT_cloud_cover',
 'DEBILT_wind_speed',
 'DEBILT_humidity',
 'DEBILT_pressure',
 'DEBILT_global_radiation',
 'DEBILT_precipitation',
 'DEBILT_sunshine',
 'DEBILT_temp_mean',
 'DEBILT_temp_min',
 'DEBILT_temp_max',
 'DUSSELDORF_cloud_cover',
 'DUSSELDORF_wind_speed',
 'DUSSELDORF_humidity',
 'DUSSELDORF_pressure',
 

In [197]:
unscaled.shape

(22950, 150)

In [198]:
from collections import Counter

Counter(col.split('_')[0] for col in unscaled.columns)


Counter({'BASEL': 11,
         'DUSSELDORF': 11,
         'OSLO': 11,
         'DEBILT': 10,
         'HEATHROW': 10,
         'LJUBLJANA': 10,
         'MAASTRICHT': 10,
         'MADRID': 10,
         'SONNBLICK': 10,
         'VALENTIA': 10,
         'BELGRADE': 9,
         'BUDAPEST': 9,
         'KASSEL': 9,
         'MUNCHENB': 9,
         'STOCKHOLM': 8,
         'DATE': 1,
         'MONTH': 1,
         'YEAR': 1})

In [199]:
BASEL = [col for col in unscaled.columns if col.split('_')[0] == 'BASEL']
BASEL

['BASEL_cloud_cover',
 'BASEL_wind_speed',
 'BASEL_humidity',
 'BASEL_pressure',
 'BASEL_global_radiation',
 'BASEL_precipitation',
 'BASEL_snow_depth',
 'BASEL_sunshine',
 'BASEL_temp_mean',
 'BASEL_temp_min',
 'BASEL_temp_max']

In [200]:
DUSSELDORF= [col for col in unscaled.columns if col.split('_')[0] == 'DUSSELDORF']
DUSSELDORF

['DUSSELDORF_cloud_cover',
 'DUSSELDORF_wind_speed',
 'DUSSELDORF_humidity',
 'DUSSELDORF_pressure',
 'DUSSELDORF_global_radiation',
 'DUSSELDORF_precipitation',
 'DUSSELDORF_snow_depth',
 'DUSSELDORF_sunshine',
 'DUSSELDORF_temp_mean',
 'DUSSELDORF_temp_min',
 'DUSSELDORF_temp_max']

In [201]:
BELGRADE = [col for col in unscaled.columns if col.split('_')[0] == 'BELGRADE']
BELGRADE

['BELGRADE_cloud_cover',
 'BELGRADE_humidity',
 'BELGRADE_pressure',
 'BELGRADE_global_radiation',
 'BELGRADE_precipitation',
 'BELGRADE_sunshine',
 'BELGRADE_temp_mean',
 'BELGRADE_temp_min',
 'BELGRADE_temp_max']

In [202]:
BUDAPEST= [col for col in unscaled.columns if col.split('_')[0] == 'BUDAPEST']
BUDAPEST

['BUDAPEST_cloud_cover',
 'BUDAPEST_humidity',
 'BUDAPEST_pressure',
 'BUDAPEST_global_radiation',
 'BUDAPEST_precipitation',
 'BUDAPEST_sunshine',
 'BUDAPEST_temp_mean',
 'BUDAPEST_temp_min',
 'BUDAPEST_temp_max']

In [203]:
STOCKHOLM = [col for col in unscaled.columns if col.split('_')[0] == 'STOCKHOLM']
STOCKHOLM

['STOCKHOLM_cloud_cover',
 'STOCKHOLM_pressure',
 'STOCKHOLM_global_radiation',
 'STOCKHOLM_precipitation',
 'STOCKHOLM_sunshine',
 'STOCKHOLM_temp_mean',
 'STOCKHOLM_temp_min',
 'STOCKHOLM_temp_max']

In [204]:

observation_types = ['cloud_cover', 'wind_speed', 'humidity', 'pressure',
                     'global_radiation', 'precipitation', 'snow_depth', 
                     'sunshine', 'temp_mean', 'temp_min', 'temp_max']

In [205]:
# Create a dictionary to store the count of stations for each observation type
station_counts = {}

for obs in observation_types:
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    
    station_counts[obs] = len(columns)

print("Number of stations covered by each observation type:")
for obs, count in station_counts.items():
    print(f"{obs}: {count} stations")

Number of stations covered by each observation type:
cloud_cover: 14 stations
wind_speed: 9 stations
humidity: 14 stations
pressure: 14 stations
global_radiation: 15 stations
precipitation: 15 stations
snow_depth: 6 stations
sunshine: 15 stations
temp_mean: 15 stations
temp_min: 15 stations
temp_max: 15 stations


Drop wind_speed and snow_depth

In [206]:
cols_to_drop = [col for col in unscaled.columns if '_wind_speed' in col or '_snow_depth' in col]
unscaled = unscaled.drop(cols_to_drop, axis=1)
unscaled.shape

(22950, 135)

In [207]:
all_columns = unscaled.columns.to_list()
all_columns

['DATE',
 'MONTH',
 'BASEL_cloud_cover',
 'BASEL_humidity',
 'BASEL_pressure',
 'BASEL_global_radiation',
 'BASEL_precipitation',
 'BASEL_sunshine',
 'BASEL_temp_mean',
 'BASEL_temp_min',
 'BASEL_temp_max',
 'BELGRADE_cloud_cover',
 'BELGRADE_humidity',
 'BELGRADE_pressure',
 'BELGRADE_global_radiation',
 'BELGRADE_precipitation',
 'BELGRADE_sunshine',
 'BELGRADE_temp_mean',
 'BELGRADE_temp_min',
 'BELGRADE_temp_max',
 'BUDAPEST_cloud_cover',
 'BUDAPEST_humidity',
 'BUDAPEST_pressure',
 'BUDAPEST_global_radiation',
 'BUDAPEST_precipitation',
 'BUDAPEST_sunshine',
 'BUDAPEST_temp_mean',
 'BUDAPEST_temp_min',
 'BUDAPEST_temp_max',
 'DEBILT_cloud_cover',
 'DEBILT_humidity',
 'DEBILT_pressure',
 'DEBILT_global_radiation',
 'DEBILT_precipitation',
 'DEBILT_sunshine',
 'DEBILT_temp_mean',
 'DEBILT_temp_min',
 'DEBILT_temp_max',
 'DUSSELDORF_cloud_cover',
 'DUSSELDORF_humidity',
 'DUSSELDORF_pressure',
 'DUSSELDORF_global_radiation',
 'DUSSELDORF_precipitation',
 'DUSSELDORF_sunshine',
 'DUSS

In [208]:
observation_types = ['cloud_cover', 'humidity', 'pressure']
stations = set(col.split("_")[0] for col in all_columns)
missing = {}
for obs in observation_types:
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    station_names = set([col.replace(f'_{obs}', '') for col in columns])
    missing_stations = stations - station_names
    missing[obs] = missing_stations

for obs, missing_stations in missing.items():
    print(f"\nMissing from {obs}:")
    if missing_stations:
        for station in missing_stations:
            print(station)
    else:
        print("None")


Missing from cloud_cover:
DATE
KASSEL
MONTH
YEAR

Missing from humidity:
DATE
MONTH
STOCKHOLM
YEAR

Missing from pressure:
DATE
MONTH
MUNCHENB
YEAR


Forcing the right column order

In [209]:
unscaled.columns.get_loc('HEATHROW_temp_max')

55

In [210]:
unscaled.columns.get_loc('STOCKHOLM_cloud_cover')

117

In [211]:
unscaled.columns.get_loc('MUNCHENB_cloud_cover')

91

In [212]:
# Insert new columns into "unscaled" at specific positions.
# The data for these new columns is taken from weather stations they are close to

unscaled.insert(56,'KASSEL_cloud_cover', unscaled['DUSSELDORF_cloud_cover'])
unscaled.insert(119, 'STOCKHOLM_humidity', unscaled['OSLO_humidity'])
unscaled.insert(94,'MUNCHENB_pressure',unscaled['BASEL_pressure'])

In [213]:
unscaled.columns.tolist()


['DATE',
 'MONTH',
 'BASEL_cloud_cover',
 'BASEL_humidity',
 'BASEL_pressure',
 'BASEL_global_radiation',
 'BASEL_precipitation',
 'BASEL_sunshine',
 'BASEL_temp_mean',
 'BASEL_temp_min',
 'BASEL_temp_max',
 'BELGRADE_cloud_cover',
 'BELGRADE_humidity',
 'BELGRADE_pressure',
 'BELGRADE_global_radiation',
 'BELGRADE_precipitation',
 'BELGRADE_sunshine',
 'BELGRADE_temp_mean',
 'BELGRADE_temp_min',
 'BELGRADE_temp_max',
 'BUDAPEST_cloud_cover',
 'BUDAPEST_humidity',
 'BUDAPEST_pressure',
 'BUDAPEST_global_radiation',
 'BUDAPEST_precipitation',
 'BUDAPEST_sunshine',
 'BUDAPEST_temp_mean',
 'BUDAPEST_temp_min',
 'BUDAPEST_temp_max',
 'DEBILT_cloud_cover',
 'DEBILT_humidity',
 'DEBILT_pressure',
 'DEBILT_global_radiation',
 'DEBILT_precipitation',
 'DEBILT_sunshine',
 'DEBILT_temp_mean',
 'DEBILT_temp_min',
 'DEBILT_temp_max',
 'DUSSELDORF_cloud_cover',
 'DUSSELDORF_humidity',
 'DUSSELDORF_pressure',
 'DUSSELDORF_global_radiation',
 'DUSSELDORF_precipitation',
 'DUSSELDORF_sunshine',
 'DUSS

In [214]:
unscaled.shape


(22950, 138)

In [215]:
unscaled.drop(columns=["DATE", "MONTH", "YEAR"], inplace=True)

In [216]:
unscaled.shape

(22950, 135)

In [217]:
pleasant.head()

Unnamed: 0,DATE,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,19600101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19600102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19600103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19600104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19600105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [221]:
pleasant.drop(columns='DATE', inplace=True)
pleasant.shape

(22950, 15)

export

In [223]:
unscaled.to_pickle(os.path.join(path, 'Data/Clean/cleaned_for_keras.pkl'))

Data reshape

In [225]:
X = np.array(unscaled)
X = X.reshape(-1,15,9)
X.shape


(22950, 15, 9)

In [226]:
y = np.array(pleasant)
y.shape

(22950, 15)

Data spliting

In [227]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=42)

In [228]:
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(17212, 15, 9) (5738, 15, 9)
(17212, 15) (5738, 15)


Keras Model

In [None]:
epochs = 30
batch_size = 16
n_hidden = 32

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax'))

In [230]:
model.summary()

Compile and run the model

In [231]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [232]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 1s - 1ms/step - accuracy: 0.1054 - loss: 4755.2681
Epoch 2/30
1076/1076 - 1s - 609us/step - accuracy: 0.1158 - loss: 49042.5039
Epoch 3/30
1076/1076 - 1s - 585us/step - accuracy: 0.1226 - loss: 172064.7812
Epoch 4/30
1076/1076 - 1s - 601us/step - accuracy: 0.1269 - loss: 375147.0000
Epoch 5/30
1076/1076 - 1s - 596us/step - accuracy: 0.1272 - loss: 667304.8125
Epoch 6/30
1076/1076 - 1s - 552us/step - accuracy: 0.1277 - loss: 1088974.6250
Epoch 7/30
1076/1076 - 1s - 587us/step - accuracy: 0.1297 - loss: 1627901.5000
Epoch 8/30
1076/1076 - 1s - 565us/step - accuracy: 0.1335 - loss: 2303127.5000
Epoch 9/30
1076/1076 - 1s - 626us/step - accuracy: 0.1323 - loss: 3050171.5000
Epoch 10/30
1076/1076 - 1s - 554us/step - accuracy: 0.1315 - loss: 3930186.2500
Epoch 11/30
1076/1076 - 1s - 550us/step - accuracy: 0.1310 - loss: 5021212.0000
Epoch 12/30
1076/1076 - 1s - 569us/step - accuracy: 0.1306 - loss: 6245888.0000
Epoch 13/30
1076/1076 - 1s - 544us/step - accuracy: 0.1317 

<keras.src.callbacks.history.History at 0x3b2e5c6e0>

Confusion Matrix

In [233]:
stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'

}

In [234]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [235]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 613us/step
Pred        BASEL  BELGRADE  BUDAPEST  DEBILT  HEATHROW  KASSEL  LJUBLJANA  \
True                                                                         
BASEL         785       765        26       2         6      36        592   
BELGRADE        1       425         0       0         4       3        302   
BUDAPEST        0        64         0       0         1       0         33   
DEBILT          0        15         0       0         0       0         27   
DUSSELDORF      0         3         0       0         0       0          6   
HEATHROW        0         8         0       0         1       0          6   
KASSEL          0         2         0       0         0       0          5   
LJUBLJANA       1        23         0       0         0       0          4   
MAASTRICHT      0         2         0       0         0       0          1   
MADRID         28        73         0       0         5       3  

retrials

In [244]:
epochs = 30
batch_size = 16
n_hidden = 4

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax, relu

In [245]:
model.summary()

In [246]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [247]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 1s - 1ms/step - accuracy: 0.1039 - loss: 971.4531
Epoch 2/30
1076/1076 - 1s - 489us/step - accuracy: 0.1076 - loss: 10839.1240
Epoch 3/30
1076/1076 - 1s - 514us/step - accuracy: 0.1120 - loss: 37130.5156
Epoch 4/30
1076/1076 - 1s - 513us/step - accuracy: 0.1085 - loss: 82062.8828
Epoch 5/30
1076/1076 - 1s - 498us/step - accuracy: 0.1093 - loss: 141866.3750
Epoch 6/30
1076/1076 - 1s - 470us/step - accuracy: 0.1072 - loss: 212041.3281
Epoch 7/30
1076/1076 - 1s - 478us/step - accuracy: 0.1079 - loss: 293013.1250
Epoch 8/30
1076/1076 - 0s - 455us/step - accuracy: 0.1107 - loss: 386800.2812
Epoch 9/30
1076/1076 - 1s - 471us/step - accuracy: 0.1070 - loss: 490288.4688
Epoch 10/30
1076/1076 - 1s - 499us/step - accuracy: 0.1067 - loss: 616099.6250
Epoch 11/30
1076/1076 - 0s - 456us/step - accuracy: 0.1095 - loss: 747878.5000
Epoch 12/30
1076/1076 - 1s - 468us/step - accuracy: 0.1083 - loss: 902231.5000
Epoch 13/30
1076/1076 - 0s - 464us/step - accuracy: 0.1108 - loss: 10

<keras.src.callbacks.history.History at 0x3b3b15310>

In [249]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [250]:
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 326us/step
Pred        BASEL  BELGRADE  DUSSELDORF  HEATHROW  KASSEL  LJUBLJANA  MADRID  \
True                                                                           
BASEL         186         4           2      3023       3        407      52   
BELGRADE      176         0           0       596       2        318       0   
BUDAPEST       19         0           0       146       0         49       0   
DEBILT          3         0           0        74       0          5       0   
DUSSELDORF      1         0           0        25       0          3       0   
HEATHROW        2         0           0        75       0          5       0   
KASSEL          1         0           0         9       0          1       0   
LJUBLJANA       4         0           0        39       0         18       0   
MAASTRICHT      0         0           0         9       0          0       0   
MADRID         18         0           0    

In [251]:
epochs = 30
batch_size = 16
n_hidden = 128

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='tanh')) # Options: sigmoid, tanh, softmax, relu

In [252]:
model.summary()

In [253]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [254]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1730 - loss: 24.1348
Epoch 2/30
1076/1076 - 1s - 1ms/step - accuracy: 0.2459 - loss: 23.4243
Epoch 3/30
1076/1076 - 1s - 1ms/step - accuracy: 0.2451 - loss: 23.3382
Epoch 4/30
1076/1076 - 1s - 1ms/step - accuracy: 0.1963 - loss: 24.5030
Epoch 5/30
1076/1076 - 1s - 882us/step - accuracy: 0.0348 - loss: 25.8937
Epoch 6/30
1076/1076 - 1s - 862us/step - accuracy: 0.2883 - loss: 26.2383
Epoch 7/30
1076/1076 - 1s - 812us/step - accuracy: 0.3512 - loss: 26.2401
Epoch 8/30
1076/1076 - 1s - 840us/step - accuracy: 0.3521 - loss: 26.2410
Epoch 9/30
1076/1076 - 1s - 880us/step - accuracy: 0.3448 - loss: 23.9496
Epoch 10/30
1076/1076 - 1s - 1ms/step - accuracy: 0.3689 - loss: 22.5505
Epoch 11/30
1076/1076 - 1s - 1ms/step - accuracy: 0.3840 - loss: 22.5505
Epoch 12/30
1076/1076 - 1s - 905us/step - accuracy: 0.3891 - loss: 22.5505
Epoch 13/30
1076/1076 - 1s - 810us/step - accuracy: 0.3900 - loss: 22.5505
Epoch 14/30
1076/1076 - 1s - 884us/step - accu

<keras.src.callbacks.history.History at 0x3b2e31480>

In [255]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [256]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 606us/step
Pred        BASEL  BUDAPEST  KASSEL  LJUBLJANA  STOCKHOLM  VALENTIA
True                                                               
BASEL        2138      1341      62         14          6       121
BELGRADE      859       232       0          0          0         1
BUDAPEST      178        36       0          0          0         0
DEBILT         80         2       0          0          0         0
DUSSELDORF     27         2       0          0          0         0
HEATHROW       73         9       0          0          0         0
KASSEL         10         1       0          0          0         0
LJUBLJANA      52         9       0          0          0         0
MAASTRICHT      4         5       0          0          0         0
MADRID        239       219       0          0          0         0
MUNCHENB        4         4       0          0          0         0
OSLO            5         0       0    

In [257]:
epochs = 30
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='tanh')) # Options: sigmoid, tanh, softmax, relu

In [258]:
model.summary()

In [259]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [260]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 2s - 2ms/step - accuracy: 0.2783 - loss: 22.6661
Epoch 2/30
1076/1076 - 1s - 961us/step - accuracy: 0.4027 - loss: 24.5268
Epoch 3/30
1076/1076 - 1s - 988us/step - accuracy: 0.4210 - loss: 24.6088
Epoch 4/30
1076/1076 - 1s - 962us/step - accuracy: 0.4292 - loss: 24.6088
Epoch 5/30
1076/1076 - 1s - 923us/step - accuracy: 0.4218 - loss: 24.6088
Epoch 6/30
1076/1076 - 1s - 987us/step - accuracy: 0.4560 - loss: 24.6088
Epoch 7/30
1076/1076 - 1s - 933us/step - accuracy: 0.5135 - loss: 24.6088
Epoch 8/30
1076/1076 - 1s - 972us/step - accuracy: 0.5134 - loss: 24.6088
Epoch 9/30
1076/1076 - 1s - 1ms/step - accuracy: 0.5136 - loss: 24.6088
Epoch 10/30
1076/1076 - 1s - 951us/step - accuracy: 0.5140 - loss: 24.6088
Epoch 11/30
1076/1076 - 1s - 859us/step - accuracy: 0.5148 - loss: 24.6088
Epoch 12/30
1076/1076 - 1s - 869us/step - accuracy: 0.5710 - loss: 24.6098
Epoch 13/30
1076/1076 - 1s - 912us/step - accuracy: 0.6118 - loss: 24.6116
Epoch 14/30
1076/1076 - 1s - 951us/ste

<keras.src.callbacks.history.History at 0x3b2e31a70>

In [261]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [262]:
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 620us/step
Pred        BASEL  BUDAPEST  KASSEL
True                               
BASEL        3535       146       1
BELGRADE     1092         0       0
BUDAPEST      214         0       0
DEBILT         82         0       0
DUSSELDORF     29         0       0
HEATHROW       82         0       0
KASSEL         11         0       0
LJUBLJANA      61         0       0
MAASTRICHT      9         0       0
MADRID        458         0       0
MUNCHENB        8         0       0
OSLO            5         0       0
STOCKHOLM       4         0       0
VALENTIA        1         0       0


In [263]:
epochs = 30
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='sigmoid')) # Options: sigmoid, tanh, softmax, relu

In [264]:
model.summary()

In [265]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [266]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 2s - 2ms/step - accuracy: 0.6174 - loss: 10603.2217
Epoch 2/30
1076/1076 - 1s - 1ms/step - accuracy: 0.6434 - loss: 110153.0703
Epoch 3/30
1076/1076 - 1s - 917us/step - accuracy: 0.6434 - loss: 353464.5938
Epoch 4/30
1076/1076 - 1s - 918us/step - accuracy: 0.6434 - loss: 782497.9375
Epoch 5/30
1076/1076 - 1s - 940us/step - accuracy: 0.6435 - loss: 1404018.8750
Epoch 6/30
1076/1076 - 1s - 985us/step - accuracy: 0.6435 - loss: 2197726.0000
Epoch 7/30
1076/1076 - 1s - 883us/step - accuracy: 0.6436 - loss: 3246425.0000
Epoch 8/30
1076/1076 - 1s - 959us/step - accuracy: 0.6436 - loss: 4442433.5000
Epoch 9/30
1076/1076 - 1s - 919us/step - accuracy: 0.6436 - loss: 5968460.0000
Epoch 10/30
1076/1076 - 1s - 928us/step - accuracy: 0.6436 - loss: 7824326.0000
Epoch 11/30
1076/1076 - 1s - 958us/step - accuracy: 0.6437 - loss: 9676803.0000
Epoch 12/30
1076/1076 - 1s - 957us/step - accuracy: 0.6437 - loss: 12251461.0000
Epoch 13/30
1076/1076 - 1s - 936us/step - accuracy: 0.643

<keras.src.callbacks.history.History at 0x3b706eb10>

In [268]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [269]:
# Evaluate

print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 593us/step
Pred        BASEL  VALENTIA
True                       
BASEL        3679         3
BELGRADE     1092         0
BUDAPEST      214         0
DEBILT         82         0
DUSSELDORF     29         0
HEATHROW       82         0
KASSEL         11         0
LJUBLJANA      61         0
MAASTRICHT      9         0
MADRID        458         0
MUNCHENB        8         0
OSLO            5         0
STOCKHOLM       4         0
VALENTIA        1         0


In [270]:
epochs = 15
batch_size = 4
n_hidden = 4

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='relu')) # Options: sigmoid, tanh, softmax, relu

In [271]:
model.summary()

In [272]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [273]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/15
4303/4303 - 4s - 816us/step - accuracy: 0.1453 - loss: 21.7605
Epoch 2/15
4303/4303 - 3s - 590us/step - accuracy: 0.6407 - loss: nan
Epoch 3/15
4303/4303 - 3s - 614us/step - accuracy: 0.6440 - loss: nan
Epoch 4/15
4303/4303 - 2s - 554us/step - accuracy: 0.6440 - loss: nan
Epoch 5/15
4303/4303 - 3s - 630us/step - accuracy: 0.6440 - loss: nan
Epoch 6/15
4303/4303 - 2s - 548us/step - accuracy: 0.6440 - loss: nan
Epoch 7/15
4303/4303 - 2s - 532us/step - accuracy: 0.6440 - loss: nan
Epoch 8/15
4303/4303 - 2s - 542us/step - accuracy: 0.6440 - loss: nan
Epoch 9/15
4303/4303 - 2s - 537us/step - accuracy: 0.6440 - loss: nan
Epoch 10/15
4303/4303 - 2s - 546us/step - accuracy: 0.6440 - loss: nan
Epoch 11/15
4303/4303 - 2s - 527us/step - accuracy: 0.6440 - loss: nan
Epoch 12/15
4303/4303 - 2s - 518us/step - accuracy: 0.6440 - loss: nan
Epoch 13/15
4303/4303 - 2s - 512us/step - accuracy: 0.6440 - loss: nan
Epoch 14/15
4303/4303 - 2s - 533us/step - accuracy: 0.6440 - loss: nan
Epoch 15/15

<keras.src.callbacks.history.History at 0x3b76e49e0>

In [274]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [275]:
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 498us/step
Pred        BASEL
True             
BASEL        3682
BELGRADE     1092
BUDAPEST      214
DEBILT         82
DUSSELDORF     29
HEATHROW       82
KASSEL         11
LJUBLJANA      61
MAASTRICHT      9
MADRID        458
MUNCHENB        8
OSLO            5
STOCKHOLM       4
VALENTIA        1
