# Keras Layered Model (CNN)

## Script Contents

1. Importing Libraries and Data Sets
2. Data Wrangling and Preparation
3. Keras Model
4. Compiling and Running Model
5. Confusion Matrix
6. Model Testing (changing hyperparameters)

## 1. Importing Libraries and Data Sets

In [4]:
# Import libraries and Data
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from numpy import unique
from numpy import reshape
from keras.models import Sequential
from keras.layers import Conv1D, Conv2D, Dense, BatchNormalization, Flatten, MaxPooling1D, Dropout
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [5]:
# Create a path where Data is stored
path = r"C:\Users\Andre\OneDrive\Desktop\ML with Python A2"

In [6]:
# Import the Data Set unscaled
unscaled= pd.read_csv(os.path.join(path, "Dataset-weather-prediction-dataset-processed.csv"))
unscaled.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,2.1,0.85,1.018,0.32,0.09,0,0.7,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,19600102,1,6,2.1,0.84,1.018,0.36,1.05,0,1.1,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,19600103,1,8,2.1,0.9,1.018,0.18,0.3,0,0.0,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,19600104,1,3,2.1,0.92,1.018,0.58,0.0,0,4.1,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,19600105,1,6,2.1,0.95,1.018,0.65,0.14,0,5.4,...,3,0.8,1.0328,0.46,0.0,0,5.7,5.7,3.0,8.4


In [7]:
unscaled.shape

(22950, 170)

In [8]:
# Import Data Set pleasantweather
pleasantweather = pd.read_csv(os.path.join(path, "Dataset-Answers-Weather_Prediction_Pleasant_Weather.csv"))
pleasantweather.head()

Unnamed: 0,DATE,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,19600101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19600102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19600103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19600104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19600105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [9]:
pleasantweather.shape

(22950, 16)

## 2. Data Wrangling and Preparation

In [11]:
# Remove weather stations not included in "pleasant weather" answers
unscaled = unscaled.drop(['GDANSK_cloud_cover', 'GDANSK_humidity', 'GDANSK_precipitation', 'GDANSK_snow_depth', 'GDANSK_temp_mean', 'GDANSK_temp_min', 'GDANSK_temp_max',
                        'ROMA_cloud_cover', 'ROMA_wind_speed', 'ROMA_humidity', 'ROMA_pressure', 'ROMA_sunshine', 'ROMA_temp_mean',
                        'TOURS_wind_speed', 'TOURS_humidity', 'TOURS_pressure', 'TOURS_global_radiation', 'TOURS_precipitation', 'TOURS_temp_mean', 'TOURS_temp_min', 'TOURS_temp_max'], axis=1)

In [12]:
unscaled.isnull().sum()

DATE                   0
MONTH                  0
BASEL_cloud_cover      0
BASEL_wind_speed       0
BASEL_humidity         0
                      ..
VALENTIA_snow_depth    0
VALENTIA_sunshine      0
VALENTIA_temp_mean     0
VALENTIA_temp_min      0
VALENTIA_temp_max      0
Length: 149, dtype: int64

In [13]:
pleasantweather.isnull().sum()

DATE                           0
BASEL_pleasant_weather         0
BELGRADE_pleasant_weather      0
BUDAPEST_pleasant_weather      0
DEBILT_pleasant_weather        0
DUSSELDORF_pleasant_weather    0
HEATHROW_pleasant_weather      0
KASSEL_pleasant_weather        0
LJUBLJANA_pleasant_weather     0
MAASTRICHT_pleasant_weather    0
MADRID_pleasant_weather        0
MUNCHENB_pleasant_weather      0
OSLO_pleasant_weather          0
SONNBLICK_pleasant_weather     0
STOCKHOLM_pleasant_weather     0
VALENTIA_pleasant_weather      0
dtype: int64

In [14]:
# Extract the different observation types

observation_types = ['cloud_cover', 'wind_speed', 'humidity', 'pressure',
                     'global_radiation', 'precipitation', 'snow_depth', 
                     'sunshine', 'temp_mean', 'temp_min', 'temp_max']

In [15]:
# Create a dictionary to store the count of stations for each observation type
station_counts = {}

for obs in observation_types:
    # Select columns related to the current observation type
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    
    # Count the number of stations (i.e., the number of columns) for the current observation type
    station_counts[obs] = len(columns)

# Print the count of stations for each observation type
print("Number of stations covered by each observation type:")
for obs, count in station_counts.items():
    print(f"{obs}: {count} stations")


Number of stations covered by each observation type:
cloud_cover: 14 stations
wind_speed: 9 stations
humidity: 14 stations
pressure: 14 stations
global_radiation: 15 stations
precipitation: 15 stations
snow_depth: 6 stations
sunshine: 15 stations
temp_mean: 15 stations
temp_min: 15 stations
temp_max: 15 stations


**Will drop Columns wind_speed and snow_depth as they have missing values**

In [17]:
# Get a list of columns containing 'wind_speed' or 'snow_depth'
cols_to_drop = [col for col in unscaled.columns if '_wind_speed' in col or '_snow_depth' in col]

# Drop the columns
unscaled = unscaled.drop(cols_to_drop, axis=1)

In [18]:
unscaled.shape

(22950, 134)

In [19]:
# Find the stations with the above entries missing
# Get all column names
all_columns = unscaled.columns.tolist()
# Exclude 'DATE' and 'MONTH' columns
all_columns = [col for col in all_columns if col not in ['DATE', 'MONTH']]  
# Extract unique weather station names
weather_stations = set()  # Use a set to automatically store only unique values
for col in all_columns:
    station_name = col.split('_')[0]  # Split the column name at the underscore and take the first part
    weather_stations.add(station_name)

# Print the list of weather stations
print(weather_stations)

{'BELGRADE', 'MAASTRICHT', 'VALENTIA', 'BUDAPEST', 'OSLO', 'DEBILT', 'HEATHROW', 'MUNCHENB', 'KASSEL', 'MADRID', 'STOCKHOLM', 'DUSSELDORF', 'SONNBLICK', 'BASEL', 'LJUBLJANA'}


In [20]:
# Find stations missing observation types
observation_types = ['cloud_cover', 'humidity', 'pressure']

missing_stations_by_observation = {}

for obs in observation_types:
    # Select columns related to the current observation type
    columns = [col for col in unscaled.columns if col.endswith(obs)]
    
    # Extract station names by removing the observation type from the column names
    station_names = set([col.replace(f'_{obs}', '') for col in columns])
    
    # Identify stations that are in all_stations but missing from the current observation type
    missing_stations = weather_stations - station_names
    
    # Store the missing station names in the dictionary
    missing_stations_by_observation[obs] = missing_stations

# Print the missing station names for each observation type
for obs, missing_stations in missing_stations_by_observation.items():
    print(f"\nStations missing from {obs}:")
    if missing_stations:
        for station in missing_stations:
            print(station)
    else:
        print("None")


Stations missing from cloud_cover:
KASSEL

Stations missing from humidity:
STOCKHOLM

Stations missing from pressure:
MUNCHENB


In [21]:
# Cloud cover is the start of a stations data, Kassel is next to Heathrow, find the position of Heathrow_temp_max for the insertion of Kassel_cloud_cover
unscaled.columns.get_loc('HEATHROW_temp_max')

55

In [22]:
# Find the position for insertion of Stockholm humidity
unscaled.columns.get_loc('STOCKHOLM_cloud_cover') #humidity is 1 after cloud cover so (result +1)

117

In [23]:
# Find position for Munchenb pressure
unscaled.columns.get_loc('MUNCHENB_cloud_cover') # pressure is 2 after cloud cover so (result +2)

91

In [24]:
# Insert new columns into "unscaled" at specific positions.
# The data for these new columns is taken from weather stations they are close to

unscaled.insert(56,'KASSEL_cloud_cover', unscaled['DUSSELDORF_cloud_cover'])
unscaled.insert(119, 'STOCKHOLM_humidity', unscaled['OSLO_humidity'])
unscaled.insert(94,'MUNCHENB_pressure',unscaled['BASEL_pressure'])

In [25]:
unscaled.columns.tolist()

['DATE',
 'MONTH',
 'BASEL_cloud_cover',
 'BASEL_humidity',
 'BASEL_pressure',
 'BASEL_global_radiation',
 'BASEL_precipitation',
 'BASEL_sunshine',
 'BASEL_temp_mean',
 'BASEL_temp_min',
 'BASEL_temp_max',
 'BELGRADE_cloud_cover',
 'BELGRADE_humidity',
 'BELGRADE_pressure',
 'BELGRADE_global_radiation',
 'BELGRADE_precipitation',
 'BELGRADE_sunshine',
 'BELGRADE_temp_mean',
 'BELGRADE_temp_min',
 'BELGRADE_temp_max',
 'BUDAPEST_cloud_cover',
 'BUDAPEST_humidity',
 'BUDAPEST_pressure',
 'BUDAPEST_global_radiation',
 'BUDAPEST_precipitation',
 'BUDAPEST_sunshine',
 'BUDAPEST_temp_mean',
 'BUDAPEST_temp_min',
 'BUDAPEST_temp_max',
 'DEBILT_cloud_cover',
 'DEBILT_humidity',
 'DEBILT_pressure',
 'DEBILT_global_radiation',
 'DEBILT_precipitation',
 'DEBILT_sunshine',
 'DEBILT_temp_mean',
 'DEBILT_temp_min',
 'DEBILT_temp_max',
 'DUSSELDORF_cloud_cover',
 'DUSSELDORF_humidity',
 'DUSSELDORF_pressure',
 'DUSSELDORF_global_radiation',
 'DUSSELDORF_precipitation',
 'DUSSELDORF_sunshine',
 'DUSS

In [50]:
# Exporting Data Set
pd.save_csv(os.path.join(path, "weather_predict_cleaned.csv"))

AttributeError: module 'pandas' has no attribute 'save_csv'

In [27]:
# Drop unnecessary columns
unscaled.drop(['DATE', 'MONTH'], axis=1, inplace=True)

In [28]:
unscaled.shape

(22950, 135)

In [29]:
# drop unneeded column from pleasantweather dataset
pleasantweather.drop(columns = 'DATE', inplace = True)

In [30]:
# Export cleaned dataset
unscaled.to_csv(os.path.join(path, 'weather_cleaned.csv'), index=False)

### Reshaping

In [32]:
# Creat an 'X' matrix by reloading and naming our data 'X'
X = pd.read_csv(os.path.join(path,'weather_cleaned.csv'), index_col=False)

In [33]:
# Making pleasantweather into y
y = pleasantweather

In [34]:
# Turn X and y into arrays
X = np.array(X)
y = np.array(y)
X

array([[ 7.    ,  0.85  ,  1.018 , ...,  8.5   ,  6.    , 10.9   ],
       [ 6.    ,  0.84  ,  1.018 , ...,  8.9   ,  5.6   , 12.1   ],
       [ 8.    ,  0.9   ,  1.018 , ..., 10.5   ,  8.1   , 12.9   ],
       ...,
       [ 4.    ,  0.76  ,  1.0227, ..., 10.7   ,  7.9   , 13.5   ],
       [ 5.    ,  0.8   ,  1.0212, ..., 10.7   ,  7.9   , 13.5   ],
       [ 5.    ,  0.84  ,  1.0193, ..., 10.7   ,  7.9   , 13.5   ]])

In [35]:
X = X.reshape(-1,15,9)

In [36]:
# Verify Shape
X.shape

(22950, 15, 9)

In [37]:
# Verify Shape
y.shape

(22950, 15)

In [38]:
X

array([[[  7.    ,   0.85  ,   1.018 , ...,   6.5   ,   0.8   ,
          10.9   ],
        [  1.    ,   0.81  ,   1.0195, ...,   3.7   ,  -0.9   ,
           7.9   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.4   ,  -0.4   ,
           5.1   ],
        ...,
        [  4.    ,   0.73  ,   1.0304, ...,  -5.9   ,  -8.5   ,
          -3.2   ],
        [  5.    ,   0.98  ,   1.0114, ...,   4.2   ,   2.2   ,
           4.9   ],
        [  5.    ,   0.88  ,   1.0003, ...,   8.5   ,   6.    ,
          10.9   ]],

       [[  6.    ,   0.84  ,   1.018 , ...,   6.1   ,   3.3   ,
          10.1   ],
        [  6.    ,   0.84  ,   1.0172, ...,   2.9   ,   2.2   ,
           4.4   ],
        [  4.    ,   0.67  ,   1.017 , ...,   2.3   ,   1.4   ,
           3.1   ],
        ...,
        [  6.    ,   0.97  ,   1.0292, ...,  -9.5   , -10.5   ,
          -8.5   ],
        [  5.    ,   0.62  ,   1.0114, ...,   4.    ,   3.    ,
           5.    ],
        [  7.    ,   0.91  ,   1.0007, ...,   8.

### Splitting into Training and Test Sets

In [40]:
# Split data into train and test sets

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 42)

In [41]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(17212, 15, 9) (17212, 15)
(5738, 15, 9) (5738, 15)


## 3. Keras Model

In [43]:
epochs = 30
batch_size = 16
n_hidden = 32

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax, relu

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [44]:
model.summary()

## 4. Compiling and Running Model

In [46]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [47]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 3s - 3ms/step - accuracy: 0.1271 - loss: 5254.6147
Epoch 2/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1274 - loss: 51978.3945
Epoch 3/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1299 - loss: 168649.0781
Epoch 4/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1321 - loss: 363015.5312
Epoch 5/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1301 - loss: 654539.8125
Epoch 6/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1283 - loss: 1032579.4375
Epoch 7/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1254 - loss: 1517628.0000
Epoch 8/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1251 - loss: 2082556.8750
Epoch 9/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1261 - loss: 2806438.7500
Epoch 10/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1244 - loss: 3575487.2500
Epoch 11/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1234 - loss: 4542874.5000
Epoch 12/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1233 - loss: 5677352.0000
Epoch 13/30
1076/1076 - 2s - 2ms/step - accuracy: 0.1245 - loss: 6914595.5000
Epo

<keras.src.callbacks.history.History at 0x225ab19b6b0>

## 5. Confusion Matrix

In [49]:
# Define list of stations names

stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'

}

In [50]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [51]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        BASEL  BELGRADE  BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  \
True                                                                          
BASEL          46       376        48       2         188         5     119   
BELGRADE       17        93         0       0           0         0      19   
BUDAPEST        4        10         0       0           1         0       3   
DEBILT          1         0         0       0           4         0       5   
DUSSELDORF      0         0         0       0           0         0       0   
HEATHROW        0         1         0       0           4         0       5   
KASSEL          0         0         0       0           0         0       2   
LJUBLJANA       3         1         0       0           1         0       1   
MAASTRICHT      0         0         0       0           0         0       0   
MADRID          5        10         0       0           

## 6. Model Testing (changing hyperparameters)

### Test 2

In [57]:
epochs = 64
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='sigmoid')) # Options: sigmoid, tanh, softmax, relu

In [59]:
model.summary()

In [61]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [63]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/64
1076/1076 - 3s - 3ms/step - accuracy: 0.6208 - loss: 9452.5283
Epoch 2/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6431 - loss: 97559.9844
Epoch 3/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6431 - loss: 315781.4062
Epoch 4/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6432 - loss: 729695.6250
Epoch 5/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6435 - loss: 1314527.5000
Epoch 6/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6436 - loss: 2016507.7500
Epoch 7/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6436 - loss: 2985702.5000
Epoch 8/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6437 - loss: 4113752.2500
Epoch 9/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6437 - loss: 5541029.0000
Epoch 10/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6437 - loss: 7254104.5000
Epoch 11/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6437 - loss: 9146992.0000
Epoch 12/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6437 - loss: 11470026.0000
Epoch 13/64
1076/1076 - 2s - 2ms/step - accuracy: 0.6436 - loss: 14044894.0000


<keras.src.callbacks.history.History at 0x225b7cd1d30>

In [65]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [67]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Pred        BASEL  VALENTIA
True                       
BASEL        3679         3
BELGRADE     1092         0
BUDAPEST      214         0
DEBILT         82         0
DUSSELDORF     29         0
HEATHROW       82         0
KASSEL         11         0
LJUBLJANA      61         0
MAASTRICHT      9         0
MADRID        458         0
MUNCHENB        8         0
OSLO            5         0
STOCKHOLM       4         0
VALENTIA        1         0


### Test 3

In [71]:
epochs = 12
batch_size = 16
n_hidden = 128

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='sigmoid')) # Options: sigmoid, tanh, softmax, relu

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [73]:
model.summary()

In [75]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [77]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6269 - loss: 20389.4492
Epoch 2/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6430 - loss: 200724.7344
Epoch 3/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6431 - loss: 675278.3125
Epoch 4/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6434 - loss: 1482045.0000
Epoch 5/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6434 - loss: 2699009.7500
Epoch 6/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6434 - loss: 4174502.2500
Epoch 7/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6434 - loss: 6177989.5000
Epoch 8/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6434 - loss: 8585678.0000
Epoch 9/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6434 - loss: 11501721.0000
Epoch 10/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6435 - loss: 14897217.0000
Epoch 11/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6436 - loss: 18958682.0000
Epoch 12/12
1076/1076 - 2s - 2ms/step - accuracy: 0.6436 - loss: 23423556.0000


<keras.src.callbacks.history.History at 0x225b8082990>

In [79]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [81]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
Pred        BASEL  VALENTIA
True                       
BASEL        3678         4
BELGRADE     1092         0
BUDAPEST      214         0
DEBILT         82         0
DUSSELDORF     29         0
HEATHROW       82         0
KASSEL         11         0
LJUBLJANA      61         0
MAASTRICHT      9         0
MADRID        458         0
MUNCHENB        8         0
OSLO            5         0
STOCKHOLM       4         0
VALENTIA        1         0


### Test 4

In [84]:
epochs = 12
batch_size = 16
n_hidden = 128

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # Options: sigmoid, tanh, softmax, relu

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [86]:
model.summary()

In [88]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [90]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/12
1076/1076 - 3s - 3ms/step - accuracy: 0.1272 - loss: 15539.7930
Epoch 2/12
1076/1076 - 2s - 2ms/step - accuracy: 0.1271 - loss: 159291.8281
Epoch 3/12
1076/1076 - 2s - 2ms/step - accuracy: 0.1255 - loss: 542179.5000
Epoch 4/12
1076/1076 - 2s - 2ms/step - accuracy: 0.1265 - loss: 1215436.8750
Epoch 5/12
1076/1076 - 2s - 2ms/step - accuracy: 0.1319 - loss: 2164441.7500
Epoch 6/12
1076/1076 - 2s - 2ms/step - accuracy: 0.1300 - loss: 3509716.2500
Epoch 7/12
1076/1076 - 2s - 2ms/step - accuracy: 0.1317 - loss: 5125835.5000
Epoch 8/12
1076/1076 - 2s - 2ms/step - accuracy: 0.1380 - loss: 7193802.0000
Epoch 9/12
1076/1076 - 2s - 2ms/step - accuracy: 0.1304 - loss: 9640264.0000
Epoch 10/12
1076/1076 - 2s - 2ms/step - accuracy: 0.1329 - loss: 12693249.0000
Epoch 11/12
1076/1076 - 3s - 2ms/step - accuracy: 0.1340 - loss: 16016984.0000
Epoch 12/12
1076/1076 - 2s - 2ms/step - accuracy: 0.1335 - loss: 19791762.0000


<keras.src.callbacks.history.History at 0x225b7f437a0>

In [92]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [94]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Pred        BASEL  BELGRADE  BUDAPEST  DEBILT  HEATHROW  KASSEL  LJUBLJANA  \
True                                                                         
BASEL         176      1081        92      97         3      23          3   
BELGRADE       33       762         0       3         0       0          0   
BUDAPEST       23       134         0       1         0       0          0   
DEBILT         12        33         0       0         0       0          0   
DUSSELDORF      4         7         0       0         0       0          0   
HEATHROW       23        25         0       0         0       0          0   
KASSEL          1         5         0       1         0       0          0   
LJUBLJANA       9        38         0       0         0       0          0   
MAASTRICHT      1         6         0       0         0       0          0   
MADRID         52       158         0       4         0       0    

### Test 5

In [97]:
epochs = 30
batch_size = 16
n_hidden = 32

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu', input_shape=(timesteps, input_dim)))
model.add(Dense(16, activation='relu'))
model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='tanh')) # Options: sigmoid, tanh, softmax, relu

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [99]:
model.summary()

In [101]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [103]:
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0211 - loss: 25.4613
Epoch 2/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0109 - loss: 25.9611
Epoch 3/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0090 - loss: 25.9601
Epoch 4/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0089 - loss: 25.9601
Epoch 5/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0094 - loss: 25.9601
Epoch 6/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0159 - loss: 25.9592
Epoch 7/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0243 - loss: 25.9583
Epoch 8/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0276 - loss: 25.9611
Epoch 9/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0278 - loss: 25.9620
Epoch 10/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0278 - loss: 25.9620
Epoch 11/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0278 - loss: 25.9620
Epoch 12/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0278 - loss: 25.9620
Epoch 13/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0278 - loss: 25.9620
Epoch 14/30
1076/1076 - 2s - 2ms/step - accuracy: 0.0278 - l

<keras.src.callbacks.history.History at 0x225b8307620>

In [105]:
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [107]:
# Evaluate
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
Pred        BASEL  BELGRADE  HEATHROW  KASSEL  LJUBLJANA  MAASTRICHT  OSLO  \
True                                                                         
BASEL          95        11      2051     452        640           9   411   
BELGRADE        2        25       555      39        415           6    49   
BUDAPEST        0         5       115       8         80           1     5   
DEBILT          0         0        61       2         19           0     0   
DUSSELDORF      0         0        15       1         13           0     0   
HEATHROW        0         1        49       6         24           0     2   
KASSEL          0         0        10       1          0           0     0   
LJUBLJANA       0         0        34       1         19           0     7   
MAASTRICHT      0         0         6       1          1           0     1   
MADRID         10         3       170      56        112         