# 1. Importing necessary libraries

In [190]:
# Data manipulation and analysis
import pandas as pd       # for handling tabular data (CSV, DataFrames)
import numpy as np        # for numerical computations and array operations

# Visualization
import seaborn as sns     # for statistical data visualization (e.g., heatmaps, confusion matrices)
import matplotlib.pyplot as plt  # for plotting graphs and visualizing training history

# System utilities
import os                 # for handling file paths (important when loading CSVs)
import operator           # provides functions for comparisons (rarely used, but handy)

# Deep learning frameworks
import tensorflow as tf   # main deep learning framework
from tensorflow import keras  # high-level API for building neural networks

# Numpy helpers
from numpy import unique  # for finding unique values in arrays
from numpy import reshape # for reshaping arrays (important for preparing data for models)

# Keras model components
from keras.models import Sequential        # for building sequential neural network models
from keras.layers import Conv1D, Conv2D    # convolutional layers (1D for sequences, 2D for images)
from keras.layers import Dense             # fully connected (dense) layer
from keras.layers import BatchNormalization # for normalizing layer outputs to speed training
from keras.layers import Flatten           # flattening 2D/3D inputs into 1D
from keras.layers import MaxPooling1D      # pooling layer to reduce dimensionality (for 1D CNN)
from keras.layers import Dropout           # dropout layer to prevent overfitting
from keras.layers import LSTM              # Long Short-Term Memory layer (RNN variant)

# Keras utilities
from keras.utils import to_categorical     # convert labels to one-hot encoding

# Scikit-learn utilities
from sklearn.model_selection import train_test_split  # split data into training and testing sets
from sklearn.preprocessing import LabelEncoder        # encode categorical labels into numeric values

In [192]:
# Define the file path to the folder where the dataset is stored.  
path = r'C:\Users\Jacques\OneDrive\Documents\Data Analytics course\Data Specialization\Part 2\Exercise 2.2\Datasets'

In [194]:
# Load the processed weather prediction dataset into a pandas DataFrame.  
# 'os.path.join()' safely combines the folder path with the CSV filename.  
# The data is read without any scaling applied (hence stored in 'df_unscaled'). 
df_unscaled = pd.read_csv(os.path.join(path, 'Dataset-weather-prediction-dataset-processed.csv'))

In [195]:
# Import the "pleasant weather" labels dataset into a DataFrame.  
# This file contains the target variable (pleasant vs. not pleasant) for the prediction task.  
df_pleasant = pd.read_csv(os.path.join(path, 'Dataset-Answers-Weather_Prediction_Pleasant_Weather.csv'))

# 2. Data preprocessing for deep learning

In [199]:
# Display the first 5 rows of the unscaled dataset.  
# This is a quick check to confirm the data loaded correctly and to get an initial look at its structure and values.  
df_unscaled.head()

Unnamed: 0,DATE,MONTH,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,19600101,1,7,2.1,0.85,1.018,0.32,0.09,0,0.7,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,19600102,1,6,2.1,0.84,1.018,0.36,1.05,0,1.1,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,19600103,1,8,2.1,0.9,1.018,0.18,0.3,0,0.0,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,19600104,1,3,2.1,0.92,1.018,0.58,0.0,0,4.1,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,19600105,1,6,2.1,0.95,1.018,0.65,0.14,0,5.4,...,3,0.8,1.0328,0.46,0.0,0,5.7,5.7,3.0,8.4


In [201]:
# Display the first 5 rows of the pleasant weather dataset.  
# This helps verify the data loaded correctly and gives a preview of the features and labels used for model training.  
df_pleasant.head()

Unnamed: 0,DATE,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,19600101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,19600102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,19600103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,19600104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,19600105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [203]:
# Remove the 'DATE' and 'MONTH' columns from the unscaled dataset.  
# These columns are not useful for deep learning in this context, since the model should focus on weather features rather than raw time identifiers. Display the first 5 rows after dropping them.  
df_unscaled = df_unscaled.drop(columns=['DATE', 'MONTH'], axis=1)
df_unscaled.head()

Unnamed: 0,BASEL_cloud_cover,BASEL_wind_speed,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_snow_depth,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,...,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_snow_depth,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,2.1,0.85,1.018,0.32,0.09,0,0.7,6.5,0.8,...,5,0.88,1.0003,0.45,0.34,0,4.7,8.5,6.0,10.9
1,6,2.1,0.84,1.018,0.36,1.05,0,1.1,6.1,3.3,...,7,0.91,1.0007,0.25,0.84,0,0.7,8.9,5.6,12.1
2,8,2.1,0.9,1.018,0.18,0.3,0,0.0,8.5,5.1,...,7,0.91,1.0096,0.17,0.08,0,0.1,10.5,8.1,12.9
3,3,2.1,0.92,1.018,0.58,0.0,0,4.1,6.3,3.8,...,7,0.86,1.0184,0.13,0.98,0,0.0,7.4,7.3,10.6
4,6,2.1,0.95,1.018,0.65,0.14,0,5.4,3.0,-0.7,...,3,0.8,1.0328,0.46,0.0,0,5.7,5.7,3.0,8.4


In [205]:
# Remove the 'DATE' column from the pleasant weather dataset.  
# The exact date is not needed for model training, as it does not contribute meaningful predictive power. Show the first 5 rows after dropping the column to confirm the change.  
df_pleasant = df_pleasant.drop(columns=['DATE'], axis=1)
df_pleasant.head()

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [207]:
# Identify and drop all columns related to the weather stations. Gdansk, Roma, and Tours from the unscaled dataset. These stations are not present in the pleasant weather dataset, and keeping them would cause mismatches between the two datasets.  
# The list comprehension finds all column names starting with those station names.  
cols_to_drop = [col for col in df_unscaled.columns if col.startswith(('GDANSK', 'ROMA', 'TOURS'))]
df_unscaled = df_unscaled.drop(columns=cols_to_drop)

In [209]:
# Display all column names of the cleaned unscaled dataset
# This helps to verify which weather stations and observation types remain
df_unscaled.columns

Index(['BASEL_cloud_cover', 'BASEL_wind_speed', 'BASEL_humidity',
       'BASEL_pressure', 'BASEL_global_radiation', 'BASEL_precipitation',
       'BASEL_snow_depth', 'BASEL_sunshine', 'BASEL_temp_mean',
       'BASEL_temp_min',
       ...
       'VALENTIA_cloud_cover', 'VALENTIA_humidity', 'VALENTIA_pressure',
       'VALENTIA_global_radiation', 'VALENTIA_precipitation',
       'VALENTIA_snow_depth', 'VALENTIA_sunshine', 'VALENTIA_temp_mean',
       'VALENTIA_temp_min', 'VALENTIA_temp_max'],
      dtype='object', length=147)

In [211]:
# Count the number of different observation/measurement types for each weather station
# This helps identify which measurements (e.g., temperature, humidity, pressure) are available for each location and ensures consistency across the dataset before deep learning.
# Extract location names 
locations = set([col.split('_')[0] for col in df_unscaled.columns])

# Create a dictionary to store measurement counts for each location
measurement_counts = {location: {} for location in locations}

# Count occurrences of each measurement type for each location
for col in df_unscaled.columns:
    parts = col.split('_') 
    location = parts[0] 
    measurement = '_'.join(parts[1:])  # Join remaining parts if there are more than two

    if measurement not in measurement_counts[location]:
        measurement_counts[location][measurement] = 1
    else:
        measurement_counts[location][measurement] += 1

# Print the measurement counts for each location
for location, measurements in measurement_counts.items():
    print(f"Location: {location}")
    for measurement, count in measurements.items():
        print(f"  - {measurement}: {count}")
    print()

Location: KASSEL
  - wind_speed: 1
  - humidity: 1
  - pressure: 1
  - global_radiation: 1
  - precipitation: 1
  - sunshine: 1
  - temp_mean: 1
  - temp_min: 1
  - temp_max: 1

Location: BELGRADE
  - cloud_cover: 1
  - humidity: 1
  - pressure: 1
  - global_radiation: 1
  - precipitation: 1
  - sunshine: 1
  - temp_mean: 1
  - temp_min: 1
  - temp_max: 1

Location: BUDAPEST
  - cloud_cover: 1
  - humidity: 1
  - pressure: 1
  - global_radiation: 1
  - precipitation: 1
  - sunshine: 1
  - temp_mean: 1
  - temp_min: 1
  - temp_max: 1

Location: MUNCHENB
  - cloud_cover: 1
  - humidity: 1
  - global_radiation: 1
  - precipitation: 1
  - snow_depth: 1
  - sunshine: 1
  - temp_mean: 1
  - temp_min: 1
  - temp_max: 1

Location: BASEL
  - cloud_cover: 1
  - wind_speed: 1
  - humidity: 1
  - pressure: 1
  - global_radiation: 1
  - precipitation: 1
  - snow_depth: 1
  - sunshine: 1
  - temp_mean: 1
  - temp_min: 1
  - temp_max: 1

Location: VALENTIA
  - cloud_cover: 1
  - humidity: 1
  - press

The maximum number of measurements recorded at a single location is 11, which include:

cloud_cover: 1

wind_speed: 1

humidity: 1

pressure: 1

global_radiation: 1

precipitation: 1

snow_depth: 1

sunshine: 1

temp_mean: 1

temp_min: 1

temp_max: 1

Missing measurements by location:

Maastricht: snow_depth

Madrid: snow_depth

Heathrow: wind_speed

Kassel: cloud_cover, snow_depth

Belgrade: wind_speed, snow_depth

Valentia: wind_speed

Budapest: wind_speed, snow_depth

Ljubljana: snow_depth

Sonnblick: snow_depth

Stockholm: wind_speed, humidity, snow_depth

Debilt: snow_depth

Munchenb: wind_speed, pressure

In [214]:
# Remove columns for wind_speed and snow_depth since these measurements are frequently missing across multiple locations.
# First, we create a list of all columns that end with 'wind_speed' or 'snow_depth'.
# Then, we drop these columns from df_unscaled to clean the dataset for modeling.

cols_to_drop = [col for col in df_unscaled.columns if col.endswith(('wind_speed', 'snow_depth'))]
df_unscaled = df_unscaled.drop(columns=cols_to_drop)

In [216]:
# Display the first few rows of the cleaned dataset to verify that the wind_speed and snow_depth columns have been removed
df_unscaled.head()

Unnamed: 0,BASEL_cloud_cover,BASEL_humidity,BASEL_pressure,BASEL_global_radiation,BASEL_precipitation,BASEL_sunshine,BASEL_temp_mean,BASEL_temp_min,BASEL_temp_max,BELGRADE_cloud_cover,...,STOCKHOLM_temp_max,VALENTIA_cloud_cover,VALENTIA_humidity,VALENTIA_pressure,VALENTIA_global_radiation,VALENTIA_precipitation,VALENTIA_sunshine,VALENTIA_temp_mean,VALENTIA_temp_min,VALENTIA_temp_max
0,7,0.85,1.018,0.32,0.09,0.7,6.5,0.8,10.9,1,...,4.9,5,0.88,1.0003,0.45,0.34,4.7,8.5,6.0,10.9
1,6,0.84,1.018,0.36,1.05,1.1,6.1,3.3,10.1,6,...,5.0,7,0.91,1.0007,0.25,0.84,0.7,8.9,5.6,12.1
2,8,0.9,1.018,0.18,0.3,0.0,8.5,5.1,9.9,6,...,4.1,7,0.91,1.0096,0.17,0.08,0.1,10.5,8.1,12.9
3,3,0.92,1.018,0.58,0.0,4.1,6.3,3.8,10.6,8,...,2.3,7,0.86,1.0184,0.13,0.98,0.0,7.4,7.3,10.6
4,6,0.95,1.018,0.65,0.14,5.4,3.0,-0.7,6.0,8,...,4.3,3,0.8,1.0328,0.46,0.0,5.7,5.7,3.0,8.4


In [218]:
# Fill in missing measurements for specific locations (Kassel, Stockholm, Munchenb) using data from nearby stations
# Ensures the new columns are inserted in the correct order based on the measurement type
# After processing, verify that the columns for the target locations have been added correctly

# Define relationships between locations
location_pairs = {
    'KASSEL': 'LJUBLJANA',
    'STOCKHOLM': 'OSLO',
    'MUNCHENB': 'SONNBLICK'
}

# Define the desired order of measurements
measurement_order = ['cloud_cover', 'humidity', 'pressure', 'global_radiation', 
                     'precipitation', 'sunshine', 'temp_mean', 'temp_min', 'temp_max']

# Function to fill missing values and insert in correct position
def fill_missing_values(df_unscaled, location, measurement, neighbor):
    """
    Fills missing values for a given location and measurement using data from a neighbor location.
    Inserts the new column in the correct position based on the measurement order.
    """
    source_col = f'{neighbor}_{measurement}'
    target_col = f'{location}_{measurement}'

    # Determine the insertion index 
    if measurement == measurement_order[0]:  
        location_columns = [col for col in df_unscaled.columns if col.startswith(location)]
        insert_index = df_unscaled.columns.get_loc(location_columns[0]) if location_columns else 0
    else:
        insert_index = df_unscaled.columns.get_loc(f'{location}_{measurement_order[measurement_order.index(measurement) - 1]}') + 1 

    # Create the new column with missing values and insert it at the correct position
    df_unscaled.insert(insert_index, target_col, np.nan) 

    # Fill missing values in the new column
    df_unscaled[target_col].fillna(df_unscaled[source_col], inplace=True) 

    return df_unscaled

# Fill missing values for each location and measurement
for location, neighbor in location_pairs.items():
    for measurement in measurement_order:
        if f'{location}_{measurement}' not in df_unscaled.columns:  
            df_unscaled = fill_missing_values(df_unscaled, location, measurement, neighbor)

# Checking new columns for existence and location
selected_columns = [col for col in df_unscaled.columns if col.startswith(('KASSEL', 'STOCKHOLM', 'MUNCHENB'))]
print(df_unscaled[selected_columns])

       KASSEL_cloud_cover  KASSEL_humidity  KASSEL_pressure  \
0                     8.0             0.82           1.0094   
1                     6.0             0.86           1.0086   
2                     8.0             0.91           1.0129   
3                     6.0             0.87           1.0290   
4                     7.0             0.86           1.0262   
...                   ...              ...              ...   
22945                 4.0             0.77           1.0161   
22946                 3.0             0.77           1.0161   
22947                 3.0             0.77           1.0161   
22948                 3.0             0.77           1.0161   
22949                 3.0             0.77           1.0161   

       KASSEL_global_radiation  KASSEL_precipitation  KASSEL_sunshine  \
0                         0.28                  0.48              1.6   
1                         0.12                  0.27              0.0   
2                       

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_unscaled[target_col].fillna(df_unscaled[source_col], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_unscaled[target_col].fillna(df_unscaled[source_col], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the inte

In [220]:
# Verify that the new columns for Munchenb have been successfully added and are in the correct order within the DataFrame
selected_columns = [col for col in df_unscaled.columns if col.startswith('MUNCHENB')]
print(df_unscaled[selected_columns])

       MUNCHENB_cloud_cover  MUNCHENB_humidity  MUNCHENB_pressure  \
0                         5               0.67             1.0304   
1                         6               0.72             1.0292   
2                         6               0.91             1.0320   
3                         6               0.90             1.0443   
4                         5               0.85             1.0430   
...                     ...                ...                ...   
22945                     2               0.76             1.0263   
22946                     6               0.70             1.0263   
22947                     7               0.64             1.0263   
22948                     6               0.75             1.0263   
22949                     5               0.83             1.0263   

       MUNCHENB_global_radiation  MUNCHENB_precipitation  MUNCHENB_sunshine  \
0                           0.20                    0.10                0.0   
1            

In [222]:
# Check the overall shape of the DataFrame after cleaning and adding missing columns
# This shows the total number of rows and columns currently in df_unscaled
df_unscaled.shape

(22950, 135)

In [224]:
# Check the shape of the pleasant weather DataFrame
# This shows the total number of rows and columns in df_pleasant, ensuring it matches expectations
df_pleasant.shape

(22950, 15)

In [226]:
# Export the cleaned weather dataset to a CSV file
# This allows us to save the preprocessed data for future use in modeling without repeating the cleaning steps
df_unscaled.to_csv(os.path.join(path, 'weather_clean.csv'), index=False)

# 3. Reshape data for deep learning models

In [228]:
# Assign feature data to X and target labels to y for model training
X = df_unscaled
y = df_pleasant

In [229]:
# Convert the feature (X) and target (y) dataFrames into NumPy arrays for compatibility with deep learning models
X = np.array(X)
y = np.array(y)

In [233]:
# Display the X array to verify its structure and contents before reshaping for the model
X

array([[ 7.    ,  0.85  ,  1.018 , ...,  8.5   ,  6.    , 10.9   ],
       [ 6.    ,  0.84  ,  1.018 , ...,  8.9   ,  5.6   , 12.1   ],
       [ 8.    ,  0.9   ,  1.018 , ..., 10.5   ,  8.1   , 12.9   ],
       ...,
       [ 4.    ,  0.76  ,  1.0227, ..., 10.7   ,  7.9   , 13.5   ],
       [ 5.    ,  0.8   ,  1.0212, ..., 10.7   ,  7.9   , 13.5   ],
       [ 5.    ,  0.84  ,  1.0193, ..., 10.7   ,  7.9   , 13.5   ]])

In [235]:
# Display the y array to inspect the target values and ensure they align with the features in X
y

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [237]:
# Reshape X into a 3D array with dimensions (samples, timesteps, features). 
# This is required for deep learning models like CNNs and LSTMs which expect 3D input.
X = X.reshape(-1,15,9)

In [239]:
# Check the new shape of X after reshaping to confirm it has the expected 3D structure (samples, timesteps, features)
X.shape

(22950, 15, 9)

In [241]:
# Check the shape of y to confirm it matches the number of samples in X and is ready for model training
y.shape

(22950, 15)

# 4. Divide data into training and test sets

In [244]:
# Split the dataset into training and testing sets to evaluate model performance later. 
# The random_state ensures reproducibility of the split.
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 42)

# 5. Build and configure keras neural network

In [247]:
# Import the Input layer from Keras to explicitly define the input shape for a Sequential model
from keras.layers import Input

# Set training parameters
epochs = 20
batch_size = 16
n_hidden = 16

# Determine input dimensions
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

# Build the LSTM model using an explicit Input layer
model = Sequential()
model.add(Input(shape=(timesteps, input_dim)))   # Explicitly define input shape
model.add(LSTM(n_hidden))                        # LSTM layer with n_hidden units
model.add(Dropout(0.5))                          # Dropout layer to reduce overfitting
model.add(Dense(n_classes, activation='sigmoid'))# Output layer with sigmoid for multi-class prediction

# Show model summary
model.summary()

In [248]:
# Display a summary of the model architecture, including layer types, output shapes, and number of parameters
model.summary()

# 6. Compile and train the RNN model

In [252]:
# Compile the LSTM model by specifying:
# - Loss function as 'categorical_crossentropy' for multi-class classification.
# - Optimizer as 'rmsprop' to update weights during training
# - Metric as 'accuracy' to monitor model performance during training
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [254]:
# Train the LSTM model using the training data (X_train, y_train) with:
# - Specified batch_size and epochs
# - Validation on the test set (X_test, y_test) to monitor performance during training
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

Epoch 1/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.2017 - loss: 9.8511 - val_accuracy: 0.1793 - val_loss: 8.4478
Epoch 2/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.1173 - loss: 9.8348 - val_accuracy: 0.0786 - val_loss: 8.6875
Epoch 3/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.0992 - loss: 10.1578 - val_accuracy: 0.0798 - val_loss: 9.0776
Epoch 4/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.0949 - loss: 10.3656 - val_accuracy: 0.0816 - val_loss: 9.4756
Epoch 5/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.0926 - loss: 10.4164 - val_accuracy: 0.0795 - val_loss: 9.7752
Epoch 6/20
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.0837 - loss: 10.5371 - val_accuracy: 0.0795 - val_loss: 10.0456
Epoch 7/20

<keras.src.callbacks.history.History at 0x2079b2f9220>

# 7. Evaluate Model Performance with a Confusion Matrix

In [256]:
# Create a dictionary mapping numeric class indices to weather station names for easier interpretation of predictions.
stations = {
0: 'BASEL',
1: 'BELGRADE',
2: 'BUDAPEST',
3: 'DEBILT',
4: 'DUSSELDORF',
5: 'HEATHROW',
6: 'KASSEL',
7: 'LJUBLJANA',
8: 'MAASTRICHT',
9: 'MADRID',
10: 'MUNCHENB',
11: 'OSLO',
12: 'SONNBLICK',
13: 'STOCKHOLM',
14: 'VALENTIA'
}

In [257]:
# Define a function to create a confusion matrix
# - Converts one-hot encoded true and predicted labels to station names.
# - Uses np.argmax to get the predicted class index for each sample.
# - Returns a cross-tabulation showing counts of true vs predicted station classifications
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [258]:
# Evaluate the trained model on the test set
# - Generate predictions for X_test
# - Use the custom confusion_matrix function to display how well the model classified each station
# - Print the resulting confusion matrix for inspection
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step
Pred        BASEL  BUDAPEST  MADRID
True                               
BASEL        3676         0       6
BELGRADE     1084         1       7
BUDAPEST      210         1       3
DEBILT         82         0       0
DUSSELDORF     29         0       0
HEATHROW       79         1       2
KASSEL         11         0       0
LJUBLJANA      58         0       3
MAASTRICHT      9         0       0
MADRID        423         2      33
MUNCHENB        6         0       2
OSLO            5         0       0
STOCKHOLM       4         0       0
VALENTIA        1         0       0


# 8. Rebuilding the model with updated hyperparameters

In [260]:
# Set training parameters
epochs = 30           # Number of training cycles
batch_size = 16       # Number of samples per gradient update
n_hidden = 32         # Number of units in the LSTM layer

# Determine input dimensions from training data
timesteps = len(X_train[0])     # Number of time steps in each input sequence
input_dim = len(X_train[0][0])  # Number of features per time step
n_classes = len(y_train[0])     # Number of output classes

# Build the LSTM model
model = Sequential()

# 1) Explicit Input layer to define input shape and avoid warnings
model.add(Input(shape=(timesteps, input_dim)))

# 2) LSTM layer with n_hidden units
model.add(LSTM(n_hidden))

# 3) Dropout layer (50%) to reduce overfitting
model.add(Dropout(0.5))

# 4) Dense output layer with n_classes units and sigmoid activation
#    for multi-class prediction
model.add(Dense(n_classes, activation='sigmoid'))

# Display the model summary
model.summary()

In [261]:
# Display a summary of the model architecture, showing each layer, its output shape, and the number of trainable parameters
model.summary()

In [262]:
# Compile the LSTM model with the following settings:
# - loss='categorical_crossentropy': suitable for multi-class classification
# - optimizer='rmsprop': optimizer for adjusting weights during training
# - metrics=['accuracy']: track model accuracy during training and evaluation
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [263]:
# Train the LSTM model on the training data (X_train, y_train)
# - batch_size=batch_size: number of samples per gradient update
# - validation_data=(X_test, y_test): evaluate model performance on test data after each epoch
# - epochs=epochs: number of complete passes through the training dataset
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

Epoch 1/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 7ms/step - accuracy: 0.0620 - loss: 10.6743 - val_accuracy: 0.0488 - val_loss: 9.0007
Epoch 2/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.0904 - loss: 10.7994 - val_accuracy: 0.0350 - val_loss: 9.6656
Epoch 3/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.0872 - loss: 11.3067 - val_accuracy: 0.0427 - val_loss: 10.1002
Epoch 4/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.0897 - loss: 11.4730 - val_accuracy: 0.0474 - val_loss: 10.5760
Epoch 5/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6ms/step - accuracy: 0.0944 - loss: 11.9577 - val_accuracy: 0.0406 - val_loss: 10.8661
Epoch 6/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 6ms/step - accuracy: 0.0825 - loss: 11.7376 - val_accuracy: 0.0410 - val_loss: 10.7021
Epoch

<keras.src.callbacks.history.History at 0x207864f98b0>

In [264]:
# Define a function to create a confusion matrix for model predictions
# - Converts one-hot encoded y_true and y_pred arrays to station names using the stations dictionary
# - np.argmax(..., axis=1) finds the predicted class index for each sample
# - pd.crosstab generates a table showing counts of true vs predicted station labels
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [265]:
# Evaluate the trained LSTM model on the test set
# - model.predict(X_test) generates predictions for the test inputs
# - confusion_matrix() compares predicted labels with true labels
# - Prints a table showing how well the model predicted each station
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Pred        BASEL  MADRID
True                     
BASEL        3682       0
BELGRADE     1091       1
BUDAPEST      214       0
DEBILT         82       0
DUSSELDORF     29       0
HEATHROW       82       0
KASSEL         11       0
LJUBLJANA      60       1
MAASTRICHT      9       0
MADRID        458       0
MUNCHENB        8       0
OSLO            5       0
STOCKHOLM       4       0
VALENTIA        1       0


The model’s accuracy seems to drop while the loss rises.

# 9. Modifying hyperparameters and updating activation function

In [268]:
# Set updated training parameters and rebuild the LSTM model with new hyperparameters
epochs = 30
batch_size = 16
n_hidden = 64

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

# Build the model using an explicit Input layer
model = Sequential()
model.add(Input(shape=(timesteps, input_dim)))  # Explicit input layer to avoid warning
model.add(LSTM(n_hidden))                        # LSTM layer with n_hidden units
model.add(Dropout(0.5))                          # Dropout layer to reduce overfitting
model.add(Dense(n_classes, activation='tanh'))   # Dense output layer with 'tanh' activation

# Display model summary
model.summary()

In [269]:
# Display the model summary
model.summary()

In [270]:
# Compile the LSTM model
# - loss='categorical_crossentropy': appropriate loss function for multi-class classification
# - optimizer='rmsprop': adaptive optimizer suitable for RNNs/LSTMs
# - metrics=['accuracy']: track accuracy during training and evaluation
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [271]:
# Train the LSTM model on the training data
# - X_train, y_train: training features and labels
# - batch_size: number of samples per gradient update
# - validation_data: evaluate model performance on the test set after each epoch
# - epochs: number of complete passes through the training data
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

Epoch 1/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 7ms/step - accuracy: 0.0279 - loss: 24.9604 - val_accuracy: 0.0850 - val_loss: 22.6357
Epoch 2/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.0643 - loss: 25.0337 - val_accuracy: 0.0763 - val_loss: 25.9383
Epoch 3/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.0698 - loss: 24.7253 - val_accuracy: 0.1596 - val_loss: 19.9886
Epoch 4/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - accuracy: 0.0781 - loss: 24.8576 - val_accuracy: 0.0396 - val_loss: 27.3011
Epoch 5/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - accuracy: 0.1026 - loss: 24.9044 - val_accuracy: 0.2529 - val_loss: 24.9915
Epoch 6/30
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.1630 - loss: 24.4794 - val_accuracy: 0.0861 - val_loss: 25.6886
Epo

<keras.src.callbacks.history.History at 0x20788c132f0>

In [272]:
# Define a function to create a confusion matrix
# - y_true: true labels (one-hot encoded)
# - y_pred: predicted labels (one-hot encoded)
# The function converts one-hot arrays to station names, then computes a crosstab showing
# how often each true label was predicted as each possible class, helping evaluate model performance.
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [273]:
# Evaluate the model's performance on the test set
# - model.predict(X_test) generates predictions for the test data
# - confusion_matrix() compares predicted vs. true labels and prints a table
#   showing how often each true station was classified as each predicted station
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step
Pred        BELGRADE  MUNCHENB  SONNBLICK
True                                     
BASEL            237         0       3445
BELGRADE           3         0       1089
BUDAPEST           0         1        213
DEBILT             0         0         82
DUSSELDORF         0         0         29
HEATHROW           0         0         82
KASSEL             0         0         11
LJUBLJANA          0         0         61
MAASTRICHT         0         0          9
MADRID             0         0        458
MUNCHENB           0         0          8
OSLO               0         0          5
STOCKHOLM          0         0          4
VALENTIA           0         0          1


# 10. Tuning hyperparameters, incorporating convolution & pooling layers, and modifying the optimizer

In [275]:
# Set training parameters
epochs = 25
batch_size = 16
n_hidden = 8

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

# Build the CNN-LSTM model using an explicit Input layer
model = Sequential()
model.add(Input(shape=(timesteps, input_dim)))         # Explicit Input layer
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu'))  # Conv1D layer
model.add(MaxPooling1D())                              # MaxPooling layer
model.add(LSTM(n_hidden))                              # LSTM layer
model.add(Dropout(0.5))                                # Dropout to reduce overfitting
model.add(Dense(n_classes, activation='tanh'))        # Output layer

In [276]:
# Display the model summary
model.summary()

In [277]:
# Compile the CNN-LSTM model
# - loss='categorical_crossentropy': appropriate for multi-class classification
# - optimizer='adam': adaptive optimizer that adjusts learning rates during training
# - metrics=['accuracy']: evaluate model performance using accuracy during training
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [278]:
# Train the CNN-LSTM model on the training data
# - X_train, y_train: training features and labels
# - batch_size: number of samples per gradient update
# - validation_data: evaluate model performance on test set after each epoch
# - epochs: number of complete passes through the training dataset
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

Epoch 1/25
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - accuracy: 0.0334 - loss: 23.5550 - val_accuracy: 0.1417 - val_loss: 16.2461
Epoch 2/25
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.0752 - loss: 23.4301 - val_accuracy: 0.0106 - val_loss: 24.1315
Epoch 3/25
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.0654 - loss: 24.1617 - val_accuracy: 0.0105 - val_loss: 28.6567
Epoch 4/25
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.0280 - loss: 25.0710 - val_accuracy: 0.0105 - val_loss: 27.3515
Epoch 5/25
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - accuracy: 0.0283 - loss: 23.9793 - val_accuracy: 0.0105 - val_loss: 24.0425
Epoch 6/25
[1m1076/1076[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.0294 - loss: 24.4323 - val_accuracy: 0.0106 - val_loss: 23.4993
Epoc

<keras.src.callbacks.history.History at 0x207898d6a20>

In [279]:
# Define a function to create a confusion matrix for model predictions
# - y_true: true labels in one-hot encoded format
# - y_pred: predicted labels from the model (probabilities)
# Steps:
# 1) Convert one-hot encoded true and predicted labels to their corresponding station names.
# 2) Use np.argmax to get the index of the maximum value (predicted class).
# 3) Convert indices to station names using the 'stations' dictionary.
# 4) Generate a confusion matrix as a pandas crosstab with rows as True labels and columns as Predicted labels.
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])
    
    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [280]:
# Evaluate the model's performance on the test set
# - Use the confusion_matrix function to compare true labels (y_test) with model predictions
# - model.predict(X_test) generates predicted probabilities for each class
# - The printed crosstab shows how well the model predicted each station, highlighting misclassifications
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Pred        BELGRADE  HEATHROW  LJUBLJANA  STOCKHOLM
True                                                
BASEL           3657        16          1          8
BELGRADE        1092         0          0          0
BUDAPEST         214         0          0          0
DEBILT            82         0          0          0
DUSSELDORF        29         0          0          0
HEATHROW          82         0          0          0
KASSEL            11         0          0          0
LJUBLJANA         61         0          0          0
MAASTRICHT         9         0          0          0
MADRID           458         0          0          0
MUNCHENB           8         0          0          0
OSLO               5         0          0          0
STOCKHOLM          4         0          0          0
VALENTIA           1         0          0          0


# 11. Reverting to the standard keras model with updated hyperparameters

In [282]:
# Set updated training parameters
epochs = 10
batch_size = 4
n_hidden = 4

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = len(y_train[0])

# Build a Sequential LSTM model using an explicit Input layer
model = Sequential()
model.add(Input(shape=(timesteps, input_dim)))  # Explicitly define input shape
model.add(LSTM(n_hidden))                        # LSTM layer with n_hidden units
model.add(Dropout(0.5))                          # Dropout layer to reduce overfitting
model.add(Dense(n_classes, activation='sigmoid'))# Dense output layer for multi-class prediction

In [283]:
# Display the model summary
model.summary()

In [284]:
# Compile the LSTM model:
# - loss='categorical_crossentropy': appropriate for multi-class classification tasks
# - optimizer='rmsprop': efficient optimizer for RNNs/LSTMs
# - metrics=['accuracy']: track the model's accuracy during training and validation
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [285]:
# Train the LSTM model on the training data:
# - X_train, y_train: input features and target labels for training
# - batch_size: number of samples per gradient update
# - validation_data=(X_test, y_test): evaluate performance on test data after each epoch
# - epochs: number of full passes through the training dataset
model.fit(X_train,
          y_train,
          batch_size=batch_size,
          validation_data=(X_test, y_test),
          epochs=epochs)

Epoch 1/10
[1m4303/4303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 5ms/step - accuracy: 0.1282 - loss: 9.0498 - val_accuracy: 0.0378 - val_loss: 8.8656
Epoch 2/10
[1m4303/4303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 5ms/step - accuracy: 0.1076 - loss: 9.3239 - val_accuracy: 0.0798 - val_loss: 9.5099
Epoch 3/10
[1m4303/4303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 5ms/step - accuracy: 0.0632 - loss: 9.9609 - val_accuracy: 0.0798 - val_loss: 10.1758
Epoch 4/10
[1m4303/4303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 5ms/step - accuracy: 0.0610 - loss: 10.6423 - val_accuracy: 0.0798 - val_loss: 10.7949
Epoch 5/10
[1m4303/4303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 5ms/step - accuracy: 0.0584 - loss: 11.2828 - val_accuracy: 0.0798 - val_loss: 11.2829
Epoch 6/10
[1m4303/4303[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5ms/step - accuracy: 0.0613 - loss: 11.9882 - val_accuracy: 0.0798 - val_loss: 11.8291
Epo

<keras.src.callbacks.history.History at 0x207a5941730>

In [286]:
# Define a function to create a confusion matrix for model predictions:
# - y_true: true labels in one-hot encoded format
# - y_pred: predicted labels in one-hot encoded format
# Steps:
# 1) Convert one-hot encoded vectors to station names using the `stations` dictionary.
# 2) Use pandas `crosstab` to generate a confusion matrix comparing true vs predicted labels.
# 3) Rows represent true labels, columns represent predicted labels.
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [287]:
# Evaluate the model's predictions by generating a confusion matrix:
# - `model.predict(X_test)` produces predicted outputs for the test set.
# - `confusion_matrix()` converts predictions and true labels to station names and compares them.
# - The resulting table shows how often each true station was predicted correctly or misclassified.
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Pred        BASEL  MADRID
True                     
BASEL        1618    2064
BELGRADE     1041      51
BUDAPEST      212       2
DEBILT         82       0
DUSSELDORF     27       2
HEATHROW       74       8
KASSEL         11       0
LJUBLJANA      57       4
MAASTRICHT      6       3
MADRID        233     225
MUNCHENB        7       1
OSLO            4       1
STOCKHOLM       4       0
VALENTIA        0       1


# 12. CNN model

In [331]:
# Set training hyperparameters
epochs = 10         # Number of full passes through the training dataset
batch_size = 4      # Number of samples per gradient update
n_hidden = 4        # Number of filters/units in Conv1D and Dense layers

# Determine input dimensions from training data
timesteps = len(X_train[0])       # Number of time steps per input sequence
input_dim = len(X_train[0][0])    # Number of features per time step
n_classes = len(y_train[0])       # Number of output classes

# Build the CNN model using Sequential API
model = Sequential()
model.add(Input(shape=(timesteps, input_dim)))  # Explicit Input layer to avoid warnings
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu'))  # Conv1D layer for feature extraction
model.add(Dense(16, activation='relu'))        # Dense layer for non-linear feature transformation
model.add(MaxPooling1D())                       # MaxPooling to downsample sequence and reduce overfitting
model.add(Flatten())                            # Flatten 3D output to 1D vector for Dense output
model.add(Dense(n_classes, activation='softmax')) # Output layer for multi-class classification

In [332]:
# Display a summary of the CNN model architecture
# - Shows each layer's type, output shape, and number of parameters
# - Provides total, trainable, and non-trainable parameters for the model
# - Useful for quickly inspecting the network structure and verifying the model setup
model.summary()

In [333]:
# Compile the CNN model for training
# - loss='categorical_crossentropy': appropriate for multi-class classification tasks
# - optimizer='adam': adaptive optimizer for efficient gradient descent
# - metrics=['accuracy']: track accuracy during training and validation
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [340]:
# Train the CNN model on the training data
# - X_train, y_train: input features and corresponding labels for training
# - batch_size=batch_size: number of samples per gradient update
# - epochs=epochs: number of complete passes through the training dataset
# - verbose=2: display training progress with one line per epoch
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/10
4303/4303 - 8s - 2ms/step - accuracy: 0.1210 - loss: 12694.1055
Epoch 2/10
4303/4303 - 7s - 2ms/step - accuracy: 0.1388 - loss: 118999.8594
Epoch 3/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1393 - loss: 388314.1562
Epoch 4/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1398 - loss: 888389.5625
Epoch 5/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1344 - loss: 1666531.1250
Epoch 6/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1323 - loss: 2757510.5000
Epoch 7/10
4303/4303 - 7s - 2ms/step - accuracy: 0.1331 - loss: 4243373.5000
Epoch 8/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1299 - loss: 6188330.0000
Epoch 9/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1296 - loss: 8656065.0000
Epoch 10/10
4303/4303 - 6s - 1ms/step - accuracy: 0.1284 - loss: 11634360.0000


<keras.src.callbacks.history.History at 0x207afde3110>

In [350]:
# Define a function to generate a confusion matrix for model predictions
# - y_true: true labels in one-hot encoded format
# - y_pred: predicted labels in one-hot encoded format
# Steps:
# 1) Convert one-hot encoded arrays to station names using the 'stations' dictionary.
# 2) Use np.argmax to get the predicted class index for each sample.
# 3) Convert the indices to Pandas Series of station names for true and predicted labels.
# 4) Return a crosstab (confusion matrix) comparing true vs predicted station names.
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [352]:
# Evaluate the trained model's performance on the test set
# - Generate predictions for X_test using model.predict()
# - Compare predicted labels to true labels y_test using the confusion_matrix function
# - Print the confusion matrix to see how accurately the model classified each station
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Pred        BASEL  BELGRADE  BUDAPEST  DEBILT  HEATHROW  KASSEL  LJUBLJANA  \
True                                                                         
BASEL        1315       164        24      23        84      40       1290   
BELGRADE      319       160         0       0         0       6        575   
BUDAPEST       79        19         0       0         0       0        110   
DEBILT         30        13         0       0         0       0         36   
DUSSELDORF     18         1         0       0         0       0          9   
HEATHROW       55         1         0       0         0       1         20   
KASSEL          4         2         0       0         0       1          3   
LJUBLJANA      25         2         0       0         0       0         34   
MAASTRICHT      5         0         0       0         0       0          4   
MADRID        255         9         0       0         0       9    

# 13. CNN model reset

In [355]:
# Set training hyperparameters
epochs = 15         # Number of full passes through the training dataset
batch_size = 8      # Number of samples per gradient update
n_hidden = 8        # Number of filters/units in Conv1D and Dense layers

# Determine input dimensions from training data
timesteps = len(X_train[0])       # Number of time steps per input sequence
input_dim = len(X_train[0][0])    # Number of features per time step
n_classes = len(y_train[0])       # Number of output classes

# Build the CNN model using Sequential API
model = Sequential()
model.add(Input(shape=(timesteps, input_dim)))  # Explicit Input layer to avoid warnings
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu'))  # Conv1D layer for feature extraction
model.add(Dense(16, activation='relu'))        # Dense layer for non-linear feature transformation
model.add(MaxPooling1D())                       # MaxPooling to downsample sequence and reduce overfitting
model.add(Flatten())                            # Flatten 3D output to 1D vector for Dense output
model.add(Dense(n_classes, activation='softmax')) # Output layer for multi-class classification

In [357]:
# Display a summary of the CNN model architecture
# - Shows each layer's type, output shape, and number of parameters
# - Provides total, trainable, and non-trainable parameters for the model
# - Useful for quickly inspecting the network structure and verifying the model setup
model.summary()

In [359]:
# Compile the CNN model for training
# - loss='categorical_crossentropy': appropriate for multi-class classification tasks
# - optimizer='adam': adaptive optimizer for efficient gradient descent
# - metrics=['accuracy']: track accuracy during training and validation
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [361]:
# Train the CNN model on the training data
# - X_train, y_train: input features and corresponding labels for training
# - batch_size=batch_size: number of samples per gradient update
# - epochs=epochs: number of complete passes through the training dataset
# - verbose=2: display training progress with one line per epoch
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/15
2152/2152 - 8s - 4ms/step - accuracy: 0.1145 - loss: 8836.0020
Epoch 2/15
2152/2152 - 5s - 3ms/step - accuracy: 0.1233 - loss: 86944.3516
Epoch 3/15
2152/2152 - 3s - 2ms/step - accuracy: 0.1265 - loss: 279511.5312
Epoch 4/15
2152/2152 - 5s - 2ms/step - accuracy: 0.1292 - loss: 587722.8750
Epoch 5/15
2152/2152 - 4s - 2ms/step - accuracy: 0.1321 - loss: 1053675.1250
Epoch 6/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1292 - loss: 1687300.0000
Epoch 7/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1253 - loss: 2564875.5000
Epoch 8/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1269 - loss: 3627280.2500
Epoch 9/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1297 - loss: 4941169.5000
Epoch 10/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1270 - loss: 6549894.5000
Epoch 11/15
2152/2152 - 4s - 2ms/step - accuracy: 0.1281 - loss: 8438362.0000
Epoch 12/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1287 - loss: 10617704.0000
Epoch 13/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1297 - loss: 13278048.0000


<keras.src.callbacks.history.History at 0x207b8ee3da0>

In [362]:
# Define a function to generate a confusion matrix for model predictions
# - y_true: true labels in one-hot encoded format
# - y_pred: predicted labels in one-hot encoded format
# Steps:
# 1) Convert one-hot encoded arrays to station names using the 'stations' dictionary.
# 2) Use np.argmax to get the predicted class index for each sample.
# 3) Convert the indices to Pandas Series of station names for true and predicted labels.
# 4) Return a crosstab (confusion matrix) comparing true vs predicted station names.
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [363]:
# Evaluate the trained model's performance on the test set
# - Generate predictions for X_test using model.predict()
# - Compare predicted labels to true labels y_test using the confusion_matrix function
# - Print the confusion matrix to see how accurately the model classified each station
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
Pred        BASEL  BELGRADE  BUDAPEST  HEATHROW  KASSEL  LJUBLJANA  \
True                                                                 
BASEL         102      1955        10        40      12          3   
BELGRADE        0       986         0         1       0          0   
BUDAPEST        0       197         0         0       0          0   
DEBILT          0        57         0         0       0          0   
DUSSELDORF      0        21         0         0       0          0   
HEATHROW        0        58         0         0       0          0   
KASSEL          0         7         0         0       0          0   
LJUBLJANA       0        54         0         0       0          0   
MAASTRICHT      0         5         0         0       0          0   
MADRID          5       281         0         0       0          0   
MUNCHENB        0         8         0         0       0          0   
OSLO           

# 14. CNN final test

In [365]:
# Set training hyperparameters
epochs = 15         # Number of full passes through the training dataset
batch_size = 8      # Number of samples per gradient update
n_hidden = 8        # Number of filters/units in Conv1D and Dense layers

# Determine input dimensions from training data
timesteps = len(X_train[0])       # Number of time steps per input sequence
input_dim = len(X_train[0][0])    # Number of features per time step
n_classes = len(y_train[0])       # Number of output classes

# Build the CNN model using Sequential API
model = Sequential()
model.add(Input(shape=(timesteps, input_dim)))  # Explicit Input layer to avoid warnings
model.add(Conv1D(n_hidden, kernel_size=2, activation='relu'))  # Conv1D layer for feature extraction
model.add(Dense(16, activation='relu'))        # Dense layer for non-linear feature transformation
model.add(MaxPooling1D())                       # MaxPooling to downsample sequence and reduce overfitting
model.add(Flatten())                            # Flatten 3D output to 1D vector for Dense output
model.add(Dense(n_classes, activation='tanh')) # Output layer for multi-class classification

In [366]:
# Display a summary of the CNN model architecture
# - Shows each layer's type, output shape, and number of parameters
# - Provides total, trainable, and non-trainable parameters for the model
# - Useful for quickly inspecting the network structure and verifying the model setup
model.summary()

In [367]:
# Compile the CNN model for training
# - loss='categorical_crossentropy': appropriate for multi-class classification tasks
# - optimizer='adam': adaptive optimizer for efficient gradient descent
# - metrics=['accuracy']: track accuracy during training and validation
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [368]:
# Train the CNN model on the training data
# - X_train, y_train: input features and corresponding labels for training
# - batch_size=batch_size: number of samples per gradient update
# - epochs=epochs: number of complete passes through the training dataset
# - verbose=2: display training progress with one line per epoch
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2)

Epoch 1/15
2152/2152 - 5s - 2ms/step - accuracy: 0.0759 - loss: 30.4218
Epoch 2/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1073 - loss: 27.3191
Epoch 3/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1239 - loss: 30.2883
Epoch 4/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1321 - loss: 30.2846
Epoch 5/15
2152/2152 - 4s - 2ms/step - accuracy: 0.1391 - loss: 30.2808
Epoch 6/15
2152/2152 - 5s - 2ms/step - accuracy: 0.1509 - loss: 30.2809
Epoch 7/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1510 - loss: 30.2808
Epoch 8/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1514 - loss: 30.2799
Epoch 9/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1513 - loss: 30.2780
Epoch 10/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1513 - loss: 30.2780
Epoch 11/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1511 - loss: 30.2771
Epoch 12/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1511 - loss: 30.2771
Epoch 13/15
2152/2152 - 3s - 1ms/step - accuracy: 0.1503 - loss: 30.2752
Epoch 14/15
2152/2152 - 4s - 2ms/step - accuracy: 0.1501 - l

<keras.src.callbacks.history.History at 0x207b9458d40>

In [372]:
# Define a function to generate a confusion matrix for model predictions
# - y_true: true labels in one-hot encoded format
# - y_pred: predicted labels in one-hot encoded format
# Steps:
# 1) Convert one-hot encoded arrays to station names using the 'stations' dictionary.
# 2) Use np.argmax to get the predicted class index for each sample.
# 3) Convert the indices to Pandas Series of station names for true and predicted labels.
# 4) Return a crosstab (confusion matrix) comparing true vs predicted station names.
def confusion_matrix(y_true, y_pred):
    y_true = pd.Series([stations[y] for y in np.argmax(y_true, axis=1)])
    y_pred = pd.Series([stations[y] for y in np.argmax(y_pred, axis=1)])

    return pd.crosstab(y_true, y_pred, rownames=['True'], colnames=['Pred'])

In [374]:
# Evaluate the trained model's performance on the test set
# - Generate predictions for X_test using model.predict()
# - Compare predicted labels to true labels y_test using the confusion_matrix function
# - Print the confusion matrix to see how accurately the model classified each station
print(confusion_matrix(y_test, model.predict(X_test)))

[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Pred        BELGRADE  DUSSELDORF  HEATHROW  LJUBLJANA  MAASTRICHT  MADRID  \
True                                                                        
BASEL           2856         101        24         23         323      27   
BELGRADE         804          82         1          1         128       2   
BUDAPEST         137          20         1          2          37       2   
DEBILT            59          10         0          2          10       0   
DUSSELDORF        19           4         1          0           5       0   
HEATHROW          43           5         1          2          27       1   
KASSEL             7           3         0          0           1       0   
LJUBLJANA         37           2         2          1          12       2   
MAASTRICHT         2           0         1          0           4       0   
MADRID           297           4         0         18         105      16   
M