In [5]:
import tensorflow as tf
import os

model_dirs = [
    os.path.join(os.curdir, "model", name)
    for name in sorted(os.listdir(os.path.join(os.curdir, "model")))
]
model_dirs


['./model/BASEL',
 './model/BUDAPEST',
 './model/DE',
 './model/DRESDEN',
 './model/DUSSELDORF',
 './model/HEATHROW',
 './model/KASSEL',
 './model/LJUBLJANA',
 './model/MAASTRICHT',
 './model/MALMO',
 './model/MONTELIMAR',
 './model/MUENCHEN',
 './model/OSLO',
 './model/PERPIGNAN',
 './model/ROMA',
 './model/SONNBLICK',
 './model/STOCKHOLM',
 './model/TOURS']

In [7]:
models = [tf.keras.models.load_model(model_dir) for model_dir in model_dirs]
models[1].summary()

Model: "BUDAPEST_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 6, 7)]            0         
                                                                 
 lstm_2 (LSTM)               (None, 6, 16)             1536      
                                                                 
 lstm_3 (LSTM)               (None, 16)                2112      
                                                                 
 dense_1 (Dense)             (None, 1)                 17        
                                                                 
Total params: 3,665
Trainable params: 3,665
Non-trainable params: 0
_________________________________________________________________


In [8]:
import pandas as pd

CSV_DIR = os.path.join(os.curdir, "dataset", "sorted")
csv_filenames = sorted(os.listdir(CSV_DIR))
csv_dataset = [
    pd.read_csv(os.path.join(CSV_DIR, filename)) for filename in csv_filenames
]
print(len(csv_dataset))

18


In [9]:

TRAIN_END_INDEX = 2922
VALIDATION_END_INDEX = 3288



def get_dataset(
    dataset: pd.DataFrame,
    sequence_length,
    batch_size,
    train_end_index,
    validation_end_index,
):
    data = dataset
    for col in dataset.columns:
        if "temp_mean" in col:
            target = pd.DataFrame(dataset[col])
            data = data.drop(columns=["DATE"])
            break
    scaled_data = (data - data.mean(axis=0)) / data.std(axis=0)
    train_dataset = tf.keras.utils.timeseries_dataset_from_array(
        scaled_data[:-sequence_length],
        targets=target[sequence_length:],
        sequence_length=sequence_length,
        batch_size=batch_size,
        start_index=0,
        end_index=train_end_index,
    )
    validation_dataset = tf.keras.utils.timeseries_dataset_from_array(
        scaled_data[:-sequence_length],
        targets=target[sequence_length:],
        sequence_length=sequence_length,
        batch_size=batch_size,
        start_index=train_end_index,
        end_index=validation_end_index,
    )
    test_dataset = tf.keras.utils.timeseries_dataset_from_array(
        scaled_data[:-sequence_length],
        targets=target[sequence_length:],
        sequence_length=sequence_length,
        batch_size=batch_size,
        start_index=validation_end_index,
    )
    return (train_dataset, validation_dataset, test_dataset)


sequence_length = 6

batch_dataset = [
    get_dataset(csv_data, sequence_length, 32, TRAIN_END_INDEX, VALIDATION_END_INDEX)
    for csv_data in csv_dataset
]

print(len(batch_dataset))
print(batch_dataset[1])
for data, label in batch_dataset[0][0]:
    print(data[0])
    print(label[0])
    break


18
(<BatchDataset element_spec=(TensorSpec(shape=(None, None, 7), dtype=tf.float64, name=None), TensorSpec(shape=(None, 1), dtype=tf.float64, name=None))>, <BatchDataset element_spec=(TensorSpec(shape=(None, None, 7), dtype=tf.float64, name=None), TensorSpec(shape=(None, 1), dtype=tf.float64, name=None))>, <BatchDataset element_spec=(TensorSpec(shape=(None, None, 7), dtype=tf.float64, name=None), TensorSpec(shape=(None, 1), dtype=tf.float64, name=None))>)
tf.Tensor(
[[ 1.34424888  1.34682202 -1.20851298 -0.38199123 -1.07646018 -1.09549112
  -0.80998751 -1.33429094]
 [ 1.15869866  1.7487146  -1.15505695 -0.43793346 -1.07646018 -1.00108475
  -0.64465741 -1.23109558]
 [ 0.602048    1.69847803 -0.8877768  -0.43793346 -0.22197883 -1.18989749
  -1.03543765 -1.23109558]
 [ 0.41649778  1.04540259 -0.74879112  0.2147259   0.51703206 -0.96062487
  -0.97531762 -0.92150951]
 [ 1.43702399  0.8444563  -0.87708559 -0.30740159 -0.22197883 -0.67740576
  -0.47932731 -0.79538185]
 [ 0.97314844  0.8193380

In [16]:
model_losses = dict()
for model, batch_data in zip(models, batch_dataset):
    loss, mae = model.evaluate(batch_data[1])
    model_losses[model.name] = {"loss": loss, "MAE": mae}
model_losses



{'BASEL_model': {'loss': 2.9261019229888916, 'MAE': 1.3783124685287476},
 'BUDAPEST_model': {'loss': 3.340862274169922, 'MAE': 1.4148924350738525},
 'DE_model': {'loss': 3.0563292503356934, 'MAE': 1.4133985042572021},
 'DRESDEN_model': {'loss': 4.355435848236084, 'MAE': 1.629381537437439},
 'DUSSELDORF_model': {'loss': 2.771543025970459, 'MAE': 1.3507333993911743},
 'HEATHROW_model': {'loss': 2.3024299144744873, 'MAE': 1.198699712753296},
 'KASSEL_model': {'loss': 3.183945417404175, 'MAE': 1.404491662979126},
 'LJUBLJANA_model': {'loss': 4.263371467590332, 'MAE': 1.6330246925354004},
 'MAASTRICHT_model': {'loss': 3.3704922199249268, 'MAE': 1.451335072517395},
 'MALMO_model': {'loss': 2.6106021404266357, 'MAE': 1.244170069694519},
 'MONTELIMAR_model': {'loss': 3.397240161895752, 'MAE': 1.4352141618728638},
 'MUENCHEN_model': {'loss': 3.982684373855591, 'MAE': 1.5770020484924316},
 'OSLO_model': {'loss': 3.0948243141174316, 'MAE': 1.3713586330413818},
 'PERPIGNAN_model': {'loss': 4.02078

In [18]:
model_loss_df = pd.DataFrame(model_losses)
model_loss_df

Unnamed: 0,BASEL_model,BUDAPEST_model,DE_model,DRESDEN_model,DUSSELDORF_model,HEATHROW_model,KASSEL_model,LJUBLJANA_model,MAASTRICHT_model,MALMO_model,MONTELIMAR_model,MUENCHEN_model,OSLO_model,PERPIGNAN_model,ROMA_model,SONNBLICK_model,STOCKHOLM_model,TOURS_model
loss,2.926102,3.340862,3.056329,4.355436,2.771543,2.30243,3.183945,4.263371,3.370492,2.610602,3.39724,3.982684,3.094824,4.020787,2.666489,3.582275,2.999295,3.995615
MAE,1.378312,1.414892,1.413399,1.629382,1.350733,1.1987,1.404492,1.633025,1.451335,1.24417,1.435214,1.577002,1.371359,1.549892,1.211921,1.451918,1.367729,1.601398


In [19]:
model_loss_df.mean(axis=1)

loss    3.328907
MAE     1.426937
dtype: float64