In [10]:
import tensorflow as tf


import neptune
from neptune.integrations.tensorflow_keras import NeptuneCallback

import os

import numpy as np
import pandas as pd
from window_generator_categorical import WindowGenerator

from run_automation_utils import Params, init_model

from typing import Dict

In [54]:
def prep_window_generator(station_id):
    df = pd.read_feather(f"../../data/pollution/processed/categorised/pm10/{station_id}.feather")
    df.set_index("timestamp", inplace=True)
    
    timestamp_s = df.index.map(pd.Timestamp.timestamp)

    day = 24 * 60 * 60
    year = (365.2425) * day

    df["Day sin"] = np.sin(timestamp_s * (2 * np.pi / day))
    df["Day cos"] = np.cos(timestamp_s * (2 * np.pi / day))
    df["Year sin"] = np.sin(timestamp_s * (2 * np.pi / year))
    df["Year cos"] = np.cos(timestamp_s * (2 * np.pi / year))
    df.reset_index(inplace=True)

    # Convert to radians.
    wd_rad = df.pop("winddirection_10m") * np.pi / 180
    df["winddirection_10m_sin"] = np.sin(wd_rad)
    df["winddirection_10m_cos"] = np.cos(wd_rad)

    df["kategorie"] = df["kategorie"].astype(np.int32)

    return WindowGenerator(input_width=24 * 7, label_width=24, shift=24, df=df.copy(), label_columns=["kategorie"])

In [55]:
window_generator_538 = prep_window_generator(538)

## TODO: is that needed? train one with and one without (cnn_lstm_model.add(tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :]))))


In [7]:
def build_model(include_lambda: bool = False):
    CONV_WIDTH = 3
    OUT_STEPS = 24  # 14 days prediction
    pred_categories = 6  # TODO: ??? num_features is 11 but must be one for our prediction

    cnn_lstm_model = tf.keras.models.Sequential()
    # Shape [batch, time, features] => [batch, CONV_WIDTH, features]
    if include_lambda:
        cnn_lstm_model.add(tf.keras.layers.Lambda(lambda x: x[:, -CONV_WIDTH:, :]))
    # Shape => [batch, 1, conv_units]
    cnn_lstm_model.add(tf.keras.layers.Conv1D(256, activation="relu", kernel_size=(CONV_WIDTH)))
    cnn_lstm_model.add(tf.keras.layers.MaxPooling1D())
    cnn_lstm_model.add(tf.keras.layers.BatchNormalization())  # TODO: useful to put it here?
    cnn_lstm_model.add(tf.keras.layers.Conv1D(512, activation="relu", kernel_size=(CONV_WIDTH)))
    cnn_lstm_model.add(tf.keras.layers.MaxPooling1D())
    cnn_lstm_model.add(tf.keras.layers.BatchNormalization())  # TODO: useful to put it here?
    cnn_lstm_model.add(tf.keras.layers.Conv1D(512, activation="relu", kernel_size=(CONV_WIDTH)))

    #cnn_lstm_model.add(tf.keras.layers.Flatten())
    # LSTM
    cnn_lstm_model.add(tf.keras.layers.LSTM(32, return_sequences=True))
    cnn_lstm_model.add(tf.keras.layers.LSTM(32, return_sequences=False))

    cnn_lstm_model.add(tf.keras.layers.Dense(512))
    cnn_lstm_model.add(tf.keras.layers.Dropout(0.4))
    # Shape => [batch, out_steps, features] -> for each prediction step one neuron
    # Anpassung für die Klassifizierung
    cnn_lstm_model.add(tf.keras.layers.Dense(OUT_STEPS * pred_categories, activation="softmax"))

    # Shape => [batch, out_steps, pred_categories]
    cnn_lstm_model.add(tf.keras.layers.Reshape([OUT_STEPS, pred_categories]))
    return cnn_lstm_model

In [8]:

model = build_model(False)

loss = tf.keras.losses.MeanSquaredError()
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics=["accuracy"])

run = neptune.init_run(
    project="data-mining-team2/model-tests",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI4Mzg2ZWZmYi05YzRlLTQ3ODYtOWE1NC1mNDM4OTM1ZjNlOTkifQ==",
    custom_run_id="classification_without_lambda",
    source_files=["./model_tests.ipynb"],
)

run["parameters"] = {
    "include_lambda": False
}

neptune_cbk = NeptuneCallback(run=run, base_namespace="training")


for epoch_count_factor in range(1, 5):
    epoch_count = 50 * epoch_count_factor
    history = model.fit(
        window_generator_538.train,
        epochs=50,
        validation_data=window_generator_538.val,
        callbacks=[neptune_cbk],
    )

    eval_metrics = model.evaluate(window_generator_538.test, verbose=0)
    for j, metric in enumerate(eval_metrics):
        run[f"eval/epoch_{epoch_count}/{model.metrics_names[j]}"] = metric

    name = f"cnn_lstm_7days_history_1days_pred_{epoch_count}_epochs_without_lambda"

    model.save("../models/" + name)

    run["model_names/" + str(epoch_count) + "epochs"] = name
    
run.stop()




https://app.neptune.ai/data-mining-team2/model-tests/e/MOD-6
(None, 24, 1)
(None, 24, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
(None, 24, 1)




INFO:tensorflow:Assets written to: ../models/cnn_lstm_7days_history_1days_pred_50_epochs_without_lambda\assets


INFO:tensorflow:Assets written to: ../models/cnn_lstm_7days_history_1days_pred_50_epochs_without_lambda\assets


(None, 24, 1)
(None, 24, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
(None, 24, 1)




INFO:tensorflow:Assets written to: ../models/cnn_lstm_7days_history_1days_pred_100_epochs_without_lambda\assets


INFO:tensorflow:Assets written to: ../models/cnn_lstm_7days_history_1days_pred_100_epochs_without_lambda\assets


(None, 24, 1)
(None, 24, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
(None, 24, 1)




INFO:tensorflow:Assets written to: ../models/cnn_lstm_7days_history_1days_pred_150_epochs_without_lambda\assets


INFO:tensorflow:Assets written to: ../models/cnn_lstm_7days_history_1days_pred_150_epochs_without_lambda\assets


(None, 24, 1)
(None, 24, 1)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
(None, 24, 1)




INFO:tensorflow:Assets written to: ../models/cnn_lstm_7days_history_1days_pred_200_epochs_without_lambda\assets


INFO:tensorflow:Assets written to: ../models/cnn_lstm_7days_history_1days_pred_200_epochs_without_lambda\assets


Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
All 1 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/data-mining-team2/model-tests/e/MOD-6/metadata


In [9]:
model = build_model(True)

loss = tf.keras.losses.MeanSquaredError()
model.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics=["accuracy"])

run = neptune.init_run(
    project="data-mining-team2/model-tests",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI4Mzg2ZWZmYi05YzRlLTQ3ODYtOWE1NC1mNDM4OTM1ZjNlOTkifQ==",
    custom_run_id="classification_with_lambda",
    source_files=["./model_tests.ipynb"],
)

run["parameters"] = {
    "include_lambda": True
}

neptune_cbk = NeptuneCallback(run=run, base_namespace="training")


for epoch_count_factor in range(1, 5):
    epoch_count = 50 * epoch_count_factor
    history = model.fit(
        window_generator_538.train,
        epochs=50,
        validation_data=window_generator_538.val,
        callbacks=[neptune_cbk],
    )

    eval_metrics = model.evaluate(window_generator_538.test, verbose=0)
    for j, metric in enumerate(eval_metrics):
        run[f"eval/epoch_{epoch_count}/{model.metrics_names[j]}"] = metric

    name = f"cnn_lstm_7days_history_1days_pred_{epoch_count}_epochs_with_lambda"

    model.save("../models/" + name)

    run["model_names/" + str(epoch_count) + "epochs"] = name
    
run.stop()

https://app.neptune.ai/data-mining-team2/model-tests/e/MOD-7
(None, 24, 1)
(None, 24, 1)
Epoch 1/50


ValueError: in user code:

    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\engine\training.py", line 1284, in train_function  *
        return step_function(self, iterator)
    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\engine\training.py", line 1268, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\engine\training.py", line 1249, in run_step  **
        outputs = model.train_step(data)
    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\engine\training.py", line 1050, in train_step
        y_pred = self(x, training=True)
    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\backend.py", line 6523, in pool2d
        x = tf.compat.v1.nn.max_pool(

    ValueError: Exception encountered when calling layer 'max_pooling1d_2' (type MaxPooling1D).
    
    Negative dimension size caused by subtracting 2 from 1 for '{{node sequential_1/max_pooling1d_2/MaxPool}} = MaxPool[T=DT_FLOAT, data_format="NHWC", explicit_paddings=[], ksize=[1, 2, 1, 1], padding="VALID", strides=[1, 2, 1, 1]](sequential_1/max_pooling1d_2/ExpandDims)' with input shapes: [?,1,1,256].
    
    Call arguments received by layer 'max_pooling1d_2' (type MaxPooling1D):
      • inputs=tf.Tensor(shape=(None, 1, 256), dtype=float32)


In [67]:
cnn_lstm_model = tf.keras.models.load_model("../models/cnn_lstm_7days_history_1days_pred_100_epochs_without_lambda")

In [66]:
pred = cnn_lstm_model.predict(window_generator_538.train.take(1))

(None, 24, 1)


ValueError: in user code:

    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\engine\training.py", line 2169, in predict_function  *
        return step_function(self, iterator)
    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\engine\training.py", line 2155, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\engine\training.py", line 2143, in run_step  **
        outputs = model.predict_step(data)
    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\engine\training.py", line 2111, in predict_step
        return self(x, training=False)
    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "e:\FHBielefeld\Master\DataMining\feinstaubprojekt-polen\.env\lib\site-packages\keras\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_1" is incompatible with the layer: expected shape=(None, 168, 11), found shape=(None, 336, 11)


In [57]:
window_generator_538.plot(cnn_lstm_model, plot_col="kategorie", offset=500, plot_version="train")

(3, 24, 1)


ValueError: Input 0 of layer "sequential" is incompatible with the layer: expected shape=(None, 168, 11), found shape=(3, 336, 11)