## CSCE636 Deep Learning - Dr. Anxiao Jiang
- Changcheng Yuan
- UIN: 53500517

Check out the Jupyter notebook for Chapter 13 at https://github.com/fchollet/deep-learning-with-python-notebooks/blob/master/chapter13_timeseries-forecasting.ipynb . It has tried 6 methods for the temperature prediction problem: Try 1 (A common-sense, non-machine learning baseline method), Try 2 (A fully connected neural network), Try 3 (1-d convolutional neural networks), Try 4 (LSTM with recurrent dropout), Try 5 (stacking RNN layers), Try 6 (Bidirectional RNN).

Your task: use the above 6 methods to predict the temperature in 48 hours (instead of 24 hours). In the Jupyter notebook, include your code as well as the performance of the 6 methods.  

In [None]:
!cd ../data/chapter13 && wget https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip
!cd ../data/chapter13 && unzip jena_climate_2009_2016.csv.zip

: 

In [1]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import tensorflow as tf

# List all GPUs
gpus = tf.config.list_physical_devices('GPU')
print("GPUs found:", gpus)

# Quick check
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("GPU available:", len(gpus) > 0)
# Or get the default device name
print("Default GPU device:", tf.test.gpu_device_name())



GPUs found: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Built with CUDA: True
GPU available: True
Default GPU device: /device:GPU:0


In [2]:
datapath = '../data/chapter13/jena_climate_2009_2016.csv'

In [3]:
import numpy as np
from tensorflow import keras

with open(datapath) as f:
    data = f.read().strip().split("\n")

header = data[0].split(",")
lines = data[1:]
num_features = len(header) - 1

raw_data = np.zeros((len(lines), num_features), dtype=np.float32)
temperature = np.zeros((len(lines),), dtype=np.float32)
for idx, line in enumerate(lines):
    values = [float(x) for x in line.split(",")[1:]]
    temperature[idx] = values[1]
    raw_data[idx] = values

num_train_samples = int(0.5 * len(raw_data))
num_val_samples = int(0.25 * len(raw_data))
num_test_samples = len(raw_data) - num_train_samples - num_val_samples

mean = raw_data[:num_train_samples].mean(axis=0)
std = raw_data[:num_train_samples].std(axis=0)
std[std == 0] = 1.0
raw_data = (raw_data - mean) / std

sampling_rate = 6
time_steps = 120
forecast_horizon_hours = 48
# Offset between the last input time step and the prediction target
sequence_delay = sampling_rate * (time_steps + forecast_horizon_hours - 1)
batch_size = 256

train_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-sequence_delay],
    targets=temperature[sequence_delay:],
    sampling_rate=sampling_rate,
    sequence_length=time_steps,
    shuffle=True,
    batch_size=batch_size,
    start_index=0,
    end_index=num_train_samples,
)

val_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-sequence_delay],
    targets=temperature[sequence_delay:],
    sampling_rate=sampling_rate,
    sequence_length=time_steps,
    shuffle=False,
    batch_size=batch_size,
    start_index=num_train_samples,
    end_index=num_train_samples + num_val_samples,
)

test_dataset = keras.utils.timeseries_dataset_from_array(
    raw_data[:-sequence_delay],
    targets=temperature[sequence_delay:],
    sampling_rate=sampling_rate,
    sequence_length=time_steps,
    shuffle=False,
    batch_size=batch_size,
    start_index=num_train_samples + num_val_samples,
)

print(f"Loaded {len(lines)} timesteps across {num_features} sensors")
print(f"Train/val/test samples: {num_train_samples}/{num_val_samples}/{num_test_samples}")
print(f"Forecast horizon: {forecast_horizon_hours} hours (delay steps: {sequence_delay})")

Loaded 420451 timesteps across 14 sensors
Train/val/test samples: 210225/105112/105114
Forecast horizon: 48 hours (delay steps: 1002)


### Try 1

Simple non-linear regression

In [5]:
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, StandardScaler

history_hours = 48
max_train_samples = 60000


def collect_numpy(dataset, limit=None):
    features, targets = [], []
    seen = 0
    for samples, target in dataset:
        samples_np = np.asarray(samples)
        target_np = np.asarray(target)
        temp_history = samples_np[:, -history_hours:, 1]
        features.append(temp_history)
        targets.append(target_np)
        seen += len(target_np)
        if limit and seen >= limit:
            break
    X = np.concatenate(features, axis=0)
    y = np.concatenate(targets, axis=0)
    if limit:
        X = X[:limit]
        y = y[:limit]
    return X, y


def evaluate_mae(model, dataset):
    total_abs_err = 0.0
    samples_seen = 0
    for samples, target in dataset:
        features = np.asarray(samples)[:, -history_hours:, 1]
        preds = model.predict(features)
        target_np = np.asarray(target)
        total_abs_err += np.sum(np.abs(preds - target_np))
        samples_seen += len(target_np)
    return total_abs_err / samples_seen


train_X, train_y = collect_numpy(train_dataset, limit=max_train_samples)

model = Pipeline(
    [
        ("poly", PolynomialFeatures(degree=2, include_bias=False)),
        ("scaler", StandardScaler()),
        ("regressor", Ridge(alpha=100.0)),
    ]
)
model.fit(train_X, train_y)

train_mae = mean_absolute_error(train_y, model.predict(train_X))
val_mae = evaluate_mae(model, val_dataset)
test_mae = evaluate_mae(model, test_dataset)

print(f"Train samples used: {len(train_X)}")
print(f"Train MAE: {train_mae:.2f} °C")
print(f"Validation MAE: {val_mae:.2f} °C")
print(f"Test MAE: {test_mae:.2f} °C")


Train samples used: 60000
Train MAE: 3.32 °C
Validation MAE: 3.06 °C
Test MAE: 3.28 °C


### Try 2

Simple dense network using MSE for training and MAE for human readable result. 

In [4]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks


def flatten_dataset(dataset):
    return dataset.map(lambda x, y: (tf.reshape(x, (tf.shape(x)[0], -1)), y))


flattened_train = flatten_dataset(train_dataset).prefetch(tf.data.AUTOTUNE)
flattened_val = flatten_dataset(val_dataset).prefetch(tf.data.AUTOTUNE)
flattened_test = flatten_dataset(test_dataset).prefetch(tf.data.AUTOTUNE)

input_dim = time_steps * num_features

dense_model = models.Sequential(
    [
        layers.Input(shape=(input_dim,)),
        layers.Dense(256, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dense(128, activation="relu"),
        layers.Dense(64, activation="relu"),
        layers.Dense(1),
    ]
)

dense_model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-3),
    loss="mse",
    metrics=["mse", "mae"],
)

early_stopping = callbacks.EarlyStopping(
    monitor="val_mae",
    patience=3,
    restore_best_weights=True,
)

history = dense_model.fit(
    flattened_train,
    validation_data=flattened_val,
    epochs=20,
    callbacks=[early_stopping],
)

test_loss, test_mse, test_mae = dense_model.evaluate(flattened_test)
print(f"Train MAE: {history.history['mae'][-1]:.2f} °C")
print(f"Validation MAE: {history.history['val_mae'][-1]:.2f} °C")
print(f"Test MAE: {test_mae:.2f} °C")


Epoch 1/20


I0000 00:00:1761168554.035713   43245 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20

KeyboardInterrupt: 

### Try 3

1D convolutional neural network

In [7]:
import tensorflow as tf
from tensorflow.keras import Input, Model, callbacks, layers, optimizers

conv_train = train_dataset.prefetch(tf.data.AUTOTUNE)
conv_val = val_dataset.prefetch(tf.data.AUTOTUNE)
conv_test = test_dataset.prefetch(tf.data.AUTOTUNE)

inputs = Input(shape=(time_steps, num_features))
x = layers.Conv1D(8, 24, activation="relu")(inputs)
x = layers.MaxPooling1D(2)(x)
x = layers.Conv1D(8, 12, activation="relu")(x)
x = layers.MaxPooling1D(2)(x)
x = layers.Conv1D(8, 6, activation="relu")(x)
x = layers.GlobalAveragePooling1D()(x)
outputs = layers.Dense(1)(x)
conv_model = Model(inputs, outputs)

conv_model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-3),
    loss="mse",
    metrics=["mae"],
)

early_stopping = callbacks.EarlyStopping(
    monitor="val_mae",
    patience=5,
    restore_best_weights=True,
)

history = conv_model.fit(
    conv_train,
    validation_data=conv_val,
    epochs=20,
    callbacks=[early_stopping],
)

test_loss, test_mae = conv_model.evaluate(conv_test)
print(f"Train MAE: {history.history['mae'][-1]:.2f} °C")
print(f"Validation MAE: {history.history['val_mae'][-1]:.2f} °C")
print(f"Test MAE: {test_mae:.2f} °C")


Epoch 1/20
Epoch 2/20
Epoch 2/20
Epoch 3/20
Epoch 3/20
Epoch 4/20
Epoch 4/20
Epoch 5/20
Epoch 5/20
Epoch 6/20
Epoch 6/20
Epoch 7/20
Epoch 7/20
Epoch 8/20
Epoch 8/20
Epoch 9/20
Epoch 9/20
Epoch 10/20
Epoch 10/20
Train MAE: 2.53 °C
Validation MAE: 3.86 °C
Test MAE: 3.72 °C
Train MAE: 2.53 °C
Validation MAE: 3.86 °C
Test MAE: 3.72 °C


### Try 4

LSTM with recurrent dropout

In [11]:
import tensorflow as tf
from tensorflow.keras import Input, Model, callbacks, layers, optimizers

lstm_train = train_dataset.prefetch(tf.data.AUTOTUNE)
lstm_val = val_dataset.prefetch(tf.data.AUTOTUNE)
lstm_test = test_dataset.prefetch(tf.data.AUTOTUNE)

debug_batches = None # disable debug
if debug_batches:
    lstm_train = lstm_train.take(debug_batches)
    lstm_val = lstm_val.take(max(1, debug_batches // 4))

inputs = Input(shape=(time_steps, num_features))
x = layers.LSTM(32, recurrent_dropout=0.25)(inputs)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1)(x)
lstm_model = Model(inputs, outputs)

lstm_model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-3),
    loss="mse",
    metrics=["mae"],
)

early_stopping = callbacks.EarlyStopping(
    monitor="val_mae",
    patience=2,
    restore_best_weights=True,
)

history = lstm_model.fit(
    lstm_train,
    validation_data=lstm_val,
    epochs=20,
    callbacks=[early_stopping],
)

test_loss, test_mae = lstm_model.evaluate(lstm_test)
print(f"Train MAE: {history.history['mae'][-1]:.2f} °C")
print(f"Validation MAE: {history.history['val_mae'][-1]:.2f} °C")
print(f"Test MAE: {test_mae:.2f} °C")


Epoch 1/20
Epoch 1/20
Epoch 2/20
Epoch 2/20
Epoch 3/20
Epoch 3/20
Epoch 4/20
Epoch 4/20
Epoch 5/20
Epoch 5/20
Epoch 6/20
Epoch 6/20
Epoch 7/20
Epoch 7/20
Epoch 8/20
Epoch 8/20
Epoch 9/20
Epoch 9/20
Train MAE: 3.12 °C
Validation MAE: 3.12 °C
Test MAE: 3.33 °C
Train MAE: 3.12 °C
Validation MAE: 3.12 °C
Test MAE: 3.33 °C


### Try 5
stacking RNN layers

In [13]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

gru_train = train_dataset.prefetch(tf.data.AUTOTUNE)
gru_val = val_dataset.prefetch(tf.data.AUTOTUNE)
gru_test = test_dataset.prefetch(tf.data.AUTOTUNE)

# Stack GRU layers with dropout to regularize the recurrent model
inputs = keras.Input(shape=(time_steps, num_features))
x = layers.GRU(32, recurrent_dropout=0.5, return_sequences=True)(inputs)
x = layers.GRU(32, recurrent_dropout=0.5)(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1)(x)
stacked_gru_model = keras.Model(inputs, outputs)

callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_mae",
        patience=2,
        restore_best_weights=True,
    )
]

stacked_gru_model.compile(optimizer="adam", loss="mse", metrics=["mae"])

history = stacked_gru_model.fit(
    gru_train,
    epochs=20,
    validation_data=gru_val,
    callbacks=callbacks,
)

train_mae = history.history["mae"][-1]
val_mae = min(history.history["val_mae"])

_, test_mae = stacked_gru_model.evaluate(gru_test)
print(f"Training MAE: {train_mae:.2f} °C")
print(f"Validation MAE: {val_mae:.2f} °C")
print(f"Test MAE: {test_mae:.2f} °C")



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Training MAE: 3.05 °C
Validation MAE: 3.15 °C
Test MAE: 3.31 °C


### Try 6

bi-directional RNN

In [14]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks

bidi_train = train_dataset.prefetch(tf.data.AUTOTUNE)
bidi_val = val_dataset.prefetch(tf.data.AUTOTUNE)
bidi_test = test_dataset.prefetch(tf.data.AUTOTUNE)

inputs = keras.Input(shape=(time_steps, num_features))
x = layers.Bidirectional(layers.LSTM(16))(inputs)
outputs = layers.Dense(1)(x)
bidirectional_model = keras.Model(inputs, outputs)

bidirectional_model.compile(optimizer="adam", loss="mse", metrics=["mae"])

early_stopping = callbacks.EarlyStopping(
    monitor="val_mae",
    patience=2,
    restore_best_weights=True,
)

history = bidirectional_model.fit(
    bidi_train,
    epochs=20,
    validation_data=bidi_val,
    callbacks=[early_stopping],
)

test_loss, test_mae = bidirectional_model.evaluate(bidi_test)
print(f"Train MAE: {history.history['mae'][-1]:.2f} °C")
print(f"Validation MAE: {min(history.history['val_mae']):.2f} °C")
print(f"Test MAE: {test_mae:.2f} °C")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Train MAE: 2.93 °C
Validation MAE: 3.15 °C
Test MAE: 3.41 °C
