# Jena Climate – Single‑Step Temperature Forecasting with LSTM
This notebook builds an LSTM model that predicts the temperature 1 hour ahead (i.e. 6 time‑steps) based on past climate sensor readings recorded at the Max Planck Institute for Biogeochemistry in Jena.

## 1. Imports & Download

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import keras, pathlib, urllib, zipfile

2025-04-27 11:29:20.166623: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-27 11:29:20.167095: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-27 11:29:20.169872: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-27 11:29:20.178014: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745753360.189505    9646 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745753360.19

## 2. Retrieve dataset

In [None]:
data_dir  = pathlib.Path("data/jena_climate")
data_dir.mkdir(exist_ok=True)
csv_path  = data_dir / "jena_climate_2009_2016.csv"
zip_path  = data_dir / "jena_climate_2009_2016.csv.zip"

# file download if not csv_path.exists():
if not csv_path.exists():
    if not zip_path.exists():
        url = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip"
        print("Downloading Jena Climate dataset …")
        keras.utils.get_file(origin=url, fname=zip_path.name, cache_dir=".", cache_subdir=str(data_dir))
    print("Extracting", zip_path)
    with zipfile.ZipFile(zip_path, "r") as zf:
        zf.extractall(data_dir)
    zip_path.unlink()

# CSV load
df = pd.read_csv(csv_path)
df = df.drop(columns=["Date Time"])
print("Loaded:", csv_path, "→ shape:", df.shape)


Loaded: jena_climate/jena_climate_2009_2016.csv → shape: (420551, 14)


## 3. Normalise

In [3]:
n = len(df)
train_df = df.iloc[:int(n*0.7)]
stats = train_df.describe().transpose()

eps = 1e-6
stats['std'] = stats['std'].replace(0, eps)   # avoid div by zero
df_norm = (df - stats['mean']) / stats['std']

# sanity check
values = df_norm.to_numpy(dtype=np.float32)   # ← dtype 강제
assert np.isfinite(values).all(), "NaN/Inf present after normalisation!"

## 4. Build windowed datasets

In [4]:
past_len, future_len = 72, 6
batch_size = 256

def make_ds(data):
    ds = tf.keras.preprocessing.timeseries_dataset_from_array(
        data=data,
        targets=data[:,0],
        sequence_length=past_len + future_len,
        sequence_stride=1,
        shuffle=True,
        batch_size=batch_size
    )
    ds = ds.map(lambda x,y:(x[:,:past_len,:],
                            tf.expand_dims(x[:,past_len+future_len-1,0],-1)))
    return ds

train_ds = make_ds(values[:int(n*0.7)])
val_ds   = make_ds(values[int(n*0.7):int(n*0.9)])
test_ds  = make_ds(values[int(n*0.9):])

# check one batch
bx, by = next(iter(train_ds))
tf.debugging.assert_all_finite(bx,"bad bx")
tf.debugging.assert_all_finite(by,"bad by")
print("Batch OK:", bx.shape, by.shape)


2025-04-27 11:29:22.728170: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Batch OK: (256, 72, 14) (256, 1)


## 5. Define LSTM model

In [5]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(past_len, df.shape[1])),
    tf.keras.layers.LSTM(64, return_sequences=True),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
opt = tf.keras.optimizers.Adam(1e-4, clipnorm=1.0)
model.compile(opt, loss='mse', metrics=[tf.keras.metrics.RootMeanSquaredError()])
model.summary()

## 6. Train

In [6]:
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
    tf.keras.callbacks.TerminateOnNaN()
]

history = model.fit(
    train_ds,
    epochs=5,
    validation_data=val_ds,
    callbacks=callbacks
)

Epoch 1/5
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 72ms/step - loss: 0.7889 - root_mean_squared_error: 0.8851 - val_loss: 0.5866 - val_root_mean_squared_error: 0.7659
Epoch 2/5
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 70ms/step - loss: 0.6112 - root_mean_squared_error: 0.7818 - val_loss: 0.5831 - val_root_mean_squared_error: 0.7636
Epoch 3/5
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 70ms/step - loss: 0.6088 - root_mean_squared_error: 0.7803 - val_loss: 0.5821 - val_root_mean_squared_error: 0.7630
Epoch 4/5
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 68ms/step - loss: 0.6088 - root_mean_squared_error: 0.7803 - val_loss: 0.5817 - val_root_mean_squared_error: 0.7627
Epoch 5/5
[1m1150/1150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 69ms/step - loss: 0.6085 - root_mean_squared_error: 0.7800 - val_loss: 0.5814 - val_root_mean_squared_error: 0.7625


## 7. Evaluate & inverse transform example predictions

In [7]:

test_rmse = model.evaluate(test_ds, verbose=0)[1]
print(f"Test RMSE (normalised): {test_rmse:.3f}")

temp_mean, temp_std = stats.loc['T (degC)', ['mean','std']]
for bx, by in test_ds.take(1):
    preds = model.predict(bx)
    print("\nPred  | True (°C)")
    for p,t in zip(preds[:10], by[:10]):
        print(f"{p[0]*temp_std+temp_mean:6.2f} | {t[0]*temp_std+temp_mean:6.2f}")


Test RMSE (normalised): 0.600
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step  

Pred  | True (°C)
 17.76 |  26.85
 17.76 |  31.40
 12.60 |  12.87
 17.56 |  17.79
  9.69 |   9.46
  9.11 |   2.97
  9.11 |   7.85
  9.11 |   6.23
 11.21 |  11.76
 17.59 |  17.65


2025-04-27 11:36:11.370262: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


## 8. Model Save & Load Model 

In [None]:
model_dir = "/model_/jena_climate_model.h5"
model.save(model_dir)
# Load the model
loaded_model = tf.keras.models.load_model(model_dir)
# Check the loaded model
loaded_model.summary()
