In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import keras

# **Loading Dataset....**

In [None]:
from zipfile import ZipFile

uri = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip"
zip_path = keras.utils.get_file(origin=uri, fname="jena_climate_2009_2016.csv.zip")
zip_file = ZipFile(zip_path)
zip_file.extractall()
csv_path = "jena_climate_2009_2016.csv"

df = pd.read_csv(csv_path)

# **Data Visualization**

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

titles = [
    "Pressure", "Temperature", "Temperature in Kelvin", "Temperature (dew point)",
    "Relative Humidity", "Saturation vapor pressure", "Vapor pressure",
    "Vapor pressure deficit", "Specific humidity", "Water vapor concentration",
    "Airtight", "Wind speed", "Maximum wind speed", "Wind direction in degrees",
]

feature_keys = [
    "p (mbar)", "T (degC)", "Tpot (K)", "Tdew (degC)", "rh (%)",
    "VPmax (mbar)", "VPact (mbar)", "VPdef (mbar)", "sh (g/kg)",
    "H2OC (mmol/mol)", "rho (g/m**3)", "wv (m/s)", "max. wv (m/s)", "wd (deg)",
]

colors = [
    "blue", "orange", "green", "red", "purple",
    "brown", "pink", "gray", "olive", "cyan", "magenta","maroon","maroon", "cyan"
]

date_time_key = "Date Time"


def plot_single_feature(ax, title, key, color, time_data, data):
    t_data = data[key]
    t_data.index = time_data
    t_data.plot(ax=ax, color=color, title="{} - {}".format(title, key), rot=25)
    ax.legend([title])


def show_raw_visualization(data):
    time_data = data[date_time_key]
    fig, axes = plt.subplots(
        nrows=7, ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k"
    )
    for i, (title, key, color) in enumerate(zip(titles, feature_keys, colors)):
        plot_single_feature(axes[i // 2, i % 2], title, key, color, time_data, data)
    plt.tight_layout()


# Assuming 'df' is your DataFrame
show_raw_visualization(df)


# **Data Preprocessing**

In [None]:
import numpy as np

split_fraction = 0.715
train_split = int(split_fraction * len(df))
step = 6

past = 720
future = 72
learning_rate = 0.001
batch_size = 256
epochs = 10


def normalize_data(data, train_split):
    data_mean = np.mean(data[:train_split], axis=0)
    data_std = np.std(data[:train_split], axis=0)
    normalized_data = (data - data_mean) / data_std
    return normalized_data


In [None]:
print(
    "The selected parameters are:",
    ", ".join([titles[i] for i in [0, 1, 5, 7, 8, 10, 11]]),
)
selected_features = [feature_keys[i] for i in [0, 1, 5, 7, 8, 10, 11]]
features = df[selected_features]
features.index = df[date_time_key]
features.head()

features = normalize_data(features.values, train_split)
features = pd.DataFrame(features)
features.head()

train_data = features.loc[0 : train_split - 1]
val_data = features.loc[train_split:]


# **Train the dataset**

In [None]:
start = past + future
end = start + train_split

# Assuming 'train_data' and 'normalized_features' are available

# Extracting input features (x_train) and target variable (y_train)
x_train = train_data.iloc[start:end, :7].values
y_train = features.iloc[start:end, 1].values.reshape(-1, 1)

sequence_length = int(past / step)

# Printing the shapes for verification
print("x_train shape:", x_train.shape)
print("y_train shape:", y_train.shape)
print("Sequence length:", sequence_length)


In [None]:
start = past + future
end = start + train_split

# Assuming 'train_data', 'features', 'past', 'future', and 'step' are available

x_train = train_data.iloc[start:end, :7].values
y_train = features.iloc[start:end, 1].values.reshape(-1, 1)

sequence_length = int(past / step)

In [None]:
dataset_train = keras.preprocessing.timeseries_dataset_from_array(
    x_train,
    y_train,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)

In [None]:
x_end = len(val_data) - past - future

label_start = train_split + past + future

x_val = val_data.iloc[:x_end][[i for i in range(7)]].values
y_val = features.iloc[label_start:][[1]]

dataset_val = keras.preprocessing.timeseries_dataset_from_array(
    x_val,
    y_val,
    sequence_length=sequence_length,
    sampling_rate=step,
    batch_size=batch_size,
)


for batch in dataset_train.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

# **Training**

In [None]:
inputs = keras.layers.Input(shape=(inputs.shape[1], inputs.shape[2]))
lstm_out = keras.layers.LSTM(32)(inputs)
outputs = keras.layers.Dense(1)(lstm_out)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss="mse")
model.summary()

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Assuming 'model', 'dataset_train', 'epochs', and 'dataset_val' are available

path_checkpoint = "model_checkpoint.weights.h5"

es_callback = EarlyStopping(monitor="val_loss", min_delta=0, patience=7)

modelckpt_callback = ModelCheckpoint(
    monitor="val_loss",
    filepath=path_checkpoint,
    verbose=1,
    save_weights_only=True,
    save_best_only=True,
)

history = model.fit(
    dataset_train,
    epochs=epochs,
    validation_data=dataset_val,
    callbacks=[es_callback, modelckpt_callback],
)


**# Evaluate the Model**

In [None]:
import matplotlib.pyplot as plt

def visualize_loss(history, title):
    loss = history.history["loss"]
    val_loss = history.history["val_loss"]
    epochs = range(1, len(loss) + 1)

    plt.plot(epochs, loss, "b", label="Training loss")
    plt.plot(epochs, val_loss, "r", label="Validation loss")
    
    plt.title(title)
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

visualize_loss(history, "Training and Validation Loss")


# **Prediction**

In [None]:
def show_plot(plot_data, delta, title):
    labels = ["History", "True Future", "Model Prediction"]
    marker = [".-", "rx", "go"]
    time_steps = list(range(-(plot_data[0].shape[0]), 0))

    if delta:
        future = delta
    else:
        future = 0

    plt.figure(figsize=(12, 6))
    plt.title(title)

    for i, val in enumerate(plot_data):
        if i:
            plt.plot(time_steps[-1] + future, plot_data[i], marker[i], markersize=10, label=labels[i])
        else:
            plt.plot(time_steps, plot_data[i].flatten(), marker[i], label=labels[i])

    plt.legend()
    plt.xlim([time_steps[0], time_steps[-1] + future + 5])
    plt.xlabel("Time-Step")
    plt.show()

for x, y in dataset_val.take(5):
    show_plot(
        [x[0][:, 1].numpy(), y[0].numpy(), model.predict(x)[0]],
        12,
        "Single Step Prediction",
    )
