In [9]:
# source https://stackoverflow.com/questions/70361179/how-to-include-future-values-in-a-time-series-prediction-of-a-rnn-in-keras

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import (
    Input,
    Dense,
    LSTM,
    TimeDistributed,
    Concatenate,
    Add,
)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# define the inputs
target = ["date,entry_id,PM1,PM2.5,PM10,temp,pressure,humidity"]
features = ["date,entry_id,PM1,PM2.5,PM10,temp,pressure,humidity"]
sequence_length = 96

# import the data
df = pd.read_csv(
    "C:/Users/admin/Masters-s-project/masters_project/air_pollution_smog_1.csv",
    sep=";",
    header=0,
    low_memory=False,
    infer_datetime_format=True,
    parse_dates={"date": [0]},
    index_col=["date"],
)

# scale the data
target_scaler = StandardScaler().fit(df[target])
features_scaler = StandardScaler().fit(df[features])

df[target] = target_scaler.transform(df[target])
df[features] = features_scaler.transform(df[features])

# extract the input and output sequences
X_encoder = []  # past features and target values
X_decoder = []  # future features values
y = []  # future target values

for i in range(sequence_length, df.shape[0] - sequence_length):
    X_encoder.append(df[features + target].iloc[i - sequence_length : i])
    X_decoder.append(df[features].iloc[i : i + sequence_length])
    y.append(df[target].iloc[i : i + sequence_length])

X_encoder = np.array(X_encoder)
X_decoder = np.array(X_decoder)
y = np.array(y)

# define the encoder and decoder
def encoder(encoder_features):
    y = LSTM(units=100, return_sequences=True)(encoder_features)
    y = TimeDistributed(Dense(units=1))(y)
    return y


def decoder(decoder_features, encoder_outputs):
    x = Concatenate(axis=-1)([decoder_features, encoder_outputs])
    # x = Add()([decoder_features, encoder_outputs])
    y = TimeDistributed(Dense(units=100, activation="relu"))(x)
    y = TimeDistributed(Dense(units=1))(y)
    return y


# build the model
encoder_features = Input(shape=X_encoder.shape[1:])
decoder_features = Input(shape=X_decoder.shape[1:])
encoder_outputs = encoder(encoder_features)
decoder_outputs = decoder(decoder_features, encoder_outputs)
model = Model([encoder_features, decoder_features], decoder_outputs)

# train the model
model.compile(optimizer=Adam(learning_rate=0.001), loss="mse")
model.fit([X_encoder, X_decoder], y, epochs=100, batch_size=128)


# extract the last predicted sequence
y_true = target_scaler.inverse_transform(y[-1, :])
y_pred = target_scaler.inverse_transform(model.predict([X_encoder, X_decoder])[-1, :])

# plot the last predicted sequence
plt.plot(y_true.flatten(), label="actual")
plt.plot(y_pred.flatten(), label="predicted")
plt.show()

KeyError: "None of [Index(['temp'], dtype='object')] are in the [columns]"