# Time Series Forecasting using LSTM with Attention

This project predicts future values of a multivariate time series dataset
using Deep Learning models such as LSTM and Attention mechanisms.


# DATA IMPORTS

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dense, Input, Attention


# Dataset creation (Synthetic – Cultus la sonnathu same)

In [7]:
np.random.seed(42)

dates = pd.date_range(start="2020-01-01", periods=1000, freq="D")

data = pd.DataFrame({
    "feature_1": np.sin(np.arange(1000)/50) + np.random.normal(0,0.1,1000),
    "feature_2": np.cos(np.arange(1000)/30),
    "feature_3": np.random.normal(0,1,1000),
    "feature_4": np.arange(1000)*0.001,
    "feature_5": np.sin(np.arange(1000)/100)
}, index=dates)

data.head()


Unnamed: 0,feature_1,feature_2,feature_3,feature_4,feature_5
2020-01-01,0.049671,1.0,1.399355,0.0,0.0
2020-01-02,0.006172,0.999444,0.924634,0.001,0.01
2020-01-03,0.104758,0.997779,0.05963,0.002,0.019999
2020-01-04,0.212267,0.995004,-0.646937,0.003,0.029996
2020-01-05,0.056499,0.991124,0.698223,0.004,0.039989


# Scaling

In [8]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)


# Create time series sequences

In [9]:
def create_sequences(data, seq_len=30):
    X, y = [], []
    for i in range(len(data)-seq_len):
        X.append(data[i:i+seq_len])
        y.append(data[i+seq_len][0])
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_data)


# Train-test split (Time aware)

In [10]:
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]


# LSTM model

In [17]:
model_lstm = tf.keras.Sequential([
    LSTM(64, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dense(1)
])

model_lstm.compile(optimizer="adam", loss="mse")
model_lstm.fit(X_train, y_train, epochs=5, batch_size=32)


Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.1423
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.0080
Epoch 3/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 0.0034
Epoch 4/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 0.0027
Epoch 5/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - loss: 0.0025


<keras.src.callbacks.history.History at 0x7fec3ca7bb90>

# LSTM + Attention

In [19]:
from tensorflow.keras.layers import Input, LSTM, Dense, Attention, GlobalAveragePooling1D
from tensorflow.keras.models import Model

inputs = Input(shape=(X_train.shape[1], X_train.shape[2]))

lstm_out = LSTM(64, return_sequences=True)(inputs)

attention = Attention()([lstm_out, lstm_out])

context = GlobalAveragePooling1D()(attention)

output = Dense(1)(context)

model_att = Model(inputs, output)
model_att.compile(optimizer="adam", loss="mse")

model_att.fit(X_train, y_train, epochs=20, batch_size=32)



Epoch 1/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 0.4206
Epoch 2/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 0.0432
Epoch 3/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.0150
Epoch 4/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.0119
Epoch 5/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.0103
Epoch 6/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.0093
Epoch 7/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.0083
Epoch 8/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - loss: 0.0070
Epoch 9/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0057
Epoch 10/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - loss: 0.0044

<keras.src.callbacks.history.History at 0x7fec3cec0500>

# Evaluation

In [20]:
pred_lstm = model_lstm.predict(X_test)
pred_att = model_att.predict(X_test)

rmse_lstm = np.sqrt(mean_squared_error(y_test, pred_lstm))
rmse_att = np.sqrt(mean_squared_error(y_test, pred_att))

print("LSTM RMSE:", rmse_lstm)
print("Attention RMSE:", rmse_att)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step
LSTM RMSE: 0.07307075460107747
Attention RMSE: 0.07921536523131088



## Conclusion
The attention-based LSTM model achieved performance comparable to the baseline LSTM.
This demonstrates that attention mechanisms can effectively capture temporal patterns
in multivariate time series data and can be further improved with tuning and larger datasets.
