In [1]:
import pandas as pd

df = pd.read_csv("../data/processed/aqi_weather_timeseries.csv")

# Convert timestamp to datetime
df["timestamp"] = pd.to_datetime(df["timestamp"])

# Sort by time
df = df.sort_values("timestamp")

df.head(), df.shape

(      city  predicted_aqi  temperature  humidity  wind_speed  \
 0  Silchar         56.015         18.8      64.0         3.6   
 1  Silchar         56.015         18.8      64.0         3.6   
 2  Silchar         56.015         18.8      64.0         3.6   
 3  Silchar         56.015         18.8      64.0         3.6   
 4  Silchar         56.015         18.8      64.0         3.6   
 
             timestamp  
 0 2026-01-18 18:21:29  
 1 2026-01-18 18:21:35  
 2 2026-01-18 18:21:38  
 3 2026-01-18 18:21:40  
 4 2026-01-18 18:21:43  ,
 (777, 6))

In [2]:
features = ["predicted_aqi", "temperature", "humidity", "wind_speed"]
data = df[features].values

In [3]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)

In [4]:
import numpy as np

SEQ_LENGTH = 8   # last 8 timesteps
X, y = [], []

for i in range(len(data_scaled) - SEQ_LENGTH):
    X.append(data_scaled[i:i+SEQ_LENGTH])
    y.append(data_scaled[i+SEQ_LENGTH][0])  # predict AQI only

X = np.array(X)
y = np.array(y)

X.shape, y.shape

((769, 8, 4), (769,))

In [5]:
split = int(0.8 * len(X))

X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(SEQ_LENGTH, X.shape[2])),
    LSTM(32),
    Dense(1)
])

model.compile(optimizer="adam", loss="mse")
model.summary()

  super().__init__(**kwargs)


In [7]:
model.fit(X_train, y_train, epochs=30, batch_size=16, validation_data=(X_test, y_test))

Epoch 1/30
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 0.0473 - val_loss: 0.0246
Epoch 2/30
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0355 - val_loss: 0.0189
Epoch 3/30
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0326 - val_loss: 0.0174
Epoch 4/30
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0298 - val_loss: 0.0171
Epoch 5/30
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0295 - val_loss: 0.0145
Epoch 6/30
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0346 - val_loss: 0.0151
Epoch 7/30
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - loss: 0.0247 - val_loss: 0.0183
Epoch 8/30
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0245 - val_loss: 0.0113
Epoch 9/30
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x2613de3ed20>

In [8]:
model.save("../models/aqi_lstm_forecast.h5")

import joblib
joblib.dump(scaler, "../models/aqi_lstm_scaler.pkl")

print("LSTM model saved")



LSTM model saved
