In [None]:
%pip install numpy pandas matplotlib seaborn scikit-learn keras keras-tuner tensorflow keras-self-attention

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow import keras
from keras import layers, regularizers
from keras.models import Sequential
import keras_tuner as kt
from keras_tuner.tuners import BayesianOptimization
from keras_self_attention import SeqSelfAttention

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting keras-self-attention
  Downloading keras-self-attention-0.51.0.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Building wheels for collected packages: keras-self-attention
  Building wheel for keras-self-attention (setup.py) ... [?25l[?25hdone
  Created wheel for keras-self-attention: filename=keras_self_attention-0.51.0-py3-none-any.whl size=18895 sha256=13e5f45e8608c0fe8f49fa5b77d650a4198fe94f6496ae5c57e9d0af4ab0f0e2
  Stored in directory: /root/.cache/pip/wheels/46/f9/96/709295c836133071c12a300729fed4027757f889c01695feea
Suc

In [None]:
# Load and preprocess data
df = pd.read_csv('balanced_dataset.csv')
X = df.drop(['City', 'AQI'], axis=1).values
y = df['AQI'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y.reshape(-1, 1)).flatten()


In [None]:
# Windowed sequences
window_size = 5
X_seq, y_seq = [], []
for i in range(len(X_scaled) - window_size):
    X_seq.append(X_scaled[i:i + window_size])
    y_seq.append(y_scaled[i + window_size])
X_lstm = np.array(X_seq)
y = np.array(y_seq)

X_train_full, X_test, y_train_full, y_test = train_test_split(X_lstm, y, test_size=0.1, random_state=42)


In [None]:
# MODEL BUILDER
def build_model(hp):
    model_type = hp.Choice('model_type', ['lstm', 'gru', 'cnn_lstm'])
    model = Sequential()
    input_shape = (X_lstm.shape[1], X_lstm.shape[2])

    if model_type == 'lstm':
        for i in range(hp.Int('num_layers', 1, 2)):
            return_seq = i < hp.Int('num_layers', 1, 2) - 1
            model.add(layers.Bidirectional(layers.LSTM(
                units=hp.Int(f'units_{i}', 32, 128, step=32),
                return_sequences=return_seq,
                input_shape=input_shape if i == 0 else None,
                kernel_regularizer=regularizers.l2(hp.Choice('l2', [0.0, 1e-4, 1e-3]))
            )))
            if hp.Boolean("attention", default=True):
                model.add(SeqSelfAttention(attention_activation='sigmoid'))
            model.add(layers.Dropout(hp.Float(f'dropout_{i}', 0.2, 0.5, step=0.1)))

    elif model_type == 'gru':
        model.add(layers.GRU(
            units=hp.Int('units_gru', 32, 128, step=32),
            input_shape=input_shape
        ))
        if hp.Boolean("attention", default=True):
            model.add(SeqSelfAttention(attention_activation='sigmoid'))
        model.add(layers.Dropout(hp.Float('dropout_gru', 0.2, 0.5, step=0.1)))

    elif model_type == 'cnn_lstm':
        model.add(layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
        model.add(layers.MaxPooling1D(pool_size=2))
        model.add(layers.LSTM(64))
        model.add(layers.Dropout(hp.Float('dropout_cnn', 0.2, 0.5, step=0.1)))

    model.add(layers.Dense(1, activation='linear'))

    model.compile(
        optimizer=keras.optimizers.Adam(
            learning_rate=hp.Choice('lr', [1e-2, 1e-3, 1e-4])
        ),
        loss='mse',
        metrics=['mae']
    )
    return model

In [None]:
# Tuning
print("\n\n🔍 Starting Bayesian Hyperparameter Search...\n")
tuner = BayesianOptimization(
    build_model,
    objective='val_mae',
    max_trials=15,
    executions_per_trial=1,
    directory='tuner_dir',
    project_name='aqi_model_all'
)

tuner.search(X_train_full, y_train_full, epochs=20, validation_split=0.2, verbose=1,
             callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)])

best_hp = tuner.get_best_hyperparameters(1)[0]
print("\nBest Hyperparameters:", best_hp.values)

Trial 15 Complete [00h 00m 51s]
val_mae: 0.43466416001319885

Best val_mae So Far: 0.3312261998653412
Total elapsed time: 00h 14m 46s

Best Hyperparameters: {'model_type': 'lstm', 'num_layers': 2, 'units_0': 64, 'l2': 0.0001, 'attention': False, 'dropout_0': 0.30000000000000004, 'lr': 0.001, 'dropout_cnn': 0.2, 'units_1': 32, 'dropout_1': 0.2}


In [None]:
# K-Fold Training
kf = KFold(n_splits=5, shuffle=True, random_state=42)
histories, fold_metrics = [], []
best_model, best_val_mae = None, float('inf')

for fold, (train_idx, val_idx) in enumerate(kf.split(X_train_full, y_train_full)):
    print(f"\n--- Fold {fold+1} ---")
    X_train, X_val = X_train_full[train_idx], X_train_full[val_idx]
    y_train, y_val = y_train_full[train_idx], y_train_full[val_idx]

    model = build_model(best_hp)

    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=50,
        batch_size = best_hp.values.get('batch_size', 32),


        callbacks=[
            keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
            keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6)
        ],
        verbose=1
    )
    histories.append(history)

    y_pred = model.predict(X_val).flatten()
    y_val_inv = y_scaler.inverse_transform(y_val.reshape(-1, 1)).flatten()
    y_pred_inv = y_scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()

    mae = mean_absolute_error(y_val_inv, y_pred_inv)
    mse = mean_squared_error(y_val_inv, y_pred_inv)
    r2 = r2_score(y_val_inv, y_pred_inv)

    fold_metrics.append({'mae': mae, 'mse': mse, 'r2': r2, 'y_val': y_val_inv, 'y_pred': y_pred_inv})

    if min(history.history['val_mae']) < best_val_mae:
        best_val_mae = min(history.history['val_mae'])
        best_model = model
        model.save('best_aqi_model_all.h5')



--- Fold 1 ---


  super().__init__(**kwargs)


Epoch 1/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 34ms/step - loss: 0.4510 - mae: 0.4401 - val_loss: 0.3819 - val_mae: 0.4061 - learning_rate: 0.0010
Epoch 2/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 16ms/step - loss: 0.3816 - mae: 0.4021 - val_loss: 0.3715 - val_mae: 0.4017 - learning_rate: 0.0010
Epoch 3/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 17ms/step - loss: 0.3559 - mae: 0.3859 - val_loss: 0.3595 - val_mae: 0.3964 - learning_rate: 0.0010
Epoch 4/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - loss: 0.3461 - mae: 0.3806 - val_loss: 0.3459 - val_mae: 0.3848 - learning_rate: 0.0010
Epoch 5/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - loss: 0.3462 - mae: 0.3833 - val_loss: 0.3450 - val_mae: 0.3767 - learning_rate: 0.0010
Epoch 6/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 16ms/step - loss: 0.3281 - mae: 0.




--- Fold 2 ---
Epoch 1/50


  super().__init__(**kwargs)


[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 19ms/step - loss: 0.4764 - mae: 0.4580 - val_loss: 0.4111 - val_mae: 0.4175 - learning_rate: 0.0010
Epoch 2/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 16ms/step - loss: 0.3815 - mae: 0.3986 - val_loss: 0.3870 - val_mae: 0.3977 - learning_rate: 0.0010
Epoch 3/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - loss: 0.3639 - mae: 0.3928 - val_loss: 0.3816 - val_mae: 0.3851 - learning_rate: 0.0010
Epoch 4/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - loss: 0.3436 - mae: 0.3840 - val_loss: 0.3711 - val_mae: 0.3897 - learning_rate: 0.0010
Epoch 5/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 16ms/step - loss: 0.3242 - mae: 0.3700 - val_loss: 0.3614 - val_mae: 0.3723 - learning_rate: 0.0010
Epoch 6/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - loss: 0.3310 - mae: 0.3697 - val_lo

  super().__init__(**kwargs)


[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 24ms/step - loss: 0.4822 - mae: 0.4552 - val_loss: 0.3399 - val_mae: 0.3819 - learning_rate: 0.0010
Epoch 2/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - loss: 0.3630 - mae: 0.3917 - val_loss: 0.3297 - val_mae: 0.3812 - learning_rate: 0.0010
Epoch 3/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 21ms/step - loss: 0.3584 - mae: 0.3887 - val_loss: 0.3228 - val_mae: 0.3717 - learning_rate: 0.0010
Epoch 4/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - loss: 0.3619 - mae: 0.3921 - val_loss: 0.3148 - val_mae: 0.3789 - learning_rate: 0.0010
Epoch 5/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - loss: 0.3542 - mae: 0.3896 - val_loss: 0.3008 - val_mae: 0.3502 - learning_rate: 0.0010
Epoch 6/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - loss: 0.3358 - mae: 0.3782 - val_los




--- Fold 4 ---
Epoch 1/50


  super().__init__(**kwargs)


[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 20ms/step - loss: 0.4704 - mae: 0.4517 - val_loss: 0.3736 - val_mae: 0.3920 - learning_rate: 0.0010
Epoch 2/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 17ms/step - loss: 0.3796 - mae: 0.3999 - val_loss: 0.3599 - val_mae: 0.3853 - learning_rate: 0.0010
Epoch 3/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - loss: 0.3629 - mae: 0.3844 - val_loss: 0.3455 - val_mae: 0.3733 - learning_rate: 0.0010
Epoch 4/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 21ms/step - loss: 0.3688 - mae: 0.3844 - val_loss: 0.3384 - val_mae: 0.3785 - learning_rate: 0.0010
Epoch 5/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - loss: 0.3496 - mae: 0.3847 - val_loss: 0.3373 - val_mae: 0.3771 - learning_rate: 0.0010
Epoch 6/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 17ms/step - loss: 0.3303 - mae: 0.3714 - val_

  super().__init__(**kwargs)


[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 19ms/step - loss: 0.4946 - mae: 0.4549 - val_loss: 0.3854 - val_mae: 0.3979 - learning_rate: 0.0010
Epoch 2/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 16ms/step - loss: 0.3840 - mae: 0.4026 - val_loss: 0.3716 - val_mae: 0.3801 - learning_rate: 0.0010
Epoch 3/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 19ms/step - loss: 0.3700 - mae: 0.3943 - val_loss: 0.3710 - val_mae: 0.3802 - learning_rate: 0.0010
Epoch 4/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 22ms/step - loss: 0.3717 - mae: 0.3927 - val_loss: 0.3455 - val_mae: 0.3767 - learning_rate: 0.0010
Epoch 5/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 17ms/step - loss: 0.3505 - mae: 0.3855 - val_loss: 0.3362 - val_mae: 0.3626 - learning_rate: 0.0010
Epoch 6/50
[1m315/315[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - loss: 0.3475 - mae: 0.3825 - val_l

In [None]:
# Final Evaluation
y_test_pred = best_model.predict(X_test).flatten()
y_test_pred_inv = y_scaler.inverse_transform(y_test_pred.reshape(-1, 1)).flatten()
y_test_inv = y_scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()

print("\n🎯 Final Test Set Evaluation:")
print("MAE:", mean_absolute_error(y_test_inv, y_test_pred_inv))
print("MSE:", mean_squared_error(y_test_inv, y_test_pred_inv))
print("R2:", r2_score(y_test_inv, y_test_pred_inv))

[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step

🎯 Final Test Set Evaluation:
MAE: 31.87061567763246
MSE: 2619.4853505288866
R2: 0.6795731260636935
