# Libraries

In [11]:
import numpy as np
import matplotlib.pyplot as mp
import pandas as pd
import seaborn as sns
from pyESN import ESN
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.multioutput import MultiOutputClassifier
from xgboost import XGBClassifier
import os
import itertools
from collections import Counter
import mlflow
import pickle

In [2]:
mlflow.set_experiment("AQI_Prediction_Hybrid")
mlflow.set_tag("version", "lagged+pollutants")
mlflow.set_tag("notes", "Using past AQI values + predicted pollutants")

2025/07/17 19:12:06 INFO mlflow.tracking.fluent: Experiment with name 'AQI_Prediction_Hybrid' does not exist. Creating a new experiment.


# Data prep

In [20]:
data=pd.read_excel("/workspaces/Time-series-prediction-for-pollution-data/air_pollution_data.xlsx")
from preprocessing import prepare_xy
x, y = prepare_xy("Ahmedabad",use_lags=False, scale=False)

In [21]:
x

array([[ 520.71,    2.38,   16.28, ...,   65.96,   72.13,    8.36],
       [1682.28,    7.71,   54.84, ...,  120.95,  154.53,   27.36],
       [1815.8 ,   16.54,   49.35, ...,  133.47,  172.63,   28.12],
       ...,
       [ 390.53,    0.  ,   12.34, ...,   18.04,   68.64,    6.46],
       [ 307.08,    0.  ,    8.74, ...,   18.49,   73.56,    3.86],
       [ 283.72,    0.  ,    6.51, ...,   12.62,   63.86,    2.66]],
      shape=(904, 8))

In [10]:
from preprocessing import prepare_xy
x_lagged, y_lagged = prepare_xy("Ahmedabad",use_lags=True, scale=False)

In [6]:
x_lagged

array([[2.18964e+03, 2.39200e+01, 5.89500e+01, ..., 5.00000e+00,
        5.00000e+00, 5.00000e+00],
       [3.84522e+03, 5.72200e+01, 1.37090e+02, ..., 5.00000e+00,
        5.00000e+00, 5.00000e+00],
       [2.88391e+03, 2.79400e+01, 9.87100e+01, ..., 5.00000e+00,
        5.00000e+00, 5.00000e+00],
       ...,
       [3.90530e+02, 0.00000e+00, 1.23400e+01, ..., 3.00000e+00,
        3.00000e+00, 3.00000e+00],
       [3.07080e+02, 0.00000e+00, 8.74000e+00, ..., 3.00000e+00,
        3.00000e+00, 3.00000e+00],
       [2.83720e+02, 0.00000e+00, 6.51000e+00, ..., 3.00000e+00,
        3.00000e+00, 3.00000e+00]], shape=(900, 12))

In [7]:
y

array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5,
       4, 4, 4, 5, 5, 5, 5, 5, 4, 5, 4, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5,
       5, 4, 4, 5, 5, 4, 5, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 5, 4, 4, 4,
       5, 4, 4, 5, 4, 4, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 4, 5, 5, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 1, 2, 4, 5, 5, 5, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 3, 3, 2, 2, 2, 2, 1, 2, 3, 3, 4, 3, 3, 2, 2,
       2, 2, 5, 4, 3, 2, 4, 2, 2, 3, 2, 2, 4, 3, 4, 3, 3, 2, 2, 3, 3, 2,
       4, 5, 2, 3, 5, 5, 2, 2, 2, 2, 4, 4, 4, 3, 1, 3, 4, 4, 4, 4, 3, 3,
       2, 3, 4, 4, 4, 4, 3, 2, 2, 3, 2, 3, 2, 2, 3, 2, 2, 2, 2, 4, 1, 2,
       2, 3, 2, 2, 2, 3, 3, 5, 5, 2, 2, 2, 2, 4, 4, 5, 2, 2, 2, 2, 5, 5,
       2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 5, 5, 5, 4, 2,

In [8]:
lookback = 5
forecast_horizon = 20 # in previous notebook we have looked ahead 20 timestamps
steps_needed = 6      # will predict for 6 AQI heads

Loaded all ESN models.


In [19]:
esn_models['co']

<pyESN.ESN at 0x7fc77d717c80>