# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout,LSTM
from sklearn.metrics import mean_squared_error
from keras.models import load_model
from tqdm import tqdm

# Reading data from csv file

In [2]:
df =  pd.read_csv('D:/python project/gandhinagar/data.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17254 entries, 0 to 17253
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0.1   17254 non-null  int64  
 1   Unnamed: 0     17254 non-null  int64  
 2   From Date      17254 non-null  object 
 3   PM2.5 (ug/m3)  17254 non-null  float64
 4   PM10 (ug/m3)   17254 non-null  float64
 5   SO2 (ug/m3)    17254 non-null  float64
 6   CO (mg/m3)     17254 non-null  float64
 7   Ozone (ug/m3)  17254 non-null  float64
 8   NO2 (ug/m3)    17254 non-null  float64
dtypes: float64(6), int64(2), object(1)
memory usage: 1.2+ MB


converting 'From Date' column to datetime object and setting it as index

In [4]:
df['From Date'] = pd.to_datetime(df['From Date'])
df.set_index('From Date', inplace=True)
columns = ['PM2.5 (ug/m3)','PM10 (ug/m3)', 'SO2 (ug/m3)', 'CO (mg/m3)', 'Ozone (ug/m3)', 'NO2 (ug/m3)']
df = df[columns]
df.head()

Unnamed: 0_level_0,PM2.5 (ug/m3),PM10 (ug/m3),SO2 (ug/m3),CO (mg/m3),Ozone (ug/m3),NO2 (ug/m3)
From Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01 02:00:00,53.64,94.12,14.14,0.8,2.29,11.55
2021-01-01 03:00:00,47.29,84.74,13.6,0.81,2.25,12.03
2021-01-01 04:00:00,46.46,74.6,11.74,0.8,2.28,12.1
2021-01-01 05:00:00,39.32,66.39,11.99,0.81,2.3,12.45
2021-01-01 06:00:00,39.66,64.98,12.2,0.81,2.3,12.4


Method to prepare dataset which contains previous 24 hours data as input and 25th hour data as output

In [5]:
def to_supervised(train):
  window_size = 24
  X = []
  Y = []
  for i in range(window_size, len(train)):
    X.append(train[i-window_size:i,:])
    Y.append(train[i,0:1])
    
  return X,Y

# Model Training

In [50]:
for i in tqdm(columns):
    # Create a DataFrame containing only the current column
    df_column = df[[i]]

    # Convert values to a NumPy array
    values = df_column.values

    # Scale the data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_dataset = scaler.fit_transform(values)

    # Create supervised data
    X, Y = to_supervised(scaled_dataset)
    X = np.array(X)
    Y = np.array(Y)

    # Split the data into training and testing sets
    n_train = 24 * 365
    X_train, X_test = X[:-n_train, :], X[-n_train:, :]
    Y_train, Y_test = Y[:-n_train], Y[-n_train:]

    # Build and train the LSTM model
    model = Sequential()
    model.add(LSTM(units=64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(0.2))
    model.add(LSTM(units=64, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(units=64))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, Y_train, epochs=25, batch_size=32)

    subs = i.split()[0]
    # Save the trained model
    model.save(f'D:/python project/gandhinagar/model/model_{subs}.h5')

  0%|          | 0/6 [00:00<?, ?it/s]

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


 17%|█▋        | 1/6 [01:20<06:43, 80.62s/it]

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


 33%|███▎      | 2/6 [02:42<05:25, 81.41s/it]

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


 50%|█████     | 3/6 [03:58<03:57, 79.03s/it]

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


 67%|██████▋   | 4/6 [05:14<02:35, 77.62s/it]

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


 83%|████████▎ | 5/6 [06:26<01:15, 75.77s/it]

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


100%|██████████| 6/6 [07:38<00:00, 76.48s/it]


# Model Prediction

In [6]:
predict_subs = {}
for i in df.columns:
    data_columns = df[[i]]
    values = data_columns.values
    # Scale the data between 0 and 1
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_dataset = scaler.fit_transform(values)
    X, Y = to_supervised(scaled_dataset)
    X = np.array(X)
    Y = np.array(Y)
    n_train = 24 * 365
    X_train, X_test = X[:-n_train,], X[-n_train:,]
    Y_train, Y_test = Y[:-n_train,], Y[-n_train:,]

    
    num_hours_to_predict = 24
    subs = i.split()[0]
    model = load_model(f'D:/python project/gandhinagar/model/model_{subs}.h5')
    initial_input_sequence = X_test[-1]
    predicted_values = []

    for _ in range(num_hours_to_predict):
        next_value = model.predict(initial_input_sequence.reshape(1, X_train.shape[1], X_train.shape[2]))[0][0]
        predicted_values.append(next_value)
        initial_input_sequence = np.roll(initial_input_sequence, shift=-1, axis=0)
        initial_input_sequence[-1] = next_value

    # Convert predicted_values list to a NumPy array after the loop
    predicted_values = np.array(predicted_values)

    # Inverse transform the scaled predictions
    predicted_values = scaler.inverse_transform(predicted_values.reshape(-1, 1))

    # Add the predicted values to the dictionary
    predict_subs[f'prediction_{i}'] = predicted_values.flatten().tolist()



In [8]:
df_subs = pd.DataFrame(predict_subs)
df_subs

Unnamed: 0,prediction_PM2.5 (ug/m3),prediction_PM10 (ug/m3),prediction_SO2 (ug/m3),prediction_CO (mg/m3),prediction_Ozone (ug/m3),prediction_NO2 (ug/m3)
0,28.707945,49.802208,4.454166,0.685921,31.403561,13.272923
1,29.051159,55.317547,4.793204,0.658704,26.756123,14.60322
2,29.44545,61.307373,5.139691,0.647925,21.958176,15.706888
3,29.950275,67.113937,5.520945,0.650357,17.387518,16.624361
4,30.600393,72.151138,5.938702,0.661163,13.29855,17.355751
5,31.351585,76.089806,6.380577,0.675374,9.797647,17.918043
6,32.094566,78.788933,6.82359,0.687045,6.859224,18.322838
7,32.719582,80.319763,7.23346,0.692236,4.382643,18.579504
8,33.140102,80.887665,7.58583,0.689217,2.257475,18.709198
9,33.309483,80.750519,7.891171,0.678569,0.425449,18.722467


In [9]:
start_date = pd.Timestamp('2023-11-03 10:00:00')
num_hours = 24
future_dates = [start_date + pd.DateOffset(hours=i) for i in range(num_hours)]

# Convert the list of datetime stamps to a pandas DataFrame
future_dates_df = pd.DataFrame({'Future_Date': future_dates})
data_final = pd.concat([future_dates_df,df_subs],axis=1)
data_final

Unnamed: 0,Future_Date,prediction_PM2.5 (ug/m3),prediction_PM10 (ug/m3),prediction_SO2 (ug/m3),prediction_CO (mg/m3),prediction_Ozone (ug/m3),prediction_NO2 (ug/m3)
0,2023-11-03 10:00:00,28.707945,49.802208,4.454166,0.685921,31.403561,13.272923
1,2023-11-03 11:00:00,29.051159,55.317547,4.793204,0.658704,26.756123,14.60322
2,2023-11-03 12:00:00,29.44545,61.307373,5.139691,0.647925,21.958176,15.706888
3,2023-11-03 13:00:00,29.950275,67.113937,5.520945,0.650357,17.387518,16.624361
4,2023-11-03 14:00:00,30.600393,72.151138,5.938702,0.661163,13.29855,17.355751
5,2023-11-03 15:00:00,31.351585,76.089806,6.380577,0.675374,9.797647,17.918043
6,2023-11-03 16:00:00,32.094566,78.788933,6.82359,0.687045,6.859224,18.322838
7,2023-11-03 17:00:00,32.719582,80.319763,7.23346,0.692236,4.382643,18.579504
8,2023-11-03 18:00:00,33.140102,80.887665,7.58583,0.689217,2.257475,18.709198
9,2023-11-03 19:00:00,33.309483,80.750519,7.891171,0.678569,0.425449,18.722467


# Method to calculate AQI Index

In [10]:
def calculate_aqi(pollutant_name, concentration):
    # Define the AQI breakpoints and associated sub-index values for each pollutant
    breakpoints = {
        "PM2.5": [(0, 30), (31, 60), (61, 90), (91, 120), (121, 250), (251, 350)],
        "PM10": [(0,50),(51,100),(101,250),(251,350),(351,430),(431,500)],
        "Ozone": [(0,50),(51,100),(101,168),(169,208),(209,748),(749,1000)],
        "CO": [(0, 1.0), (1.1, 2.0), (2.1, 10), (10.1, 17), (17.1, 34),(34.1,50)],
        "NO2": [(0, 40), (41, 80), (81, 180), (181, 280), (281, 400),(401,500)],
        "SO2": [(0, 40), (41, 80), (81, 380), (381, 800), (801, 1600),(1601,2000)],
        "NH3": [(0,200),(201,400),(401,800),(801,1200),(1201,1800),(1801,2000)]
    }
    st_range=[(0,50),(51,100),(101,200),(201,300),(301,400),(401,500)]

    for i in range(len(breakpoints[pollutant_name])):
        low, high = breakpoints[pollutant_name][i]
        if low <= concentration <= high:
            aqi_low, aqi_high = st_range[i]
            aqi = ((aqi_high - aqi_low) / (high - low)) * (concentration - low) + aqi_low
            return aqi

In [11]:
data_final.set_index(data_final['Future_Date'],inplace=True)
data_final.head()

Unnamed: 0_level_0,Future_Date,prediction_PM2.5 (ug/m3),prediction_PM10 (ug/m3),prediction_SO2 (ug/m3),prediction_CO (mg/m3),prediction_Ozone (ug/m3),prediction_NO2 (ug/m3)
Future_Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-11-03 10:00:00,2023-11-03 10:00:00,28.707945,49.802208,4.454166,0.685921,31.403561,13.272923
2023-11-03 11:00:00,2023-11-03 11:00:00,29.051159,55.317547,4.793204,0.658704,26.756123,14.60322
2023-11-03 12:00:00,2023-11-03 12:00:00,29.44545,61.307373,5.139691,0.647925,21.958176,15.706888
2023-11-03 13:00:00,2023-11-03 13:00:00,29.950275,67.113937,5.520945,0.650357,17.387518,16.624361
2023-11-03 14:00:00,2023-11-03 14:00:00,30.600393,72.151138,5.938702,0.661163,13.29855,17.355751


In [12]:
data_new = data_final.iloc[:,1:]
data_new.columns

Index(['prediction_PM2.5 (ug/m3)', 'prediction_PM10 (ug/m3)',
       'prediction_SO2 (ug/m3)', 'prediction_CO (mg/m3)',
       'prediction_Ozone (ug/m3)', 'prediction_NO2 (ug/m3)'],
      dtype='object')

# calculating AQI value of each pollutant

In [13]:
data_new['aqi_pm2.5'] = data_new.apply(lambda row: calculate_aqi("PM2.5", row['prediction_PM2.5 (ug/m3)']), axis=1)

data_new['aqi_pm10'] = data_new.apply(lambda row: calculate_aqi("PM10", row['prediction_PM10 (ug/m3)']), axis=1)

data_new['aqi_so2'] = data_new.apply(lambda row: calculate_aqi("SO2", row['prediction_SO2 (ug/m3)']), axis=1)

data_new['aqi_co'] = data_new.apply(lambda row: calculate_aqi("CO", row['prediction_CO (mg/m3)']), axis=1)

data_new['aqi_ozone'] = data_new.apply(lambda row: calculate_aqi("Ozone", row['prediction_Ozone (ug/m3)']), axis=1)

data_new['aqi_no2'] = data_new.apply(lambda row: calculate_aqi("NO2", row['prediction_NO2 (ug/m3)']), axis=1)


#  Calculating AQI Index

In [14]:
def calculate_max_of_columns(row, columns):
    return row[columns].max()
column = ['aqi_pm2.5','aqi_pm10','aqi_so2','aqi_co','aqi_ozone','aqi_no2']
data_new['AQI'] = data_new.apply(lambda row: calculate_max_of_columns(row,column),axis=1)

# Save predicted value to csv file

In [15]:
data_new.to_csv('D:/python project/gandhinagar/final_gandhinagar.csv')