In [1]:
import pandas as pd
import numpy as np
from keras.optimizers import Adam
from keras.models import load_model
from joblib import load
from keras import backend as K

In [2]:
# Load the saved model
loaded_ann_model = load_model('F:/Works/Application of ML/Final/ANN_trained_model.h5')



In [3]:
# Compile the loaded ANN model with the custom optimizer
loaded_ann_model.compile(optimizer=Adam(learning_rate=0.0001 ), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [4]:
def mse(y_true, y_pred):
    return K.mean(K.square(y_pred - y_true), axis=-1)

In [5]:
# Load the saved model with custom objects
loaded_lstm_model = load_model('F:/Works/Application of ML/Final/LSTM_trained_model.h5', custom_objects={'mse': mse})



In [6]:
# Load the scaler for LSTM
scaler_LSTM = load('F:/Works/Application of ML/Final/scaler_LSTM.joblib')

# Load the scaler for ANN
scaler_ANN = load('F:/Works/Application of ML/Final/scaler_ANN.joblib')

In [7]:
# Load the original data used during training
LSTM_cleaned_data = pd.read_csv('F:/Works/Application of ML/Final/LSTM_cleaned_data.csv')  # Load your original training data here
LSTM_cleaned_data

Unnamed: 0,PM2.5,PM10,SO2,NO2,CO,O3
0,16.665354,41.316808,2.818773,16.060654,0.430841,36.267782
1,17.288915,39.965232,2.423315,14.354335,0.430870,27.113356
2,9.280876,21.127849,1.855501,13.340066,0.410151,20.977514
3,22.639594,53.866619,2.739353,12.817808,0.403904,33.445877
4,24.576782,59.886589,3.123391,17.698004,0.488798,32.663070
...,...,...,...,...,...,...
2705,9.504518,15.533632,0.965425,13.304156,0.385762,29.021385
2706,11.450000,22.141246,1.118793,10.305741,0.343775,35.827846
2707,10.667974,18.257115,0.998958,9.099509,0.331951,37.641503
2708,12.035394,23.145436,1.186963,10.338576,0.314859,31.804717


In [8]:
# Fit the scaler with the original data
scaler_LSTM.fit(LSTM_cleaned_data)

In [10]:
# Define the range of dates for which you want to make predictions
start_date = pd.to_datetime('2024-06-01')
end_date = pd.to_datetime('2024-06-30')

In [11]:
# Define the window size and features
window_size = 10  # Adjust this based on your model architecture
num_features = 6  # Adjust this based on the number of features in your input data

# Initialize the input data with dummy values (you should replace this with your actual data)
input_data = np.random.rand(window_size, num_features)

# Create a sequence of future dates within the specified range
future_dates = pd.date_range(start=start_date, end=end_date, freq='D')

# Create an empty array to store the predicted values
predicted_values = []

# Predict the next value for each day in the specified range
for _ in range(len(future_dates)):
    # Reshape the input data to match the model input shape
    input_data_reshaped = np.reshape(input_data, (1, input_data.shape[0], input_data.shape[1]))
    
    # Predict the next value using the loaded LSTM model
    next_value = loaded_lstm_model.predict(input_data_reshaped)[0]
    
    # Append the predicted value to the list of predicted values
    predicted_values.append(next_value)
    
    # Update the input data by removing the first time step and appending the predicted value
    input_data = np.append(input_data[1:], [next_value], axis=0)

# Inverse transform the predicted values to get the original scale
predicted_values = scaler_LSTM.inverse_transform(predicted_values)

# Create a DataFrame to store the predicted values along with corresponding dates
predicted_df = pd.DataFrame(predicted_values, columns=['PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3'])
predicted_df['Date'] = future_dates

# Display the predicted DataFrame
predicted_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 315ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

Unnamed: 0,PM2.5,PM10,SO2,NO2,CO,O3,Date
0,22.090008,57.351091,2.057945,11.900409,-0.181051,32.063362,2024-06-01
1,20.316057,52.20809,2.026573,11.350058,-0.177466,31.266481,2024-06-02
2,19.037691,47.587899,1.983645,10.888183,-0.186128,30.218665,2024-06-03
3,17.87087,43.651716,1.933157,10.442662,-0.190738,29.27035,2024-06-04
4,16.91302,40.537258,1.901337,10.141656,-0.198077,28.382688,2024-06-05
5,15.915968,37.631847,1.880724,9.755081,-0.199818,27.564741,2024-06-06
6,15.101709,35.216765,1.836342,9.430283,-0.203116,26.992079,2024-06-07
7,14.359348,33.034406,1.807774,9.120653,-0.215274,26.458065,2024-06-08
8,13.812632,31.209857,1.764211,8.909091,-0.242009,25.478546,2024-06-09
9,13.330082,29.999948,1.739099,8.837189,-0.242074,24.904166,2024-06-10


In [12]:
# PM2.5 Sub-Index calculation
def get_PM25_subindex(x):
    if x <= 30:
        return x * 50 / 30
    elif x <= 60:
        return 50 + (x - 30) * 50 / 30
    elif x <= 90:
        return 100 + (x - 60) * 100 / 30
    elif x <= 120:
        return 200 + (x - 90) * 100 / 30
    elif x <= 250:
        return 300 + (x - 120) * 100 / 130
    elif x > 250:
        return 400 + (x - 250) * 100 / 130
    else:
        return 0

predicted_df["PM2.5_SubIndex"] = predicted_df["PM2.5"].apply(lambda x: get_PM25_subindex(x))

In [13]:
# PM10 Sub-Index calculation
def get_PM10_subindex(x):
    if x <= 50:
        return x
    elif x <= 100:
        return x
    elif x <= 250:
        return 100 + (x - 100) * 100 / 150
    elif x <= 350:
        return 200 + (x - 250)
    elif x <= 430:
        return 300 + (x - 350) * 100 / 80
    elif x > 430:
        return 400 + (x - 430) * 100 / 80
    else:
        return 0

predicted_df["PM10_SubIndex"] = predicted_df["PM10"].apply(lambda x: get_PM10_subindex(x))

In [14]:
# SO2 Sub-Index calculation
def get_SO2_subindex(x):
    if x <= 40:
        return x * 50 / 40
    elif x <= 80:
        return 50 + (x - 40) * 50 / 40
    elif x <= 380:
        return 100 + (x - 80) * 100 / 300
    elif x <= 800:
        return 200 + (x - 380) * 100 / 420
    elif x <= 1600:
        return 300 + (x - 800) * 100 / 800
    elif x > 1600:
        return 400 + (x - 1600) * 100 / 800
    else:
        return 0

predicted_df["SO2_SubIndex"] = predicted_df["SO2"].apply(lambda x: get_SO2_subindex(x))

In [15]:
# NO2 Sub-Index calculation
def get_NO2_subindex(x):
    if x <= 40:
        return x * 50 / 40
    elif x <= 80:
        return 50 + (x - 40) * 50 / 40
    elif x <= 180:
        return 100 + (x - 80) * 100 / 100
    elif x <= 280:
        return 200 + (x - 180) * 100 / 100
    elif x <= 400:
        return 300 + (x - 280) * 100 / 120
    elif x > 400:
        return 400 + (x - 400) * 100 / 120
    else:
        return 0

predicted_df["NO2_SubIndex"] = predicted_df["NO2"].apply(lambda x: get_NO2_subindex(x))

In [16]:
# CO Sub-Index calculation
def get_CO_subindex(x):
    if x <= 1:
        return x * 50 / 1
    elif x <= 2:
        return 50 + (x - 1) * 50 / 1
    elif x <= 10:
        return 100 + (x - 2) * 100 / 8
    elif x <= 17:
        return 200 + (x - 10) * 100 / 7
    elif x <= 34:
        return 300 + (x - 17) * 100 / 17
    elif x > 34:
        return 400 + (x - 34) * 100 / 17
    else:
        return 0

predicted_df["CO_SubIndex"] = predicted_df["CO"].apply(lambda x: get_CO_subindex(x))

In [17]:
# O3 Sub-Index calculation
def get_O3_subindex(x):
    if x <= 50:
        return x * 50 / 50
    elif x <= 100:
        return 50 + (x - 50) * 50 / 50
    elif x <= 168:
        return 100 + (x - 100) * 100 / 68
    elif x <= 208:
        return 200 + (x - 168) * 100 / 40
    elif x <= 748:
        return 300 + (x - 208) * 100 / 539
    elif x > 748:
        return 400 + (x - 400) * 100 / 539
    else:
        return 0

predicted_df["O3_SubIndex"] = predicted_df["O3"].apply(lambda x: get_O3_subindex(x))

In [18]:
predicted_df.head()

Unnamed: 0,PM2.5,PM10,SO2,NO2,CO,O3,Date,PM2.5_SubIndex,PM10_SubIndex,SO2_SubIndex,NO2_SubIndex,CO_SubIndex,O3_SubIndex
0,22.090008,57.351091,2.057945,11.900409,-0.181051,32.063362,2024-06-01,36.81668,57.351091,2.572431,14.875512,-9.052539,32.063362
1,20.316057,52.20809,2.026573,11.350058,-0.177466,31.266481,2024-06-02,33.860094,52.20809,2.533216,14.187572,-8.873289,31.266481
2,19.037691,47.587899,1.983645,10.888183,-0.186128,30.218665,2024-06-03,31.729486,47.587899,2.479556,13.610229,-9.3064,30.218665
3,17.87087,43.651716,1.933157,10.442662,-0.190738,29.27035,2024-06-04,29.784783,43.651716,2.416446,13.053328,-9.536891,29.27035
4,16.91302,40.537258,1.901337,10.141656,-0.198077,28.382688,2024-06-05,28.188367,40.537258,2.376671,12.67707,-9.903839,28.382688


In [19]:
# Assuming 'df' is your DataFrame
real_data = predicted_df.iloc[:, -6:]
real_data.head()

Unnamed: 0,PM2.5_SubIndex,PM10_SubIndex,SO2_SubIndex,NO2_SubIndex,CO_SubIndex,O3_SubIndex
0,36.81668,57.351091,2.572431,14.875512,-9.052539,32.063362
1,33.860094,52.20809,2.533216,14.187572,-8.873289,31.266481
2,31.729486,47.587899,2.479556,13.610229,-9.3064,30.218665
3,29.784783,43.651716,2.416446,13.053328,-9.536891,29.27035
4,28.188367,40.537258,2.376671,12.67707,-9.903839,28.382688


In [20]:
# Transform the new data using the loaded scaler
real_data_scaled = scaler_ANN.transform(real_data)
real_data_scaled

array([[-0.74496743, -0.68494418, -1.01582858, -0.84422611, -1.14995017,
        -0.53945691],
       [-0.77478398, -0.75173347, -1.01898648, -0.86995732, -1.14726298,
        -0.56013824],
       [-0.79627073, -0.81173331, -1.02330764, -0.89155186, -1.15375592,
        -0.58733203],
       [-0.81588265, -0.86285033, -1.02838973, -0.9123818 , -1.1572113 ,
        -0.61194349],
       [-0.83198218, -0.90329605, -1.03159273, -0.92645508, -1.16271237,
        -0.63498083],
       [-0.84874063, -0.94102701, -1.03366764, -0.94452906, -1.16401765,
        -0.65620888],
       [-0.86242667, -0.97239034, -1.03813507, -0.95971473, -1.16648927,
        -0.67107107],
       [-0.87490428, -1.00073141, -1.04101077, -0.9741912 , -1.17560298,
        -0.68493025],
       [-0.88409347, -1.02442581, -1.04539579, -0.98408259, -1.19564253,
        -0.71035155],
       [-0.89220416, -1.04013823, -1.0479235 , -0.98744431, -1.19569104,
        -0.72525834],
       [-0.90181525, -1.06130899, -1.05055022, -0.

In [21]:
# Make predictions using the loaded model
predictions = loaded_ann_model.predict(real_data_scaled)
predictions

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 443ms/step


array([[1.37839904e-02, 9.86209452e-01, 6.54512814e-06, 4.68453076e-13,
        2.55820105e-13, 1.10547405e-11],
       [1.57298833e-01, 8.42698514e-01, 2.65343738e-06, 1.64483810e-13,
        1.88961138e-13, 1.96450460e-11],
       [7.07399845e-01, 2.92599559e-01, 5.52954134e-07, 3.72039469e-14,
        7.57738260e-14, 1.51578819e-11],
       [9.28854108e-01, 7.11457506e-02, 9.53922310e-08, 6.70925311e-15,
        1.94946852e-14, 5.72249366e-12],
       [9.83776808e-01, 1.62231084e-02, 1.24122144e-08, 7.66128172e-16,
        3.20431590e-15, 1.54074355e-12],
       [9.90887642e-01, 9.11240932e-03, 4.40327153e-09, 1.94169915e-16,
        9.29813054e-16, 6.58512741e-13],
       [9.93894160e-01, 6.10580947e-03, 2.04672901e-09, 6.50586684e-17,
        3.40811568e-16, 3.19502020e-13],
       [9.95414972e-01, 4.58499044e-03, 1.13344722e-09, 2.62116645e-17,
        1.47163822e-16, 1.74661650e-13],
       [9.96028423e-01, 3.97164002e-03, 7.86160304e-10, 1.44440426e-17,
        8.46518441e-17, 

In [22]:
# Get the index of the class with the highest probability for each sample
predicted_labels = np.argmax(predictions, axis=1)
predicted_labels

array([1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [23]:
# Define a dictionary mapping encoded numbers to original class labels
class_mappings = {
    0: 'Good',
    1: 'Satisfactory',
    2: 'Moderate',
    3: 'Poor',
    4: 'Very Poor',
    5: 'Severe'
}

# Replace the encoded numbers with their original class labels
predicted_class_labels = [class_mappings[label] for label in predicted_labels]

# Create a DataFrame to display the predicted class labels
predicted_df_ann = pd.DataFrame({'AQI Prediction': predicted_class_labels})
predicted_df_ann

Unnamed: 0,AQI Prediction
0,Satisfactory
1,Satisfactory
2,Good
3,Good
4,Good
5,Good
6,Good
7,Good
8,Good
9,Good


In [24]:
first_six_columns_df = predicted_df.iloc[:, :7]
result_df = pd.concat([first_six_columns_df, predicted_df_ann], axis=1)
result_df

Unnamed: 0,PM2.5,PM10,SO2,NO2,CO,O3,Date,AQI Prediction
0,22.090008,57.351091,2.057945,11.900409,-0.181051,32.063362,2024-06-01,Satisfactory
1,20.316057,52.20809,2.026573,11.350058,-0.177466,31.266481,2024-06-02,Satisfactory
2,19.037691,47.587899,1.983645,10.888183,-0.186128,30.218665,2024-06-03,Good
3,17.87087,43.651716,1.933157,10.442662,-0.190738,29.27035,2024-06-04,Good
4,16.91302,40.537258,1.901337,10.141656,-0.198077,28.382688,2024-06-05,Good
5,15.915968,37.631847,1.880724,9.755081,-0.199818,27.564741,2024-06-06,Good
6,15.101709,35.216765,1.836342,9.430283,-0.203116,26.992079,2024-06-07,Good
7,14.359348,33.034406,1.807774,9.120653,-0.215274,26.458065,2024-06-08,Good
8,13.812632,31.209857,1.764211,8.909091,-0.242009,25.478546,2024-06-09,Good
9,13.330082,29.999948,1.739099,8.837189,-0.242074,24.904166,2024-06-10,Good


In [25]:
# Reorder columns with "Date" at the beginning
result_df = result_df[['Date', 'PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3', 'AQI Prediction']]


In [26]:
# Round numeric values to 2 decimal places
numeric_columns = ['PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3']
result_df[numeric_columns] = result_df[numeric_columns].round(2)
result_df

Unnamed: 0,Date,PM2.5,PM10,SO2,NO2,CO,O3,AQI Prediction
0,2024-06-01,22.09,57.35,2.06,11.9,-0.18,32.06,Satisfactory
1,2024-06-02,20.32,52.21,2.03,11.35,-0.18,31.27,Satisfactory
2,2024-06-03,19.04,47.59,1.98,10.89,-0.19,30.22,Good
3,2024-06-04,17.87,43.65,1.93,10.44,-0.19,29.27,Good
4,2024-06-05,16.91,40.54,1.9,10.14,-0.2,28.38,Good
5,2024-06-06,15.92,37.63,1.88,9.76,-0.2,27.56,Good
6,2024-06-07,15.1,35.22,1.84,9.43,-0.2,26.99,Good
7,2024-06-08,14.36,33.03,1.81,9.12,-0.22,26.46,Good
8,2024-06-09,13.81,31.21,1.76,8.91,-0.24,25.48,Good
9,2024-06-10,13.33,30.0,1.74,8.84,-0.24,24.9,Good


In [27]:
# Export DataFrame to a CSV file
result_df.to_csv('F:/Works/Application of ML/Final/prediction_ANN.csv', index=False)