**1.IMPORTING LIBRARIES**

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import cv2
import pandas as pd
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, BatchNormalization, Conv2D, Dense, Dropout, Flatten, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img ,array_to_img ,img_to_array
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

**2.LOAD THE IMAGE TRAINING ANG VALIDATION DATASET**

In [None]:
original_images_dir='/kaggle/input/air-pollution-image-dataset-from-india-and-nepal/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/IND_and_NEP'
validation_df = pd.read_csv('/kaggle/input/air-pollution-image-dataset-from-india-and-nepal/Dataset_for_AQI_Classification/Dataset_for_AQI_Classification/val_data.csv')


a.Creating a directory for augmented images

In [None]:

save_dir = '/kaggle/working/augmented_images'
os.makedirs(save_dir, exist_ok=True)


In [None]:
IMG_WIDTH = 224
IMG_HEIGHT = 224
BATCH_SIZE = 128

In [None]:
train_dataset_path = '/kaggle/working/augmented_images'

image_dir='/kaggle/input/air-pollution-image-dataset-from-india-and-nepal/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/IND_and_NEP'
directories = [d for d in os.listdir(image_dir) if os.path.isdir(os.path.join(image_dir, d))]
for i in directories:
    if not os.path.exists(train_dataset_path+'/'+i):
        os.makedirs(train_dataset_path+'/'+i)

b.Image Augmentation

In [None]:
datagen = ImageDataGenerator(
        rotation_range=40,          # Rotate images by up to 40 degrees
        width_shift_range=0.2,      # Shift width by up to 20% of image width
        height_shift_range=0.2,     # Shift height by up to 20% of image height
        shear_range=0.2,            # Apply shear transformation with intensity up to 20%
        zoom_range=0.2,             # Zoom in or out by up to 20%
        horizontal_flip=True,      # Flip images horizontally
        vertical_flip=True,
        brightness_range=[0.5, 1.5],  # Adjust brightness
        channel_shift_range=10 ,        # Shift color channels
        fill_mode='nearest')        # Fill mode for points outside the input boundaries


image_dir='/kaggle/input/air-pollution-image-dataset-from-india-and-nepal/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/IND_and_NEP'
directories = [d for d in os.listdir(image_dir) if os.path.isdir(os.path.join(image_dir, d))]
for dirr in directories:
    image_files = [os.path.join(image_dir+'/'+dirr+'/', f) for f in os.listdir(image_dir+'/'+dirr+'/') if f.endswith('.jpg')]
    
    for image in image_files: 
        img = load_img(image) 
        x = img_to_array(img)  
        x = x.reshape((1,) + x.shape)
        i = 0
        for batch in datagen.flow(x, batch_size=1,
            save_to_dir=(train_dataset_path+'/'+dirr), save_prefix=[j for j in image[::-1].split('/')][0], save_format='jpg'):
            i += 1
            if i > 20:
                break 
                     


In [None]:
train_datagen = ImageDataGenerator(rescale=1.0/255,
                                  zoom_range=0.2,
                                  width_shift_range=0.2,
                                  height_shift_range=0.2,
                                  fill_mode='nearest')
train_generator = train_datagen.flow_from_directory(train_dataset_path,
                                                   target_size=(IMG_WIDTH, IMG_HEIGHT),
                                                   batch_size=BATCH_SIZE,
                                                   class_mode='categorical',
                                                   shuffle=True)

In [None]:
validation_datagen = ImageDataGenerator(rescale=1.0/255)
validation_generator = validation_datagen.flow_from_dataframe(
    dataframe=validation_df,
    directory='/kaggle/input/air-pollution-image-dataset-from-india-and-nepal/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/All_img',
    x_col='Filename',
    y_col='AQI_Class',
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=True
)


c.Get the label mapping

In [None]:
labels = {value: key for key, value in train_generator.class_indices.items()}

print("Label Mappings for classes present in the training and validation datasets\n")
for key, value in labels.items():
    print(f"{key} : {value}")

**3.TRAINING A CNN MODEL**

a.Create a CNN Model

In [None]:
def create_model():
    model = Sequential([
        Conv2D(filters=128, kernel_size=(5, 5), padding='valid', input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        BatchNormalization(),
        
        Conv2D(filters=64, kernel_size=(3, 3), padding='valid', kernel_regularizer=l2(0.00005)),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        BatchNormalization(),
        
        Conv2D(filters=32, kernel_size=(3, 3), padding='valid', kernel_regularizer=l2(0.00005)),
        Activation('relu'),
        MaxPooling2D(pool_size=(2, 2)),
        BatchNormalization(),
        
        Flatten(),
        
        Dense(units=256, activation='relu'),
        Dropout(0.5),
        Dense(units=6, activation='softmax')
    ])
    
    return model

In [None]:
cnn_model = create_model()

In [None]:
print(cnn_model.summary())

In [None]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), patience=5)

In [None]:
optimizer = Adam(learning_rate=0.001)

In [None]:
cnn_model.compile(optimizer=optimizer, loss=CategoricalCrossentropy(), metrics=['accuracy'])

In [None]:
history = cnn_model.fit(train_generator, epochs=12, validation_data=validation_generator,
                       verbose=2,
                       callbacks=[reduce_lr])

**4.PLOTTING THE MODEL METRICS**

In [None]:
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

train_loss = history.history['loss']
val_loss = history.history['val_loss']

learning_rate = history.history['learning_rate']

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=1, figsize=(12, 10))

ax[0].set_title('Training Accuracy vs. Epochs')
ax[0].plot(train_accuracy, 'o-', label='Train Accuracy')
ax[0].plot(val_accuracy, 'o-', label='Validation Accuracy')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Accuracy')
ax[0].legend(loc='best')

ax[1].set_title('Training/Validation Loss vs. Epochs')
ax[1].plot(train_loss, 'o-', label='Train Loss')
ax[1].plot(val_loss, 'o-', label='Validation Loss')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Loss')
ax[1].legend(loc='best')
ax[2].set_title('Learning Rate vs. Epochs')
ax[2].plot(learning_rate, 'o-', label='Learning Rate')
ax[2].set_xlabel('Epochs')
ax[2].set_ylabel('Loss')
ax[2].legend(loc='best')

plt.tight_layout()
plt.show()


**5.TESTING THE MODEL ON TESTSET**

In [None]:
test_df= pd.read_csv('/kaggle/input/air-pollution-image-dataset-from-india-and-nepal/Dataset_for_AQI_Classification/Dataset_for_AQI_Classification/testing_data.csv')

In [None]:

test_datagen = ImageDataGenerator(rescale=1.0/255)

test_generator = test_datagen.flow_from_dataframe( 
                                                 dataframe=test_df,
                                                 directory='/kaggle/input/air-pollution-image-dataset-from-india-and-nepal/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/All_img',
                                                 x_col='Filename',
                                                 y_col='AQI_Class',
                                                 shuffle=False,
                                                 batch_size=BATCH_SIZE,
                                                 target_size = (IMG_WIDTH, IMG_HEIGHT),
                                                 class_mode='categorical')

**6.MODEL PREDICTION ON TEST**

In [None]:
predictions = cnn_model.predict(test_generator)

In [None]:
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(6, 6))
idx = 0

for i in range(2):
    for j in range(3):
        predicted_label = labels[np.argmax(predictions[idx])]
        ax[i, j].set_title(f"{predicted_label}")
        ax[i, j].imshow(test_generator[0][0][idx])
        ax[i, j].axis("off")
        idx += 1

plt.tight_layout()
plt.suptitle("Test Dataset Predictions", fontsize=15)
plt.show()

In [None]:
test_loss, test_accuracy = cnn_model.evaluate(test_generator, batch_size=BATCH_SIZE)

**.PLOTTING THE CLASSIFICATION METRICS**

a.Confusion Matrix

In [None]:
y_pred = np.argmax(predictions, axis=1)
y_true = test_generator.classes

In [None]:
cf_mtx = confusion_matrix(y_true, y_pred)

group_counts = ["{0:0.0f}".format(value) for value in cf_mtx.flatten()]
group_percentages = ["{0:.2%}".format(value) for value in cf_mtx.flatten()/np.sum(cf_mtx)]
box_labels = [f"{v1}\n({v2})" for v1, v2 in zip(group_counts, group_percentages)]
box_labels = np.asarray(box_labels).reshape(6, 6)

plt.figure(figsize = (12, 12))
sns.heatmap(cf_mtx, xticklabels=labels.values(), yticklabels=labels.values(),
           cmap="YlGnBu", fmt="", annot=box_labels)
plt.xlabel('Predicted Classes')
plt.ylabel('True Classes')
plt.show()

In [None]:
print(classification_report(y_true, y_pred, target_names=labels.values()))

**8.SAVING THE MODEL**


In [None]:
cnn_model.save('/kaggle/working/mmodel.h5')