## Download and Prepare the Dataset

In [None]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.5
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [None]:
# import the libraries as shown below

from tensorflow.keras.layers import Input, Lambda, Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.applications.inception_v3 import InceptionV3
#from keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img
from tensorflow.keras.models import Sequential
import numpy as np
from glob import glob
#import matplotlib.pyplot as plt

In [None]:
! mkdir ~/.kaggle

In [None]:
! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets download adarshrouniyar/air-pollution-image-dataset-from-india-and-nepal/

In [None]:
import zipfile
import os

# Replace 'your_zip_file_path.zip' with the actual path of your zip file
zip_file_path = '/content/air-pollution-image-dataset-from-india-and-nepal.zip'

# Extract the zip file to the current working directory
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall('/content/')

# List the contents of the current working directory
extracted_files = os.listdir('/content/')
print("Files extracted successfully:", extracted_files)


In [None]:
  # Create a new directory for extraction
extraction_path = '/content/dataset'
os.makedirs(extraction_path, exist_ok=True)

# Extract the zip file to the specified directory
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extraction_path)

# List the contents of the extraction directory
extracted_files = os.listdir(extraction_path)
print("Files extracted successfully:", extracted_files)


In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Path to the original dataset
original_dataset_path = '/content/dataset/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/IND_and_NEP'

# Path to the new train and validation folders
output_train_path = '/content/dataset/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/Train'
output_validation_path = '/content/dataset/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/Validation'

# Create train and validation folders if they don't exist
os.makedirs(output_train_path, exist_ok=True)
os.makedirs(output_validation_path, exist_ok=True)

# Get the list of original folders
original_folders = os.listdir(original_dataset_path)

# Iterate through each original folder
for folder in original_folders:
    folder_path = os.path.join(original_dataset_path, folder)

    # Get the list of image files in the original folder
    image_files = [f for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))]

    # Split the dataset into train and validation sets
    train_files, validation_files = train_test_split(image_files, test_size=0.2, random_state=42)

    # Create train and validation folders for the current original folder
    output_train_folder = os.path.join(output_train_path, folder)
    output_validation_folder = os.path.join(output_validation_path, folder)
    os.makedirs(output_train_folder, exist_ok=True)
    os.makedirs(output_validation_folder, exist_ok=True)

    # Copy training set images to the train folder
    for file in train_files:
        source_path = os.path.join(folder_path, file)
        destination_path = os.path.join(output_train_folder, file)
        shutil.copyfile(source_path, destination_path)

    # Copy validation set images to the validation folder
    for file in validation_files:
        source_path = os.path.join(folder_path, file)
        destination_path = os.path.join(output_validation_folder, file)
        shutil.copyfile(source_path, destination_path)

print("Dataset split into train and validation successfully.")


## CNN model


In [None]:
import tensorflow as tf

model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 150x150 with 3 bytes color
    # This is the first convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(224, 224, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The third convolution
    tf.keras.layers.Conv2D(512, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The fourth convolution
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(6, activation='softmax')
])

# Print the model summary
model.summary()

In [None]:
# Set the training parameters
model.compile(loss = 'categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

## ImageDataGenerator


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

TRAINING_DIR = "/content/dataset/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/Train"
training_datagen = ImageDataGenerator(
      rescale = 1./255,
	    rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

VALIDATION_DIR = "/content/dataset/Air Pollution Image Dataset/Air Pollution Image Dataset/Combined_Dataset/Validation"
validation_datagen = ImageDataGenerator(rescale = 1./255)

train_generator = training_datagen.flow_from_directory(
	TRAINING_DIR,
	target_size=(224,224),
	class_mode='categorical',
  batch_size=32
)

validation_generator = validation_datagen.flow_from_directory(
	VALIDATION_DIR,
	target_size=(224,224),
	class_mode='categorical',
  batch_size=32
)

## Train the model and evaluate the results


In [None]:
# Train the model
history = model.fit(train_generator, epochs=30, steps_per_epoch=len(train_generator), validation_data = validation_generator, verbose = 1, validation_steps=len(validation_generator))

In [None]:
import matplotlib.pyplot as plt

# Plot the results
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']


epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()

plt.show()

In [None]:
from tensorflow.keras.models import load_model

model.save('aqidl.h5')