<font size="+3" color=blue><b> <center><u> Image Classification using CNNs </u></center></b></font>

<font color="green" size=+2.5><b>Objective</b></font>

The aim of this kernel is to classify outdoor scene. This kernel will hold almost all steps and steps required to implement image classification algorithm using deep learning on Intel Scene Classification dataset.

<a id="4"></a>
<font color="green" size=+2.5><b>Import Libraries</b></font>


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt             
import tensorflow as tf
from tensorflow import keras 
import tensorflow.keras.models as Models
from tensorflow.keras.preprocessing.image import ImageDataGenerator   # generates batches of augmented data
from tensorflow.keras.preprocessing import image                      # functions for image preprocessing
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import cv2


In [None]:
IMAGE_SIZE = (228, 228)

BATCH_SIZE = 32

<a id="5"></a>
<font color="green" size=+2.5><b>Loading the Data</b></font>

In [None]:
train_dir='/kaggle/input/intel-image-classification/seg_train/seg_train'
test_dir='/kaggle/input/intel-image-classification/seg_test/seg_test'

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  train_dir,
  seed=123,              #sets the random seed for shuffling the dataset . Setting a seed ensures reproducibility.
  image_size=IMAGE_SIZE,
  batch_size=BATCH_SIZE)           # training dataset 

In [None]:
val_ds = tf.keras.utils.image_dataset_from_directory(
  test_dir,
  seed=123,             #sets the random seed for shuffling the dataset. Setting a seed ensures reproducibility.
  image_size=IMAGE_SIZE,
  batch_size=BATCH_SIZE)        # validation dataset 

You can find the class names in the class_names attribute on these datasets.

In [None]:
class_names = train_ds.class_names
print(class_names)      # an attribute printing labels present in dataset

<a id="6"></a>
<font color="green" size=+2.5><b>Visualize the data</b></font>

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):   #one batch at a time
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)      # subplot in 3x3 grid
        plt.imshow(images[i].numpy().astype("uint8"))  # tensor to array , pixel values as unsigned 8 bit integers
        plt.title(class_names[labels[i]])
        plt.axis("off")                    #turns off axis labels

<a id="7"></a>
<font color="green" size=+2.5><b>Beginner: Simple Model Creation</b></font>

## Steps are:

1. Build the model,
2. Compile the model,
3. Train / fit the data to the model,
4. Evaluate the model on the testing set,

- Conv2D: (32 filters of size 3 by 3) The features will be "extracted" from the image.
- MaxPooling2D: The images get half sized.
- Flatten: Transforms the format of the images from a 2d-array to a 1d-array of 150 150 3 pixel values.
- Relu : given a value x, returns max(x, 0).
- Softmax: 6 neurons, probability that the image belongs to one of the classes.

In [None]:
num_classes = len(class_names)     # number of labels in dataset

In [None]:
model = Models.Sequential()   # Sequential model with linear stack of layers
model.add(tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(228,228,3)))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))  #downsample  the spatial dimensions of the feature maps.
model.add(tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2,2))
model.add(tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2,2))
model.add(tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2,2))
model.add(tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(2,2))
model.add(tf.keras.layers.Flatten())      #3D output to 1D vector before adding fully connected layers
model.add(tf.keras.layers.Dense(1024, activation='relu'))   # dense layer with 1024 units
model.add(tf.keras.layers.Dropout(0.2))   #  droupout rate 0.2 , regularization , preventing overfitting
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dropout(0.2))
model.add(tf.keras.layers.Dense(num_classes, activation='softmax')) # final output probabilites for each class , units is num_classes 

In [None]:
model.summary()

In [None]:
model.compile(     # learning process before training the model
  optimizer='adam',  # adaptive moment estimation
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), # minimum loss during training, multiclass problem , raw logits output
  metrics=['accuracy']) # display classification accuracy

In [None]:

earlystopping = EarlyStopping(monitor='val_loss',
                                              patience=5, # stop after  5 epochs if validation loss not improve
                                              verbose=1, #  for display
                                              mode='min'
                                              )
checkpointer = ModelCheckpoint(filepath='bestvalue', verbose=0, save_best_only=True)
callback_list = [checkpointer, earlystopping] # list passed to fit method for training

In [None]:
history = model.fit(
    train_ds,                 
    validation_data=val_ds,  
    epochs=35,               # Number of times model will iterate
    callbacks=callback_list   # List of callbacks to be applied during training
)

# Model Summary

In [None]:
model.summary()

In [None]:
plt.xlabel('Epoch Number')
plt.ylabel('Loss')
plt.plot(history.history['loss'], label='training set')
plt.plot(history.history['val_loss'], label='test set')
plt.legend()

In [None]:
plt.xlabel('Epoch Number')
plt.ylabel('Accuracy')
plt.plot(history.history['accuracy'], label='training set')
plt.plot(history.history['val_accuracy'], label='test set')
plt.legend()

# Prediction on unseen image data

In [None]:
# load the predicted data and predict class on unseen data
def getImagePaths(path):
    image_names = []
    for dirname, _, filenames in os.walk(path):  # iterate all files in a directory 
        for filename in filenames:
            fullpath = os.path.join(dirname, filename)   # create fullpath
            image_names.append(fullpath)
    return image_names

pred_dir = '../input/intel-image-classification/seg_pred/seg_pred'

images_paths = getImagePaths(pred_dir)
len(images_paths)    # total number of images 

In [None]:
# images path list to numpy array using cv2.imread module
file_array = []

for file in images_paths[:9]:
    img_ = image.load_img(file, target_size=(228, 228))
    img_array = image.img_to_array(img_)       # image to array having pixel values 
    img_processed = np.expand_dims(img_array, axis=0)  # adds extra dimension to array
    img_processed /= 255.    # normalize pixel size to  [0,1] range 
    file_array.append(img_processed)    
    
file_array = np.array(file_array)     # whole list to array 

In [None]:
classes = train_ds.class_names
print(classes)

In [None]:
def predict_image(filename, model):
    img_ = image.load_img(filename, target_size=(228, 228))
    img_array = image.img_to_array(img_)        # image to array having pixel values 
    img_processed = np.expand_dims(img_array, axis=0) # adds extra dimension to array
    img_processed /= 255.                      # normalize pixel size to  [0,1] range 
    
    prediction = model.predict(img_processed)  # holds result  that is a vector of probabilities for each class.
    
    index = np.argmax(prediction)     #Finds the index of the class with the highest probability 
    
    plt.title("Prediction - {}".format(str(classes[index]).title()), size=18, color='red')
    plt.imshow(img_array)

In [None]:
predict_image('/kaggle/input/intel-image-classification/seg_pred/seg_pred/2138.jpg',model)