<a href="https://colab.research.google.com/github/Ananda-2/6-Age-Calculator/blob/main/Indoor_Scene_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Indoor Scene Recognition

The dataset has a collection of about 15000+ labeled images belonging to 67 categories. I am selecting only below 10 categories ⚰

airport_inside, auditorium, bakery, bathroom, bookstore, casino, church_inside, grocerystore, operating_room, warehouse

Objective is to create a model that will able to classify images into these 10 categories.


## Importing required libraries


In [1]:
# Importing required libraries
import os
import pathlib
import math
import matplotlib.pyplot as plt
import cv2
from google.colab.patches import cv2_imshow
import numpy as np
np.random.seed(42)
import random as rn
rn.seed(42)
from keras import backend as K
import tensorflow as tf
tf.random.set_seed(42)
import keras
from keras.models import Sequential, Model
from keras.layers import Input, Lambda, Dense, Flatten, GlobalMaxPool3D, BatchNormalization, Dropout, Activation
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import optimizers
from keras.regularizers import l2
from keras.models import load_model
from keras.applications.inception_v3 import InceptionV3
from keras.applications.xception import Xception
from keras.applications.inception_resnet_v2 import InceptionResNetV2

In [2]:
# Checking available Nvidia GPU
!nvidia-smi

Tue Mar 18 05:46:27 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   35C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

## Preparing data

In [None]:
# # Unzip files
# from zipfile import ZipFile

# with ZipFile('/content/drive/MyDrive/colab_data/indoorCVPR/data_10_cats.zip', 'r') as zipobj:
#     zipobj.extractall('/content/drive/MyDrive/colab_data/indoorCVPR')
#     print('Files are unzipped')

from google.colab import drive
drive.mount('/content/drive')

I have considered below 10 categories for classification:
airport_inside, auditorium, bakery, bathroom, bookstore, casino, church_inside, grocerystore, operating_room, warehouse

### Spliting data for training and validation

In [None]:
# Training dataset path
data_path = '/content/drive/My Drive/archive/indoorCVPR_09/Images'
train_data_dir = pathlib.Path(data_path)

# Counting the number of images
image_count = len(list(train_data_dir.glob('*/*.jpg')))
print(f"Total images: {image_count}")

In [None]:
# Setting image height, width and batch size
img_height= 150
img_width= 150
batch_size= 64

In [None]:
# Training dataset
train_ds= tf.keras.preprocessing.image_dataset_from_directory(train_data_dir, seed= 42, validation_split= 0.2,
                                                              subset= 'training',
                                                              batch_size= batch_size,
                                                              image_size= (img_height, img_width)
                                                              )

In [None]:
# Validation dataset
val_ds= tf.keras.preprocessing.image_dataset_from_directory(train_data_dir, seed= 42, validation_split= 0.2,
                                                              subset= 'validation',
                                                              batch_size= batch_size,
                                                              image_size= (img_height, img_width)
                                                              )

### Vizualizing training data

In [None]:
# Checking all class names
class_names = train_ds.class_names
print(class_names)

In [None]:
# Checking training data
plt.figure(figsize=(9, 9))
for images, labels in train_ds.take(1):
  for i in range(16):
    ax = plt.subplot(4, 4, i + 1)
    plt.imshow(images[i].numpy().astype('uint8'))
    plt.title(class_names[labels[i]])
    plt.axis('off')

### Data Augmentation

Now, creating data augmentation layer. I have selected only relevant data augmentation techniques for current scenario.

In [None]:
# ## Creating data augmentation layer
augmentation_layer= Sequential(
  [tf.keras.layers.InputLayer(input_shape= (img_height, img_width, 3)),
   tf.keras.layers.RandomFlip('horizontal', seed= 42),
   tf.keras.layers.RandomRotation(0.2, fill_mode= 'wrap', seed= 42),
   tf.keras.layers.RandomZoom(0.2, seed= 42),
   tf.keras.layers.RandomTranslation(.2, .2, fill_mode='wrap', interpolation='bilinear', seed= 42),
   tf.keras.layers.RandomContrast(0.2, seed= 42)]
   )

In [None]:
# Plotting different augmented version of a random image from training dataset
plt.figure(figsize=(9, 9))
a= np.random.randint(42)
for images, labels in train_ds.take(1):
    for i in range(12):
      aug_img= augmentation_layer(images)
      ax = plt.subplot(3, 4, i + 1)
      plt.imshow(aug_img[a].numpy().astype(np.int32))
      plt.axis('off')

## Model building

### InceptionV3 Finetuning

First trying transfer learning using InceptionV3 architecture with imagenet pretrained weights. I have removed the default output softmax layer of InceptionV3. I have kept First 249 layers weights as it is. Trained layer 249 to last layer on training dataset. Added a Flatten layer, Dropout layers, 2 hidden dense layers and output dense layer with 10 neurons and softmax as activation.

I will use label encoding instead of one hot encoding to optimize memory utilization. So my loss function will be: sparse_categorical_entropy and my metric will be sparse_categorical_accuracy.

In [None]:
# Creating function to fine tune InceptionV3
def inceptionv3(inp_shape, dropout_rate, train_layers_after):
    inceptionv3= InceptionV3(weights= 'imagenet', include_top= False)
    for layer in inceptionv3.layers[:train_layers_after]:
      layer.trainable= False
    input_layer= Input(shape= inp_shape)
    data_aug_layer= augmentation_layer(input_layer)
    norm_layer= tf.keras.layers.Rescaling(1./255)(data_aug_layer)
    cnn_layers= inceptionv3(norm_layer)
    flatten_layer= Flatten()(cnn_layers)
    dropout_layer1= Dropout(dropout_rate)(flatten_layer)
    dense_layer= Dense(1024, activation= 'relu', kernel_initializer= 'he_normal', kernel_regularizer='l2')(dropout_layer1)
    dropout_layer2= Dropout(dropout_rate)(dense_layer)
    dense_layer_1= Dense(512, activation= 'relu', kernel_initializer= 'he_normal', kernel_regularizer='l2')(dropout_layer2)
    dropout_layer3= Dropout(dropout_rate)(dense_layer_1)
    output_layer= Dense(10, activation= 'softmax')(dropout_layer3)
    model= Model(input_layer, output_layer)
    model.compile(optimizer= 'adam', loss= 'sparse_categorical_crossentropy', metrics= ['sparse_categorical_accuracy'])
    return model

In [None]:
# Creating model
inp_shape= (150, 150, 3)
dropout_rate= .25
train_layers_after= 249
incv3_model= inceptionv3(inp_shape, dropout_rate, train_layers_after)
incv3_model.summary()

In [None]:
# Loading data into cache to overcome data bottleneck during training.
AUTOTUNE= tf.data.AUTOTUNE
AUTOTUNE= tf.data.experimental.AUTOTUNE

# Shuffling data before starting of each epoch
train_ds= train_ds.cache().shuffle(1000).prefetch(buffer_size= AUTOTUNE)
val_ds= val_ds.cache().prefetch(buffer_size= AUTOTUNE)

In [None]:
# Setting callbacks

base_path= '/content/drive/MyDrive/colab_data/indoorCVPR/models/'

filepath= base_path + 'model-{epoch:05d}-{loss:.5f}-{sparse_categorical_accuracy:.5f}-{val_loss:.5f}-{val_sparse_categorical_accuracy:.5f}.h5'

checkpoint= ModelCheckpoint(filepath, monitor= 'val_sparse_categorical_accuracy', verbose= 1,
                            save_best_only= True, save_weights_only= False, mode= 'auto')

LR= ReduceLROnPlateau(monitor= 'val_loss', factor= 0.1, patience= 30, verbose= 1)
callbacks_list= [checkpoint, LR]

In [None]:
# Model training
epochs= 200
steps_per_epoch= math.ceil(2652/batch_size)
validation_steps= math.ceil(663/batch_size)

history= incv3_model.fit(train_ds, validation_data= val_ds, steps_per_epoch= steps_per_epoch, epochs= epochs, callbacks= callbacks_list, validation_steps= validation_steps)

In [None]:
acc= history.history['sparse_categorical_accuracy']
val_acc= history.history['val_sparse_categorical_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.show()

- Best Epoch: 109 (model-00109-0.08159-0.99170-0.67942-0.85068.h5)
- **Training: loss: .08 - categorical_accuracy: 0.99**
- **Validation: val_loss: .68 - val_sparse_categorical_accuracy: 0.85**


### Xception Finetunning

Used almost same architecture as previous one, only used Xception as our cnn architecture instead of InceptionV3. Removed output softmax layer of Xception architecture and finetunned Xception from layer 114 to end layer. Then I have added custom layers as previous.

In [None]:
# Creating function to fine tune Xception
def xception_cnn(inp_shape, dropout_rate, train_layers_after):
    xcp= Xception(weights= 'imagenet', include_top= False)
    for layer in xcp.layers[:train_layers_after]:
      layer.trainable= False
    input_layer= Input(shape= inp_shape)
    data_aug_layer= augmentation_layer(input_layer)
    norm_layer= tf.keras.layers.Rescaling(1./255)(data_aug_layer)
    cnn_layers= xcp(norm_layer)
    flatten_layer= Flatten()(cnn_layers)
    dropout_layer1= Dropout(dropout_rate)(flatten_layer)
    dense_layer= Dense(1024, activation= 'relu', kernel_initializer= 'he_normal', kernel_regularizer='l2')(dropout_layer1)
    dropout_layer2= Dropout(dropout_rate)(dense_layer)
    dense_layer_1= Dense(512, activation= 'relu', kernel_initializer= 'he_normal', kernel_regularizer='l2')(dropout_layer2)
    dropout_layer3= Dropout(dropout_rate)(dense_layer_1)
    output_layer= Dense(10, activation= 'softmax')(dropout_layer3)
    model= Model(input_layer, output_layer)
    model.compile(optimizer= 'adam', loss= 'sparse_categorical_crossentropy', metrics= ['sparse_categorical_accuracy'])
    return model

In [None]:
# Creating model
inp_shape= (150, 150, 3)
dropout_rate= .25
train_layers_after= 114
xcp_model= xception_cnn(inp_shape, dropout_rate, train_layers_after)
xcp_model.summary()

In [None]:
# Model training
epochs= 10
steps_per_epoch= math.ceil(2652/batch_size)
validation_steps= math.ceil(663/batch_size)

history= xcp_model.fit(train_ds, validation_data= val_ds, steps_per_epoch= steps_per_epoch, epochs= epochs, callbacks= callbacks_list, validation_steps= validation_steps)

In [None]:
acc= history.history['sparse_categorical_accuracy']
val_acc= history.history['val_sparse_categorical_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.show()

- Best Epoch: 88 (model-00088-0.07750-0.99321-0.49403-0.89140.h5)
- **Training: loss: .775 - categorical_accuracy: 0.993**
- **Validation: val_loss: .494 - val_sparse_categorical_accuracy: 0.891**


## Inference Script

In [None]:
# Combining both the models for inference
final_model1= tf.keras.models.load_model('/content/drive/MyDrive/colab_data/models/model-00088-0.07750-0.99321-0.49403-0.89140.h5')
final_model2=  tf.keras.models.load_model('/content/drive/MyDrive/colab_data/models/model-00109-0.08159-0.99170-0.67942-0.85068.h5')
models= [final_model1, final_model2]

In [None]:
# Single inference function
def single_inference(path, models):
  classes = ['auditorium', 'bathroom', 'bedroom', 'elevator', 'gym', 'kitchen', 'library', 'office', 'restaurant', 'staircase']
  img= cv2.imread(path, cv2.IMREAD_UNCHANGED)
  cv2_imshow(img)
  img= cv2.resize(img, (150, 150), interpolation = cv2.INTER_AREA)
  img= img.reshape((1,150,150,3))
  pred1= models[0].predict(img)
  pred2= models[1].predict(img)
  pred= (pred1+pred2)/2
  print ('Predicted class:', classes[np.argmax(pred)])

In [None]:
# Predicting a random image from internet
single_inference('/content/test1.jpg', models)

In [None]:
# Predicting a random image from internet
single_inference('/content/test2.jpg', models)

In [None]:
# Predicting a random image from internet
single_inference('/content/test3.jpg', models)

In [None]:
# Predicting a random image from internet
single_inference('/content/test4.jpg', models)