### Importing the necessary libraries

In [None]:
import math
import numpy as np
import scipy
import h5py
from tensorflow.keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization
from tensorflow.keras.layers import Flatten, Conv2D, AveragePooling2D, MaxPooling2D
from tensorflow.keras.layers import Conv2D, GlobalAveragePooling2D, GlobalMaxPooling2D, Multiply, Lambda, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.applications.imagenet_utils import preprocess_input
from keras.callbacks import ModelCheckpoint
from matplotlib.pyplot import imshow
import tensorflow as tf
import keras

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
def channel_attention(x, ratio= 8):
    b, _, _, channel = x.shape
    # MLP
    l1 = Dense(channel//ratio, activation = "relu", use_bias = False)
    l2 = Dense(channel, use_bias = False)

    # Apply Global Average Pooling
    x1 = GlobalAveragePooling2D()(x)
    x1 = l1(x1)
    x1 = l2(x1)

    # Apply Max Pooling
    x2 = GlobalMaxPooling2D()(x)
    x2 = l1(x2)
    x2 = l2(x2)

    # Add and apply Sigmoid activation function
    features = x1 + x2
    features = Activation("sigmoid")(features)
    features = Multiply()([x, features])
    return features

In [None]:
def spatial_attention(x):
    # Apply Average Pooling
    x1 = tf.reduce_mean(x, axis = -1)
    x1 = tf.expand_dims(x1, axis = -1)

    # Apply Max Pooling
    x2 = tf.reduce_mean(x, axis = -1)
    x2 = tf.expand_dims(x2, axis = -1)

    # Concatenate
    features = Concatenate()([x1, x2])

    # Convolution Layer
    features = Conv2D(1, kernel_size = 7, padding='same', activation='sigmoid')(features)
    features = Multiply()([x, features])

    return features

In [None]:
def cbam(x):
    channel_attention_fm = channel_attention(x)
    spatial_attention_fm = spatial_attention(channel_attention_fm)
    return x+spatial_attention_fm

In [None]:
def identity_block(X, f, filters, stage, block):
    """
    Implementation of the identity block

    Parameters
    ----------
    X : tensor
        input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
    f : integer
        specifying the shape of the middle CONV's window for the main path
    filters : list
        python list of integers, defining the number of filters in the CONV layers of the main path
    stage : integer
        used to name the layers, depending on their position in the network
    block : str
        used to name the layers, depending on their position in the network

    Returns
    -------
    X : tensor
        output of the identity block, tensor of shape (n_H, n_W, n_C)
    """

    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    # Retrieve Filters
    F1, F2, F3 = filters

    # Save the input value. we'll need this later to add back to the main path.
    X_shortcut = X

    # First component of main path
    X = Conv2D(filters=F1, kernel_size=(1, 1), strides=(1, 1), padding='valid',
               name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
    X = Activation('relu')(X)

    # Second component of main path
    X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same',
               name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    X = Activation('relu')(X)

    # Third component of main path
    X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid',
               name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)

    return X

In [None]:
def convolutional_block(X, f, filters, stage, block, s=2):
    """
    Implementation of the convolutional block as defined in Figure

    Parameters
    ----------
    X : tensor
        input tensor of shape (m, n_H_prev, n_W_prev, n_C_prev)
    f : integer
        specifying the shape of the middle CONV's window for the main path
    filters : list
        python list of integers, defining the number of filters in the CONV layers of the main path
    stage : integer
        used to name the layers, depending on their position in the network
    block : str
        used to name the layers, depending on their position in the network
    s : integer, optional
        Integer, specifying the stride to be used. The default is 2.

    Returns
    -------
    X : tensor
        output of the convolutional block, tensor of shape (n_H, n_W, n_C)
    """

    # defining name basis
    conv_name_base = 'res' + str(stage) + block + '_branch'
    bn_name_base = 'bn' + str(stage) + block + '_branch'

    # Retrieve Filters
    F1, F2, F3 = filters

    # Save the input value
    X_shortcut = X

    # First component of main path
    X = Conv2D(filters=F1, kernel_size=(1, 1), strides=(s, s), padding='valid', name=conv_name_base + '2a', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2a')(X)
    X = Activation('relu')(X)

    # Second component of main path (≈3 lines)
    X = Conv2D(filters=F2, kernel_size=(f, f), strides=(1, 1), padding='same', name=conv_name_base + '2b', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2b')(X)
    X = Activation('relu')(X)

    # Third component of main path (≈2 lines)
    X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', name=conv_name_base + '2c', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name=bn_name_base + '2c')(X)

    ##### SHORTCUT PATH #### (≈2 lines)
    X_shortcut = Conv2D(filters=F3, kernel_size=(1, 1), strides=(s, s), padding='valid', name=conv_name_base + '1', kernel_initializer=glorot_uniform(seed=0))(X_shortcut)
    X_shortcut = BatchNormalization(axis=3, name=bn_name_base + '1')(X_shortcut)

    # Final step: Add shortcut value to main path, and pass it through a RELU activation (≈2 lines)
    X = Add()([X, X_shortcut])
    X = Activation('relu')(X)

    return X

In [None]:
def ResNet18(input_shape, outputClasses):
    """
    Implementation of the popular ResNet18 the following architecture:
    CONV2D -> BATCHNORM -> RELU -> MAXPOOL -> CONVBLOCK -> IDBLOCK*2 -> CONVBLOCK -> IDBLOCK*3
    -> CONVBLOCK -> IDBLOCK*5 -> CONVBLOCK -> IDBLOCK*2 -> AVGPOOL -> TOPLAYER

    Parameters
    ----------
    input_shape : tuple, optional
        shape of the input image.
    outputClasses : integer, optional
        number of classes.

    Returns
    -------
    model : object
        a Model() instance in Keras
    """

    # Define the input as a tensor with shape input_shape
    X_input = Input(input_shape)

    # Zero-Padding
    X = ZeroPadding2D((3, 3))(X_input)

    # Stage 1
    X = Conv2D(64, (7, 7), strides=(2, 2), name='conv1', kernel_initializer=glorot_uniform(seed=0))(X)
    X = BatchNormalization(axis=3, name='bn_conv1')(X)
    X = Activation('relu')(X)
    X = MaxPooling2D((3, 3), strides=(2, 2))(X)

    # Stage 2
    X = convolutional_block(X, f=3, filters=[64, 64, 256], stage=2, block='a', s=1)
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='b')
    X = identity_block(X, 3, [64, 64, 256], stage=2, block='c')

    # Stage 3
    X = convolutional_block(X, f=3, filters=[128, 128, 512], stage=3, block='a', s=2)
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='b')
    X = identity_block(X, 3, [128, 128, 512], stage=3, block='c')

    # Stage 4
    X = convolutional_block(X, f=3, filters=[256, 256, 1024], stage=4, block='a', s=2)
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b')
    X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c')


    # Stage 5
    X = convolutional_block(X, f=3, filters=[512, 512, 2048], stage=5, block='a', s=2)
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b')
    X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c')
    X = cbam(X)

    # AVGPOOL
    X = AveragePooling2D(pool_size=(2, 2), padding='same')(X)

    # output layer
    X = Flatten()(X)
    X = Dense(outputClasses, activation='softmax', name='fc' + str(outputClasses),
              kernel_initializer=glorot_uniform(seed=0))(X)

    # Create model
    model = Model(inputs=X_input, outputs=X, name='ResNet18')

    return model

<h1><center> Implementation </center></h1>

### Loading and Pre-processing data

In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

In [None]:
!unzip "/content/drive/MyDrive/Datasets/MITIndoorDataset/indoor_CVPR_09_augmented.zip"

In [None]:
from sklearn.model_selection import train_test_split
import os
from PIL import Image
import numpy as np
import random

In [None]:
batch_size = 32
img_height = 224
img_width = 224

In [None]:
DATADIR = "/content/content/drive/MyDrive/Datasets/MITIndoorDataset/indoor_CVPR_09_augmented"
CATEGORIES = os.listdir(DATADIR)

In [None]:
import cv2
from sklearn.model_selection import train_test_split

In [None]:
def load_dataset(dataset_path, test_size=0.2, random_state=42):
    categories = os.listdir(dataset_path)
    X = []
    y = []

    for category in categories:
        category_path = os.path.join(dataset_path, category)
        if os.path.isdir(category_path):
            images = os.listdir(category_path)
            for image in images:
                image_path = os.path.join(category_path, image)
                img = Image.open(image_path)
                img = np.array(img)
                X.append(img)
                y.append(category)

    X = np.array(X)
    y = np.array(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    return X_train, X_test, y_train, y_test

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = load_dataset(DATADIR)

# Print the shapes of the training and testing sets
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

In [None]:
# from keras.utils import to_categorical

# # Assuming your target labels are currently in a list called target_labels
# target_labels = CATEGORIES  # Fill this with your actual target labels

# # Convert target labels to one-hot encoded format
# one_hot_target = to_categorical(target_labels, num_classes=67)

from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Assuming your target labels are currently stored in a list called target_labels
target_labels = CATEGORIES  # Fill this with your actual target labels

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit label encoder and transform target labels
encoded_labels_train = label_encoder.fit_transform(y_train)
encoded_labels_test = label_encoder.fit_transform(y_test)

# Convert encoded labels to one-hot encoded format
y_train = to_categorical(encoded_labels_train, num_classes=len(label_encoder.classes_))
y_test = to_categorical(encoded_labels_test, num_classes=len(label_encoder.classes_))


In [None]:
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

### Model creation and compilation

In [None]:
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import ModelCheckpoint
learning_rate_reduction = ReduceLROnPlateau(
    monitor='val_accuracy',
    patience=3,
    verbose=1,
    factor=0.6,
    min_lr=1e-6)

checkpoint_filepath1 = '/content/drive/MyDrive/Trained Models/resnet18_with_cbam_after_stage_5_MIT_val_accuracy.h5'
checkpoint1 = ModelCheckpoint(filepath=checkpoint_filepath1,
                             monitor='val_accuracy',
                             save_best_only=True,
                             mode='max',
                             verbose=1)

In [None]:
resnet18 = ResNet18(input_shape=(224, 224, 3), outputClasses=67)
resnet18.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
resnet18.summary()

In [None]:
num_epochs = 65

In [None]:
resnet_teacher_history = resnet18.fit(
    X_train,
    y_train,
    batch_size = 32,
    epochs=num_epochs,
    validation_split = 0.2,
    verbose=1,
    callbacks=[learning_rate_reduction, checkpoint1])