## Imports and data segregation

In [1]:
import os
import shutil
from tqdm import tqdm
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import cv2
import zipfile
import shutil
import random
import pandas as pd
import csv
import os
# Paths to data and image lists
data_path = '/kaggle/input/indoor-scenes-cvpr-2019/indoorCVPR_09/Images/'
train_list_path = '/kaggle/input/indoor-scenes-cvpr-2019/TrainImages.txt'
test_list_path = '/kaggle/input/indoor-scenes-cvpr-2019/TestImages.txt'

# Output directories
output_dir = '/kaggle/working/split_data'
train_dir = os.path.join(output_dir, 'train')
val_dir = os.path.join(output_dir, 'validation')
test_dir = os.path.join(output_dir, 'test')

# Create directories for train, validation, and test splits
for folder in [train_dir, val_dir, test_dir]:
    os.makedirs(folder, exist_ok=True)

# Read train and test image lists
with open(train_list_path, 'r') as file:
    train_images = set(file.read().splitlines())

with open(test_list_path, 'r') as file:
    test_images = set(file.read().splitlines())

# Get all available images in the dataset
all_images = []
for root, _, files in os.walk(data_path):
    for file in files:
        if file.endswith(('.jpg', '.png')):
            all_images.append(os.path.join(root, file))

# Initialize counters
train_count, val_count, test_count = 0, 0, 0

# Copy images to their respective directories
for img_path in tqdm(all_images, desc="Processing images"):
    # Extract relative image path (folder_name/image_name)
    relative_path = os.path.relpath(img_path, data_path).replace("\\", "/")

    # Determine the split (train, test, or validation)
    if relative_path in train_images:
        dest_dir = train_dir
        train_count += 1
    elif relative_path in test_images:
        dest_dir = test_dir
        test_count += 1
    else:
        dest_dir = val_dir
        val_count += 1

    # Create subdirectory for the class if it doesn't exist
    class_dir = os.path.join(dest_dir, os.path.dirname(relative_path))
    os.makedirs(class_dir, exist_ok=True)

    # Copy the image
    shutil.copy(img_path, os.path.join(class_dir, os.path.basename(img_path)))

print(f"Data split complete!")
print(f"Training images: {train_count}")
print(f"Validation images: {val_count}")
print(f"Testing images: {test_count}")


Processing images: 100%|██████████| 15614/15614 [02:18<00:00, 112.62it/s]

Data split complete!
Training images: 5360
Validation images: 8914
Testing images: 1340





## Data preprocessing

In [2]:

#Set data augmentation techniques
train_datagen = keras.preprocessing.image.ImageDataGenerator(horizontal_flip=True,vertical_flip=True
                                                             ,zoom_range=0.2,rotation_range=360
                                                             ,width_shift_range=0.1,height_shift_range=0.1
                                                             ,channel_shift_range=50
                                                             ,brightness_range=(0,1.2)
                                                             ,preprocessing_function=keras.applications.imagenet_utils.preprocess_input)

test_datagen = keras.preprocessing.image.ImageDataGenerator(preprocessing_function=keras.applications.imagenet_utils.preprocess_input)


In [3]:
#Create Data augmentation techniques
batch_size=15
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(512, 512),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)
validation_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(512, 512),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

Found 5360 images belonging to 67 classes.
Found 1340 images belonging to 67 classes.


## Evaluation metrics defination

In [4]:
import keras
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import csv
import json
import numpy as np
import matplotlib.pyplot as plt
from keras import backend as K

keras.backend.clear_session() #clear backend

# Helper function to calculate F1 Score, Sensitivity, and Specificity
def calculate_metrics(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    precisions, recalls, f1_scores, specificities = [], [], [], []

    for i in range(cm.shape[0]):
        tp = cm[i, i]
        fn = cm[i, :].sum() - tp
        fp = cm[:, i].sum() - tp
        tn = cm.sum() - (tp + fn + fp)

        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        specificity = tn / (tn + fp) if (tn + fp) > 0 else 0

        precisions.append(precision)
        recalls.append(recall)
        f1_scores.append(f1_score)
        specificities.append(specificity)

    return {
        'f1_score': np.mean(f1_scores),
        'sensitivity': np.mean(recalls),
        'specificity': np.mean(specificities)
    }

# Custom callback to save metrics and checkpoints
class SaveMetricsAndCheckpoints(keras.callbacks.Callback):
    def __init__(self, validation_data, log_file_path='/kaggle/working/metrics_log.csv', save_interval=10):
        super().__init__()
        self.validation_data = validation_data
        self.log_file_path = log_file_path
        self.save_interval = save_interval

        # Initialize the CSV file with headers
        with open(self.log_file_path, mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([
                'epoch',
                'train_loss', 'train_accuracy', 'train_precision', 'train_recall', 
                'train_top_1_accuracy', 'train_top_5_accuracy',
                'val_loss', 'val_accuracy', 'val_precision', 'val_recall',
                'val_top_1_accuracy', 'val_top_5_accuracy',
                'f1_score', 'sensitivity', 'specificity'
            ])
        print(f"Metrics will be logged to: {self.log_file_path}")

    def on_epoch_end(self, epoch, logs=None):
        # Extract validation data
        val_images, val_labels = self.validation_data
        y_pred = self.model.predict(val_images)
        y_pred_labels = tf.argmax(y_pred, axis=1).numpy()
        y_true_labels = tf.argmax(val_labels, axis=1).numpy()

        # Calculate additional metrics
        additional_metrics = calculate_metrics(y_true_labels, y_pred_labels)

        # Save metrics to the CSV file
        with open(self.log_file_path, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([
                epoch + 1,
                logs.get('loss'), logs.get('accuracy'), logs.get('precision'), logs.get('recall'),
                logs.get('top_1_accuracy'), logs.get('top_5_accuracy'),
                logs.get('val_loss'), logs.get('val_accuracy'), logs.get('val_precision'), logs.get('val_recall'),
                logs.get('val_top_1_accuracy'), logs.get('val_top_5_accuracy'),
                additional_metrics['f1_score'], additional_metrics['sensitivity'], additional_metrics['specificity']
            ])
        
        print(f"Epoch {epoch + 1}: Metrics logged.")

        # Save model checkpoint every nth epoch
        if (epoch + 1) % self.save_interval == 0:
            checkpoint_filepath = (
                f"/kaggle/working/model-"
                f"{epoch + 1:02d}-"
                f"val_acc_{logs['val_accuracy']:.4f}.keras"
            )
            self.model.save(checkpoint_filepath)
            print(f"Model checkpoint saved at: {checkpoint_filepath}")

# Validation data
validation_data = next(iter(validation_generator))

# Initialize callback
metrics_and_checkpoint_callback = SaveMetricsAndCheckpoints(
    validation_data=(validation_data[0], validation_data[1]),
    log_file_path='/kaggle/working/metrics_log.csv',
    save_interval=10
)


Metrics will be logged to: /kaggle/working/metrics_log.csv


## Model defining and compilation

In [5]:
keras.backend.clear_session() #clear backend
shape=(512,512,3)
input_tensor=keras.Input(shape=shape)
base_model=keras.applications.<model_name>(input_tensor=input_tensor,weights='imagenet',include_top=False)
# Replace the model name with the models which you want to evaluate DenseNet169, ResNet50, EfficientNetB0, Xception and MobileNetV2
avg=keras.layers.AveragePooling2D(3,padding='valid')(base_model.output)
depthw=keras.layers.DepthwiseConv2D(5,
                                      depthwise_initializer=keras.initializers.RandomNormal(mean=0.0,stddev=0.01),
                                      bias_initializer=keras.initializers.Zeros(),depthwise_constraint=keras.constraints.NonNeg())(avg)
model=keras.Model(inputs=base_model.input, outputs=preds)  

##################################
for layer in model.layers:
  layer.trainable = True
#Determine adaptive learning rate with an initialization value of 0.045 and decay of 0.94 every two epochs.
lr_schedule =keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.045,
    decay_steps=2*int(len(train_generator.filenames)/batch_size),
    decay_rate=0.94,
    staircase=True)
optimizer=keras.optimizers.SGD(momentum=0.9,learning_rate=lr_schedule)
# Compile the model
model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall'),
        tf.keras.metrics.TopKCategoricalAccuracy(k=1, name='top_1_accuracy'),
        tf.keras.metrics.TopKCategoricalAccuracy(k=5, name='top_5_accuracy')
    ]
)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


## Model Fitting

In [6]:
hist=model.fit(
    train_generator, 
    epochs=130,
    validation_data=validation_generator,
    shuffle=True,
    callbacks=metrics_and_checkpoint_callback) #start training


Epoch 1/130


  self._warn_if_super_not_called()
I0000 00:00:1734156888.000264     106 service.cc:145] XLA service 0x79c078003ce0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734156888.000329     106 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1734156888.000335     106 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
2024-12-14 06:15:13.524603: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng4{} for conv (f32[15,96,257,257]{3,2,1,0}, u8[0]{0}) custom-call(f32[15,96,259,259]{3,2,1,0}, f32[96,1,3,3]{3,2,1,0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, feature_group_count=96, custom_call_target="__cudnn$convForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0}} is taking a while...
2024-12-14 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/stepstep - accuracy: 0.1315 - loss: 3.6599 - precision: 0.4087 - recall: 0.0193 - top_1_accuracy: 0.1315 - top_5_accuracy: 0.332
Epoch 1: Metrics logged.
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m476s[0m 1s/step - accuracy: 0.1316 - loss: 3.6590 - precision: 0.4090 - recall: 0.0193 - top_1_accuracy: 0.1316 - top_5_accuracy: 0.3326 - val_accuracy: 0.2485 - val_loss: 3.0686 - val_precision: 0.4190 - val_recall: 0.1351 - val_top_1_accuracy: 0.2485 - val_top_5_accuracy: 0.5813
Epoch 2/130
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step/step - accuracy: 0.2961 - loss: 2.6856 - precision: 0.6126 - recall: 0.1206 - top_1_accuracy: 0.2961 - top_5_accuracy: 0.629
Epoch 2: Metrics logged.
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m357s[0m 977ms/step - accuracy: 0.2961 - loss: 2.6855 - precision: 0.6126 - recall: 0.1207 - top_1_accuracy: 0.2961 - top_5_accuracy: 0.6291 - val

KeyboardInterrupt: 

In [7]:
import kagglehub

kagglehub.login()

# Replace with path to directory containing model files.
LOCAL_MODEL_DIR = 'path to the last checkpoint you want to save'

MODEL_SLUG = 'my_model' # Replace with model slug.

# Learn more about naming model variations at
# https://www.kaggle.com/docs/models#name-model.
VARIATION_SLUG = 'default' # Replace with variation slug.

kagglehub.model_upload(
  handle = f"use you handle info to save the checkpoint directly on your kaggle ID",
  local_model_dir = LOCAL_MODEL_DIR,
  version_notes = 'version update dates')

VBox(children=(HTML(value='<center> <img\nsrc=https://www.kaggle.com/static/images/site-logo.png\nalt=\'Kaggle…

Uploading Model https://www.kaggle.com/models/kartikgarg74/my_model/keras/default ...
Model 'my_model' does not exist or access is forbidden for user 'kartikgarg74'. Creating or handling Model...
Model 'my_model' Created.
Starting upload for file /kaggle/working/model-100-val_acc_0.7134.keras


Uploading: 100%|██████████| 33.8M/33.8M [00:00<00:00, 34.1MB/s]

Upload successful: /kaggle/working/model-100-val_acc_0.7134.keras (32MB)





Your model instance has been created.
Files are being processed...
See at: https://www.kaggle.com/models/kartikgarg74/my_model/keras/default


In [5]:
import tensorflow as tf
# Path to the checkpoint and metrics log
checkpoint_path = 'path to the last saved checkpoint'
# Load the model from the checkpoint
model = keras.models.load_model(checkpoint_path)

validation_data = next(iter(validation_generator))
# Resume training
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=130,  # Train for the remaining epochs
    callbacks=[metrics_and_checkpoint_callback],
    initial_epoch=# last saved epoch
)


Epoch 101/130


  self._warn_if_super_not_called()
I0000 00:00:1734233839.056444     107 service.cc:145] XLA service 0x7f2330006ed0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734233839.056512     107 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1734233839.056517     107 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
2024-12-15 03:37:46.750352: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng4{} for conv (f32[15,96,257,257]{3,2,1,0}, u8[0]{0}) custom-call(f32[15,96,259,259]{3,2,1,0}, f32[96,1,3,3]{3,2,1,0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, feature_group_count=96, custom_call_target="__cudnn$convForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0}} is taking a while...
2024-12-15 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/stepstep - accuracy: 0.9897 - loss: 0.0387 - precision: 0.9975 - recall: 0.9896 - top_1_accuracy: 0.9897 - top_5_accuracy: 0.994
Epoch 101: Metrics logged.
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m523s[0m 1s/step - accuracy: 0.9897 - loss: 0.0387 - precision: 0.9975 - recall: 0.9896 - top_1_accuracy: 0.9897 - top_5_accuracy: 0.9941 - val_accuracy: 0.7149 - val_loss: 1.4478 - val_precision: 0.7555 - val_recall: 0.6963 - val_top_1_accuracy: 0.7149 - val_top_5_accuracy: 0.9261
Epoch 102/130
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/stepep - accuracy: 0.9882 - loss: 0.0484 - precision: 0.9969 - recall: 0.9870 - top_1_accuracy: 0.9882 - top_5_accuracy: 0.993
Epoch 102: Metrics logged.
[1m358/358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m395s[0m 1s/step - accuracy: 0.9882 - loss: 0.0483 - precision: 0.9969 - recall: 0.9870 - top_1_accuracy: 0.9882 - top_5_accuracy: 0.9934 - val

In [8]:
import kagglehub

kagglehub.login()

# Replace with path to directory containing model files.
LOCAL_MODEL_DIR = '/kaggle/working/model-130-val_acc_0.7142.keras'

MODEL_SLUG = 'my_model' # Replace with model slug.

# Learn more about naming model variations at
# https://www.kaggle.com/docs/models#name-model.
VARIATION_SLUG = 'default' # Replace with variation slug.

kagglehub.model_upload(
  handle = f"kartikgarg74/{MODEL_SLUG}/keras/{VARIATION_SLUG}",
  local_model_dir = LOCAL_MODEL_DIR,
  version_notes = 'Update 2024-12-14')

VBox(children=(HTML(value='<center> <img\nsrc=https://www.kaggle.com/static/images/site-logo.png\nalt=\'Kaggle…

Uploading Model https://www.kaggle.com/models/kartikgarg74/my_model/keras/default ...
Starting upload for file /kaggle/working/model-130-val_acc_0.7142.keras


Uploading: 100%|██████████| 33.8M/33.8M [00:00<00:00, 53.0MB/s]

Upload successful: /kaggle/working/model-130-val_acc_0.7142.keras (32MB)





Your model instance version has been created.
Files are being processed...
See at: https://www.kaggle.com/models/kartikgarg74/my_model/keras/default
