In [2]:
import os
import time
import math
import random
import subprocess

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm
from glob import glob
from pathlib import Path
from tabulate import tabulate
from shutil import copy, copytree
from typing import Optional, Dict

import tensorflow as tf
from keras import layers
import keras.backend as K
from keras.layers import *
from keras.models import Model
from keras.optimizers import Adam
from keras.utils import plot_model
from keras.preprocessing import image
from keras.applications import DenseNet121, ConvNeXtSmall
from keras.metrics import Precision, Recall, AUC
from keras.callbacks import LearningRateScheduler, EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

2024-07-20 10:13:19.641079: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-20 10:13:19.641179: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-20 10:13:19.763575: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
def download_and_extract(download_url, download_path, output_dir):
    try:
        check_dependencies()

        # Download the file without progress display
        subprocess.run(["wget", "-q", download_url, "-O", download_path], check=True)

        # Check if the download was successful before proceeding
        if os.path.exists(download_path):
            # Create the output directory
            os.makedirs(output_dir, exist_ok=True)

            # Extract the contents
            extract_command = ["tar", "-xf", download_path, "-C", output_dir]
            subprocess.run(extract_command, check=True)

            # Remove the tar file after extraction
            os.remove(download_path)
            print("Download and extraction completed successfully.")
        else:
            print("Error: Failed to download the dataset.")
    except subprocess.CalledProcessError as e:
        print(f"Error: {str(e)}")
        
def check_dependencies():
    try:
        subprocess.run(["wget", "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
        subprocess.run(["tar", "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
    except subprocess.CalledProcessError as e:
        print(f"Error: Dependency check failed. {e}")
        raise

In [4]:
# Variables
download_url = "https://data.caltech.edu/records/nyy15-4j048/files/256_ObjectCategories.tar"
download_path = "/kaggle/working/256_ObjectCategories.tar"
# download_path = "/content/256_ObjectCategories.tar" # colab
output_dir = "/kaggle/working/data"
# output_dir = "/content/data" # colab

# Function call
download_and_extract(download_url, download_path, output_dir)

Download and extraction completed successfully.


In [5]:
data_path = Path(r"/kaggle/working/data/256_ObjectCategories") # for kaggle
# data_path = Path(r"/content/data/256_ObjectCategories") # for colab


In [6]:
def count_images_per_class(path):
    """
    Prints the number of images in each class in the dataset.

    Parameters:
    -----------
    path: str
        A string representing the path to the data.

    Returns:
    --------
    None. Prints the number of images in each class to console.
    """

    # Printing the header for the output.
    print(f'{"Classes":>22} | {"Images":^6}')
    print("="*40)

    # Looping through each folder in the data path.
    for folder in os.listdir(path):

        # Counting the number of files in the current folder.
        sample_size = len(os.listdir(path / folder))

        # Printing the folder name and the number of files in it.
        print(f"{folder.strip():<23s}|{sample_size}")

# Call the function with the data path as an argument
count_images_per_class(data_path)

               Classes | Images
048.conch              |103
140.menorah-101        |89
130.license-plate      |91
171.refrigerator       |84
250.zebra              |96
139.megaphone          |86
051.cowboy-hat         |114
165.pram               |88
153.palm-pilot         |93
146.mountain-bike      |82
076.football-helmet    |84
084.giraffe            |84
210.syringe            |111
249.yo-yo              |100
147.mushroom           |202
022.buddha-101         |97
169.radio-telescope    |92
006.basketball-hoop    |90
054.diamond-ring       |118
097.harmonica          |89
254.greyhound          |95
068.fern               |110
205.superman           |87
233.tuning-fork        |100
142.microwave          |107
199.spoon              |105
236.unicorn            |97
132.light-house        |190
106.horseshoe-crab     |87
007.bat                |106
160.pez-dispenser      |83
239.washing-machine    |84
092.grapes             |201
101.head-phones        |138
109.hot-tub            |156
072.fire

In [7]:
 def prepare_data(path: str, b_size: int) -> tuple:
    """Prepares training, validation, and test dataframes along with their respective steps.

    Args:
        path (str): The path to the directory containing the data.
        b_size (int): The batch size for training.

    Returns:
        tuple: A tuple containing the training dataframe, validation dataframe, test dataframe,
        training steps, validation steps, and test steps.
    """
    file_paths = []
    labels = []

    for claass in sorted(os.listdir(path)):
        c_pth = os.path.join(path, claass)

        for file in os.listdir(c_pth):
            file_path = os.path.join(c_pth, file)
            file_paths.append(file_path)
            labels.append(claass)

    print(f"Files: {len(file_paths)}\nLabels: {len(labels)}\n")

    files_series = pd.Series(file_paths, name="file_paths")
    labels_series = pd.Series(labels, name="labels")

    d = pd.concat([files_series, labels_series], axis=1)

    train_d, test_data_d = train_test_split(d, test_size=.2, stratify=d.labels, random_state=81)
    train_d, valid_d = train_test_split(train_d, test_size=.2, stratify=train_d.labels, random_state=81)

    print(f"Training Data: {train_d.shape[0]} samples\nTesting Data: {test_data_d.shape[0]} samples\nValidation Data: {valid_d.shape[0]} samples\n")

    return d, labels, train_d, valid_d, test_data_d

BATCH_SIZE = 32
df, labels, train_df, valid_df, test_df = prepare_data(data_path, BATCH_SIZE)

Files: 30609
Labels: 30609

Training Data: 19589 samples
Testing Data: 6122 samples
Validation Data: 4898 samples



In [8]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def load_data(df, b_size=BATCH_SIZE, shuffle=True, random_state=81, workers=1):
    generator = ImageDataGenerator(
        rescale=1 / 255.,
    )

    data = generator.flow_from_dataframe(
        df,
        x_col="file_paths",
        y_col="labels",
        target_size=(224, 224),
        class_mode="categorical",
        shuffle=shuffle,
        batch_size=b_size,
        seed=random_state,
        workers=workers
    )

    # Print a warning for invalid filenames
    invalid_filenames = df.loc[~df['file_paths'].isin(data.filenames), 'file_paths']
    if not invalid_filenames.empty:
        print("Warning: Invalid filenames found and will be ignored:", invalid_filenames.tolist())

    return data

# Load the training, validation, and test data with consistent random_state
train_data = load_data(train_df)
valid_data = load_data(valid_df, shuffle=False)
test_data = load_data(test_df, shuffle=False)

Found 19588 validated image filenames belonging to 257 classes.
Found 4898 validated image filenames belonging to 257 classes.
Found 6121 validated image filenames belonging to 257 classes.




In [9]:
model = ConvNeXtSmall(weights='imagenet', include_top=False)
model.trainable = True

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/convnext/convnext_small_notop.h5
[1m198551472/198551472[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step


In [12]:

layer_name_1 = 'convnext_small_stage_3_block_1_pointwise_conv_2'

selected_layer_1_output = model.get_layer(layer_name_1).output
x = MaxMinPooling2D(pool_size=(5, 5), name="maxmin_pooling")(selected_layer_1_output)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(48, (3, 3), activation='relu', padding='same')(x)
x = GlobalAveragePooling2D()(x)

layer_name_2 = 'layer_normalization'

selected_layer_2_output = model.get_layer(layer_name_2).output
selected_layer_2_output = GlobalAveragePooling2D()(selected_layer_2_output)

concatenated_output = tf.keras.layers.concatenate([selected_layer_2_output, x], axis=-1)

dense_output = tf.keras.layers.Dense(257, activation='softmax')(concatenated_output)

new_model = tf.keras.Model(inputs=model.input, outputs=dense_output)

new_model.summary()

In [13]:
import tensorflow.keras.backend as K

def create_metrics():
    """
    Creates instances of various metrics for evaluating model performance.
    """
    @tf.function
    def f1_score(y_true, y_pred):
        """Calculates the F1 score."""
        tp = true_positive(y_true, y_pred)
        fp = false_positive(y_true, y_pred)
        fn = false_negative(y_true, y_pred)

        precision = tp / (tp + fp + K.epsilon())
        recall = tp / (tp + fn + K.epsilon())

        return 2 * (precision * recall) / (precision + recall + K.epsilon())

    @tf.function
    def specificity(y_true, y_pred):
        """Calculates the specificity."""
        tn = true_negative(y_true, y_pred)
        fp = false_positive(y_true, y_pred)

        return tn / (tn + fp + K.epsilon())

    @tf.function
    def sensitivity(y_true, y_pred):
        """Calculates the sensitivity."""
        tp = true_positive(y_true, y_pred)
        fn = false_negative(y_true, y_pred)

        return tp / (tp + fn + K.epsilon())

    @tf.function
    def mcc(y_true, y_pred):
        """
        Calculates the Matthews correlation coefficient (MCC).
        
        This approach allows for a nuanced assessment of the model's ability to distinguish
        between different classes, making it particularly valuable in scenarios where classes
        may have varying levels of significance.
        
        """
        tp = true_positive(y_true, y_pred)
        tn = true_negative(y_true, y_pred)
        fp = false_positive(y_true, y_pred)
        fn = false_negative(y_true, y_pred)

        numerator = (tp * tn - fp * fn)
        denominator = K.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))

        return numerator / (denominator + K.epsilon())

    @tf.function
    def true_positive(y_true, y_pred):
        """Calculates the number of true positives."""
        y_pred_pos, _, y_pos, _ = calculate_confusion_matrix(y_true, y_pred)
        return K.sum(y_pos * y_pred_pos)

    @tf.function
    def false_positive(y_true, y_pred):
        """Calculates the number of false positives."""
        y_pred_pos, _, _, y_neg = calculate_confusion_matrix(y_true, y_pred)
        return K.sum(y_neg * y_pred_pos)

    @tf.function
    def false_negative(y_true, y_pred):
        """Calculates the number of false negatives."""
        _, y_pred_neg, y_pos, _ = calculate_confusion_matrix(y_true, y_pred)
        return K.sum(y_pos * y_pred_neg)

    @tf.function
    def true_negative(y_true, y_pred):
        """Calculates the number of true negatives."""
        _, y_pred_neg, _, y_neg = calculate_confusion_matrix(y_true, y_pred)
        return K.sum(y_neg * y_pred_neg)

    @tf.function
    def calculate_confusion_matrix(y_true, y_pred):
        """Calculates the components of the confusion matrix."""
        y_pred_pos = K.round(K.clip(y_pred, 0, 1))
        y_pred_neg = 1 - y_pred_pos
        y_pos = K.round(K.clip(y_true, 0, 1))
        y_neg = 1 - y_pos
        return y_pred_pos, y_pred_neg, y_pos, y_neg

    precision_metric = Precision()
    recall_metric = Recall()
    f1_score_metric = f1_score
    specificity_metric = specificity
    sensitivity_metric = sensitivity
    mcc_metric = mcc
    auc_metric = AUC()

    return (
        precision_metric,
        recall_metric,
        f1_score_metric,
        specificity_metric,
        sensitivity_metric,
        mcc_metric,
        auc_metric,
    )

custom_metrics = create_metrics()

# precision_metric, recall_metric, f1_score_metric, specificity_metric, sensitivity_metric, mcc_metric, auc_metric = custom_metrics

In [14]:
class TimeCallback(tf.keras.callbacks.Callback):
    """Custom callback to record epoch times and total training time."""

    def on_train_begin(self, logs=None):
        """Initialize training start time and epoch times."""
        self.train_start_time = time.perf_counter()
        self.epoch_times = []

    def on_epoch_begin(self, epoch, logs=None):
        """Record start time of each epoch."""
        self.epoch_start_time = time.perf_counter()

    def on_epoch_end(self, epoch, logs=None):
        """Calculate and store epoch time."""
        epoch_time = time.perf_counter() - self.epoch_start_time
        self.epoch_times.append(epoch_time)

    def on_train_end(self, logs=None):
        """Calculate total training time."""
        self.total_train_time = time.perf_counter() - self.train_start_time

time_callback = TimeCallback()

In [15]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [16]:
from keras.callbacks import LearningRateScheduler

learningRate = 1e-4
decayRate = 0.97

def lr_schedule(epoch):
    if epoch < 2:
        return learningRate
    else:
        return learningRate * decayRate ** epoch

learning_callback = LearningRateScheduler(lr_schedule)

In [18]:
import warnings
warnings.filterwarnings("ignore")

custom_metrics = list(create_metrics())
custom_metrics.append("accuracy")

new_model.compile(
    optimizer=Adam(learning_rate=learningRate),
    loss='categorical_crossentropy',
    metrics=custom_metrics
)

history = new_model.fit(
    train_data,
    validation_data=valid_data,
    epochs=30,
    callbacks=[time_callback, early_stopping]
)

Epoch 1/30


I0000 00:00:1721470913.587718     122 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1721470913.684758     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721470913.685236     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721470913.685699     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721470913.686826     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721470913.687247     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721470913.687690     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721470913.688132     122 graph_launch.cc:671] Fallback to op-by-op mode because m

[1m613/613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 516ms/step - accuracy: 0.2431 - auc_2: 0.7367 - f1_score: 0.1530 - loss: 4.2608 - mcc: 0.2061 - precision_2: 0.7878 - recall_2: 0.1020 - sensitivity: 0.1018 - specificity: 1.0000

W0000 00:00:1721471238.031537     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721471238.031924     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721471238.032346     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721471238.033336     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721471238.033697     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721471238.034043     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721471238.034386     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721471238.034727     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m613/613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m442s[0m 570ms/step - accuracy: 0.2435 - auc_2: 0.7370 - f1_score: 0.1534 - loss: 4.2584 - mcc: 0.2065 - precision_2: 0.7880 - recall_2: 0.1023 - sensitivity: 0.1021 - specificity: 1.0000 - val_accuracy: 0.7834 - val_auc_2: 0.9826 - val_f1_score: 0.7678 - val_loss: 0.9941 - val_mcc: 0.7812 - val_precision_2: 0.9390 - val_recall_2: 0.6511 - val_sensitivity: 0.6532 - val_specificity: 0.9998
Epoch 2/30
[1m613/613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m298s[0m 486ms/step - accuracy: 0.8570 - auc_2: 0.9930 - f1_score: 0.8472 - loss: 0.6238 - mcc: 0.8522 - precision_2: 0.9472 - recall_2: 0.7690 - sensitivity: 0.7690 - specificity: 0.9998 - val_accuracy: 0.8340 - val_auc_2: 0.9882 - val_f1_score: 0.8425 - val_loss: 0.6855 - val_mcc: 0.8458 - val_precision_2: 0.9247 - val_recall_2: 0.7742 - val_sensitivity: 0.7756 - val_specificity: 0.9998
Epoch 3/30
[1m613/613[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m299s[0m 487ms/st

In [19]:
execution_time_parts = []
avg_time_parts = []

for unit in [(3600, 'hours'), (60, 'minutes'), (1, 'seconds'), (0.001, 'milliseconds')]:
    total_value = int(time_callback.total_train_time // unit[0])
    avg_value = int(np.mean(time_callback.epoch_times) // unit[0])

    if total_value > 0 or unit[0] == 0.001:
        execution_time_parts.append((total_value, unit[1]))
    if avg_value > 0 or unit[0] == 0.001:
        avg_time_parts.append((avg_value, unit[1]))

    time_callback.total_train_time -= total_value * unit[0]
    time_callback.epoch_times = [time - avg_value * unit[0] for time in time_callback.epoch_times]

execution_time_string = ", ".join([
    f"{value:02d} {unit_str}" for value, unit_str in execution_time_parts if value > 0
])

avg_time_string = ", ".join([
    f"{value:02d} {unit_str}" for value, unit_str in avg_time_parts if value > 0
])

print(f"Model training took {execution_time_string}")
print(f"Average time per epoch: {avg_time_string}")

Model training took 32 minutes, 15 seconds, 380 milliseconds
Average time per epoch: 05 minutes, 22 seconds, 422 milliseconds


In [20]:
_ = new_model.evaluate(test_data)

[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 164ms/step - accuracy: 0.8604 - auc_2: 0.9897 - f1_score: 0.8691 - loss: 0.5469 - mcc: 0.8705 - precision_2: 0.9217 - recall_2: 0.8240 - sensitivity: 0.8240 - specificity: 0.9997


In [None]:
import numpy as np
from sklearn.metrics import classification_report

# Assuming model is your trained model
# Predict model for each class
y_pred_prob = new_model.predict(test_data)

# Convert probabilities to class labels
y_pred = np.argmax(y_pred_prob, axis=1)

# Convert y_test from one-hot encoded to categorical labels if needed
y_true = np.argmax(test_data, axis=1)

# Generate classification report
report = classification_report(y_true, y_pred)

# Print the report
print(report)


[1m  2/192[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m23s[0m 121ms/step

W0000 00:00:1721472795.210017     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721472795.210329     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721472795.210772     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721472795.211887     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721472795.212266     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721472795.212618     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721472795.212936     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1721472795.213252     122 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m192/192[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 155ms/step


In [None]:
def plot_metrics(history, metric_name_list, figure_size=(10, 6)):
    """
    Plots the given metrics for the training and validation sets.

    Args:
        history: a Keras History object containing the training history
        metric_name_list: a list of strings representing the names of the metrics to plot
        figure_size: a tuple specifying the size of the figure (width, height)

    Returns:
        None
    """
    # Check if history is a valid Keras History object
    if not isinstance(history, type(tf.keras.callbacks.History())):
        raise ValueError("Invalid Keras History object provided.")

    num_metrics = len(metric_name_list)

    # Create subplots
    figure, axes = plt.subplots(num_metrics, figsize=(figure_size[0], figure_size[1] * num_metrics))

    # Define x-axis range
    rng = range(1, len(history.history[metric_name_list[0]]) + 1)

    for ax, metric_name in zip(axes, metric_name_list):
        # Check if the metric exists in the history
        if metric_name not in history.history:
            raise ValueError(f"Metric '{metric_name}' not found in the training history.")

        metric = history.history[metric_name]
        v_metric = history.history.get(f"val_{metric_name}", None)

        # Plot training metric
        ax.plot(rng, metric, label=metric_name)

        # Plot validation metric if available
        if v_metric is not None:
            ax.plot(rng, v_metric, label=f"val_{metric_name}")

        ax.legend()
        ax.set_xlabel("Epochs")

        # Set y-axis label and title
        ylabel = metric_name.upper() if metric_name in ("auc", "mcc") else metric_name.capitalize()
        ax.set_ylabel(ylabel)
        ax.set_title(f"{ylabel} vs Epochs")

        # Set y-axis limits
        max_loss = max(max(metric), max(v_metric)) if v_metric is not None else max(metric)
        min_loss = min(min(metric), min(v_metric)) if v_metric is not None else min(metric)
        y_max = math.ceil(max_loss)

        if min_loss > 0 or max_loss > 1:
            ax.set_ylim(0, y_max)
        else:
            ax.set_ylim(min_loss, y_max)

        ax.grid(True, linestyle='--', alpha=0.5)
        ax.set_xlim(1, len(metric))

    plt.tight_layout()
    plt.show()

metric_names = ["loss", "accuracy", "precision_1", "recall_1", "f1_score",
                "specificity", "sensitivity", "mcc", "auc_1"]
plot_metrics(history, metric_names)