In [3]:
import pandas as pd
import os
import tensorflow as tf
import time
from sklearn.metrics import classification_report
from tensorflow.keras.applications import ResNet101V2
from tensorflow.keras import layers, models
from tensorflow.keras import regularizers

In [5]:

# Set base path for uploaded files
base_path = "/home/samng/MADS-wsl/Deep_learning/Assignment2/Bird dataset/CUB_200_2011/"

image_dir = f"{base_path}images"  # folder containing all class folders and images


# Load metadata files
images_df = pd.read_csv(base_path + "images.txt", sep=" ", header=None, names=["image_id", "file_path"])
labels_df = pd.read_csv(base_path + "image_class_labels.txt", sep=" ", header=None, names=["image_id", "class_id"])
split_df = pd.read_csv(base_path + "train_test_split.txt", sep=" ", header=None, names=["image_id", "is_train"])

# Merge into one master DataFrame
metadata_df = images_df.merge(labels_df, on="image_id")
metadata_df = metadata_df.merge(split_df, on="image_id")

# Preview result
metadata_df.head()


Unnamed: 0,image_id,file_path,class_id,is_train
0,1,001.Black_footed_Albatross/Black_Footed_Albatr...,1,0
1,2,001.Black_footed_Albatross/Black_Footed_Albatr...,1,1
2,3,001.Black_footed_Albatross/Black_Footed_Albatr...,1,0
3,4,001.Black_footed_Albatross/Black_Footed_Albatr...,1,1
4,5,001.Black_footed_Albatross/Black_Footed_Albatr...,1,1


In [8]:

# Step 5: Add full image path
metadata_df["full_path"] = metadata_df["file_path"].apply(lambda x: f"{image_dir}/{x}")

# Step 6: Split into train and test sets
train_df  = metadata_df[metadata_df["is_train"] == 1].reset_index(drop=True)
test_df  = metadata_df[metadata_df["is_train"] == 0].reset_index(drop=True)

metadata_df.head()


Unnamed: 0,image_id,file_path,class_id,is_train,full_path
0,1,001.Black_footed_Albatross/Black_Footed_Albatr...,1,0,/home/samng/MADS-wsl/Deep_learning/Assignment2...
1,2,001.Black_footed_Albatross/Black_Footed_Albatr...,1,1,/home/samng/MADS-wsl/Deep_learning/Assignment2...
2,3,001.Black_footed_Albatross/Black_Footed_Albatr...,1,0,/home/samng/MADS-wsl/Deep_learning/Assignment2...
3,4,001.Black_footed_Albatross/Black_Footed_Albatr...,1,1,/home/samng/MADS-wsl/Deep_learning/Assignment2...
4,5,001.Black_footed_Albatross/Black_Footed_Albatr...,1,1,/home/samng/MADS-wsl/Deep_learning/Assignment2...


In [9]:
print("Train set size:", len(train_df ))
print("Test set size:", len(test_df ))

Train set size: 5994
Test set size: 5794


In [45]:
# Step 4.1: Set hyperparameters for the experiment
learning_rate = 0.01
momentum = 0.0
weight_decay = 0.0001  # Not used directly unless you apply kernel_regularizer
num_epochs = 30    # paper used 300 (fine tuning)
batch_size = 32   # paper use 256 (colab out of memory when using 256)

# steps_per_epoch = 3 # Not use here , only used for small trtain set

In [46]:



# Step 2A: Image loading and preprocessing function
def load_and_preprocess_image(path, label):
    # This line loads the image from Google Drive
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)

    # Resize (shorter side = 256) — using resize_with_pad to preserve aspect
    image = tf.image.resize_with_pad(image, target_height=256, target_width=256)

    # Random crop to 224×224
    image = tf.image.random_crop(image, size=[224, 224, 3])

    # Data augmentation (to match paper)
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.1)

    # Normalize with ImageNet mean/std
    image = tf.cast(image, tf.float32) / 255.0
    image = (image - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]

    return image, label



In [47]:
def load_and_preprocess_image_val(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)

    # Resize and center crop
    image = tf.image.resize_with_pad(image, 256, 256)
    image = tf.image.central_crop(image, central_fraction=0.875)  # ~224/256 = 0.875

    # Normalize
    image = tf.cast(image, tf.float32) / 255.0
    image = (image - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]

    return image, label



In [48]:
# Convert Pandas train_df  into TensorFlow dataset
# Step 2B: Create TensorFlow dataset from train_df
train_paths = train_df ["full_path"].tolist()
train_labels = train_df ["class_id"].astype(int) - 1  # shift to 0-based index

train_dataset = tf.data.Dataset.from_tensor_slices((train_paths, train_labels))
train_dataset = train_dataset.map(load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)

# Add caching and prefetching to optimize performance
train_dataset = train_dataset.cache().shuffle(1000).batch(batch_size).prefetch(tf.data.AUTOTUNE)


In [49]:
test_paths = test_df ["full_path"].tolist()
test_labels = test_df ["class_id"].astype(int) - 1

test_dataset = tf.data.Dataset.from_tensor_slices((test_paths, test_labels))
test_dataset = test_dataset.map(load_and_preprocess_image_val, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)


In [50]:
# Step 4: Load and modify pretrained ResNet-101-V2

# Load base model
base_model = ResNet101V2(include_top=False, weights='imagenet', input_shape=(224, 224, 3), pooling='avg')

x = layers.Dense(
    200,
    activation='softmax',
    kernel_regularizer=regularizers.l2(weight_decay)  # L2 = weight decay
)(base_model.output)

model = models.Model(inputs=base_model.input, outputs=x)

2025-05-17 22:26:27.114063: W tensorflow/core/kernels/data/cache_dataset_ops.cc:916] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [51]:
# Step 5: Compile the model with optimizer and hyperparameters
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum)
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [52]:
# Skip for now: full set of images

start = time.time()

# Train model
training_metrics = model.fit(
    train_dataset,
    validation_data= test_dataset,
    epochs= num_epochs,
    # steps_per_epoch= steps_per_epoch    # Not use for full training image set
)

print(f"Time spent: {time.time() - start} seconds")

Epoch 1/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 356ms/step - accuracy: 0.1176 - loss: 4.5853 - val_accuracy: 0.1205 - val_loss: 4.9322
Epoch 2/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 276ms/step - accuracy: 0.4907 - loss: 2.4277 - val_accuracy: 0.2901 - val_loss: 3.1002
Epoch 3/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 288ms/step - accuracy: 0.7696 - loss: 1.2207 - val_accuracy: 0.4360 - val_loss: 2.2894
Epoch 4/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 279ms/step - accuracy: 0.9182 - loss: 0.6114 - val_accuracy: 0.4676 - val_loss: 2.1550
Epoch 5/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 285ms/step - accuracy: 0.9777 - loss: 0.3014 - val_accuracy: 0.5136 - val_loss: 1.9519
Epoch 6/30
[1m188/188[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 279ms/step - accuracy: 0.9945 - loss: 0.1838 - val_accuracy: 0.5381 - val_loss: 1.8487
Epoch 7/30

In [54]:
# Skip for now
start = time.time()

# Full evaluation on test set
y_true, y_pred = [], []
for images, labels in test_dataset:
    preds = model.predict(images, verbose=0)
    y_true.extend(labels.numpy())
    y_pred.extend(tf.argmax(preds, axis=1).numpy())

print(f"Time spent: {time.time() - start} seconds")

Time spent: 27.503771781921387 seconds


Top-1 accuracy =
Number of correct predictions (or total # of TP) /
Total predictions
​


Top-1 accuracy measures sample-level performance,

avg_accuracy measures class-level performance.


In [55]:



# Classification report
from sklearn.metrics import classification_report
class_map_df = pd.read_csv(base_path +"classes.txt", sep=" ", header=None, names=["class_id", "class_name"])
class_map = dict(zip(class_map_df["class_id"] - 1, class_map_df["class_name"]))
unique_labels = sorted(list(set(y_true)))
target_names = [class_map[i] for i in unique_labels]
report_dict = classification_report(y_true, y_pred, labels=unique_labels, target_names=target_names, output_dict=True, zero_division=0)

report_df = pd.DataFrame(report_dict).transpose()
report_df = report_df.loc[target_names].reset_index()
report_df.rename(columns={"index": "class_name"}, inplace=True)
class_id_map = {v: k for k, v in class_map.items()}
report_df.insert(0, "class_id", report_df["class_name"].map(class_id_map))
report_df = report_df[["class_id", "class_name", "precision", "recall", "f1-score", "support"]]

# Experiment settings
report_df["learning_rate"] = learning_rate
report_df["momentum"] = momentum
report_df["weight_decay"] = weight_decay
report_df["epochs"] = num_epochs
# report_df["steps_per_epoch"] = steps_per_epoch
report_df["Top1_accuracy"] = round(model.evaluate(test_dataset, verbose=0)[1], 3)
report_df["Top1_error"] = round(1.0 - report_df["Top1_accuracy"], 3)
report_df["ELR"] = round(learning_rate / (1 - momentum), 6)

# Macro metrics
macro_metrics = report_dict.get("macro avg", {})
report_df["macro_precision"] = round(macro_metrics.get("precision", 0.0), 3)
report_df["macro_recall"] = round(macro_metrics.get("recall", 0.0), 3)
report_df["macro_f1"] = round(macro_metrics.get("f1-score", 0.0), 3)

# Train/val loss
report_df["train_loss"] = round(training_metrics.history["loss"][-1], 3)
report_df["val_loss"] = round(training_metrics.history["val_loss"][-1], 3)

# Final report
report_df


Unnamed: 0,class_id,class_name,precision,recall,f1-score,support,learning_rate,momentum,weight_decay,epochs,Top1_accuracy,Top1_error,ELR,macro_precision,macro_recall,macro_f1,train_loss,val_loss
0,0,001.Black_footed_Albatross,0.548387,0.566667,0.557377,30.0,0.01,0.0,0.0001,30,0.579,0.421,0.01,0.596,0.581,0.582,0.043,1.774
1,1,002.Laysan_Albatross,0.700000,0.466667,0.560000,30.0,0.01,0.0,0.0001,30,0.579,0.421,0.01,0.596,0.581,0.582,0.043,1.774
2,2,003.Sooty_Albatross,0.466667,0.500000,0.482759,28.0,0.01,0.0,0.0001,30,0.579,0.421,0.01,0.596,0.581,0.582,0.043,1.774
3,3,004.Groove_billed_Ani,0.583333,0.700000,0.636364,30.0,0.01,0.0,0.0001,30,0.579,0.421,0.01,0.596,0.581,0.582,0.043,1.774
4,4,005.Crested_Auklet,0.588235,0.714286,0.645161,14.0,0.01,0.0,0.0001,30,0.579,0.421,0.01,0.596,0.581,0.582,0.043,1.774
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,195,196.House_Wren,0.365854,0.500000,0.422535,30.0,0.01,0.0,0.0001,30,0.579,0.421,0.01,0.596,0.581,0.582,0.043,1.774
196,196,197.Marsh_Wren,0.480000,0.400000,0.436364,30.0,0.01,0.0,0.0001,30,0.579,0.421,0.01,0.596,0.581,0.582,0.043,1.774
197,197,198.Rock_Wren,0.461538,0.600000,0.521739,30.0,0.01,0.0,0.0001,30,0.579,0.421,0.01,0.596,0.581,0.582,0.043,1.774
198,198,199.Winter_Wren,0.538462,0.700000,0.608696,30.0,0.01,0.0,0.0001,30,0.579,0.421,0.01,0.596,0.581,0.582,0.043,1.774
