In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

base_dir = "/content/drive/MyDrive/challenge_deep"

AGREED_BOXES_CSV = os.path.join(base_dir, "agreed_boxes.csv")
BACKGROUND_BOXES_CSV = os.path.join(base_dir, "random_background_boxes.csv")
IMAGES_FOLDER = os.path.join(base_dir, "data")  

print("AGREED_BOXES_CSV:", AGREED_BOXES_CSV)
print("BACKGROUND_BOXES_CSV:", BACKGROUND_BOXES_CSV)
print("IMAGES_FOLDER:", IMAGES_FOLDER)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
AGREED_BOXES_CSV: /content/drive/MyDrive/challenge_deep/agreed_boxes.csv
BACKGROUND_BOXES_CSV: /content/drive/MyDrive/challenge_deep/random_background_boxes.csv
IMAGES_FOLDER: /content/drive/MyDrive/challenge_deep/data


In [None]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
import cv2



AGREED_BOXES_CSV = "/content/drive/MyDrive/challenge_deep/agreed_boxes.csv"
BACKGROUND_BOXES_CSV = "/content/drive/MyDrive/challenge_deep/random_background_boxes.csv"
IMAGES_FOLDER = "/content/data"  

df_species = pd.read_csv(AGREED_BOXES_CSV)
df_bg = pd.read_csv(BACKGROUND_BOXES_CSV)
df_bg["label"] = 8
df_bg = df_bg.rename(columns={
    "bg_x_center": "x_center",
    "bg_y_center": "y_center",
    "bg_width": "width",
    "bg_height": "height"
})
df_bg = df_bg[["image_name", "label", "x_center", "y_center", "width", "height"]]
df_all = pd.concat([df_species, df_bg], ignore_index=True)
df_all["label"] = df_all["label"].astype(int)
df_all = df_all.sample(frac=1, random_state=42).reset_index(drop=True)


train_df = df_all.copy()

print(f"Total samples: {len(train_df)}")
print("\nTrain set label distribution:")
print(train_df["label"].value_counts())

Total samples: 1479

Train set label distribution:
label
0    414
8    282
1    192
5    128
2    109
7     96
3     94
6     91
4     73
Name: count, dtype: int64


In [None]:
df_all

Unnamed: 0,image_name,label,x_center,y_center,width,height
0,0.40408057173709120.058779774002236440.1312640...,5,0.607747,0.606934,0.247396,0.224609
1,0.221066600649218240.98588868631582330.7887187...,6,0.434733,0.402100,0.246419,0.137207
2,0.0577500065339129340.52497339482277390.202119...,5,0.313965,0.399414,0.483398,0.566406
3,0.78122926470508450.0200100097668706180.931817...,5,0.542643,0.462891,0.270182,0.737305
4,0.77589813861635630.125271074186480980.8868120...,1,0.604492,0.802734,0.266276,0.343750
...,...,...,...,...,...,...
1474,0.77255618222900340.034430017735025630.8625841...,0,0.722656,0.512451,0.349609,0.332520
1475,0.59682257034479440.69402763007951150.59091281...,8,0.499837,0.254150,0.157227,0.206543
1476,0.101879714163278460.052416208709034850.978952...,5,0.825684,0.152832,0.347982,0.273438
1477,0.55996703273279180.93325592670723860.48582311...,8,0.776530,0.398926,0.187174,0.200195


In [None]:
import cv2
import numpy as np

df_bb = df_all.head(100)
pixel_widths = []
pixel_heights = []

for idx, row in df_bb.iterrows():
    image_path = os.path.join(IMAGES_FOLDER, row["image_name"])

    img = cv2.imread(image_path)
    if img is None:
        print(f"Warning: Couldn't load image {image_path}")
        continue

    img_h, img_w = img.shape[:2]

    abs_width = row["width"] * img_w
    abs_height = row["height"] * img_h

    pixel_widths.append(abs_width)
    pixel_heights.append(abs_height)

# Compute the average pixel width and height
avg_width = np.mean(pixel_widths)
avg_height = np.mean(pixel_heights)

print("Average bounding box pixel width:", avg_width)
print("Average bounding box pixel height:", avg_height)


Average bounding box pixel width: 753.5
Average bounding box pixel height: 802.79


In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50

class F1Macro(tf.keras.metrics.Metric):
    def __init__(self, num_classes, name='f1_macro', **kwargs):
        super(F1Macro, self).__init__(name=name, **kwargs)
        self.num_classes = num_classes
        self.tp = self.add_weight(name='tp', shape=(num_classes,), initializer='zeros', dtype=tf.float32)
        self.fp = self.add_weight(name='fp', shape=(num_classes,), initializer='zeros', dtype=tf.float32)
        self.fn = self.add_weight(name='fn', shape=(num_classes,), initializer='zeros', dtype=tf.float32)

    def update_state(self, y_true, y_pred, sample_weight=None):
      y_pred = tf.argmax(y_pred, axis=1)
      y_true = tf.cast(y_true, tf.int64)

      y_true_one_hot = tf.one_hot(y_true, depth=self.num_classes)
      y_pred_one_hot = tf.one_hot(y_pred, depth=self.num_classes)

      tp = tf.reduce_sum(y_true_one_hot * y_pred_one_hot, axis=0)
      fp = tf.reduce_sum((1 - y_true_one_hot) * y_pred_one_hot, axis=0)
      fn = tf.reduce_sum(y_true_one_hot * (1 - y_pred_one_hot), axis=0)

      self.tp.assign_add(tp)
      self.fp.assign_add(fp)
      self.fn.assign_add(fn)

    def result(self):
        precision = tf.math.divide_no_nan(self.tp, self.tp + self.fp)
        recall = tf.math.divide_no_nan(self.tp, self.tp + self.fn)
        f1 = tf.math.divide_no_nan(2 * precision * recall, precision + recall)
        return tf.reduce_mean(f1)

    def reset_states(self):
        for var in self.variables:
            var.assign(tf.zeros_like(var))

In [None]:
TARGET_HEIGHT = 800
TARGET_WIDTH  = 800
NUM_CLASSES = 9  

def decode_and_crop(image_path, label, x_center, y_center, w_rel, h_rel):
    x_center = tf.cast(x_center, tf.float32)
    y_center = tf.cast(y_center, tf.float32)
    w_rel    = tf.cast(w_rel, tf.float32)
    h_rel    = tf.cast(h_rel, tf.float32)

    image_bytes = tf.io.read_file(image_path)
    image = tf.io.decode_jpeg(image_bytes, channels=3)

    shape = tf.shape(image)
    img_h = tf.cast(shape[0], tf.float32)
    img_w = tf.cast(shape[1], tf.float32)

    x1 = (x_center - w_rel / 2.0) * img_w
    y1 = (y_center - h_rel / 2.0) * img_h
    box_w = w_rel * img_w
    box_h = h_rel * img_h

    x1 = tf.cast(tf.math.round(x1), tf.int32)
    y1 = tf.cast(tf.math.round(y1), tf.int32)
    box_w = tf.cast(tf.math.round(box_w), tf.int32)
    box_h = tf.cast(tf.math.round(box_h), tf.int32)
    x1 = tf.maximum(0, tf.minimum(x1, tf.cast(img_w-1, tf.int32)))
    y1 = tf.maximum(0, tf.minimum(y1, tf.cast(img_h-1, tf.int32)))
    x2 = tf.minimum(x1 + box_w, tf.cast(img_w, tf.int32))
    y2 = tf.minimum(y1 + box_h, tf.cast(img_h, tf.int32))
    final_w = x2 - x1
    final_h = y2 - y1

    cropped = tf.image.crop_to_bounding_box(image, y1, x1, final_h, final_w)
    cropped = tf.image.resize_with_pad(cropped, TARGET_HEIGHT, TARGET_WIDTH)
    cropped = cropped / 255.0
    return cropped, label

def augment_image(image, label):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    return image, label

def build_dataset(df, images_dir, batch_size=4, shuffle=True):
    image_paths = df["image_name"].apply(lambda x: os.path.join(images_dir, x)).values
    labels      = df["label"].values
    x_centers   = df["x_center"].values
    y_centers   = df["y_center"].values
    widths      = df["width"].values
    heights     = df["height"].values

    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels, x_centers, y_centers, widths, heights))
    if shuffle:
        dataset = dataset.shuffle(len(df), reshuffle_each_iteration=True)
    dataset = dataset.map(
        lambda ip, lab, xc, yc, w, h: decode_and_crop(ip, lab, xc, yc, w, h),
        num_parallel_calls=tf.data.AUTOTUNE
    )
    dataset = dataset.map(augment_image, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

train_ds = build_dataset(train_df, IMAGES_FOLDER, batch_size=4, shuffle=True)

In [None]:
base_model = ResNet50(
    weights="imagenet",
    include_top=False,
    input_shape=(TARGET_HEIGHT, TARGET_WIDTH, 3)
)

x = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
x = tf.keras.layers.Dense(512, activation="relu")(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")(x)

model = tf.keras.Model(inputs=base_model.input, outputs=outputs)

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss="sparse_categorical_crossentropy",
    metrics=[F1Macro(NUM_CLASSES)]
)

EPOCHS = 10
history = model.fit(
    train_ds,
    epochs=EPOCHS
)

Epoch 1/10
[1m370/370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 121ms/step - f1_macro: 0.3631 - loss: 1.5616
Epoch 2/10
[1m370/370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 71ms/step - f1_macro: 0.5645 - loss: 0.9376
Epoch 3/10
[1m370/370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 70ms/step - f1_macro: 0.6801 - loss: 0.7509
Epoch 4/10
[1m370/370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 70ms/step - f1_macro: 0.7669 - loss: 0.5655
Epoch 5/10
[1m370/370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 70ms/step - f1_macro: 0.7975 - loss: 0.5218
Epoch 6/10
[1m370/370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 70ms/step - f1_macro: 0.7923 - loss: 0.4755
Epoch 7/10
[1m370/370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 70ms/step - f1_macro: 0.8083 - loss: 0.4447
Epoch 8/10
[1m370/370[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 70ms/step - f1_macro: 0.8348 - loss: 0.3759
Epoch 9/10
[1m370/370

In [None]:
import os
import tensorflow as tf
import numpy as np
import pandas as pd

TEST_FOLDER = "/content/drive/MyDrive/challenge_deep/datatest"

TARGET_HEIGHT = 800
TARGET_WIDTH  = 800

def load_and_preprocess_image(filepath):
    image_bytes = tf.io.read_file(filepath)
    image = tf.io.decode_jpeg(image_bytes, channels=3)
    image = tf.image.resize_with_pad(image, TARGET_HEIGHT, TARGET_WIDTH)
    image = image / 255.0  
    return image

test_files = [os.path.join(TEST_FOLDER, f) for f in os.listdir(TEST_FOLDER) if f.lower().endswith('.jpg')]
print(f"Found {len(test_files)} test images.")

predictions = []

for filepath in test_files:
    image = load_and_preprocess_image(filepath)
    image = tf.expand_dims(image, axis=0)
    preds = model.predict(image)
    pred_label = int(np.argmax(preds, axis=1)[0])
    idx = os.path.splitext(os.path.basename(filepath))[0]

    predictions.append({'idx': idx, 'gt': pred_label})

pred_df = pd.DataFrame(predictions)
pred_df.to_csv("predictions_v0.csv", index=False)
print("Saved predictions_v0.csv")


Found 1344 test images.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m