# Aerial Object Classification — Minimal Training Notebook

This notebook trains a **Bird vs Drone** classifier with a **single, consistent preprocessing path**:

- Inputs are **RGB** and resized to **224×224** inside the model via `layers.Resizing(224,224)`.
- EfficientNetB0 preprocessing is **baked into the model**.
- We avoid `rescale=1./255` in the generators to keep train/infer parity.
- The best model is saved as **`final_model_rgb224.keras`** alongside **`label_map.json`** and **`inference_meta.json`**.


In [1]:
import os, json, random, math
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

print("TF:", tf.__version__)
print("Keras:", keras.__version__)

SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)


TF: 2.19.0
Keras: 3.10.0


In [2]:
from google.colab import drive
drive.mount('/content/drive')

# Example: assume datasets unzipped in /content/drive/MyDrive/aerial_project/
BASE_PATH = "/content/drive/MyDrive/aerial_project"  # adjust to your folder
# Ensure you have:
# BASE_PATH/classification_dataset/TRAIN/bird, /TRAIN/drone, /VALID/, /TEST/

Mounted at /content/drive


In [4]:
BASE_PATH = "/content/drive/MyDrive/aerial_project"  # adjust to your folder

train_dir = os.path.join(BASE_PATH, "classification_dataset", "train")
val_dir   = os.path.join(BASE_PATH, "classification_dataset", "valid")
test_dir  = os.path.join(BASE_PATH, "classification_dataset", "test")

IMG_SIZE = (224, 224)
BATCH_SIZE = 32

from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rotation_range=8,
    width_shift_range=0.05,
    height_shift_range=0.05,
    horizontal_flip=True,
    fill_mode="nearest"
)
val_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()

train_gen = train_datagen.flow_from_directory(
    train_dir, target_size=IMG_SIZE, color_mode="rgb",
    batch_size=BATCH_SIZE, class_mode="binary", seed=42
)
val_gen = val_datagen.flow_from_directory(
    val_dir, target_size=IMG_SIZE, color_mode="rgb",
    batch_size=BATCH_SIZE, class_mode="binary", shuffle=False
)
test_gen = test_datagen.flow_from_directory(
    test_dir, target_size=IMG_SIZE, color_mode="rgb",
    batch_size=BATCH_SIZE, class_mode="binary", shuffle=False
)

print("Class indices:", train_gen.class_indices)

with open("label_map.json", "w") as f:
    json.dump(train_gen.class_indices, f)
print("Saved label_map.json")


Found 2662 images belonging to 2 classes.
Found 442 images belonging to 2 classes.
Found 215 images belonging to 2 classes.
Class indices: {'bird': 0, 'drone': 1}
Saved label_map.json


In [5]:
from collections import Counter

counts = Counter(train_gen.classes)
n0, n1 = counts.get(0, 0), counts.get(1, 0)
total = n0 + n1 if (n0 + n1) > 0 else 1
w0 = total / (2.0 * max(1, n0))
w1 = total / (2.0 * max(1, n1))
class_weight = {0: w0, 1: w1}
print("Class counts:", dict(counts))
print("Class weights:", class_weight)


Class counts: {np.int32(0): 1414, np.int32(1): 1248}
Class weights: {0: 0.9413012729844413, 1: 1.0665064102564104}


In [6]:
from tensorflow.keras.applications import efficientnet

keras.backend.clear_session()

inp = keras.Input(shape=(None, None, 3), name="input_rgb")
x = layers.Resizing(224, 224, interpolation="bilinear", name="resize_224")(inp)
x = efficientnet.preprocess_input(x)

base = efficientnet.EfficientNetB0(include_top=False, weights="imagenet", input_tensor=x)
base.trainable = False

y = layers.GlobalAveragePooling2D()(base.output)
y = layers.Dense(256, activation="relu")(y)
y = layers.Dropout(0.4)(y)
out = layers.Dense(1, activation="sigmoid")(y)

model = keras.Model(inp, out, name="bird_drone_efficientnet_b0")
model.compile(optimizer=keras.optimizers.Adam(1e-3),
              loss="binary_crossentropy",
              metrics=["accuracy", keras.metrics.AUC(name="auc")])
model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
ckpt_path = "final_model_rgb224.keras"
callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=6, restore_best_weights=True),
    keras.callbacks.ModelCheckpoint(ckpt_path, monitor="val_loss", save_best_only=True)
]

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=20,
    class_weight=class_weight,
    callbacks=callbacks
)
print("Best checkpoint saved to:", ckpt_path)


  self._warn_if_super_not_called()


Epoch 1/20
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m670s[0m 8s/step - accuracy: 0.9264 - auc: 0.9758 - loss: 0.1904 - val_accuracy: 0.9796 - val_auc: 0.9984 - val_loss: 0.0501
Epoch 2/20
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m277s[0m 3s/step - accuracy: 0.9914 - auc: 0.9998 - loss: 0.0265 - val_accuracy: 0.9796 - val_auc: 0.9962 - val_loss: 0.0567
Epoch 3/20
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 3s/step - accuracy: 0.9981 - auc: 1.0000 - loss: 0.0151 - val_accuracy: 0.9842 - val_auc: 0.9966 - val_loss: 0.0572
Epoch 4/20
[1m81/84[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m8s[0m 3s/step - accuracy: 0.9985 - auc: 1.0000 - loss: 0.0101 

In [None]:
for layer in base.layers[-40:]:
    layer.trainable = True

model.compile(optimizer=keras.optimizers.Adam(1e-5),
              loss="binary_crossentropy",
              metrics=["accuracy", keras.metrics.AUC(name="auc")])

history_ft = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    class_weight=class_weight,
    callbacks=callbacks
)
print("After fine-tuning, best checkpoint saved to:", ckpt_path)


In [None]:
best = keras.models.load_model(ckpt_path, compile=False)
print("Loaded model input shape:", best.input_shape)

y0 = best.predict(np.zeros((1, 224, 224, 3), dtype=np.float32), verbose=0)
print("Zero image score:", y0.ravel()[0])

from sklearn.metrics import roc_curve, roc_auc_score
val_scores = best.predict(val_gen, verbose=0).ravel()
val_true = val_gen.classes
auc = roc_auc_score(val_true, val_scores)
fpr, tpr, thr = roc_curve(val_true, val_scores)
youden = int(np.argmax(tpr - fpr))
best_thr = float(thr[youden])
print(f"AUC={auc:.4f} | Youden threshold={best_thr:.4f}")

with open("inference_meta.json", "w") as f:
    json.dump({"threshold": best_thr}, f, indent=2)
print("Saved inference_meta.json")


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

test_scores = best.predict(test_gen, verbose=0).ravel()
test_true = test_gen.classes
y_pred = (test_scores >= best_thr).astype(int)

print("Confusion matrix:\n", confusion_matrix(test_true, y_pred))
print("\nClassification report:\n", classification_report(test_true, y_pred, target_names=list(train_gen.class_indices.keys())))
