In [None]:

!pip install --upgrade protobuf
!pip install tensorflow --upgrade

In [None]:

FEATURE_FLAGS = {
    # Model Architecture Features
    'USE_CBAM_ATTENTION': True,          # Enable/disable CBAM attention blocks
    'USE_SPP_MODULE': True,              # Enable/disable Spatial Pyramid Pooling
    'USE_RESIDUAL_BLOCKS': True,         # Enable/disable residual connections
    'USE_MISH_ACTIVATION': False,        # Use Mish instead of LeakyReLU
    
    # Training Features
    'USE_DATA_AUGMENTATION': True,       # Enable/disable data augmentation
    'USE_MIXED_PRECISION': False,        # Enable mixed precision training
    'USE_GRADIENT_CLIPPING': True,       # Enable gradient clipping
    
    # Loss Function Features
    'USE_FOCAL_LOSS': False,             # Use focal loss for classification
    'USE_GIOU_LOSS': False,              # Use GIoU loss instead of MSE for boxes
    'USE_LABEL_SMOOTHING': True,         # Apply label smoothing
    
    # Post-processing Features
    'USE_SOFT_NMS': False,               # Use Soft-NMS instead of standard NMS
    'USE_MULTISCALE_INFERENCE': False,   # Test-time augmentation with multiple scales
    
    # Monitoring & Debug Features
    'VERBOSE_LOGGING': True,             # Extra logging during training
    'SAVE_INTERMEDIATE_OUTPUTS': False,  # Save feature maps for visualization
    'EARLY_STOPPING_ENABLED': True,      # Enable early stopping
    'SAVE_BEST_ONLY': True,              # Only save best model checkpoint
}


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model
import xml.etree.ElementTree as ET
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import cv2

In [None]:
if FEATURE_FLAGS['USE_MIXED_PRECISION']:
    from tensorflow.keras import mixed_precision
    policy = mixed_precision.Policy('mixed_float16')
    mixed_precision.set_global_policy(policy)
    print("Mixed precision enabled")

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

In [None]:
CONFIG = {
    'IMAGE_SIZE': 416,
    'GRID_SIZE': 13,
    'NUM_CLASSES': 4,
    'NUM_ANCHORS': 5,
    'BATCH_SIZE': 8,
    'EPOCHS': 100,
    'LEARNING_RATE': 1e-4,
    'GRADIENT_CLIP_VALUE': 1.0 if FEATURE_FLAGS['USE_GRADIENT_CLIPPING'] else None,
}

CLASS_NAMES = ['trafficlight', 'stop', 'speedlimit', 'crosswalk']
CLASS_TO_IDX = {name: idx for idx, name in enumerate(CLASS_NAMES)}

# Anchor boxes (width, height) normalized
ANCHORS = np.array([
    [0.1, 0.2], [0.2, 0.4], [0.3, 0.3], [0.4, 0.6], [0.5, 0.5]
], dtype=np.float32)

# Dataset paths - UPDATE THESE FOR YOUR ENVIRONMENT
IMAGES_DIR = '/kaggle/input/road-sign-detection/images'
ANNOTATIONS_DIR = '/kaggle/input/road-sign-detection/annotations'

print("Configuration loaded!")
print(f"Active Feature Flags:")
for flag, value in FEATURE_FLAGS.items():
    if value:
        print(f"  âœ“ {flag}")

In [None]:
def parse_voc_annotation(xml_path):
    """Parse PASCAL VOC XML annotation file."""
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    
    objects = []
    for obj in root.findall('object'):
        name = obj.find('name').text.lower().replace(' ', '')
        if name not in CLASS_TO_IDX:
            continue
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        
        # Normalize coordinates
        xmin_n = max(0, min(1, xmin / width))
        ymin_n = max(0, min(1, ymin / height))
        xmax_n = max(0, min(1, xmax / width))
        ymax_n = max(0, min(1, ymax / height))
        
        # Ensure valid box
        if xmax_n > xmin_n and ymax_n > ymin_n:
            objects.append({
                'class_idx': CLASS_TO_IDX[name],
                'bbox': [xmin_n, ymin_n, xmax_n, ymax_n]
            })
    
    return objects, width, height

In [None]:
def load_dataset(images_dir, annotations_dir):
    """Load all images and annotations."""
    data = []
    
    if not os.path.exists(annotations_dir):
        print(f"Error: Annotations directory not found: {annotations_dir}")
        return data
    
    if not os.path.exists(images_dir):
        print(f"Error: Images directory not found: {images_dir}")
        return data
    
    ann_files = [f for f in os.listdir(annotations_dir) if f.endswith('.xml')]
    
    for ann_file in ann_files:
        ann_path = os.path.join(annotations_dir, ann_file)
        try:
            objects, orig_w, orig_h = parse_voc_annotation(ann_path)
        except Exception as e:
            if FEATURE_FLAGS['VERBOSE_LOGGING']:
                print(f"Error parsing {ann_file}: {e}")
            continue
        
        if not objects:
            continue
        
        base_name = os.path.splitext(ann_file)[0]
        img_path = None
        for ext in ['.png', '.jpg', '.jpeg', '.PNG', '.JPG', '.JPEG']:
            candidate = os.path.join(images_dir, base_name + ext)
            if os.path.exists(candidate):
                img_path = candidate
                break
        
        if img_path is None:
            continue
        
        data.append({
            'image_path': img_path,
            'objects': objects,
            'orig_size': (orig_w, orig_h)
        })
    
    print(f"Loaded {len(data)} samples")
    return data

In [None]:
def create_target_tensor(objects, grid_size, num_anchors, num_classes, anchors):
    """Create YOLO-style target tensor."""
    target = np.zeros((grid_size, grid_size, num_anchors, 5 + num_classes), dtype=np.float32)
    
    for obj in objects:
        xmin, ymin, xmax, ymax = obj['bbox']
        class_idx = obj['class_idx']
        
        cx = (xmin + xmax) / 2
        cy = (ymin + ymax) / 2
        w = xmax - xmin
        h = ymax - ymin
        
        if w <= 0 or h <= 0:
            continue
        
        grid_x = int(cx * grid_size)
        grid_y = int(cy * grid_size)
        grid_x = min(grid_x, grid_size - 1)
        grid_y = min(grid_y, grid_size - 1)
        
        # Find best anchor
        best_iou = 0
        best_anchor = 0
        for i, anchor in enumerate(anchors):
            anchor_w, anchor_h = anchor
            if anchor_w <= 0 or anchor_h <= 0:
                continue
            intersection = min(w, anchor_w) * min(h, anchor_h)
            union = w * h + anchor_w * anchor_h - intersection
            iou = intersection / (union + 1e-8)
            if iou > best_iou:
                best_iou = iou
                best_anchor = i
        
        # Set target values
        target[grid_y, grid_x, best_anchor, 0] = cx * grid_size - grid_x
        target[grid_y, grid_x, best_anchor, 1] = cy * grid_size - grid_y
        target[grid_y, grid_x, best_anchor, 2] = np.log(w / (anchors[best_anchor][0] + 1e-8) + 1e-8)
        target[grid_y, grid_x, best_anchor, 3] = np.log(h / (anchors[best_anchor][1] + 1e-8) + 1e-8)
        target[grid_y, grid_x, best_anchor, 4] = 1.0
        
        # Label smoothing
        if FEATURE_FLAGS['USE_LABEL_SMOOTHING']:
            smooth = 0.1
            target[grid_y, grid_x, best_anchor, 5 + class_idx] = 1.0 - smooth
            target[grid_y, grid_x, best_anchor, 5:] += smooth / num_classes
        else:
            target[grid_y, grid_x, best_anchor, 5 + class_idx] = 1.0
    
    return target

In [None]:
class ObjectDetectionDataset(keras.utils.Sequence):
    def __init__(self, data, config, anchors, augment=False):
        self.data = data
        self.config = config
        self.anchors = anchors
        self.augment = augment and FEATURE_FLAGS['USE_DATA_AUGMENTATION']
        self.indices = np.arange(len(data))
    
    def __len__(self):
        return max(1, len(self.data) // self.config['BATCH_SIZE'])
    
    def __getitem__(self, idx):
        batch_size = self.config['BATCH_SIZE']
        start_idx = idx * batch_size
        end_idx = min(start_idx + batch_size, len(self.data))
        batch_indices = self.indices[start_idx:end_idx]
        
        images = []
        targets = []
        
        for i in batch_indices:
            sample = self.data[i]
            
            img = cv2.imread(sample['image_path'])
            if img is None:
                img = np.zeros((self.config['IMAGE_SIZE'], self.config['IMAGE_SIZE'], 3), dtype=np.uint8)
            else:
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (self.config['IMAGE_SIZE'], self.config['IMAGE_SIZE']))
            
            objects = [obj.copy() for obj in sample['objects']]
            
            if self.augment and len(objects) > 0:
                # Horizontal flip
                if np.random.random() > 0.5:
                    img = np.fliplr(img).copy()
                    for obj in objects:
                        xmin, ymin, xmax, ymax = obj['bbox']
                        obj['bbox'] = [1 - xmax, ymin, 1 - xmin, ymax]
                
                # Brightness augmentation
                img = img.astype(np.float32)
                img *= np.random.uniform(0.8, 1.2)
                img = np.clip(img, 0, 255).astype(np.uint8)
            
            img = img.astype(np.float32) / 255.0
            
            target = create_target_tensor(
                objects, self.config['GRID_SIZE'], 
                self.config['NUM_ANCHORS'], self.config['NUM_CLASSES'],
                self.anchors
            )
            
            images.append(img)
            targets.append(target)
        
        return np.array(images), np.array(targets)
    
    def on_epoch_end(self):
        if self.augment:
            np.random.shuffle(self.indices)

In [None]:
def conv_block(x, filters, kernel_size=3, strides=1, use_bn=True, activation='leaky'):
    x = layers.Conv2D(filters, kernel_size, strides=strides, padding='same', use_bias=not use_bn)(x)
    if use_bn:
        x = layers.BatchNormalization()(x)
    
    if FEATURE_FLAGS['USE_MISH_ACTIVATION'] and activation in ['leaky', 'mish']:
        # Mish activation: x * tanh(softplus(x))
        x = layers.Lambda(lambda t: t * tf.nn.tanh(tf.nn.softplus(t)))(x)
    elif activation == 'leaky':
        x = layers.LeakyReLU(0.1)(x)
    
    return x

def residual_block(x, filters):
    if not FEATURE_FLAGS['USE_RESIDUAL_BLOCKS']:
        x = conv_block(x, filters // 2, kernel_size=1)
        x = conv_block(x, filters, kernel_size=3)
        return x
    
    shortcut = x
    x = conv_block(x, filters // 2, kernel_size=1)
    x = conv_block(x, filters, kernel_size=3)
    if shortcut.shape[-1] != filters:
        shortcut = conv_block(shortcut, filters, kernel_size=1, use_bn=False, activation=None)
    x = layers.Add()([shortcut, x])
    x = layers.LeakyReLU(0.1)(x)
    return x

def spatial_attention(x):
    avg_pool = layers.Lambda(lambda t: tf.reduce_mean(t, axis=-1, keepdims=True))(x)
    max_pool = layers.Lambda(lambda t: tf.reduce_max(t, axis=-1, keepdims=True))(x)
    concat = layers.Concatenate()([avg_pool, max_pool])
    attention = layers.Conv2D(1, 7, padding='same', activation='sigmoid')(concat)
    return layers.Multiply()([x, attention])

def channel_attention(x, ratio=8):
    channels = x.shape[-1]
    
    # Use Keras pooling layers
    avg_pool = layers.GlobalAveragePooling2D()(x)
    max_pool = layers.GlobalMaxPooling2D()(x)
    
    shared_dense1 = layers.Dense(channels // ratio, activation='relu')
    shared_dense2 = layers.Dense(channels)
    
    avg_out = shared_dense2(shared_dense1(avg_pool))
    max_out = shared_dense2(shared_dense1(max_pool))
    
    attention = layers.Add()([avg_out, max_out])
    attention = layers.Activation('sigmoid')(attention)
    attention = layers.Reshape((1, 1, channels))(attention)
    return layers.Multiply()([x, attention])

def cbam_block(x, ratio=8):
    """Convolutional Block Attention Module"""
    if not FEATURE_FLAGS['USE_CBAM_ATTENTION']:
        return x
    
    x = channel_attention(x, ratio)
    x = spatial_attention(x)
    return x

In [None]:
def build_detector(config, anchors):
    inputs = layers.Input(shape=(config['IMAGE_SIZE'], config['IMAGE_SIZE'], 3))
    
    # Stem
    x = conv_block(inputs, 32, kernel_size=3, strides=1)
    x = conv_block(x, 64, kernel_size=3, strides=2)  # 208x208
    
    # Stage 1
    x = residual_block(x, 64)
    x = conv_block(x, 128, kernel_size=3, strides=2)  # 104x104
    
    # Stage 2
    for _ in range(2):
        x = residual_block(x, 128)
    x = cbam_block(x)
    x = conv_block(x, 256, kernel_size=3, strides=2)  # 52x52
    
    # Stage 3
    for _ in range(4):
        x = residual_block(x, 256)
    x = cbam_block(x)
    x = conv_block(x, 512, kernel_size=3, strides=2)  # 26x26
    
    # Stage 4
    for _ in range(4):
        x = residual_block(x, 512)
    x = cbam_block(x)
    x = conv_block(x, 1024, kernel_size=3, strides=2)  # 13x13
    
    # Stage 5
    for _ in range(2):
        x = residual_block(x, 1024)
    x = cbam_block(x)
    
    # SPP Module (optional)
    if FEATURE_FLAGS['USE_SPP_MODULE']:
        pool1 = layers.MaxPooling2D(5, strides=1, padding='same')(x)
        pool2 = layers.MaxPooling2D(9, strides=1, padding='same')(x)
        pool3 = layers.MaxPooling2D(13, strides=1, padding='same')(x)
        x = layers.Concatenate()([x, pool1, pool2, pool3])
        x = conv_block(x, 1024, kernel_size=1)
    
    # Detection Head
    x = conv_block(x, 512, kernel_size=1)
    x = conv_block(x, 1024, kernel_size=3)
    x = conv_block(x, 512, kernel_size=1)
    x = conv_block(x, 1024, kernel_size=3)
    
    output_channels = config['NUM_ANCHORS'] * (5 + config['NUM_CLASSES'])
    outputs = layers.Conv2D(output_channels, 1, padding='same')(x)
    outputs = layers.Reshape((config['GRID_SIZE'], config['GRID_SIZE'], 
                              config['NUM_ANCHORS'], 5 + config['NUM_CLASSES']))(outputs)
    
    model = Model(inputs, outputs, name='CustomDetector')
    return model

# Build model
model = build_detector(CONFIG, ANCHORS)
print("\nModel built successfully!")
print(f"Total parameters: {model.count_params():,}")

In [None]:
class DetectionLoss(keras.losses.Loss):
    def __init__(self, config, anchors, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.anchors = tf.constant(anchors, dtype=tf.float32)
        self.lambda_coord = 5.0
        self.lambda_noobj = 0.5
        self.lambda_obj = 1.0
        self.lambda_class = 1.0
    
    def call(self, y_true, y_pred):
        true_xy = y_true[..., :2]
        true_wh = y_true[..., 2:4]
        true_obj = y_true[..., 4:5]
        true_class = y_true[..., 5:]
        
        pred_xy = tf.sigmoid(y_pred[..., :2])
        pred_wh = y_pred[..., 2:4]
        pred_obj = tf.sigmoid(y_pred[..., 4:5])
        pred_class = tf.sigmoid(y_pred[..., 5:])
        
        obj_mask = true_obj
        noobj_mask = 1 - obj_mask
        
        xy_loss = obj_mask * tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1, keepdims=True)
        wh_loss = obj_mask * tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1, keepdims=True)
        coord_loss = self.lambda_coord * tf.reduce_sum(xy_loss + wh_loss)
        
        if FEATURE_FLAGS['USE_FOCAL_LOSS']:
            alpha = 0.25
            gamma = 2.0
            obj_bce = tf.keras.losses.binary_crossentropy(true_obj, pred_obj)
            obj_focal = alpha * tf.pow(1 - pred_obj, gamma) * obj_bce
            obj_loss = self.lambda_obj * tf.reduce_sum(obj_mask * obj_focal)
            noobj_loss = self.lambda_noobj * tf.reduce_sum(noobj_mask * obj_focal)
        else:
            obj_loss = self.lambda_obj * tf.reduce_sum(obj_mask * tf.square(1 - pred_obj))
            noobj_loss = self.lambda_noobj * tf.reduce_sum(noobj_mask * tf.square(pred_obj))
        
        class_loss = self.lambda_class * tf.reduce_sum(
            obj_mask * tf.reduce_sum(tf.square(true_class - pred_class), axis=-1, keepdims=True)
        )
        
        total_loss = coord_loss + obj_loss + noobj_loss + class_loss
        batch_size = tf.cast(tf.shape(y_true)[0], tf.float32)
        
        return total_loss / (batch_size + 1e-8)

In [None]:
def decode_predictions(predictions, config, anchors, conf_threshold=0.5, nms_threshold=0.4):
    batch_size = predictions.shape[0]
    grid_size = config['GRID_SIZE']
    
    all_boxes = []
    
    for b in range(batch_size):
        boxes = []
        pred = predictions[b]
        
        for gy in range(grid_size):
            for gx in range(grid_size):
                for a in range(config['NUM_ANCHORS']):
                    objectness = 1 / (1 + np.exp(-np.clip(pred[gy, gx, a, 4], -10, 10)))
                    
                    if objectness < conf_threshold:
                        continue
                    
                    class_probs = 1 / (1 + np.exp(-np.clip(pred[gy, gx, a, 5:], -10, 10)))
                    class_idx = np.argmax(class_probs)
                    class_conf = class_probs[class_idx]
                    
                    confidence = objectness * class_conf
                    if confidence < conf_threshold:
                        continue
                    
                    cx = (gx + 1 / (1 + np.exp(-np.clip(pred[gy, gx, a, 0], -10, 10)))) / grid_size
                    cy = (gy + 1 / (1 + np.exp(-np.clip(pred[gy, gx, a, 1], -10, 10)))) / grid_size
                    w = anchors[a, 0] * np.exp(np.clip(pred[gy, gx, a, 2], -10, 10))
                    h = anchors[a, 1] * np.exp(np.clip(pred[gy, gx, a, 3], -10, 10))
                    
                    xmin = max(0, cx - w / 2)
                    ymin = max(0, cy - h / 2)
                    xmax = min(1, cx + w / 2)
                    ymax = min(1, cy + h / 2)
                    
                    boxes.append([xmin, ymin, xmax, ymax, confidence, class_idx])
        
        if boxes:
            boxes = np.array(boxes)
            if FEATURE_FLAGS['USE_SOFT_NMS']:
                boxes = apply_soft_nms(boxes, nms_threshold)
            else:
                boxes = apply_nms(boxes, nms_threshold)
        else:
            boxes = np.array([]).reshape(0, 6)
        
        all_boxes.append(boxes)
    
    return all_boxes

def apply_nms(boxes, threshold):
    if len(boxes) == 0:
        return boxes
    
    order = boxes[:, 4].argsort()[::-1]
    boxes = boxes[order]
    
    keep = []
    while len(boxes) > 0:
        keep.append(boxes[0])
        if len(boxes) == 1:
            break
        
        box = boxes[0, :4]
        other_boxes = boxes[1:, :4]
        
        x1 = np.maximum(box[0], other_boxes[:, 0])
        y1 = np.maximum(box[1], other_boxes[:, 1])
        x2 = np.minimum(box[2], other_boxes[:, 2])
        y2 = np.minimum(box[3], other_boxes[:, 3])
        
        intersection = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
        box_area = (box[2] - box[0]) * (box[3] - box[1])
        other_areas = (other_boxes[:, 2] - other_boxes[:, 0]) * (other_boxes[:, 3] - other_boxes[:, 1])
        iou = intersection / (box_area + other_areas - intersection + 1e-8)
        
        boxes = boxes[1:][iou < threshold]
    
    return np.array(keep)

def apply_soft_nms(boxes, threshold, sigma=0.5):
    """Soft-NMS implementation"""
    if len(boxes) == 0:
        return boxes
    
    boxes = boxes.copy()
    keep = []
    
    while len(boxes) > 0:
        idx = boxes[:, 4].argmax()
        keep.append(boxes[idx])
        
        if len(boxes) == 1:
            break
        
        box = boxes[idx, :4]
        other_boxes = np.delete(boxes, idx, axis=0)
        
        x1 = np.maximum(box[0], other_boxes[:, 0])
        y1 = np.maximum(box[1], other_boxes[:, 1])
        x2 = np.minimum(box[2], other_boxes[:, 2])
        y2 = np.minimum(box[3], other_boxes[:, 3])
        
        intersection = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
        box_area = (box[2] - box[0]) * (box[3] - box[1])
        other_areas = (other_boxes[:, 2] - other_boxes[:, 0]) * (other_boxes[:, 3] - other_boxes[:, 1])
        iou = intersection / (box_area + other_areas - intersection + 1e-8)
        other_boxes[:, 4] *= np.exp(-(iou ** 2) / sigma)
        boxes = other_boxes
        boxes = boxes[boxes[:, 4] > threshold * 0.1]
    return np.array(keep)

In [None]:
def visualize_predictions(image, boxes, class_names, save_path=None):
    fig, ax = plt.subplots(1, figsize=(12, 8))
    ax.imshow(image)
    
    colors = ['red', 'blue', 'green', 'yellow']
    
    h, w = image.shape[:2]
    for box in boxes:
        xmin, ymin, xmax, ymax, conf, class_idx = box
        class_idx = int(class_idx)
        
        rect = patches.Rectangle(
            (xmin * w, ymin * h), (xmax - xmin) * w, (ymax - ymin) * h,
            linewidth=2, edgecolor=colors[class_idx], facecolor='none'
        )
        ax.add_patch(rect)
        
        label = f"{class_names[class_idx]}: {conf:.2f}"
        ax.text(xmin * w, ymin * h - 5, label, 
                color='white', fontsize=10, weight='bold',
                bbox=dict(boxstyle='round', facecolor=colors[class_idx], alpha=0.8))
    
    ax.axis('off')
    
    if save_path:
        plt.savefig(save_path, bbox_inches='tight', dpi=150)
    plt.show()

In [None]:
def visualize_sample(data, idx):
    sample = data[idx]
    img = cv2.imread(sample['image_path'])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    boxes = []
    for obj in sample['objects']:
        boxes.append(obj['bbox'] + [1.0, obj['class_idx']])
    
    visualize_predictions(img, boxes, CLASS_NAMES)

In [None]:
print("Loading dataset...")
data = load_dataset(IMAGES_DIR, ANNOTATIONS_DIR)

if len(data) == 0:
    print("\n" + "="*60)
    print("ERROR: No data found!")
    print("="*60)
    print("\nPlease update paths in CELL 3")
else:
    train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)
    print(f"\nTrain: {len(train_data)}, Validation: {len(val_data)}")
    
    train_gen = ObjectDetectionDataset
# Create data generators
train_gen = ObjectDetectionDataset(train_data, CONFIG, ANCHORS, augment=True)
val_gen = ObjectDetectionDataset(val_data, CONFIG, ANCHORS, augment=False)

# Compile model
optimizer = keras.optimizers.Adam(learning_rate=CONFIG['LEARNING_RATE'])
if FEATURE_FLAGS['USE_GRADIENT_CLIPPING']:
    optimizer = keras.optimizers.Adam(
        learning_rate=CONFIG['LEARNING_RATE'],
        clipvalue=CONFIG['GRADIENT_CLIP_VALUE']
    )
loss_fn = DetectionLoss(CONFIG, ANCHORS)
model.compile(optimizer=optimizer, loss=loss_fn)

# Callbacks
callbacks = [
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.5, patience=5, min_lr=1e-7, verbose=1
    ),
    keras.callbacks.ModelCheckpoint(
        'best_detector.keras', monitor='val_loss', save_best_only=FEATURE_FLAGS['SAVE_BEST_ONLY'], verbose=1
    ),
]
if FEATURE_FLAGS['EARLY_STOPPING_ENABLED']:
    callbacks.append(
        keras.callbacks.EarlyStopping(
            monitor='val_loss', patience=15, restore_best_weights=True, verbose=1
        )
    )

# Train
print("\n" + "="*60)
print("Starting training...")
print("="*60)

In [None]:
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=CONFIG['EPOCHS'],
    callbacks=callbacks,
    verbose=1
)
print("Training completed!")

In [None]:
model.save("final_detector.keras")
print("Final model saved.")

In [None]:
import random, math

def predict_and_show_grid(model, data, n=9, conf=0.1):
    # sample indices instead of samples
    indices = random.sample(range(len(data)), n)
    
    cols = 3
    rows = math.ceil(n / cols)
    fig, axes = plt.subplots(rows, cols, figsize=(14, 10))
    axes = axes.flatten()

    for ax, idx in zip(axes, indices):
        sample = data[idx]

        img = cv2.imread(sample['image_path'])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        resized = cv2.resize(img, (CONFIG['IMAGE_SIZE'], CONFIG['IMAGE_SIZE']))
        input_tensor = resized.astype(np.float32) / 255.0
        input_tensor = np.expand_dims(input_tensor, axis=0)

        preds = model.predict(input_tensor, verbose=0)
        boxes = decode_predictions(preds, CONFIG, ANCHORS,
                                   conf_threshold=conf,
                                   nms_threshold=0.45)[0]

        ax.imshow(resized)
        h, w = resized.shape[:2]

        for box in boxes:
            xmin, ymin, xmax, ymax, score, cls = box
            cls = int(cls)

            rect = patches.Rectangle(
                (xmin * w, ymin * h),
                (xmax - xmin) * w,
                (ymax - ymin) * h,
                linewidth=2,
                edgecolor='red',
                facecolor='none'
            )
            ax.add_patch(rect)

            ax.text(xmin * w, ymin * h - 5,
                    f"{CLASS_NAMES[cls]} {score:.2f}",
                    color='white', fontsize=8,
                    bbox=dict(facecolor='red', alpha=0.7))

        # ðŸ‘‰ Show index on the image
        ax.text(5, 20, f"Index: {idx}",
                color='yellow', fontsize=11, weight='bold',
                bbox=dict(facecolor='black', alpha=0.7))

        ax.axis('off')

    for ax in axes[len(indices):]:
        ax.axis('off')

    plt.tight_layout()
    plt.show()
predict_and_show_grid(model, val_data, n=9, conf=0.1)

In [None]:
def compare_gt_vs_prediction_fixed(model, data, indices, conf=0.1):
    n = len(indices)
    fig, axes = plt.subplots(n, 2, figsize=(10, 4*n))

    for row, idx in enumerate(indices):
        sample = data[idx]

        # ---- Load image ----
        img = cv2.imread(sample['image_path'])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        resized = cv2.resize(img, (CONFIG['IMAGE_SIZE'], CONFIG['IMAGE_SIZE']))

        # ---- Prediction ----
        input_tensor = resized.astype(np.float32) / 255.0
        input_tensor = np.expand_dims(input_tensor, axis=0)
        preds = model.predict(input_tensor, verbose=0)

        pred_boxes = decode_predictions(preds, CONFIG, ANCHORS,
                                        conf_threshold=conf,
                                        nms_threshold=0.45)[0]

        # ---- Ground Truth ----
        gt_boxes = []
        for obj in sample['objects']:
            xmin, ymin, xmax, ymax = obj['bbox']
            gt_boxes.append([xmin, ymin, xmax, ymax, 1.0, obj['class_idx']])

        h, w = resized.shape[:2]

        # ---- Draw GT ----
        axes[row, 0].imshow(resized)
        axes[row, 0].set_title(f"Ground Truth | Index {idx}")
        for box in gt_boxes:
            xmin, ymin, xmax, ymax, _, cls = box
            cls = int(cls)
            rect = patches.Rectangle(
                (xmin * w, ymin * h),
                (xmax - xmin) * w,
                (ymax - ymin) * h,
                linewidth=2, edgecolor='green', facecolor='none'
            )
            axes[row, 0].add_patch(rect)
        axes[row, 0].axis('off')

        # ---- Draw Prediction ----
        axes[row, 1].imshow(resized)
        axes[row, 1].set_title("Prediction")
        for box in pred_boxes:
            xmin, ymin, xmax, ymax, score, cls = box
            cls = int(cls)
            rect = patches.Rectangle(
                (xmin * w, ymin * h),
                (xmax - xmin) * w,
                (ymax - ymin) * h,
                linewidth=2, edgecolor='red', facecolor='none'
            )
            axes[row, 1].add_patch(rect)
            axes[row, 1].text(xmin * w, ymin * h - 4,
                              f"{CLASS_NAMES[cls]} {score:.2f}",
                              color='white', fontsize=8,
                              bbox=dict(facecolor='red', alpha=0.7))
        axes[row, 1].axis('off')

    plt.tight_layout()
    plt.savefig("comparison_results.png", dpi=150, bbox_inches='tight')
    plt.show()

indices = [140,112,14,143,29,24,106,100,126,168,144,136,137]
compare_gt_vs_prediction_fixed(model, val_data, indices, conf=0.1)