In [0]:
import urllib3
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
urllib3.connection.HTTPConnection.default_pool_maxsize = 50
urllib3.connection.HTTPSConnection.default_pool_maxsize = 50

In [0]:
%pip install opencv-python
%restart_python

In [0]:
import tensorflow as tf
import json
import numpy as np
import cv2
from pathlib import Path

In [0]:
def create_dataset(
    image_paths: list, 
    batch_size: int = 32, 
    img_shape=(224,224), 
    use_random=True, 
    max_objects=30
    ):
    "Creates a dataset from a serialized list of loaded data"
    def gen():
        for image_path in image_paths:
            yield image_path
    
    dataset = tf.data.Dataset.from_generator(
        gen,
        output_signature=tf.TensorSpec(shape=(), dtype=tf.string)
    )
    
    if use_random:
        dataset = dataset.shuffle(buffer_size=len(image_paths))
    
    def _load_and_process_py(image_path_tensor):
        """Python function to load image and annotations"""
        # Convert tensor to string
        image_path = image_path_tensor.numpy().decode('utf-8')
        
        # Load and process image
        v_data = cv2.imread(image_path)
        v_data = cv2.resize(v_data, (img_shape[1], img_shape[0])) / 255.0
        v_data = v_data.astype(np.float32) #HWC
        
        # Load and parse annotation
        annotation_path = image_path.replace('.png', '.json')
        with open(annotation_path, 'r') as f:
            ann = json.load(f)
        
        bboxes = np.array(ann['objects']['bbox'], dtype=np.float32)
        categories = np.array(ann['objects']['category'], dtype=np.int32)
        
        # Pad bboxes and categories
        padded_bboxes = np.zeros((max_objects, 4), dtype=np.float32)
        padded_categories = np.full((max_objects), -1, dtype=np.int32)
        
        num_objects = min(len(bboxes), max_objects)
        padded_bboxes[:num_objects] = bboxes[:num_objects]
        padded_categories[:num_objects] = categories[:num_objects]
        
        return v_data, padded_bboxes, padded_categories
    
    def _process_path(image_path):
        """Wrapper for py_function with proper output signatures"""
        image, bboxes, categories = tf.py_function(
            _load_and_process_py,
            [image_path],
            [tf.float32, tf.float32, tf.int32]
        )
    
        image = tf.ensure_shape(image, (img_shape[0], img_shape[1], 3))
        bboxes = tf.ensure_shape(bboxes, (max_objects, 4))
        categories = tf.ensure_shape(categories, (max_objects,))
        
        return {'images': tf.cast(image, tf.float32, name='images')}, {
            'bboxes': tf.cast(bboxes, tf.float32, name='bboxes'),
            'classes': tf.cast(categories, tf.int32, name='classes')
        }

    dataset = dataset.map(_process_path, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.cache()
    dataset = dataset.batch(batch_size, drop_remainder=True)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

In [0]:
from pathlib import Path
image_paths = [str(x) for x in Path('/Volumes/shm/default/cppe5/').glob('*.png')]
dataset = create_dataset(image_paths)

In [0]:
for batch in dataset.take(1):
    images = batch[0]['images']
    bboxes = batch[1]['bboxes']
    categories = batch[1]['classes']
    print(f"Input shape: {images.shape}")
    print(f"Bounding boxes shape: {bboxes.shape}")
    print(f"Categories shape: {categories.shape}")

In [0]:
from model import build_object_detection_model
model = build_object_detection_model()

In [0]:
%%time
# we can adjust steps per epoch to avoid running out at the end, which speeds computation significantly
steps_per_epoch = len(image_paths) // 32
model.fit(dataset, epochs=1, steps_per_epoch=steps_per_epoch)