In [0]:
import tensorflow as tf
import json
import os

def parse_annotation(annotation_path, image_dir):
    with open(annotation_path, 'r') as f:
        data = json.load(f)
    
    image_path = os.path.join(image_dir, data['file_name'])
    boxes = tf.convert_to_tensor(data['objects']['bbox'], dtype=tf.float32)
    classes = tf.convert_to_tensor(data['objects']['category'], dtype=tf.int32)
    
    return image_path, boxes, classes

def load_and_preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.resize(image, [300, 300])  # Resize to a fixed size
    image = tf.cast(image, tf.float32) / 255.0  # Normalize to [0, 1]
    return image

def create_example(annotation_path, image_dir):
    image_path, boxes, classes = parse_annotation(annotation_path, image_dir)
    image = load_and_preprocess_image(image_path)
    return image, (boxes, classes)

In [0]:
def create_dataset(annotation_dir, image_dir, batch_size):
    annotation_files = [
      os.path.join(annotation_dir, f) 
      for f in os.listdir(annotation_dir) 
      if f.endswith('.json')
      ][0:100]
    
    def generator():
        for annotation_file in annotation_files:
            yield create_example(annotation_file, image_dir)
    
    dataset = tf.data.Dataset.from_generator(
        generator,
        output_types=(tf.float32, (tf.float32, tf.int32)),
        output_shapes=((300, 300, 3), ((None, 4), (None,)))
    )
    
    dataset = dataset.shuffle(buffer_size=100)
    dataset = dataset.batch(batch_size)
    
    return dataset
  
# Usage
train_dataset = create_dataset(
  '/Volumes/shm/default/cppe5/annotations/', 
  '/Volumes/shm/default/cppe5/images/', 
  batch_size=10
  )

In [0]:
import tensorflow as tf

def create_object_detection_model(input_shape, num_classes):
    inputs = tf.keras.Input(shape=input_shape)
    
    # Convolutional layers
    x = tf.keras.layers.Conv2D(32, 3, activation='relu', padding='same')(inputs)
    x = tf.keras.layers.MaxPooling2D()(x)
    x = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(x)
    x = tf.keras.layers.MaxPooling2D()(x)
    x = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(x)
    x = tf.keras.layers.MaxPooling2D()(x)
    
    # Flatten and dense layers
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(256, activation='relu')(x)
    
    # Output layers
    class_output = tf.keras.layers.Dense(num_classes, activation='softmax', name='class_output')(x)
    bbox_output = tf.keras.layers.Dense(4, name='bbox_output')(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=[class_output, bbox_output])
    return model

# Create the model
model = create_object_detection_model((300, 300, 3), num_classes=5)

In [0]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_fn(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

epochs = 10
for epoch in range(epochs):
    for images, (boxes, classes) in train_dataset:
        loss = train_step(images, classes)
    print(f"Epoch {epoch + 1}, Loss: {loss.numpy()}")

In [0]:
for images, (boxes, classes) in train_dataset:
    print(len(images))