In [2]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Load the dataset (COCO)
dataset_name = "coco/2017"  # Change this to a different dataset like "voc" if needed

# Splitting into train, validation, and test sets
(train_ds, val_ds, test_ds), info = tfds.load(dataset_name, split=['train[:80%]', 'train[80%:90%]', 'train[90%:]'], 
                                              shuffle_files=True, with_info=True)

# Define label and object to detect (e.g., "person")
object_to_detect = "person"  # You can change this to another class if needed

# Function to preprocess the images
def preprocess_data(example):
    # Normalizing the image
    image = example['image']
    image = tf.image.resize(image, (128, 128))  # Resize all images to 128x128 for simplicity
    image = image / 255.0  # Normalize between 0 and 1
    
    # Extracting the bounding box
    labels = example['objects']['label']
    bbox = example['objects']['bbox']
    
    # Check if the object to detect is in the image
    object_present = tf.reduce_any(labels == info.features['objects']['label'].names.index(object_to_detect))
    
    # Take the first bounding box (if present), else set bbox to zeros
    bbox = bbox[0] if object_present else tf.constant([0.0, 0.0, 0.0, 0.0])

    return image, bbox

# Applying the preprocessing function to datasets
train_ds = train_ds.map(preprocess_data, num_parallel_calls=tf.data.AUTOTUNE).batch(32).prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.map(preprocess_data, num_parallel_calls=tf.data.AUTOTUNE).batch(32).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.map(preprocess_data, num_parallel_calls=tf.data.AUTOTUNE).batch(32).prefetch(tf.data.AUTOTUNE)

In [None]:
# Build a simple CNN model for object detection
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Conv2D(256, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D((2, 2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(4, activation='sigmoid')  # Output layer for bounding box coordinates (xmin, ymin, xmax, ymax)
    ])
    
    model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
    return model

# Initialize the model
model = create_model()

# Train the model
history = model.fit(train_ds, validation_data=val_ds, epochs=10)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_ds)
print(f'Test Accuracy: {test_acc}')

# Example of visualizing a prediction
def visualize_prediction(image, bbox):
    fig, ax = plt.subplots(1)
    ax.imshow(image)
    # Denormalize bbox coordinates
    bbox = bbox * [image.shape[1], image.shape[0], image.shape[1], image.shape[0]]
    rect = plt.Rectangle((bbox[0], bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1],
                         linewidth=1, edgecolor='r', facecolor='none')
    ax.add_patch(rect)
    plt.show()

In [None]:
# Run predictions on the test set
for image, true_bbox in test_ds.take(1):
    pred_bbox = model.predict(tf.expand_dims(image[0], axis=0))
    visualize_prediction(image[0].numpy(), pred_bbox[0])