In [1]:
!pip install -q tensorflow opencv-python pycocotools

In [2]:
import os

os.makedirs("data", exist_ok=True)

!wget -q http://images.cocodataset.org/zips/train2017.zip
!unzip -q train2017.zip -d data/

!wget -q http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!unzip -q annotations_trainval2017.zip -d data/

In [3]:
import cv2
import numpy as np
from pycocotools.coco import COCO
import random

IMG_SIZE = 256

images_dir = "data/train2017"
ann_file = "data/annotations/instances_train2017.json"

coco = COCO(ann_file)

person_cat_id = coco.getCatIds(catNms=["person"])[0]
img_ids = coco.getImgIds(catIds=[person_cat_id])

print("Total person images:", len(img_ids))

def load_image_and_mask(img_id):
    img_info = coco.loadImgs(img_id)[0]
    img_path = os.path.join(images_dir, img_info["file_name"])

    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    mask = np.zeros((img_info["height"], img_info["width"]), dtype=np.uint8)

    ann_ids = coco.getAnnIds(imgIds=[img_id], catIds=[person_cat_id])
    anns = coco.loadAnns(ann_ids)

    for ann in anns:
        m = coco.annToMask(ann)
        mask = np.maximum(mask, m)

    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    mask = cv2.resize(mask, (IMG_SIZE, IMG_SIZE))

    img = img / 255.0
    mask = (mask > 0).astype(np.float32)
    mask = np.expand_dims(mask, axis=-1)

    return img, mask

loading annotations into memory...
Done (t=18.56s)
creating index...
index created!
Total person images: 64115


In [4]:
import tensorflow as tf

MAX_SAMPLES = 10000
random.shuffle(img_ids)
img_ids = img_ids[:MAX_SAMPLES]

def gen():
    for img_id in img_ids:
        img, mask = load_image_and_mask(img_id)
        yield img, mask

dataset = tf.data.Dataset.from_generator(
    gen,
    output_signature=(
        tf.TensorSpec(shape=(IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(IMG_SIZE, IMG_SIZE, 1), dtype=tf.float32),
    ),
)

BATCH_SIZE = 8
dataset = dataset.shuffle(100).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Train / Val split
train_size = int(0.9 * MAX_SAMPLES / BATCH_SIZE)
train_ds = dataset.take(train_size)
val_ds = dataset.skip(train_size)

In [5]:
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV3Small

def conv_block(x, filters):
    x = layers.Conv2D(filters, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Conv2D(filters, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    return x

def resize_like(x, ref):
    return layers.Resizing(ref.shape[1], ref.shape[2], interpolation="bilinear")(x)

def build_model():
    inputs = layers.Input((IMG_SIZE, IMG_SIZE, 3))

    base = MobileNetV3Small(input_tensor=inputs, include_top=False, weights="imagenet")

    # Pick feature maps by depth (stable across versions)
    e1 = base.layers[20].output   # ~128x128
    e2 = base.layers[40].output   # ~64x64
    e3 = base.layers[80].output   # ~32x32
    e4 = base.layers[120].output  # ~16x16
    e5 = base.layers[-1].output   # ~8x8 (deepest)

    # Decoder
    x = conv_block(e5, 256)

    x = resize_like(x, e4)
    x = conv_block(layers.Concatenate()([x, e4]), 128)

    x = resize_like(x, e3)
    x = conv_block(layers.Concatenate()([x, e3]), 64)

    x = resize_like(x, e2)
    x = conv_block(layers.Concatenate()([x, e2]), 32)

    x = resize_like(x, e1)
    x = conv_block(layers.Concatenate()([x, e1]), 16)

    # Final resize to full image size
    x = layers.Resizing(IMG_SIZE, IMG_SIZE)(x)

    outputs = layers.Conv2D(1, 1, activation="sigmoid")(x)

    model = models.Model(inputs, outputs)
    return model

model = build_model()
model.summary()

  return MobileNetV3(


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v3/weights_mobilenet_v3_small_224_1.0_float_no_top_v2.h5
[1m4334752/4334752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [6]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [7]:
EPOCHS = 25

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS
)

Epoch 1/25
   1125/Unknown [1m175s[0m 95ms/step - accuracy: 0.6591 - loss: 0.5982



[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 176ms/step - accuracy: 0.6593 - loss: 0.5981 - val_accuracy: 0.8201 - val_loss: 0.4729
Epoch 2/25
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 154ms/step - accuracy: 0.8929 - loss: 0.3464 - val_accuracy: 0.8279 - val_loss: 0.4618
Epoch 3/25
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 152ms/step - accuracy: 0.9037 - loss: 0.2703 - val_accuracy: 0.8075 - val_loss: 0.4261
Epoch 4/25
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 154ms/step - accuracy: 0.9113 - loss: 0.2305 - val_accuracy: 0.8348 - val_loss: 0.4352
Epoch 5/25
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m173s[0m 153ms/step - accuracy: 0.9179 - loss: 0.2058 - val_accuracy: 0.8284 - val_loss: 0.4865
Epoch 6/25
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 153ms/step - accuracy: 0.

In [8]:
model.save("model.keras")
print("Saved model.keras")

from google.colab import files
files.download("model.keras")

Saved model.keras


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>