In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import os
from datetime import datetime
from tqdm import tqdm

print(tf.__version__)

2.17.1


In [2]:
# Parameters
IMAGE_SIZE = (256, 256)
BATCH_SIZE = 16
COLOR_TO_SEGMENT = [255, 0, 0]
LOG_DIR = "logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")
CHECKPOINT_DIR = "/kaggle/working/checkpoints/"
EPOCHS = 5

IMAGE_DIR = '/kaggle/input/indian-driving-dataset-segmentation-all-level1id/idd/train/images'
MASK_DIR = '/kaggle/input/indian-driving-dataset-segmentation-all-level1id/idd/train/rgb_labels'
TEST_IMAGE_DIR = '/kaggle/input/indian-driving-dataset-segmentation-all-level1id/idd/val/images'
TEST_MASK_DIR = '/kaggle/input/indian-driving-dataset-segmentation-all-level1id/idd/val/rgb_labels'


In [3]:
from tqdm import tqdm

def preprocess_image(img_path):
    img = load_img(img_path, target_size=IMAGE_SIZE)
    img = img_to_array(img) / 255.0
    return img


def preprocess_mask(mask_path):
    mask = load_img(mask_path, target_size=IMAGE_SIZE)
    mask = img_to_array(mask)
    binary_mask = np.all(mask == COLOR_TO_SEGMENT, axis=-1).astype(np.float32)
    return binary_mask[..., np.newaxis]


def data_generator(image_dir, mask_dir, batch_size):
    image_paths = sorted([os.path.join(image_dir, f) for f in os.listdir(image_dir)])
    mask_paths = sorted([os.path.join(mask_dir, f) for f in os.listdir(mask_dir)])

    while True:  # Infinite loop over the data
        images = []
        masks = []
        for i in range(len(image_paths)):
            img = preprocess_image(image_paths[i])
            mask = preprocess_mask(mask_paths[i])

            images.append(img)
            masks.append(mask)

            if len(images) == batch_size:
                yield np.array(images), np.array(masks)  # Return a batch
                images = []  # Reset for the next batch
                masks = []

In [4]:
def build_deeplabv3plus_model():
    # Use MobileNetV2 as the base model
    base_model = tf.keras.applications.MobileNetV2(input_shape=(*IMAGE_SIZE, 3), include_top=False)
    base_model.trainable = False  # Freeze the base model
    
    inputs = tf.keras.Input(shape=(*IMAGE_SIZE, 3))
    x = base_model(inputs, training=False)
    x = tf.keras.layers.Conv2D(256, 3, padding='same', activation='relu')(x)  # Convolutional layer for processing
    
    # Add transpose convolution layers to upscale the feature map to the original size
    x = tf.keras.layers.Conv2DTranspose(128, 3, strides=2, padding='same', activation='relu')(x)  # Upsample to 16x16
    x = tf.keras.layers.Conv2DTranspose(128, 3, strides=2, padding='same', activation='relu')(x)  # Upsample to 32x32
    x = tf.keras.layers.Conv2DTranspose(128, 3, strides=2, padding='same', activation='relu')(x)  # Upsample to 64x64
    x = tf.keras.layers.Conv2DTranspose(128, 3, strides=2, padding='same', activation='relu')(x)  # Upsample to 128x128
    x = tf.keras.layers.Conv2DTranspose(128, 3, strides=2, padding='same', activation='relu')(x)  # Upsample to 256x256
    
    # Add a final 1x1 convolution to predict the binary mask
    x = tf.keras.layers.Conv2D(1, 1, activation='sigmoid')(x)  # Output layer for segmentation mask
    
    model = tf.keras.Model(inputs, x)
    return model


In [5]:
# Data generator for training
train_gen = data_generator(IMAGE_DIR, MASK_DIR, BATCH_SIZE)
# Data generator for testing (used as validation)
test_gen = data_generator(TEST_IMAGE_DIR, TEST_MASK_DIR, BATCH_SIZE)

In [6]:
# Calculate steps per epoch (number of batches in one epoch)
train_steps = len(os.listdir(IMAGE_DIR)) // BATCH_SIZE
test_steps = len(os.listdir(TEST_IMAGE_DIR)) // BATCH_SIZE

train_steps, test_steps

(804, 124)

In [7]:
# Build the model
model = build_deeplabv3plus_model()
model.summary()

  base_model = tf.keras.applications.MobileNetV2(input_shape=(*IMAGE_SIZE, 3), include_top=False)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [8]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [9]:
# TensorBoard callback
tensorboard_cb = tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR)
# Model checkpoint callback
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(CHECKPOINT_DIR, 'model_weights.weights.h5'),
                                                    save_weights_only=True,
                                                    save_best_only=True,
                                                    monitor='loss',
                                                    mode='min')
    
# Training
model.fit(train_gen, steps_per_epoch=train_steps, epochs=EPOCHS, 
            validation_data=test_gen, validation_steps=test_steps,
            callbacks=[tensorboard_cb, checkpoint_cb])


print(f"Training complete. Weights saved in {CHECKPOINT_DIR}")

Epoch 1/5
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1026s[0m 1s/step - accuracy: 0.8978 - loss: 0.3011 - val_accuracy: 0.9417 - val_loss: 0.2174
Epoch 2/5
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m707s[0m 881ms/step - accuracy: 0.9442 - loss: 0.2035 - val_accuracy: 0.9444 - val_loss: 0.1778
Epoch 3/5
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m697s[0m 868ms/step - accuracy: 0.9501 - loss: 0.1509 - val_accuracy: 0.9455 - val_loss: 0.1330
Epoch 4/5
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m682s[0m 849ms/step - accuracy: 0.9544 - loss: 0.1079 - val_accuracy: 0.9447 - val_loss: 0.1309
Epoch 5/5
[1m804/804[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m682s[0m 849ms/step - accuracy: 0.9577 - loss: 0.0999 - val_accuracy: 0.9417 - val_loss: 0.1437
Training complete. Weights saved in /kaggle/working/checkpoints/


In [10]:
import matplotlib.pyplot as plt
import cv2

print(cv2.__version__)



4.10.0


In [11]:

def preprocess_image_for_prediction(img_path):
    img = load_img(img_path, target_size=IMAGE_SIZE)
    img_array = img_to_array(img) / 255.0
    return img_array[np.newaxis, ...]  # Add batch dimension


# def crop_segmented_area(original_image, mask):
#     # BOUNDING BOX CUTTING
#     # Threshold the mask to binary format
#     binary_mask = (mask[0, ..., 0] > 0.5).astype(np.uint8)  # Shape: (256, 256)
    
#     # Find contours in the binary mask
#     contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
#     if len(contours) == 0:
#         print("No segmented area detected.")
#         return None
    
#     # Find the bounding box for the largest contour
#     largest_contour = max(contours, key=cv2.contourArea)
#     x, y, w, h = cv2.boundingRect(largest_contour)
    
#     # Crop the original image to the bounding box
#     original_image_resized = cv2.resize(original_image, IMAGE_SIZE)  # Resize to match the model's input size
#     cropped_image = original_image_resized[y:y + h, x:x + w]
#     return cropped_image

# def crop_segmented_area(original_image, mask):
#     # EXACT CUTTING
#     # Threshold the mask to binary format
#     binary_mask = (mask[0, ..., 0] > 0.5).astype(np.uint8)  # Shape: (256, 256)

#     # Ensure the mask is the same size as the original image
#     binary_mask_resized = cv2.resize(binary_mask, (original_image.shape[1], original_image.shape[0]))

#     # Create an empty mask the same size as the input image
#     segmentation_mask = np.zeros_like(binary_mask_resized)

#     # Fill the mask with the largest segmented area
#     contours, _ = cv2.findContours(binary_mask_resized, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

#     if len(contours) == 0:
#         print("No segmented area detected.")
#         return None
    
#     cv2.drawContours(segmentation_mask, contours, -1, (255), thickness=cv2.FILLED)

#     # Convert original image to uint8 if not already
#     original_image_uint8 = np.uint8(original_image * 255) if original_image.max() <= 1 else np.uint8(original_image)

#     # Use the mask to keep only the segmented area of the image
#     segmented_image = cv2.bitwise_and(original_image_uint8, original_image_uint8, mask=segmentation_mask)
    
#     # Resize to match the model's input size (if needed)
#     segmented_image_resized = cv2.resize(segmented_image, (IMAGE_SIZE[1], IMAGE_SIZE[0]))

#     return segmented_image_resized


def crop_segmented_area_and_bounding_box(original_image, mask, image_size=(256, 256)):
    # Threshold the mask to binary format (use the mask shape)
    binary_mask = (mask[0, ..., 0] > 0.5).astype(np.uint8)  # Shape: (256, 256)
    
    # Resize the binary mask to match the original image size
    binary_mask_resized = cv2.resize(binary_mask, (original_image.shape[1], original_image.shape[0]))

    # Find contours in the binary mask to get the segmented areas
    contours, _ = cv2.findContours(binary_mask_resized, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if len(contours) == 0:
        print("No segmented area detected.")
        return None  # Return None if no contours are found
    
    # Find the bounding box surrounding the largest contour
    largest_contour = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(largest_contour)

    # Optionally, visualize the bounding box if needed:
    cv2.rectangle(original_image, (x, y), (x + w, y + h), (0, 255, 0), 2)

    # Now, crop the original image using the bounding box directly (without resizing)
    cropped_image = original_image[y:y+h, x:x+w]

    # Optionally, resize the cropped image to match the model's input size (e.g., 256x256)
    cropped_image_resized = cv2.resize(cropped_image, image_size)

    return cropped_image_resi


def visualize_and_save_cropped_result(original_image, cropped_image, save_path=CROPPED_SAVE_PATH):
    if cropped_image is None:
        print("No segmented area to crop and save.")
        return
    
    # Save the cropped result
    plt.figure(figsize=(6, 6))
    plt.title("Cropped Segmented Area")
    plt.imshow(cropped_image)
    plt.axis("off")
    #plt.savefig(save_path)
    plt.show()
    print(f"Cropped segmented area saved at {save_path}")

def load_and_predict(image_path):
    # Load model architecture
    model = build_deeplabv3plus_model()
    
    # Load weights
    model.load_weights(CHECKPOINT_DIR+'model_weights.weights.h5')
    print(f"Weights loaded from {CHECKPOINT_DIR}")
    
    # Preprocess the input image
    img = preprocess_image_for_prediction(image_path)
    
    # Predict mask
    mask = model.predict(img)
    
    # Load the original image (not resized)
    original_img = img_to_array(load_img(image_path)) / 255.0  # Original scale
    
    # Crop segmented area
    cropped_image = crop_segmented_area(original_img, mask)
    visualize_and_save_cropped_result(original_img, cropped_image)


NameError: name 'CROPPED_SAVE_PATH' is not defined

In [None]:
IMAGE_PATH = '/kaggle/input/indian-driving-dataset-segmentation-all-level1id/idd/test/images/0000266_leftImg8bit.jpg'
load_and_predict(IMAGE_PATH)