# Mask Extraction
### Import Libraries & Prepare Segment Anything Model(SAM)

In [1]:
import os
import shutil
import cv2
import numpy as np
import yaml

# Model SAM
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator, SamPredictor
sam_checkpoint = "../models/sam_vit_h_4b8939.pth"
model_type = "vit_h"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
predictor = SamPredictor(sam)

  from .autonotebook import tqdm as notebook_tqdm


### Prepare essential functions

In [2]:
# Function to display mask in an image
def show_mask(mask, color):
    h, w = mask.shape[-2:]
    mask_image = (mask.reshape(h, w, 1) * np.array(color)).astype(np.uint8)
    return mask_image

# Function for displaying bounding box and text in an image
def show_box(image, box, label, conf_score, color):
    x0, y0 = int(box[0]), int(box[1])
    x1, y1 = int(box[2]), int(box[3])
    cv2.rectangle(image, (x0, y0), (x1, y1), color, 2)
    label_text = f'{label} {conf_score:.2f}'
    cv2.putText(image, label_text, (x0, y0 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

# Load class names from YAML file
with open('../yolov9/data/coco.yaml', 'r') as file:
    coco_data = yaml.safe_load(file)
    class_names = coco_data['names']

### Prepare data

In [6]:
# Define the desired classes
desired_classes = [25, 58]

# Random color map for each class
color_map = {}
for class_id in desired_classes:
    color_map[class_id] = tuple(np.random.randint(0, 256, 3).tolist())

# Directory where the images are located
images_dir = '../images'

# Directory where the tags (detections) are located
project_path = '/data/notebook'
trial_name = 'mardi'
yolo_result_path = f'{project_path}/{trial_name}/'
labels_dir = f'{yolo_result_path}/labels/'
if os.path.exists(yolo_result_path):
    shutil.rmtree(yolo_result_path)

# Directory for saving processed images
output_dir = 'mask_extraction'
os.makedirs(output_dir, exist_ok=True)

# Get the list of image file names in the directory
image_files = os.listdir(images_dir)

# Detection using the gelan-c model
!python ../yolov9/detect.py --weights ../yolov9/weights/gelan-c.pt --conf 0.1 --source {images_dir} --project {project_path} --name {trial_name} --device 0 --save-txt --save-conf

[34m[1mdetect: [0mweights=['../yolov9/weights/gelan-c.pt'], source=../images, data=../yolov9/data/coco128.yaml, imgsz=[640, 640], conf_thres=0.1, iou_thres=0.45, max_det=1000, device=0, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=/data/notebook, name=mardi, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 1e33dbb Python-3.10.8 torch-1.13.1 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 5938MiB)

Fusing layers... 
Model summary: 467 layers, 25472640 parameters, 0 gradients, 102.8 GFLOPs
image 1/5 /data/images/week_1.jpg: 480x640 2 traffic lights, 1 potted plant, 1 vase, 18.0ms
image 2/5 /data/images/week_2.jpg: 480x640 2 traffic lights, 1 umbrella, 1 potted plant, 1 vase, 17.9ms
image 3/5 /data/images/week_3.jpg: 480x640 2 traffic lights, 1 potted plant, 1 cell phone, 1 vase, 17.9ms
image 4/5 /data/ima

### Generate masked data

In [7]:
# Iterate on images
for image_file in image_files:
    # Build the complete image path
    image_path = os.path.join(images_dir, image_file)

    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        print(f"Error: Unable to load image {image_path}")
        continue

    image_height, image_width, _ = image.shape

    # Set the image for the predictor
    predictor.set_image(image)

    # Construct the full path to the label file for this image
    label_file = os.path.splitext(image_file)[0] + '.txt'
    label_path = os.path.join(labels_dir, label_file)

    if not os.path.exists(label_path):
        print(f"Warning: Tag file not found for {image_file}")
        continue

    class_ids = []
    bboxes = []
    conf_scores = []

    # Read the tag file
    with open(label_path, 'r') as file:
        for line in file:
            components = line.split()
            class_id = int(components[0])
            confidence = float(components[5])
            cx, cy, w, h = [float(x) for x in components[1:5]]

            # Convert from normalized [0, 1] to image scale
            cx *= image_width
            cy *= image_height
            w *= image_width
            h *= image_height

            # Convert center x, y, width and height to xmin, ymin, xmax, ymax
            xmin = cx - w / 2
            ymin = cy - h / 2
            xmax = cx + w / 2
            ymax = cy + h / 2

            class_ids.append(class_id)
            bboxes.append((xmin, ymin, xmax, ymax))
            conf_scores.append(confidence)

    # Create an added mask for the image
    aggregate_mask = np.zeros(image.shape[:2], dtype=np.uint8)

    # Iterate on each detection and process it into the image
    for class_id, bbox, conf_score in zip(class_ids, bboxes, conf_scores):
        if class_id in desired_classes:
            class_name = class_names[class_id]
            color = color_map[class_id]
            show_box(image, bbox, class_name, conf_score, color)

            # Generate and accumulate masks for each bounding box
            input_box = np.array(bbox).reshape(1, 4)
            masks, _, _ = predictor.predict(
                point_coords=None,
                point_labels=None,
                box=input_box,
                multimask_output=False,
            )
            aggregate_mask = np.where(masks[0] > 0.5, 1, aggregate_mask)

    # Convert the aggregated segmentation mask to a binary mask
    binary_mask = np.where(aggregate_mask == 1, 1, 0)

    # Create a white background with the same size as the image
    white_background = np.ones_like(image) * 255

    # Applying the binary mask to the original image
    # Where the binary mask is 0 (background), use white background; otherwise, use the original image.
    new_image = white_background * (1 - binary_mask[..., np.newaxis]) + image * binary_mask[..., np.newaxis]

    # Save the processed image with white background in the output directory
    output_image_path = os.path.join(output_dir, image_file)
    cv2.imwrite(output_image_path, new_image.astype(np.uint8))
    print(f"Processed image saved in {output_image_path}")

print("Image processing and saving completed.")

Processed image saved in mask_extraction/week_1.jpg
Processed image saved in mask_extraction/week_2.jpg
Processed image saved in mask_extraction/week_3.jpg
Processed image saved in mask_extraction/week_4.jpg
Processed image saved in mask_extraction/week_5.jpg
Image processing and saving completed.
