In [2]:
import torch
import torchvision
print("PyTorch version:", torch.__version__)
print("Torchvision version:", torchvision.__version__)
print("CUDA is available:", torch.cuda.is_available())
import sys
!{sys.executable} -m pip install opencv-python matplotlib
!{sys.executable} -m pip install 'git+https://github.com/facebookresearch/segment-anything.git'

!mkdir images
!wget -P images https://raw.githubusercontent.com/facebookresearch/segment-anything/main/notebooks/images/truck.jpg
!wget -P images https://raw.githubusercontent.com/facebookresearch/segment-anything/main/notebooks/images/groceries.jpg


PyTorch version: 2.1.0
Torchvision version: 0.16.0
CUDA is available: True
Collecting opencv-python
  Using cached opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting matplotlib
  Using cached matplotlib-3.8.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Using cached contourpy-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.8 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Using cached fonttools-4.49.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (159 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Using cached kiwisolver-1.4.5-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (6.4 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Using cached pyparsing-3.1.1-py3-none

In [3]:
import numpy as np
import matplotlib.pyplot as plt

def show_mask(mask, ax, random_color=False, color=None):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    elif color:
        color = np.concatenate([np.array(color), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
      
    ax.imshow(mask_image)

def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))  

def show_boxes_on_image(raw_image, boxes):
    plt.figure(figsize=(10,10))
    plt.imshow(raw_image)
    for box in boxes:
      show_box(box, plt.gca())
    plt.axis('on')
    plt.show()

def show_points_on_image(raw_image, input_points, input_labels=None):
    plt.figure(figsize=(10,10))
    plt.imshow(raw_image)
    input_points = np.array(input_points)
    if input_labels is None:
      labels = np.ones_like(input_points[:, 0])
    else:
      labels = np.array(input_labels)
    show_points(input_points, labels, plt.gca())
    plt.axis('on')
    plt.show()

def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
    plt.figure(figsize=(10,10))
    plt.imshow(raw_image)
    input_points = np.array(input_points)
    if input_labels is None:
      labels = np.ones_like(input_points[:, 0])
    else:
      labels = np.array(input_labels)
    show_points(input_points, labels, plt.gca())
    for box in boxes:
      show_box(box, plt.gca())
    plt.axis('on')
    plt.show()


def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
    plt.figure(figsize=(10,10))
    plt.imshow(raw_image)
    input_points = np.array(input_points)
    if input_labels is None:
      labels = np.ones_like(input_points[:, 0])
    else:
      labels = np.array(input_labels)
    show_points(input_points, labels, plt.gca())
    for box in boxes:
      show_box(box, plt.gca())
    plt.axis('on')
    plt.show()


def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels==1]
    neg_points = coords[labels==0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)

def show_mask(mask, ax, random_color=False, color=None):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    elif color:
        color = np.concatenate([np.array(color), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
      
    ax.imshow(mask_image)

def show_masks_on_image(raw_image, masks, scores):
    if len(masks.shape) == 4:
      masks = masks.squeeze()
    if scores.shape[0] == 1:
      scores = scores.squeeze()

    nb_predictions = scores.shape[-1]
    fig, axes = plt.subplots(1, nb_predictions, figsize=(15, 15))

    for i, (mask, score) in enumerate(zip(masks, scores)):
      mask = mask.cpu().detach()
      axes[i].imshow(np.array(raw_image))
      show_mask(mask, axes[i])
      axes[i].title.set_text(f"Mask {i+1}, Score: {score.item():.3f}")
      axes[i].axis("off")
    plt.show()

In [16]:
import os

# get all files with .jpg in all directories in ./Yolo_Dataset_2
import glob
bbox_dataset = 'datasets/Batch_6/LABELLED'
seg_dataset = 'datasets/Batch_6/CONVERTED'
image_files = glob.glob(f"./{bbox_dataset}/*.jpg", recursive=True)
label_files = glob.glob(f"./{bbox_dataset}/*.txt", recursive=True)

print("Number of images:", len(image_files))
print("Number of labels:", len(label_files))
# iterate through each image file and add it to a tuple 
image_lables = []
for imgPath in image_files:
    # get the label file path
    labelPath = imgPath.replace(".jpg", ".txt")
    # rplace images with labels
    labelPath = labelPath.replace("images", "labels")
    # add the image and label path to a tuple
    image_lables.append((imgPath, labelPath))
    
count_empty = 0
sizes = []
for lable_path in label_files:
    if os.path.getsize(lable_path) == 0:
        count_empty += 1
    sizes.append((lable_path, (os.path.getsize(lable_path))))
print("Empty Files: ", count_empty) 
print("Files Size Sorted:", sorted(sizes, key=lambda x:x[1]))

print(image_lables[0])

Number of images: 15100
Number of labels: 13727
Empty Files:  0
('./datasets/Batch_6/LABELLED/Image_0000034716_1667328130_428679000.jpg', './datasets/Batch_6/LABELLED/Image_0000034716_1667328130_428679000.txt')


In [46]:
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import os
import shutil
import numpy as np
import cv2
import torch

def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - box[0], box[3] - box[1]
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))  
def show_boxes_on_image(raw_image, boxes):
    plt.figure(figsize=(10,10))
    plt.imshow(raw_image)
    for box in boxes:
      show_box(box, plt.gca())
    plt.axis('on')
    plt.show()

def getLabels(labelPath):
    # Check if the file exists
    if not os.path.exists(labelPath):
        # Return an empty list if the file doesn't exist
        return None
    
    with open(labelPath) as f:
        # Preparing list for annotation of BB (bounding boxes)
        labels = []
        for line in f:
            labels.append(line.rstrip())

    return labels
def readLabelBB(labels, w, h):
    parsedLabels = []
    for i in range(len(labels)):
        bb_current = labels[i].split()
        objClass = bb_current[0]
        x_center, y_center = int(float(bb_current[1]) * w), int(float(bb_current[2]) * h)
        box_width, box_height = int(float(bb_current[3]) * w), int(float(bb_current[4]) * h)
        parsedLabels.append((x_center, y_center, box_width, box_height))
    return parsedLabels, objClass
def getConvertedBoxes(labels, image_width, image_height):
    converted_boxes = []
    class_ids = []
    for i in range(len(labels)):
        bb_current = labels[i].split()
        class_id = int(bb_current[0])
        x_center, y_center = float(bb_current[1]), float(bb_current[2])
        box_width, box_height = float(bb_current[3]), float(bb_current[4])
        
        # Convert to top left and bottom right coordinates
        x0 = int((x_center - box_width / 2) * image_width)
        y0 = int((y_center - box_height / 2) * image_height)
        x1 = int((x_center + box_width / 2) * image_width)
        y1 = int((y_center + box_height / 2) * image_height)
        class_ids.append(class_id)
        converted_boxes.append([x0, y0, x1, y1])
    return  class_ids, converted_boxes

def save_all_masks_on_one_image(raw_image, masks, save_dir, save_filename="all_masks_overlay"):
    raw_image_array = np.array(raw_image, dtype=np.float32)  # Convert to float for blending
    overlay_image = raw_image_array.copy()

    # Ensure save_dir exists
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # Use a fixed color for visibility and debugging
    color = np.array([255.0, 0.0, 0.0, 153.0])  # Red with alpha (60% opacity when considering 255 scale)
    alpha = 0.6

    for i, mask in enumerate(masks):
        mask = mask.astype(np.float32)  # Ensure mask is float
        h, w = mask.shape
        
        # Create a colored mask
        colored_mask = np.zeros((h, w, 4), dtype=np.float32)  # Include alpha channel for the mask
        colored_mask[..., :3] = color[:3]  # Apply color
        colored_mask[..., 3] = mask * color[3]  # Apply mask's alpha channel
        
        # Alpha blending
        alpha = colored_mask[..., 3:] / 255.0  # Normalize alpha to [0, 1]
        
        overlay_image = (1 - alpha) * overlay_image + alpha * colored_mask[..., :3]
    
    # Ensure the resulting image is in the correct data type and range
    overlay_image = np.clip(overlay_image, 0, 255).astype(np.uint8)

    # Convert array to image after all masks have been applied
    overlay_pil = Image.fromarray(overlay_image)
    save_path = os.path.join(save_dir, save_filename)  # Use PNG to preserve quality
    overlay_pil.save(save_path)
    print(f"Saved: {save_path}")        

def parse_seg_label_file(seg_label_path, image_shape):
    """
    Parses a segmentation label file.

    Parameters:
    - seg_label_path: Path to the segmentation label file.
    - image_shape: Tuple of (height, width) of the corresponding image.

    Returns:
    - masks: A list of mask arrays.
    - class_ids: A list of class IDs associated with each mask.
    """
    masks = []
    class_ids = []

    with open(seg_label_path, 'r') as file:
        for line in file:
            parts = line.split()
            class_id = int(parts[0])
            # Assuming the rest of the line is normalized mask coordinates
            coords = np.array([float(x) for x in parts[1:]]).reshape(-1, 2)
            # Un-normalize coordinates
            coords[:, 0] *= image_shape[1]  # Width
            coords[:, 1] *= image_shape[0]  # Height
            # Create a blank mask
            mask = np.zeros(image_shape, dtype=np.uint8)
            # Draw the polygon on the mask
            coords = np.array([coords], dtype=np.int32)  # cv2.fillPoly expects a 3D array
            cv2.fillPoly(mask, coords, 1)
            masks.append(mask)
            class_ids.append(class_id)

    return masks, class_ids

In [42]:
import sys
sys.path.append("..")
from segment_anything import sam_model_registry, SamPredictor

sam_checkpoint = "sam_vit_h_4b8939.pth"
model_type = "vit_h"

device = "cuda"

sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device=device)

predictor = SamPredictor(sam)


In [47]:
# if you would like to plot and view the segmentation masks then set Objects list from the yaml file
Objects = []
# color rgb values for each class
color = []

SAVE_MASK_IMAGE = True

# Generate a random color for each object in the Objects list
for objects in Objects:
    color.append((np.random.randint(0,255),np.random.randint(0,255),np.random.randint(0,255)))

loopCount = 0  # Initialize loop count for tracking number of processed labels

# get image size from one image
image = cv2.imread(imgPath, cv2.IMREAD_COLOR)  # Read the image file

# Assuming image_labels is a list of tuples containing image paths and corresponding label paths
for imgPath, labelPath in image_lables:
    destination = f'{seg_dataset}/train'  # Default destination folder for training data
    if 'valid' in imgPath:  # Change destination if the image is for validation
        destination = f'{seg_dataset}/valid'
    
    if SAVE_MASK_IMAGE:
        mask_destination = f'{destination}/mask_validation'
    
    # Extract the file name without extension to use for the label file
    label_file = imgPath.split('/')[-1].split('.')[0]
    seg_label_path = os.path.join(destination, f'labels/{label_file}.txt')
    
    # Skip processing if label file already exists in the destination
    if os.path.exists(seg_label_path):
        print(f'{label_file} already exists in {destination}')
        mask_file = os.path.join(mask_destination, os.path.basename(imgPath))
        if (not os.path.exists(mask_file)) and SAVE_MASK_IMAGE:
            # Convert the PIL image for compatibility
            raw_image = Image.open(imgPath).convert("RGB")
            raw_image_array = np.array(raw_image)
            h, w = raw_image_array.shape[:2]
            
            # Parse the segmentation label file
            masks, class_ids = parse_seg_label_file(seg_label_path, (h, w))

            # Save the masks on the image
            print("Saving Validation Mask...")
            save_all_masks_on_one_image(raw_image, masks, mask_destination, save_filename=os.path.basename(imgPath))
        continue
    
    labels = getLabels(labelPath)  # Assuming getLabels is a function to parse label files
    if labels == None:
        continue # yolo format skip if there's no labels.
    
    image = cv2.imread(imgPath, cv2.IMREAD_COLOR)  # Read the image file
    predictor.set_image(image)  # Assuming predictor is a pre-defined object for predictions
    raw_image = Image.open(imgPath).convert("RGB")  # Open the image with PIL for additional operations if needed
    h, w = image.shape[:2]  # Get image dimensions
    
    # Convert bounding boxes according to the image dimensions
    class_ids, bounding_boxes = getConvertedBoxes(labels, w, h)
    
    # Convert bounding boxes to tensor and apply any necessary transformations
    input_boxes = torch.tensor(bounding_boxes, device=predictor.device)
    transformed_boxes = predictor.transform.apply_boxes_torch(input_boxes, image.shape[:2])
    
    # Predict masks based on the transformed bounding boxes
    masks, _, _ = predictor.predict_torch(
        point_coords=None,
        point_labels=None,
        boxes=transformed_boxes,
        multimask_output=False,
    )
    
    # Process each mask generated by the predictor
    for i, mask in enumerate(masks):
        binary_mask = masks[i].squeeze().cpu().numpy().astype(np.uint8)  # Convert mask to binary (0 or 1) format
        contours, hierarchy = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)  # Find contours in the binary mask
        
        try:
            largest_contour = max(contours, key=cv2.contourArea)  # Find the largest contour
            segmentation = largest_contour.flatten().tolist()  # Flatten the largest contour to a list
            mask = np.array(segmentation).reshape(-1, 2)  # Reshape for normalization
            mask_norm = mask / np.array([w, h])  # Normalize the pixel coordinates
            class_id = class_ids[i]  # Get the class ID for the current mask
            yolo = mask_norm.reshape(-1)  # Flatten the normalized mask coordinates
            
            if not os.path.exists(destination):  # Ensure the destination directory exists
                os.makedirs(destination)
            
            if SAVE_MASK_IMAGE:
                # Ensure the mask_destination directory exists
                if not os.path.exists(mask_destination):
                    os.makedirs(mask_destination)
                
                # Call the function with the correct parameters
                save_all_masks_on_one_image(raw_image, masks, mask_destination, save_filename=os.path.basename(imgPath))
            
        except Exception as e:
            continue  # Skip to the next mask if any errors occur
        
        loopCount += 1  # Increment the processed label count

        print(f'writing {label_file} to {destination}')
        print(f"file number {loopCount}")
        
        # Ensure the labels directory exists
        if not os.path.exists(os.path.join(destination, 'labels')):
            os.makedirs(os.path.join(destination, 'labels'))
        
        # Write the normalized mask coordinates to the label file
        with open(seg_label_path, "a") as f:
            for val in yolo:
                f.write(f"{class_id} {val:.6f}")
            f.write("\n")

    # Ensure the images directory exists and copy the current image to it
    if not os.path.exists(os.path.join(destination, 'images')):
        os.makedirs(os.path.join(destination, 'images'))
    shutil.copy(imgPath, f'{destination}/images')

Image_0000034716_1667328130_428679000 already exists in datasets/Batch_6/CONVERTED/train
./datasets/Batch_6/LABELLED/Image_0000034716_1667328130_428679000.jpg
Saving Validation Mask...
Saved: datasets/Batch_6/CONVERTED/train/mask_validation/Image_0000034716_1667328130_428679000.jpg
Image_0000024444_1667327616_828736000 already exists in datasets/Batch_6/CONVERTED/train
./datasets/Batch_6/LABELLED/Image_0000024444_1667327616_828736000.jpg
Saving Validation Mask...
Saved: datasets/Batch_6/CONVERTED/train/mask_validation/Image_0000024444_1667327616_828736000.jpg
Image_0000036329_1667328211_78707000 already exists in datasets/Batch_6/CONVERTED/train
./datasets/Batch_6/LABELLED/Image_0000036329_1667328211_78707000.jpg
Saving Validation Mask...
Saved: datasets/Batch_6/CONVERTED/train/mask_validation/Image_0000036329_1667328211_78707000.jpg
Image_0000004430_1667326616_128470000 already exists in datasets/Batch_6/CONVERTED/train
./datasets/Batch_6/LABELLED/Image_0000004430_1667326616_128470000

In [None]:
# zip seg_dataset folder
shutil.make_archive(seg_dataset, 'zip', seg_dataset)

'/notebooks/Yolo_Seg.zip'

In [None]:
# Convert Everything from the Filepath into a Video

In [None]:
# from PIL import Image
# import os
# import shutil
# # if you would like to plot and view the segmentation masks then set Objects list from the yaml file
# Objects = []
# # color rgb values for each class
# color = []

# for objects in Objects:
#     # create a random color and add it to the color list
#     color.append((np.random.randint(0,255),np.random.randint(0,255),np.random.randint(0,255)))
# loopCount = 0
# for imgPath, labelPath in image_lables:
#     destination = f'{seg_dataset}/train'
#     if 'valid' in imgPath:
#         destination = f'{seg_dataset}/valid'
#     # if label file is in destination folder then skip
#     label_file = imgPath.split('/')[-1].split('.')[0]
#     seg_label_path = os.path.join(destination, f'labels/{label_file}.txt')
#     if os.path.exists(seg_label_path):
#         label_file = imgPath.split('/')[-1].split('.')[0]
#         print(f'{label_file} already exists in {destination}')
#         continue
#     labels = getLabels(labelPath)
#     image = cv2.imread(imgPath, cv2.IMREAD_COLOR)
#     predictor.set_image(image)
#     raw_image = Image.open(imgPath).convert("RGB")
#     h, w = image.shape[:2]
#     class_ids, bounding_boxes = getConvertedBoxes(labels, w, h)
#     # show_boxes_on_image(raw_image, bounding_boxes) 
#     input_boxes = torch.tensor(bounding_boxes, device=predictor.device)
#     transformed_boxes = predictor.transform.apply_boxes_torch(input_boxes, image.shape[:2])
#     masks, _, _ = predictor.predict_torch(
#         point_coords=None,
#         point_labels=None,
#         boxes=transformed_boxes,
#         multimask_output=False,
#     )
#     for i,mask in enumerate(masks):
#         binary_mask = masks[i].squeeze().cpu().numpy().astype(np.uint8)
#         contours, hierarchy = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#         try:
#             largest_contour = max(contours, key=cv2.contourArea)
#             segmentation = largest_contour.flatten().tolist()
#             mask = segmentation

#             # convert mask to numpy array of shape (N,2)
#             mask = np.array(mask).reshape(-1, 2)

#             # normalize the pixel coordinates
#             mask_norm = mask / np.array([w, h])
#             class_id = class_ids[i]
#             yolo = mask_norm.reshape(-1)
#             # show_mask(mask.cpu().numpy(), plt.gca(), random_color=False, color=color[class_id])
#             # check if train or valid in imagPath
        
#             # if folder does not exist, create it
#             if not os.path.exists(destination):
#                 os.makedirs(destination)
#         except Exception as e:
#             continue
#         # label file name
#         loopCount += 1

#         print(f'writing {label_file} to {destination}')
#         print(f"file number {loopCount}")
#         # create labels folder if it does not exist
#         if not os.path.exists(os.path.join(destination, 'labels')):
#             os.makedirs(os.path.join(destination, 'labels'))
#         with open(seg_label_path, "a") as f:
#             for val in yolo:
#                 f.write("{} {:.6f}".format(class_id,val))
#             f.write("\n")

#     # create images folder if it does not exist
#     if not os.path.exists(os.path.join(destination, 'images')):
#         os.makedirs(os.path.join(destination, 'images'))
#     # copy image to destination/images
#     shutil.copy(imgPath, f'{destination}/images')
#     # for box in input_boxes:
#     #     show_box(box.cpu().numpy(), plt.gca())
#     # plt.axis('off')
#     # plt.show()
#     # if loopCount == 10:
#     #     break