In [1]:
import torch
import torchvision
import matplotlib.pyplot as plt
import numpy as np
from skimage import io, segmentation, color
import networkx as nx
import cv2

In [50]:
def load_image(filepath):
    image_path = filepath
    image = io.imread(image_path)

    # If the image has 4 channels (RGBA), remove the alpha channel
    if image.shape[-1] == 4: image = image[..., :3]
        
    return image

In [51]:
def show_images(images):
    n = len(images)

    fig, axes = plt.subplots(1, n, figsize=(5 * n, 5))
    if n == 1: axes = [axes]

    for i, ax in enumerate(axes):
        if len(images[i].shape) == 3: ax.imshow(images[i])
        else: ax.imshow(images[i], cmap='gray')
        
        ax.axis('off')

    plt.show()

In [110]:
# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

def isolate_objects(image):
    """
    Isolate objects in an image by detecting them using YOLO and removing the background.
    
    Args:
    - image: np.array, input image (RGB).
    
    Returns:
    - isolated_image: np.array, image with objects isolated and background removed.
    """
    # Perform object detection using YOLOv5
    results = model(image)

    # Convert results to pandas DataFrame for easy access to bounding boxes
    detections = results.pandas().xyxy[0]  # Bounding boxes: xmin, ymin, xmax, ymax

    # Create a mask for the background (initialize as all zeros)
    mask = np.zeros(image.shape[:2], dtype=np.uint8)

    # Loop through the detected objects and fill the mask
    for idx, row in detections.iterrows():
        xmin, ymin, xmax, ymax = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax'])
        
        # Set the object region in the mask to 255 (white)
        mask[ymin:ymax, xmin:xmax] = 255

    # Create a 3-channel mask to apply to the original image
    mask_3channel = cv2.merge([mask, mask, mask])

    # Apply the mask to the image (set background pixels to black)
    isolated_image = cv2.bitwise_and(image, mask_3channel)

    return isolated_image

Using cache found in /home/theo/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-10-11 Python-3.12.6 torch-2.4.1+cu121 CPU

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [113]:
# Load the image (replace 'path_to_image.jpg' with your actual image path)
image = cv2.imread('../COMP90086_2024_Project_train/train/173.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB

# Isolate objects and remove the background
isolated_image = isolate_objects(image)

# Display the original and isolated images
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.title('Original Image')
plt.imshow(image)

plt.subplot(1, 2, 2)
plt.title('Isolated Objects with Black Background')
plt.imshow(isolated_image)

plt.show()

  with amp.autocast(autocast):


In [114]:
img = load_image('../COMP90086_2024_Project_train/train/173.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
isolated_img = isolate_objects(img)
print(isolated_img)
show_images([isolated_img])

[[[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 ...

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]

 [[0 0 0]
  [0 0 0]
  [0 0 0]
  ...
  [0 0 0]
  [0 0 0]
  [0 0 0]]]


  with amp.autocast(autocast):


In [6]:
import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained=True)

def isolate_stacks(image):
    """
    Isolate the stacks in the image by detecting them using YOLO and removing the background.
    
    Args:
    - image: np.array, input image (RGB).
    
    Returns:
    - isolated_image: np.array, image with objects isolated and background removed.
    """
    # Perform object detection using YOLOv5
    results = model(image)

    # Convert results to pandas DataFrame for easy access to bounding boxes
    detections = results.pandas().xyxy[0]  # Bounding boxes: xmin, ymin, xmax, ymax

    # Print detections to verify
    print("YOLO Detections:")
    print(detections)

    if detections.empty:
        print("No objects detected.")
        return image  # Return the original image if no objects are detected

    # Create a mask for the background (initialize as all zeros)
    mask = np.zeros(image.shape[:2], dtype=np.uint8)

    # Loop through the detected objects and fill the mask
    for idx, row in detections.iterrows():
        xmin, ymin, xmax, ymax = int(row['xmin']), int(row['ymin']), int(row['xmax']), int(row['ymax'])
        
        # Set the object region in the mask to 255 (white)
        mask[ymin:ymax, xmin:xmax] = 255

    # Create a 3-channel mask to apply to the original image
    mask_3channel = cv2.merge([mask, mask, mask])

    # Apply the mask to the image (set background pixels to black)
    isolated_image = cv2.bitwise_and(image, mask_3channel)

    return isolated_image

# Load the image (replace with the correct image path)
image_path = '../COMP90086_2024_Project_train/train/173.jpg'
image = cv2.imread(image_path)

# Convert image to RGB (as OpenCV loads images in BGR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Isolate the stacks
isolated_image = isolate_stacks(image)

for i in isolated_image: print(i)

Using cache found in /home/theo/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2024-10-11 Python-3.12.6 torch-2.4.1+cu121 CPU

Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients, 48.9 GFLOPs
Adding AutoShape... 
  with amp.autocast(autocast):


YOLO Detections:
        xmin       ymin       xmax        ymax  confidence  class  name
0  21.502163  52.418003  74.403114  163.827896    0.315938     33  kite
[[0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [0 0 0]
 [