In [3]:
import cv2
import torch
import numpy as np
from torchvision import models
import torchvision.transforms as T

# Load pre-trained Mask R-CNN model
def load_mask_rcnn():
    model = models.detection.maskrcnn_resnet50_fpn(pretrained=True)
    model.eval()
    return model

# Preprocess input image
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    transform = T.Compose([
        T.ToTensor()
    ])
    return image, transform(image_rgb).unsqueeze(0)

# Get segmentation mask
def get_segmentation_mask(model, image_tensor, threshold=0.5):
    with torch.no_grad():
        predictions = model(image_tensor)

    masks = predictions[0]['masks']
    labels = predictions[0]['labels']
    scores = predictions[0]['scores']

    # Filter masks based on threshold
    mask_list = []
    for i in range(len(masks)):
        if scores[i] >= threshold and labels[i] == 18:  # 18 is the COCO class for dog
            mask_list.append(masks[i, 0].cpu().numpy())

    # Combine masks if multiple
    if len(mask_list) > 0:
        mask = np.sum(mask_list, axis=0)
        mask = (mask > 0.5).astype(np.uint8) * 255
        return mask
    else:
        return np.zeros((image_tensor.shape[2], image_tensor.shape[3]), dtype=np.uint8)

# Generate foreground and background masks
def segment_foreground_background(image, mask):
    # Resize mask to match image size
    mask_resized = cv2.resize(mask, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST)

    # Apply mask to extract foreground and background
    fg = cv2.bitwise_and(image, image, mask=mask_resized)
    bg_mask = 255 - mask_resized
    bg = cv2.bitwise_and(image, image, mask=bg_mask)

    return fg, bg

# Main function
def main(image_path):
    # Load Mask R-CNN model
    model = load_mask_rcnn()

    # Load and preprocess the image
    image, image_tensor = preprocess_image(image_path)

    # Get segmentation mask for the dog
    mask = get_segmentation_mask(model, image_tensor)

    # Segment foreground and background
    fg, bg = segment_foreground_background(image, mask)

    # Save segmented images
    cv2.imwrite("foreground.jpg", fg)
    cv2.imwrite("background.jpg", bg)
    print("Segmentation complete. Foreground and background saved.")



In [4]:
# Run segmentation on input image
if __name__ == "__main__":
    image_path = "/home/neelraj-reddy/college/6th_sem/computer vision/project/A little survey on previous works/images/dog.jpeg"          # Input source image
    main(image_path)

Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /home/neelraj-reddy/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
100%|██████████| 170M/170M [06:49<00:00, 435kB/s]  


Segmentation complete. Foreground and background saved.


In [None]:
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
from segment_anything import sam_model_registry, SamPredictor

# Load SAM model
def load_sam_model(model_type="vit_b"):
    checkpoint_path = "sam_vit_b_01ec64.pth"  # Path to SAM checkpoint
    model = sam_model_registry[model_type](checkpoint=checkpoint_path)
    model.eval()
    predictor = SamPredictor(model)
    return predictor

# Preprocess image and get SAM mask
def get_sam_mask(predictor, image_path):
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Set image for SAM predictor
    predictor.set_image(image_rgb)

    # Get initial automatic mask
    masks, _, _ = predictor.predict(
        point_coords=None,  # No specific point prompt
        point_labels=None,
        box=None,
        multimask_output=True
    )

    # Select the best mask based on area (largest mask is often the foreground)
    best_mask = max(masks, key=lambda x: np.sum(x))
    mask = best_mask.astype(np.uint8) * 255
    return image, mask

# Segment foreground and background
def segment_foreground_background(image, mask):
    mask_resized = cv2.resize(mask, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST)

    # Apply mask to extract foreground and background
    fg = cv2.bitwise_and(image, image, mask=mask_resized)
    bg_mask = 255 - mask_resized
    bg = cv2.bitwise_and(image, image, mask=bg_mask)

    return fg, bg

# Save and display results
def save_results(fg, bg):
    cv2.imwrite("foreground.jpg", fg)
    cv2.imwrite("background.jpg", bg)
    print("Segmentation complete. Foreground and background saved as 'foreground.jpg' and 'background.jpg'.")

# Main function
def main(image_path):
    # Load SAM model
    predictor = load_sam_model()

    # Get SAM segmentation mask
    image, mask = get_sam_mask(predictor, image_path)

    # Segment foreground and background
    fg, bg = segment_foreground_background(image, mask)

    # Save results
    save_results(fg, bg)

# Run segmentation on input image
if __name__ == "__main__":
    image_path = "dog.jpeg"  # Change to your image path
    main(image_path)
