In [3]:
pip install numpy pandas tensorflow torchvision matplotlib opencv-python

Collecting torchvision
  Downloading torchvision-0.20.1-cp312-cp312-win_amd64.whl.metadata (6.2 kB)
Collecting torch==2.5.1 (from torchvision)
  Downloading torch-2.5.1-cp312-cp312-win_amd64.whl.metadata (28 kB)
Collecting sympy==1.13.1 (from torch==2.5.1->torchvision)
  Downloading sympy-1.13.1-py3-none-any.whl.metadata (12 kB)
Downloading torchvision-0.20.1-cp312-cp312-win_amd64.whl (1.6 MB)
   ---------------------------------------- 0.0/1.6 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.6 MB ? eta -:--:--
   ------------- -------------------------- 0.5/1.6 MB 2.1 MB/s eta 0:00:01
   ---------------------------------------- 1.6/1.6 MB 4.0 MB/s eta 0:00:00
Downloading torch-2.5.1-cp312-cp312-win_amd64.whl (203.0 MB)
   ---------------------------------------- 0.0/203.0 MB ? eta -:--:--
    --------------------------------------- 3.1/203.0 MB 16.9 MB/s eta 0:00:12
   - -------------------------------------- 8.4/203.0 MB 20.0 MB/s eta 0:00:10
   -- -----------------

In [4]:
import os
import cv2
import numpy as np
import torch
from torchvision import models
from torchvision.transforms import functional as F
from PIL import Image
import matplotlib.pyplot as plt

In [5]:
# 1. Load a pre-trained Mask R-CNN model
model = models.detection.maskrcnn_resnet50_fpn(pretrained=True)
model.eval()

Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to C:\Users\ASUS/.cache\torch\hub\checkpoints\maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
100%|███████████████████████████████████████████████████████████████████████████████| 170M/170M [00:06<00:00, 27.6MB/s]


MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(in

In [7]:
def segment_dogs(image_path, threshold=0.5):
    # Load and preprocess the image
    image = Image.open(image_path).convert("RGB")
    image_tensor = F.to_tensor(image).unsqueeze(0)

    # Get predictions
    with torch.no_grad():
        predictions = model(image_tensor)[0]

    # Filter predictions for 'dog' class (COCO class id 18)
    dog_indices = [i for i, label in enumerate(predictions['labels']) if label == 18]

    if not dog_indices:
        print(f"No dogs detected in {image_path}")
        return None

    # Process each detected dog
    masks = predictions['masks'][dog_indices]
    scores = predictions['scores'][dog_indices]

    segmented_images = []

    for i, (mask, score) in enumerate(zip(masks, scores)):
        if score >= threshold:
            mask_np = mask.squeeze(0).mul(255).byte().cpu().numpy()

            # Create binary mask
            binary_mask = mask_np > 128

            # Extract the dog region
            image_np = np.array(image)
            segmented = image_np * binary_mask[:, :, None]
            segmented_images.append(segmented)

    return segmented_images

In [8]:
# 3. Define a function to save the segmented images
def save_segmented_images(segmented_images, output_dir, relative_path):
    output_path = os.path.join(output_dir, relative_path)
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    for i, seg_image in enumerate(segmented_images):
        save_path = f"{output_path}_segmented_{i+1}.png"
        Image.fromarray(seg_image).save(save_path)

In [9]:
# 4. Process the dataset
input_dir = "C:/Users/ASUS/Desktop/dogs_project/test"
output_dir = "C:/Users/ASUS/Desktop/dogs_project/segmented_results"

for root, _, files in os.walk(input_dir):
    for file_name in files:
        if file_name.endswith(('.jpg', '.png', '.jpeg')):
            image_path = os.path.join(root, file_name)
            relative_path = os.path.relpath(image_path, input_dir)
            base_name = os.path.splitext(relative_path)[0]

            segmented_images = segment_dogs(image_path)
            if segmented_images:
                save_segmented_images(segmented_images, output_dir, base_name)

print("Segmentation completed!")

No dogs detected in C:/Users/ASUS/Desktop/dogs_project/cropped_dogs_dataset\Rhodesian_ridgeback\n02087394_7459_1619.jpg
No dogs detected in C:/Users/ASUS/Desktop/dogs_project/cropped_dogs_dataset\Rhodesian_ridgeback\n02087394_7544_1624.jpg
No dogs detected in C:/Users/ASUS/Desktop/dogs_project/cropped_dogs_dataset\Rottweiler\n02106550_4129_14271.jpg
No dogs detected in C:/Users/ASUS/Desktop/dogs_project/cropped_dogs_dataset\Saint_Bernard\n02109525_6663_16473.jpg
No dogs detected in C:/Users/ASUS/Desktop/dogs_project/cropped_dogs_dataset\Saluki\n02091831_213_4589.jpg
No dogs detected in C:/Users/ASUS/Desktop/dogs_project/cropped_dogs_dataset\Saluki\n02091831_5384_4674.jpg
No dogs detected in C:/Users/ASUS/Desktop/dogs_project/cropped_dogs_dataset\Samoyed\n02111889_1264_18225.jpg
No dogs detected in C:/Users/ASUS/Desktop/dogs_project/cropped_dogs_dataset\Samoyed\n02111889_1994_18284.jpg
No dogs detected in C:/Users/ASUS/Desktop/dogs_project/cropped_dogs_dataset\Samoyed\n02111889_5075_183