In [1]:
pip install torch torchvision opencv-python pillow numpy


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [3]:
import torch
import torchvision
import cv2
import numpy as np
from PIL import Image
from torchvision import transforms

# Load pretrained DeepLabV3 model
model = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=True)
model.eval()

# Transformation for input image
preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

def extract_person(image_path, output_path):
    # Load and preprocess image
    image = Image.open(image_path).convert("RGB")
    input_tensor = preprocess(image).unsqueeze(0)  # Shape: [1, 3, H, W]

    with torch.no_grad():
        output = model(input_tensor)["out"][0]  # Shape: [21, H, W]

    # Get predicted classes per pixel
    predictions = output.argmax(0).byte().cpu().numpy()

    # Person class in COCO/VOC is ID 15
    mask = predictions == 15

    # Convert PIL image to NumPy
    image_np = np.array(image)

    # Create 3-channel mask
    mask_3c = np.stack([mask]*3, axis=-1)

    # Apply mask (keep person, set background to white)
    extracted = np.where(mask_3c, image_np, 255)

    # Save result
    cv2.imwrite(output_path, cv2.cvtColor(extracted, cv2.COLOR_RGB2BGR))
    print(f"Extracted person saved at {output_path}")

if __name__ == "__main__":
    input_image = "image1.jpeg"   # Path to your photo
    output_image = "person.png" # Output path
    extract_person(input_image, output_image)


Downloading: "https://download.pytorch.org/models/deeplabv3_resnet101_coco-586e9e4e.pth" to /root/.cache/torch/hub/checkpoints/deeplabv3_resnet101_coco-586e9e4e.pth
100%|██████████| 233M/233M [00:01<00:00, 141MB/s]


Extracted person saved at person.png
