In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install opencv-python-headless



In [21]:
import os

video_path = '/content/drive/MyDrive/ring_video.mp4'
model_path = '/content/drive/MyDrive/mask_rcnn_ring_segmentation_weights.pth'
output_folder = '/content/drive/MyDrive/ring_outputs'

os.makedirs(output_folder, exist_ok=True)

In [22]:
from torchvision.models.detection import maskrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

model = maskrcnn_resnet50_fpn(weights=MaskRCNN_ResNet50_FPN_Weights.DEFAULT)


num_classes = 3
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

in_mask_features = model.roi_heads.mask_predictor.conv5_mask.in_channels
model.roi_heads.mask_predictor = MaskRCNNPredictor(in_mask_features, 256, num_classes)


model.load_state_dict(torch.load(f"{model_path}", map_location=device))
model.to(device)
model.eval()


MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(in

In [23]:
import cv2
from PIL import Image
import numpy as np

transform = T.Compose([T.ToTensor()])

cap = cv2.VideoCapture(video_path)
frame_count = 0
ring_id = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    orig = frame.copy()
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image_pil = Image.fromarray(image)
    image_tensor = transform(image_pil).to(device)

    with torch.no_grad():
        outputs = model([image_tensor])[0]

    scores = outputs['scores'].cpu().numpy()
    masks = outputs['masks'].cpu().numpy()
    labels = outputs['labels'].cpu().numpy()

    for i, score in enumerate(scores):
        if score < 0.75:
            continue

        mask = masks[i, 0]
        mask = (mask > 0.5).astype(np.uint8) * 255


        white_bg = np.ones_like(orig) * 255


        masked = cv2.bitwise_and(orig, orig, mask=mask)
        inv_mask = cv2.bitwise_not(mask)
        white_bg = cv2.bitwise_and(white_bg, white_bg, mask=inv_mask)
        result = cv2.add(masked, white_bg)


        ys, xs = np.where(mask > 0)
        if len(xs) == 0 or len(ys) == 0:
            continue
        x_min, x_max = xs.min(), xs.max()
        y_min, y_max = ys.min(), ys.max()
        cropped = result[y_min:y_max, x_min:x_max]


        cv2.imwrite(f"{output_folder}/ring_{frame_count}_{ring_id}.png", cropped)
        ring_id += 1

    frame_count += 1

cap.release()
print("Done. All rings saved.")


Done. All rings saved.


In [24]:
import shutil
shutil.make_archive('/content/drive/MyDrive/ring_outputs', 'zip', output_folder)

from google.colab import files
files.download('/content/drive/MyDrive/ring_outputs.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>