In [8]:
import numpy as np
import tensorflow as tf
from mrcnn import model as modellib
from mrcnn.config import Config
import cv2

In [19]:
from tensorflow.python.client import device_lib

def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]

print(get_available_devices())

['/device:CPU:0']


In [9]:
class InferenceConfig(Config):
    NAME = "coco"
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    NUM_CLASSES = 1 + 80  # COCO dataset has 80 classes + 1 background

In [10]:
def load_model(weights_path):
    config = InferenceConfig()
    model = modellib.MaskRCNN(mode="inference", model_dir='./', config=config)
    model.load_weights(weights_path, by_name=True)
    return model

In [11]:
# Function to apply Mask R-CNN to each frame and overlay results
def process_frame(model, frame):
    results = model.detect([frame], verbose=0)
    r = results[0]
    
    # Create a blank canvas for segmented mask
    mask_canvas = np.zeros_like(frame)
    
    # Apply detected masks
    for i in range(r['masks'].shape[-1]):
        mask = r['masks'][:, :, i]
        color = np.random.randint(0, 255, (3,), dtype=int)
        mask_canvas[mask] = color
    
    # Overlay mask on the original frame
    alpha = 0.5  # Transparency factor
    segmented_frame = cv2.addWeighted(frame, 1 - alpha, mask_canvas, alpha, 0)
    return segmented_frame

In [12]:
import cv2
import numpy as np

def process_and_display_video(input_video_path, model, max_duration_seconds=1):
    cap = cv2.VideoCapture(input_video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    max_frames = int(fps * max_duration_seconds)
    
    # Lists to store original and segmented frames
    original_frames = []
    segmented_frames = []
    frame_count = 0
    
    print("Processing video...")
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Resize frame for inference
        resized_frame = cv2.resize(frame, (1024, 1024))
        segmented_frame = process_frame(model, resized_frame)
        
        # Resize segmented frame back to original dimensions
        segmented_frame = cv2.resize(segmented_frame, (width, height))
        
        # Store frames in memory
        original_frames.append(frame)
        segmented_frames.append(segmented_frame)
        
        frame_count += 1
    
    cap.release()
    
    print("Processing complete! Displaying video...")

    while True:
        for original, segmented in zip(original_frames, segmented_frames):
            # Combine the frames horizontally
            combined_frame = np.hstack((original, segmented))
            
            # Display the combined frame
            cv2.imshow("Original and Segmented Video", combined_frame)
            
            # Exit on pressing 'q'
            key = cv2.waitKey(int(1000 / fps))
            if key & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                print("Video display complete!")
                return
    
    # Release all OpenCV windows
    

In [13]:
# Load the model
weights_path = "mask_rcnn_coco.h5"  # Update with your path
model = load_model(weights_path)

In [14]:
# Process the input video
input_video_path = "videoplayback.mp4"
process_and_display_video(input_video_path, model)

Processing video...
Processing complete! Displaying video...
Video display complete!
