In [1]:
import numpy as np
import tensorflow as tf
from mrcnn import model as modellib
from mrcnn.config import Config
import cv2

Using TensorFlow backend.


In [2]:
from tensorflow.python.client import device_lib

def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]

print(get_available_devices())

['/device:CPU:0']


In [3]:
class InferenceConfig(Config):
    NAME = "coco"
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    NUM_CLASSES = 1 + 80  # COCO dataset has 80 classes + 1 background

In [4]:
def load_model(weights_path):
    config = InferenceConfig()
    model = modellib.MaskRCNN(mode="inference", model_dir='./', config=config)
    model.load_weights(weights_path, by_name=True)
    return model

In [5]:
colours = [(68, 136, 229), (32, 67, 6), (169, 77, 36), (240, 81, 131), (164, 229, 229), (92, 21, 220), (189, 226, 125), (248, 19, 241), (15, 249, 52), (85, 150, 138), (148, 194, 1), (152, 143, 116), (217, 58, 224), (55, 178, 45), (243, 94, 206), (233, 51, 152), (132, 172, 38), (110, 253, 131), (45, 29, 43), (111, 100, 13), (37, 254, 130), (96, 61, 69), (88, 207, 28), (181, 120, 144), (20, 3, 192), (233, 102, 128), (156, 176, 34), (152, 170, 43), (78, 136, 191), (77, 118, 249), (173, 214, 194), (173, 118, 231), (185, 254, 208), (182, 58, 250), (158, 226, 80), (154, 160, 218), (53, 238, 212), (79, 212, 233), (38, 13, 25), (62, 165, 205), (144, 58, 51), (34, 98, 57), (110, 47, 230), (192, 45, 5), (6, 84, 105), (8, 176, 71), (56, 189, 201), (53, 71, 4), (44, 201, 184), (93, 150, 19), (135, 216, 229), (181, 96, 93), (104, 94, 68), (24, 239, 200), (102, 66, 48), (192, 136, 10), (207, 180, 13), (185, 226, 128), (51, 133, 65), (144, 4, 93), (121, 224, 191), (12, 95, 122), (226, 74, 204), (17, 190, 251), (233, 36, 103), (46, 224, 233), (214, 104, 188), (22, 35, 154), (85, 152, 137), (194, 204, 8), (174, 199, 102), (125, 102, 195), (39, 172, 16), (69, 112, 131), (191, 65, 22), (251, 66, 45), (154, 68, 24), (7, 3, 77), (232, 83, 131), (212, 33, 50), (16, 104, 28)]

In [None]:
CATEGORY_COLORS = {i: colours[i] for i in range(len(colours))}

In [7]:
classnames = {0: u'__background__',
 1: u'person',
 2: u'bicycle',
 3: u'car',
 4: u'motorcycle',
 5: u'airplane',
 6: u'bus',
 7: u'train',
 8: u'truck',
 9: u'boat',
 10: u'traffic light',
 11: u'fire hydrant',
 12: u'stop sign',
 13: u'parking meter',
 14: u'bench',
 15: u'bird',
 16: u'cat',
 17: u'dog',
 18: u'horse',
 19: u'sheep',
 20: u'cow',
 21: u'elephant',
 22: u'bear',
 23: u'zebra',
 24: u'giraffe',
 25: u'backpack',
 26: u'umbrella',
 27: u'handbag',
 28: u'tie',
 29: u'suitcase',
 30: u'frisbee',
 31: u'skis',
 32: u'snowboard',
 33: u'sports ball',
 34: u'kite',
 35: u'baseball bat',
 36: u'baseball glove',
 37: u'skateboard',
 38: u'surfboard',
 39: u'tennis racket',
 40: u'bottle',
 41: u'wine glass',
 42: u'cup',
 43: u'fork',
 44: u'knife',
 45: u'spoon',
 46: u'bowl',
 47: u'banana',
 48: u'apple',
 49: u'sandwich',
 50: u'orange',
 51: u'broccoli',
 52: u'carrot',
 53: u'hot dog',
 54: u'pizza',
 55: u'donut',
 56: u'cake',
 57: u'chair',
 58: u'couch',
 59: u'potted plant',
 60: u'bed',
 61: u'dining table',
 62: u'toilet',
 63: u'tv',
 64: u'laptop',
 65: u'mouse',
 66: u'remote',
 67: u'keyboard',
 68: u'cell phone',
 69: u'microwave',
 70: u'oven',
 71: u'toaster',
 72: u'sink',
 73: u'refrigerator',
 74: u'book',
 75: u'clock',
 76: u'vase',
 77: u'scissors',
 78: u'teddy bear',
 79: u'hair drier',
 80: u'toothbrush'}

In [14]:
# Function to apply Mask R-CNN to each frame and overlay results
def process_frame(model, frame):
    results = model.detect([frame], verbose=0)
    r = results[0]
    
    # Create a blank canvas for segmented mask
    mask_canvas = np.zeros_like(frame)
    
    # Apply detected masks
    for i in range(r['masks'].shape[-1]):
        mask = r['masks'][:, :, i]
        class_id = r['class_ids'][i]
        color = CATEGORY_COLORS.get(class_id-1, tuple(np.random.randint(0, 255, (3,), dtype=int)))
        mask_canvas[mask] = color
    
    # Overlay mask on the original frame
    alpha = 0.5  # Transparency factor
    segmented_frame = cv2.addWeighted(frame, 1 - alpha, mask_canvas, alpha, 0)
    return segmented_frame

In [23]:
def create_legend_canvas(category_colors, canvas_height, max_rows_per_column=20):
    """
    Creates a canvas with a legend showing category colors and labels in multiple columns.
    """
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.5
    thickness = 1
    box_height = 20
    line_height = 30
    box_width = 30
    text_offset = 10
    
    # Calculate the number of columns needed
    total_categories = len(category_colors)
    num_columns = -(-total_categories // max_rows_per_column)  # Ceiling division
    canvas_width = num_columns * 200  # Adjust column width as needed
    
    # Create a white canvas
    canvas = np.ones((canvas_height, canvas_width, 3), dtype=np.uint8) * 255
    
    # Draw the legend
    y_offset = 20  # Initial offset from the top
    x_offset = 10  # Initial offset from the left
    current_row = 0
    
    for category_id, color in category_colors.items():
        # Start a new column if max rows reached
        if current_row >= max_rows_per_column:
            current_row = 0
            x_offset += 200
            y_offset = 20

        # Draw the color box
        box_start = (x_offset, y_offset)
        box_end = (x_offset + box_width, y_offset + box_height)
        cv2.rectangle(canvas, box_start, box_end, color[::-1], -1)  # OpenCV uses BGR
        
        # Draw the category label
        label = f"{classnames[category_id-1]}"
        text_position = (x_offset + box_width + text_offset, y_offset + 15)
        cv2.putText(canvas, label, text_position, font, font_scale, (0, 0, 0), thickness, cv2.LINE_AA)
        
        # Increment row
        y_offset += line_height
        current_row += 1
    
    return canvas

In [19]:
import cv2
import numpy as np

def process_and_display_video(input_video_path, model, category_colors, max_duration_seconds=1):
    cap = cv2.VideoCapture(input_video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    max_frames = int(fps * max_duration_seconds)
    
    # Lists to store original and segmented frames
    original_frames = []
    segmented_frames = []
    frame_count = 0
    
    print("Processing video...")
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Resize frame for inference
        resized_frame = cv2.resize(frame, (1024, 1024))
        segmented_frame = process_frame(model, resized_frame)
        
        # Resize segmented frame back to original dimensions
        segmented_frame = cv2.resize(segmented_frame, (width, height))
        
        # Store frames in memory
        original_frames.append(frame)
        segmented_frames.append(segmented_frame)
        
        frame_count += 1
    
    cap.release()
    
    print("Processing complete! Displaying video...")

    # Create a legend canvas
    legend_canvas = create_legend_canvas(category_colors, height)
    
    while True:
        for original, segmented in zip(original_frames, segmented_frames):
            # Combine the segmented frame with the legend
            combined_segmented = np.hstack((segmented, legend_canvas))
            
            # Combine the frames horizontally
            combined_frame = np.hstack((original, combined_segmented))
            
            # Display the combined frame
            cv2.imshow("Original, Segmented Video, and Legend", combined_frame)
            
            # Exit on pressing 'q'
            key = cv2.waitKey(int(1000 / fps))
            if key & 0xFF == ord('q'):
                cv2.destroyAllWindows()
                print("Video display complete!")
                return
    
    # Release all OpenCV windows

In [10]:
# Load the model
weights_path = "mask_rcnn_coco.h5"  # Update with your path
model = load_model(weights_path)






Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
box_ind is deprecated, use box_indices instead


Instructions for updating:
Use `tf.cast` instead.







In [24]:
# Process the input video
input_video_path = "videoplayback.mp4"
process_and_display_video(input_video_path, model, CATEGORY_COLORS)

Processing video...
Processing complete! Displaying video...
Video display complete!
