<a href="https://colab.research.google.com/github/AlbinDavid/OCR/blob/master/webcam_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# install dependencies: 
!pip install pyyaml==5.1 pycocotools>=2.0.1

import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab

1.6.0+cu101 True
gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [2]:
# install detectron2: (Colab has CUDA 10.1 + torch 1.6)
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
assert torch.__version__.startswith("1.6")
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.6/index.html

Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.6/index.html


In [26]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode
import IPython
import time
import sys
import numpy as np
import cv2
import base64
import logging
from google.colab import output
from PIL import Image
from io import BytesIO
classes_dict={0:"person",67:"mobile"}

In [27]:
def data_uri_to_img(uri):
    """convert base64 image to numpy array"""
    try:
        image = base64.b64decode(uri.split(',')[1], validate=True)
        # make the binary image, a PIL image
        image = Image.open(BytesIO(image))
        # convert to numpy array
        image = np.array(image, dtype=np.uint8); 
        return image
    except Exception as e:
        logging.exception(e);print('\n')
        return None

In [28]:
def run_algo(imgB64):
    """
    in Colab, run_algo function gets invoked by the JavaScript, 
    that sends N images every second, one at a time.
    params:
        image: image
    """
    image = data_uri_to_img(imgB64)  
    if image is None:
        print("At run_algo(): image is None.")
        return

    try:
        cfg = get_cfg()
        # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
        cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
        # Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
        predictor = DefaultPredictor(cfg)
        outputs = predictor(image)
        # print(outputs["instances"].pred_classes)
        predicted_boxes = outputs["instances"].pred_boxes
        classes_list = outputs["instances"].pred_classes.tolist()
        for index,i in enumerate(classes_list):
            try:
              predicted_class = classes_dict[i]
              print("Detected ",predicted_class," at ",predicted_boxes[index])
            except:
              pass
    except Exception as e:
        logging.exception(e)
        print('\n')


output.register_callback('notebook.run_algo', run_algo)

In [29]:



def take_photo(filename='photo.jpg', quality=0.8):
    js = Javascript('''
        async function takePhoto(quality) {
    // create html elements
    const div = document.createElement('div');
    const video = document.createElement('video');
    video.style.display = 'block';

    // request the stream. This will ask for Permission to access 
    // a connected Camera/Webcam
    const stream = await navigator.mediaDevices.getUserMedia({video: true});

    // show the HTML elements
    document.body.appendChild(div);
    div.appendChild(video);
    // display the stream
    video.srcObject = stream;
    await video.play();

    // Resize the output (of Colab Notebook Cell) to fit the video element.
    google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

    // capture 5 frames (for test)
    for (let i = 0; i < 100; i++) {
        const canvas = document.createElement('canvas');
        canvas.width = video.videoWidth;
        canvas.height = video.videoHeight;
        canvas.getContext('2d').drawImage(video, 0, 0);
        img = canvas.toDataURL('image/jpeg', quality);

        // Call a python function and send this image
        google.colab.kernel.invokeFunction('notebook.run_algo', [img], {});

        // wait for X miliseconds second, before next capture
        await new Promise(resolve => setTimeout(resolve, 250));
    }

    stream.getVideoTracks()[0].stop(); // stop video stream
}
        ''')
    # make the provided HTML, part of the cell
    display(js) 
    # call the takePhoto() JavaScript function
    data = eval_js('takePhoto({})'.format(quality)) 

take_photo()

<IPython.core.display.Javascript object>

Detected  person  at  Boxes(tensor([[188.9810, 209.0856, 546.2219, 478.9264]], device='cuda:0'))
Detected  person  at  Boxes(tensor([[ 21.6475,   7.8983, 542.1993, 474.1400]], device='cuda:0'))
Detected  person  at  Boxes(tensor([[  4.4938, 192.3740, 354.2171, 466.8879]], device='cuda:0'))
Detected  person  at  Boxes(tensor([[  0.0000,  85.4691, 559.8842, 477.2065]], device='cuda:0'))
Detected  person  at  Boxes(tensor([[  0.0000, 100.4419, 553.7832, 473.3087]], device='cuda:0'))
Detected  person  at  Boxes(tensor([[1.5120e-01, 1.7525e+02, 6.2904e+02, 4.7453e+02]], device='cuda:0'))
Detected  person  at  Boxes(tensor([[  0.0000, 194.9566, 561.2238, 473.0497]], device='cuda:0'))
Detected  person  at  Boxes(tensor([[  2.0202,  68.2283, 578.2250, 480.0000]], device='cuda:0'))
Detected  person  at  Boxes(tensor([[  0.0000,  71.0537, 514.4730, 472.8480]], device='cuda:0'))
Detected  person  at  Boxes(tensor([[  2.6431,  65.6740, 497.6150, 474.5148]], device='cuda:0'))
Detected  person  at  