<a href="https://colab.research.google.com/github/SJCAAT/cv_workshop/blob/main/workshop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


In [None]:
!pip install ultralytics
from ultralytics import YOLO

In [None]:
from io import BytesIO
from PIL import Image as pil_img
from IPython.display import Image, display, Javascript, update_display
from google.colab.output import eval_js
from base64 import b64decode
import numpy as np

In [None]:
def take_photo(display_id: str, quality: float = 0.8) -> bytes:
    # js snippet to capture frame from webcam
    js = Javascript('''
        async function takePhoto(quality) {
            const div = document.createElement('div')
            const video = document.createElement('video')
            const stream = await navigator.mediaDevices.getUserMedia({video:true});

            div.appendChild(video);
            video.srcObject = stream;
            await video.play();
                    
            const canvas = document.createElement('canvas');
            canvas.width = video.videoWidth;
            canvas.height = video.videoHeight;
            canvas.getContext('2d').drawImage(video, 0, 0);
            stream.getVideoTracks()[0].stop();
            div.remove();
            return canvas.toDataURL('image/jpeg', quality);
        }
    ''')

    # evaluate js and retrieve returned binary image
    display(js, display_id=display_id)
    data = eval_js('takePhoto({})'.format(quality))
    binary = b64decode(data.split(',')[1])
    return binary

In [None]:
def infer_image(model: YOLO, binary_img: bytes) -> np.array:
    # Run inference on a binary image
    img = np.array(pil_img.open(BytesIO(binary_img)))
    results = model(img, verbose=False)
    return results[0].plot(pil=True)

In [None]:
# update the same display
display_id = 'sample_display'
# load a pretrained yolov8 model
model = YOLO('yolov8n')

while True:
    try:
        # read a frame from the webcam
        binary_img = take_photo(display_id='sample_display_2')
        # run inference on the frame
        result = infer_image(model, binary_img=binary_img)

        # show the frame with the inference results
        display(result, display_id=display_id)
    except Exception as err:
        # show error if user does not have a webcam or did not grant page permission
        print(str(err))