<a href="https://colab.research.google.com/github/Oleksii-Adamov/text-detection-recognition-project/blob/main/Oleksii_notebooks/keras_ocr_web_cam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Installs

In [None]:
!pip install tensorflow==2.7.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow==2.7.0
  Downloading https://us-python.pkg.dev/colab-wheels/public/tensorflow/tensorflow-2.7.0%2Bzzzcolab20220506150900-cp37-cp37m-linux_x86_64.whl (665.5 MB)
[K     |████████████████████████████████| 665.5 MB 23 kB/s 
Collecting tensorflow-estimator<2.8,~=2.7.0rc0
  Downloading tensorflow_estimator-2.7.0-py2.py3-none-any.whl (463 kB)
[K     |████████████████████████████████| 463 kB 36.2 MB/s 
Collecting keras<2.8,>=2.7.0rc0
  Downloading keras-2.7.0-py2.py3-none-any.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 59.5 MB/s 
Collecting gast<0.5.0,>=0.2.1
  Downloading gast-0.4.0-py3-none-any.whl (9.8 kB)
Installing collected packages: tensorflow-estimator, keras, gast, tensorflow
  Attempting uninstall: tensorflow-estimator
    Found existing installation: tensorflow-estimator 2.8.0
    Uninstalling tensorflow-estimator-2.8.0:
      Successfully unin

In [None]:
!pip install -U git+https://github.com/faustomorales/keras-ocr.git@v0.8.7
!pip install -U opencv-python

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/faustomorales/keras-ocr.git@v0.8.7
  Cloning https://github.com/faustomorales/keras-ocr.git (to revision v0.8.7) to /tmp/pip-req-build-_dfvdksm
  Running command git clone -q https://github.com/faustomorales/keras-ocr.git /tmp/pip-req-build-_dfvdksm
  Running command git checkout -q c47646c43dc3cb71812db08bdb3843cfa783d8cc
Collecting essential_generators
  Downloading essential_generators-1.0-py3-none-any.whl (9.5 MB)
[K     |████████████████████████████████| 9.5 MB 20.6 MB/s 
Collecting validators
  Downloading validators-0.20.0.tar.gz (30 kB)
Collecting fonttools
  Downloading fonttools-4.37.1-py3-none-any.whl (957 kB)
[K     |████████████████████████████████| 957 kB 47.0 MB/s 
Collecting pyclipper
  Downloading pyclipper-1.3.0.post3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (604 kB)
[K     |████████████████████████████████| 604 kB 73.5 M

#Google drive and imports

In [None]:
try:
  from google.colab import drive
  import os
  drive.mount('/content/drive')
  data_dir = 'drive/My Drive/colab/keras-ocr'
  os.makedirs(data_dir, exist_ok=True)
except ImportError:
  data_dir = '.'

Mounted at /content/drive


In [None]:
import keras_ocr

import numpy as np

import cv2

from base64 import b64decode, b64encode
from google.colab.output import eval_js
from IPython.display import display, Javascript, Image
import PIL
import io
import timeit

#Loading weights

In [None]:
ukr_letters = 'бвгґдєжзиїйклмнптуфцчшщьюяБГҐДЄЖЗИЇЙЛПУФЦЧШЩЬЮЯ'
en_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
special_symbols = '-'
alphabet = string.digits + ukr_letters + en_letters + ' ' + special_symbols

In [None]:
detector = keras_ocr.detection.Detector(weights='clovaai_general')

Looking for /root/.keras-ocr/craft_mlt_25k.h5


In [None]:
recognizer = keras_ocr.recognition.Recognizer(
   alphabet=alphabet
)
recognizer.model.load_weights(data_dir + '/32BatchSizeBlur300EpochsBest.h5')

Provided alphabet does not match pretrained alphabet. Using backbone weights only.
Looking for /root/.keras-ocr/crnn_kurapan_notop.h5


In [None]:
pipeline = keras_ocr.pipeline.Pipeline(detector=detector, recognizer=recognizer)

#Recognition on web cam

## Helper Functions

In [None]:
# function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

## Video stream JS

In [None]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;
    
    var pendingResolve = null;
    var shutdown = false;
    
    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }
    
    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 640);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }
    
    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);
      
      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);
           
      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);
      
      const instruction = document.createElement('div');
      instruction.innerHTML = 
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };
      
      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 640; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);
      
      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();
      
      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }
            
      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }
      
      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;
      
      return {'create': preShow - preCreate, 
              'show': preCapture - preShow, 
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)
  
def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

##Web cam

In [None]:
fontpath = 'drive/MyDrive/colab/keras-ocr/fonts/firasanscondensed/FiraSansCondensed-Regular.ttf'

In [None]:
# maybe could optimize and compute fontsize straightaway, but for me it didn't work. Fortunately, it works quite fast
def get_font(fontpath, box, text):
  font_size = 100
  box_width = box[1][0] - box[0][0]
  desired_text_width = box_width * 0.8
  width = None
  while (width is None or width > box_width) and font_size > 0:
    font = PIL.ImageFont.truetype(fontpath, size = font_size)
    #text_width = font.getlength(text)
    #text_width = font.getbbox(text)
    width, height = font.getsize(text)
    font_size -= 1
  return PIL.ImageFont.truetype(fontpath, size = font_size)

In [None]:
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count = 0
#latency = np.empty(50)
#for i in range(50):
while True:
    start_time = timeit.default_timer()

    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    frame = js_to_image(js_reply["img"])

    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # create transparent overlay for bounding box
    bbox_array = np.zeros([640,640,4], dtype=np.uint8)
    prediction = pipeline.recognize([frame_rgb])[0]

    for text, box in prediction:
      bbox_array = cv2.polylines(
                img=bbox_array,
                pts=box[np.newaxis].astype("int32"),
                color=(255, 0, 0, 255),
                thickness=2,
                isClosed=True,
            )
      bbox_array_pil = PIL.Image.fromarray(bbox_array)
      draw = PIL.ImageDraw.Draw(bbox_array_pil)
      draw.text((box[:, 0].min().astype(int), box[:, 1].min().astype(int)), text, font = get_font(fontpath, box, text), fill = (0, 0, 0, 255))
      bbox_array = np.array(bbox_array_pil)
    
    # convert overlay of bbox into bytes
    bbox_bytes = bbox_to_bytes(bbox_array)
    # update bbox so next frame gets new overlay
    bbox = bbox_bytes

    #latency[i] = timeit.default_timer() - start_time
    #print(timeit.default_timer() - start_time)
#print(np.mean(latency[1:]))

<IPython.core.display.Javascript object>

KeyboardInterrupt: ignored