<a href="https://colab.research.google.com/github/Saman-Karim/PSL-translator/blob/main/PSL_Live_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip '/content/drive/MyDrive/PSL_video/train.zip'
!unzip '/content/drive/MyDrive/PSL_video/validation.zip'
!unzip '/content/drive/MyDrive/PSL_video/test.zip'

# PSL Train, Validation and Test dataset is available at https://drive.google.com/drive/folders/1_-HvCXLZXSGESzYy8a7MFFJumPgYDhz5?usp=sharing 
# Developed Xception Model (model.json, model-weights.h5) is available at https://drive.google.com/drive/folders/1A3cVF7I6h0MLk8JVdI_OCHLMyEePYje5?usp=sharing

In [None]:
from PIL import Image
import numpy as np

import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)
import pandas as pd
import json
from skimage import transform
from matplotlib import pyplot as plt

from keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.applications import InceptionV3, Xception, InceptionResNetV2
from tensorflow.keras.applications.mobilenet import MobileNet
from keras.applications.mobilenet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from keras.models import model_from_json
from keras.callbacks import ModelCheckpoint
from keras.preprocessing import image
from tensorflow.keras.utils import plot_model

import cv2
from google.colab.patches import cv2_imshow
import os
import pathlib

In [None]:
TRAINING_DATA_SET_PATH = '/content/train'
VALIDATION_DATA_SET_PATH = '/content/validation'
TEST_DATA_SET_PATH = '/content/test'
EPOCHS = 20
BATCH_SIZE = 32
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
DATASET_CATEGORIES = 27

def loadModelfromJson(modelPath, weightPath):
    json_file = open(modelPath, 'r')
    loaded_model_json = json_file.read()
    json_file.close()

    loaded_model = model_from_json(loaded_model_json)

    loaded_model.load_weights(weightPath)
    print("Model loaded from", modelPath)
    return loaded_model


def loadSingleImage(filename):
    np_image = Image.open(filename)
    np_image = np.array(np_image).astype('float32')
    img = img/255
    np_image = transform.resize(np_image, (IMAGE_HEIGHT, IMAGE_WIDTH, 3))
    np_image = np.expand_dims(np_image, axis=0)
    return np_image

def load_image(img_path, show=False):

    img = image.load_img(img_path, target_size=(IMAGE_HEIGHT, IMAGE_WIDTH))
    img_tensor = image.img_to_array(img)                    # (height, width, channels)
    img_tensor = np.expand_dims(img_tensor, axis=0)         # (1, height, width, channels), add a dimension because the model expects this shape: (batch_size, height, width, channels)
    img_tensor /= 255.                                      # imshow expects values in the range [0, 1]

    if show:
        plt.imshow(img_tensor[0])                           
        plt.axis('off')
        plt.show()

    return img_tensor


def showImageCV(preds, imagePath, letter):
    # find the class label index with the largest corresponding
    # probability
    i = preds.argmax(axis=1)[0]
    # label = lb.classes_[i]
    # draw the class label + probability on the output image
    text = "{}: {:.2f}%".format(letter, preds[0][i] * 100)

    image = cv2.imread(imagePath)
    output = image.copy()
    cv2.putText(output, text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                (0, 0, 255), 2)
    # show the output image
    cv2_imshow(output)
    cv2.waitKey(0)


def getTopPredictions(preds):
    predsDict = {  
       'Aliph': 0, 'Aliph_mad_aa':0, 'Bari_yeh': 0, 'Bay':0, 'Choti_yeh': 0, 'Daal': 0, 'Fay':0, 'Ghain': 0, 'Hey': 0, 
       'Jeem':0, 'Kaaf':0, 'Laam': 0, 'Meem':0, 'Noon':0, 'Pay':0, 'Quaaf': 0, 'Ray':0, 'Say':0,
       'Seen':0, 'Tey':0, 'Vao':0, 'Zaal':0, 'Zay': 0, 'aRay':0, 'del':0, 'nothing':0, 'space': 0,         
    }

    # map preds index with probability to correct letter from dictonary
    for i, key in enumerate(predsDict, start=0):
        predsDict[key] = preds[i]

    # sort by dictonary value and returns as list
    all_preds = sorted(predsDict.items(), reverse=True, key=lambda x: x[1])

    top_preds = [all_preds[0], all_preds[1], all_preds[2]]
    return top_preds, all_preds


In [None]:
def predictSingleImage(filepath, model):
    inputImage = Image.open(filepath)
    inputImage = inputImage.resize((224, 224))

    imageDataArray = image.img_to_array(inputImage)
    imageDataArray = np.expand_dims(imageDataArray, axis=0)
    imageDataArray = preprocess_input(imageDataArray)

    predictions = model.predict(imageDataArray)
    return predictions

In [None]:
finalModel = loadModelfromJson('/content/drive/MyDrive/output/model.json', '/content/drive/MyDrive/output/model-weights.h5')

In [None]:
# import dependencies
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import cv2
import numpy as np
import PIL
import io
import html
import time

In [None]:
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img


In [None]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;
    
    var pendingResolve = null;
    var shutdown = false;
    
    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }
    
    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }
    
    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);
      
      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      labelElement.style.color = 'red';
      labelElement.style.size = '32px';

      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);
           
      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);
      

      const instruction = document.createElement('div');
      instruction.innerHTML = 
          '<span style="color: black; font-weight: bold;">' +
          'Real time PSL Recognition</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };
      
      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);
      
      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();
      
      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }
            
      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }
      
      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;
      
      return {'create': preShow - preCreate, 
              'show': preCapture - preShow, 
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)
  
def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

In [None]:
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count = 0

while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])

    #cv2_imshow(img)

    try:
      #img2 = cv2.flip(img, 1)
      np_image = transform.resize(img, (IMAGE_HEIGHT, IMAGE_WIDTH, 3))
      np_image = np.expand_dims(np_image, axis=0)
      #image1 = load_image(filename)
      predictions = finalModel.predict(np_image)
      top_three_preds, all_preds = getTopPredictions(predictions[0])

      if (top_three_preds[0][1] > 0.7):
        label_html = "{}: {:.2f}%".format(top_three_preds[0][0], top_three_preds[0][1] * 100)
      else:
        label_html = "Capturing..."

    # show the output image
      #cv2_imshow(img2)


    except Exception as err:
      # Errors will be thrown if the user does not have a webcam or if they do not
      # grant the page permission to access it.
      print(str(err))

Live recognition code inspired by: https://www.youtube.com/watch?v=YjWh7QvVH60
