In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import cv2
import numpy as np
import PIL
import io
import html
import time

In [3]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;

    var pendingResolve = null;
    var shutdown = false;

    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }

    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }

    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);

      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);

      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);

      const instruction = document.createElement('div');
      instruction.innerHTML =
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };

      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }
      async function stream_frame(label, imgData) {
        if (shutdown) {
          removeDom();
          shutdown = false;
          return '';
        }

        var preCreate = Date.now();
        stream = await createDom();

        var preShow = Date.now();
        if (label != "") {
          labelElement.innerHTML = label;
        }

        if (imgData != "") {
          var videoRect = video.getClientRects()[0];
          imgElement.style.top = videoRect.top + "px";
          imgElement.style.left = videoRect.left + "px";
          imgElement.style.width = videoRect.width + "px";
          imgElement.style.height = videoRect.height + "px";
          imgElement.src = imgData;

          // Add a label below the bounding box
          if (!document.getElementById('bboxLabel')) {
            const bboxLabel = document.createElement('div');
            bboxLabel.id = 'bboxLabel';
            bboxLabel.style.position = 'absolute';
            bboxLabel.style.zIndex = 2;
            bboxLabel.style.backgroundColor = 'rgba(0, 0, 0, 0.5)';
            bboxLabel.style.color = 'white';
            bboxLabel.style.padding = '2px 5px';
            bboxLabel.style.borderRadius = '3px';
            document.body.appendChild(bboxLabel);
          }
          const bboxLabel = document.getElementById('bboxLabel');
          bboxLabel.innerText = label;
          bboxLabel.style.top = (videoRect.bottom - 100) + "px"; // Position below the box
          bboxLabel.style.left = (videoRect.left + 100) + "px";
        }

        var preCapture = Date.now();
        var result = await new Promise(function(resolve, reject) {
          pendingResolve = resolve;
        });
        shutdown = false;

        return {'create': preShow - preCreate,
                'show': preCapture - preShow,
                'capture': Date.now() - preCapture,
                'img': result};
      }
          ''')

  display(js)

def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

In [4]:
# function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

In [8]:
import cv2
import numpy as np
from tensorflow.keras.models import load_model

# Start streaming video from webcam
video_stream()

# Initialize variables
label_html = 'Capturing...'
bbox = ''
count = 0

# Load the Haar Cascade XML file
cascade_path = '/content/drive/MyDrive/WEBCAM FACE DETECTION/haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(cascade_path)

# Load the pre-trained CNN model
model_path = '/content/drive/MyDrive/WEBCAM FACE DETECTION/Final_Model.h5'
cnn_model = load_model(model_path)


class_names = ['Rafsan', 'Munni', 'Akid', 'Shoily']

# Confidence threshold to filter predictions
CONFIDENCE_THRESHOLD = 0.7

while True:
    # Get frame from JavaScript interface
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # Convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])

    # Create transparent overlay for bounding boxes
    bbox_array = np.zeros([480, 640, 4], dtype=np.uint8)

    # Grayscale image for face detection
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # Detect faces in the frame
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=10, minSize=(30, 30))

    # Reset label_html for each frame
    label_html = ''

    # Process each detected face
    for (x, y, w, h) in faces:
        # Draw a rectangle around the face
        bbox_array = cv2.rectangle(bbox_array, (x, y), (x + w, y + h), (255, 0, 0), 2)

        # Extract the face region for prediction
        face_roi = img[y:y + h, x:x + w]  # Extract the face region (RGB format)

        # Convert face_roi to grayscale for compatibility with the model
        face_roi_gray = cv2.cvtColor(face_roi, cv2.COLOR_RGB2GRAY)

        # Resize to match the CNN input size
        face_resized = cv2.resize(face_roi_gray, (222, 222))

        # Normalize pixel values to [0, 1]
        face_normalized = face_resized / 255.0

        # Reshape to add batch and channel dimensions
        face_reshaped = np.expand_dims(face_normalized, axis=(0, -1))

        # Predict the class of the face
        prediction = cnn_model.predict(face_reshaped, verbose=0)
        predicted_class = class_names[np.argmax(prediction)]
        confidence = np.max(prediction)

        # Only display predictions above the confidence threshold
        if confidence > CONFIDENCE_THRESHOLD:
            # Update label_html for JavaScript
            label_html += f"{predicted_class} ({confidence:.2f})"

            # Draw the label and confidence under the bounding box
            cv2.putText(img, f"Predicted: {predicted_class}", (x, y + h + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
            cv2.putText(img, f"Confidence: {confidence:.2f}", (x, y + h + 40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
            # print(f"Predicted Class: {predicted_class}, Confidence: {confidence:.2f}")

    # Update the overlay with bounding boxes
    bbox_array[:, :, 3] = (bbox_array.max(axis=2) > 0).astype(int) * 255

    # Convert overlay of bbox into bytes
    bbox_bytes = bbox_to_bytes(bbox_array)

    # Update bbox so the next frame gets the new overlay
    bbox = bbox_bytes


<IPython.core.display.Javascript object>

