<a href="https://colab.research.google.com/github/lee-euijin/AppleFarm/blob/main/Web/webcam_realtime_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
from tensorflow.keras import models
from tensorflow import keras

In [2]:
# opencv-tool 설치
! pip install mediapipe opencv-python

Collecting mediapipe
[?25l  Downloading https://files.pythonhosted.org/packages/5a/c6/cb43b4d35257270a428a7c8e8c10565bb9719eaa4a3a5f34442d77e02678/mediapipe-0.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.7MB)
[K     |████████████████████████████████| 37.7MB 106kB/s 
Installing collected packages: mediapipe
Successfully installed mediapipe-0.8.5


In [3]:
from IPython.display import display, Javascript, Image
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import cv2
import PIL
import io
import html
import time
import mediapipe as mp # Face, Body, Hand Pose Detection 라이브러리

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

In [7]:
# 저장한 모델 불러오기
model = tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/model_weight/model_made_augmentation.h5')

In [8]:
# js image 를  opencv 이미지 바꿔주기
def js_to_image(js_reply):
  """
  asassasasasa
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)  # 바이너리 파일 읽어옴
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# OpenCV Rectangle bounding box image 를 base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

In [9]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;
    
    var pendingResolve = null;
    var shutdown = false;
    
    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }
    
    function onAnimationFrame() {
      if (!shutdown) { // 꺼진게 아니면 
        window.requestAnimationFrame(onAnimationFrame);  //  함수는 기본적으로는 1초에 60번, 보통은 모니터에 주사율에 맞추어 함수를 실행함.
      }
      if (pendingResolve) { // 보류중
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480); // 이미지를 그려
          result = captureCanvas.toDataURL('image/jpeg', 0.8)  // 캔버스에 그린 그림을 문자열 형태로.  0.8 = encoderOptions

        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }
    
    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);
      
      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);
           
      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});  //모바일 장치의 후면 카메라를 요청하기 위한 코드:
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);
      
      const instruction = document.createElement('div');
      instruction.innerHTML = 
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop this demo</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };
      
      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);

      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();
      
      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }
            
      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }
      
      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;
      
      return {'create': preShow - preCreate, 
              'show': preCapture - preShow, 
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)
  
def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox)) # 자바스크립트 함수 사용하기 
  
  return data

## 데이터셋 저장

In [10]:
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count =1
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])    
    
    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

      # Recolor Feed
      image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
      # Make Detections
      results = holistic.process(image)
     
      # Recolor image back to BGR for rendering
      image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
      
      # landmark만 그릴 image
      landmark_img = np.zeros(image.shape, np.uint8)
      
      # Right hand
      mp_drawing.draw_landmarks(landmark_img, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

      #웹캠에서 얻은 data를 train data로 추가 저장
      save_path = '/content/drive/My Drive/13/01_palm/'

      if np.max(landmark_img) !=0:
       cv2.imwrite(save_path + 'frame_13_10_{:04d}.png'.format(count), landmark_img) # 저장할 데이터 form
       count += 1

<IPython.core.display.Javascript object>

## Realtime model 실시간 코드

In [15]:
# 저장한 모델 불러오기
model = tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/model_weight/model_made_augmentation.h5')


labels = ['palm', 'l', 'fist', 'fist_moved', 'thumb', 'index', 'ok', 'palm_moved', 'c', 'down']
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count =1
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])    


    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

      # Recolor Feed
      image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
      results = holistic.process(image)       # Make Detections

      # Recolor image back to BGR for rendering
      image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
      landmark_img = np.zeros(image.shape, np.uint8) # landmark만 그릴 image
      mp_drawing.draw_landmarks(landmark_img, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

      # test data
      landmark_img = cv2.cvtColor(landmark_img, cv2.COLOR_BGR2GRAY)
      landmark_img = cv2.resize(landmark_img, (128,128))

      # Making prediction on test data
      X_test = landmark_img.reshape(1, 128, 128, 1)
      pred = model.predict(X_test)

      if np.max(pred) < 0.2:
        label = 'none'
      else:
        label = labels[np.argmax(pred)] # prediction 결과

      h, w, _ = image.shape # 480,640
      bbox_array = np.zeros([480,640,4], dtype=np.uint8)

      hand_landmarks = results.right_hand_landmarks
      x_point = []  # hand point 저장
      y_point = []  # hand point 저장
      
      if hand_landmarks:
        # for handLMs in hand_landmarks:
        for lm in hand_landmarks.landmark:
          x_point.append(lm.x*640)
          y_point.append(lm.y*480)

        min_x = int(min(x_point))
        max_x = int(max(x_point))
        min_y = int(min(y_point))
        max_y = int(max(y_point))

        bbox_array = cv2.rectangle(bbox_array,(min_x,min_y),(max_x,max_y),(255,0,0),2) # bounding box 그려주기
        bbox_array = cv2.putText(bbox_array,label, (min_x,min_y-10) ,cv2.FONT_ITALIC, 0.5, 255,2) # label 달아주기
        bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
        
    # convert overlay of bbox into bytes
    bbox_bytes = bbox_to_bytes(bbox_array)
    # update bbox so next frame gets new overlay
    bbox = bbox_bytes


<IPython.core.display.Javascript object>

## class 별 확률값 표시 
- augmentation 없는 데이터셋 이용

In [19]:
# 저장한 모델 불러오기
model = tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/model_weight/model_made_121314.h5')

labels = ['palm', 'l', 'fist', 'fist_moved', 'thumb', 'index', 'ok', 'palm_moved', 'c', 'down']
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count =1
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])    


    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

      # Recolor Feed
      image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
      results = holistic.process(image)       # Make Detections

      # Recolor image back to BGR for rendering
      image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
      landmark_img = np.zeros(image.shape, np.uint8) # landmark만 그릴 image
      mp_drawing.draw_landmarks(landmark_img, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

      # test data
      landmark_img = cv2.cvtColor(landmark_img, cv2.COLOR_BGR2GRAY)
      landmark_img = cv2.resize(landmark_img, (128,128))

      # Making prediction on test data
      X_test = landmark_img.reshape(1, 128, 128, 1)
      pred = model.predict_proba(X_test)

      if np.max(pred) < 0.4:
        label = 'none'
        print(pred)
        
      else:
        label = labels[np.argmax(pred)-1] # prediction 결과
        print(pred)
        print(label)
  

<IPython.core.display.Javascript object>



[[0.10586174 0.10789999 0.1018554  0.09523472 0.0966622  0.08107451
  0.10035154 0.10360441 0.11996584 0.08748963]]
[[4.41469951e-03 8.18779971e-03 4.83315853e-05 9.86802518e-01
  6.09900781e-07 5.39151835e-04 1.38509995e-08 4.33009563e-06
  1.45990207e-08 2.40023928e-06]]
fist
[[2.36506807e-07 9.93219674e-01 6.75855437e-03 2.02557914e-07
  8.25872411e-13 1.35019870e-10 1.82462923e-09 2.09217942e-05
  4.52776106e-07 1.19970325e-14]]
palm
[[8.4122058e-07 9.9970156e-01 2.8520511e-04 6.4485434e-08 6.0877486e-13
  5.5976131e-12 1.4835049e-09 9.7455004e-06 2.6135674e-06 8.8560290e-14]]
palm
[[1.0778079e-07 9.9946874e-01 4.6891868e-04 1.3268337e-08 6.6930868e-13
  2.9781017e-12 1.5613532e-09 6.1455212e-05 7.4598682e-07 2.6116636e-13]]
palm
[[5.0702941e-04 8.9627934e-01 1.1404571e-03 2.1811202e-03 2.9988885e-07
  9.7460251e-10 2.0493606e-04 9.2947287e-03 9.0392075e-02 2.1097550e-08]]
palm
[[2.6087842e-13 3.7316129e-16 2.6085830e-11 2.6345756e-10 5.2612703e-10
  3.7417081e-22 5.0483129e-09 3.0

## class 별 확률값 표시
- augmentaion dataset 이용

In [17]:
# 저장한 모델 불러오기
model = tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/model_weight/model_made_augmentation.h5')


labels = ['palm', 'l', 'fist', 'fist_moved', 'thumb', 'index', 'ok', 'palm_moved', 'c', 'down']
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count =1
while True:
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break

    # convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])    


    with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:

      # Recolor Feed
      image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
      results = holistic.process(image)       # Make Detections

      # Recolor image back to BGR for rendering
      image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
      landmark_img = np.zeros(image.shape, np.uint8) # landmark만 그릴 image
      mp_drawing.draw_landmarks(landmark_img, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

      # test data
      landmark_img = cv2.cvtColor(landmark_img, cv2.COLOR_BGR2GRAY)
      landmark_img = cv2.resize(landmark_img, (128,128))

      # Making prediction on test data
      X_test = landmark_img.reshape(1, 128, 128, 1)
      pred = model.predict_proba(X_test)

      if np.max(pred) < 0.4:
        label = 'none'
        print(pred)
        
      else:
        label = labels[np.argmax(pred)] # prediction 결과
        print(pred)
        print(label)
  

<IPython.core.display.Javascript object>



[[0.02877679 0.05945883 0.41088748 0.18281624 0.18487701 0.005234
  0.02663992 0.02947755 0.04866821 0.02316405]]
fist
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]]
fist
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]]
l
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
palm
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
palm
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
palm
[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]]
l
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]]
index
[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]]
l
[[0. 0. 0. 0. 0. 