https://github.com/kushalk7/Gesture-recognition-using-CNNLSTM/blob/master/webCam-CNNLSTM.py

https://github.com/ChunML/DeepLearning/blob/master/camera_test.py

In [1]:
import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import cv2
import time
from keras.models import load_model
import random
from collections import deque
import threading
# import argparse

# ap = argparse.ArgumentParser()
# ap.add_argument("-n", "--num-frames", type=int, default=100,help="# of frames to loop over for FPS test")

model_path = "model\\3DCNN+3LSTM_128_4_aug_v4.h5"

IMAGE_WIDTH = 640
IMAGE_HEIGHT = 480
classes = ['Jap','Hook','Uppercut','None']
frames = deque(maxlen=24)
status = 0 # 0:start, 1:generate pose, 2:predict
# previous = 0

class OutputFrame:
    def __init__(self):
        self.frame = np.zeros((IMAGE_HEIGHT,IMAGE_WIDTH,3))
        self.label = 'Warming Up...'
        self.frameno = 0
        self.pose = ''
        self.score = 0
        self.message = ''
    
    def randomPose(self):
        global status
        if status == 1:
            index = random.randint(0,2)
            self.pose = classes[index]
            status = 2
    
    def checkPose(self,confidence):
        global status
        if self.pose == self.label:
            if confidence >= 0.75:
                self.message = 'PERFECT!'
                self.score += 100
            else:
                self.message = 'GOOD!'
                self.score += 50
        else:
            print('MISS')
            self.message = 'MISS!'
            status = 1
                
class WebcamThread(threading.Thread):
    def __init__(self, name):
        threading.Thread.__init__(self)
        self.name = name
    def run(self):
        print("Starting " + self.name)
        self.get_frame()
        print("Exiting " + self.name)
    def get_frame(self):
        while not done:
            _, frame = cap.read()
            output_frame.frame = frame

class PredictorThread(threading.Thread):
    def __init__(self, name):
        threading.Thread.__init__(self)
        self.name = name
    def run(self):
        global model_path,status
        print("Starting " + self.name)
        print("[INFO] loading network...")
        self.model = load_model(model_path)
        print("[INFO] model loaded successfully...")
        status = 1
        self.predict()
        print("Exiting " + self.name)
    
    def predict(self):
        global frames, status
        while not done:
            _, image_np = cap.read()
            image_np = cv2.resize(image_np,(128,128),interpolation=cv2.INTER_AREA)
            image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
            frames.append(image_np)
            output_frame.frameno = len(frames)
            if len(frames) < 24:
                continue
            else:
                if status == 2:
                    output_frame.message = ''
                    np_frames = np.array(frames)
                    label, confidence = self.predict_label(np_frames)
                    for i in range(18):
                        frames.popleft()
                    print('pred: '+label)  
                    output_frame.label = label
                    output_frame.checkPose(confidence)
                    status = 1
                
    def predict_label(self, frames):  
        X_train = np.expand_dims(frames, axis=0)
        train_set = X_train.astype('float16')
        train_set -= 111.75
        train_set /= 143.2
        preds = self.model.predict(train_set)
        label = classes[np.argmax(preds,axis=1)[0]]
        confidence = np.max(preds,axis=1)[0]
        return label, confidence

if __name__ == "__main__":
    done = False

    cap = cv2.VideoCapture(0)
    cap.set(3, IMAGE_WIDTH)
    cap.set(4, IMAGE_HEIGHT)
    output_frame = OutputFrame()

    webcam_thread = WebcamThread("Webcam Thread")
    predictor_thread = PredictorThread("Predictor Thread")
    webcam_thread.start()
    predictor_thread.start()

    while True:
        to_show = output_frame.frame
        output_frame.randomPose()
        
        if status!=0:
            cv2.putText(to_show, str(output_frame.pose), (260, 400), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 0), 4)
        cv2.putText(to_show, str(output_frame.message), (250,250), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 4, 8)
        cv2.putText(to_show, "Score: {}".format(output_frame.score), (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
        cv2.putText(to_show, "Label: {}".format(output_frame.label), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
        cv2.putText(to_show, str(output_frame.frameno), (580, 35), cv2.FONT_HERSHEY_SIMPLEX, 1.3, (200, 100, 0), 4)
        cv2.imshow('frame', to_show)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            done = True
            break

    cap.release()
    cv2.destroyAllWindows()

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


Starting Webcam Thread
Starting Predictor Thread
[INFO] loading network...
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
[INFO] model loaded successfully...
pred: Hook
pred: Jap
pred: Uppercut
pred: Uppercut
MISS
pred: Uppercut
pred: Uppercut
pred: Hook
pred: Uppercut
pred: Uppercut
MISS
pred: Uppercut
MISS
pred: Jap
pred: Uppercut
pred: Uppercut
MISS
pred: Jap
pred: Jap
pred: Hook
pred: Uppercut
MISS
pred: Hook
pred: Uppercut
MISS
pred: Jap
pred: Uppercut
pred: Uppercut
pred: Uppercut
MISS
pred: Hook
pred: Uppercut
pred: Uppercut
pred: Uppercut
pred: Uppercut
MISS
pred: Uppercut
pred: Uppercut
MISS
pred: Uppercut
pred: Uppercut
MISS
pred: Uppercut
pred: Uppercut
MISS
pred: None
MISS
pred: Uppercut
pred: Uppercut
pred: Uppercut
MISS
pred: Hook
pred: Uppercut
pred: Uppercut


In [5]:
import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import cv2
import time
from keras.models import load_model
import random
from collections import deque
import threading

font = cv2.FONT_HERSHEY_SIMPLEX
# img_rows,img_cols=128, 128
label = 'Warming Up...'
classes = ['Jap','Hook','Uppercut','None']
frames = deque(maxlen=24)
framecount = 0
# pose = 'Ready'

class PredictorThread(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self)

    def run(self):
        global label,frames
        print("[INFO] loading network...")
        model_path = "model\\3DCNN+3LSTM_128_4_aug_v4.h5"
        self.model = load_model(model_path)
        print("[INFO] model loaded successfully...")

        while (~(frame is None)):
#             rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(frame)
            print(len(frames))
#             cv2.putText(original, str(len(frames)), (500, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
            if len(frames) < 24:
                continue
            else:
                np_frames = np.array(frames)
                label, confidence = self.predict(np_frames)
                for i in range(12):
                    frames.popleft()
                print(label)
            
    def predict(self, frames):
        X_train = np.expand_dims(frames, axis=0)
#         print(X_train.shape)
        train_set = X_train.astype('float16')
        train_set -= 111.75
        train_set /= 143.2
        preds = self.model.predict(train_set)
        print(preds)
        label = classes[np.argmax(preds,axis=1)[0]]
        confidence = np.max(preds,axis=1)[0]
        return label, confidence

cap = cv2.VideoCapture(0)
# cv2.namedWindow('Original', cv2.WINDOW_NORMAL)
ret = cap.set(3,640)
ret = cap.set(4,480)
time.sleep(5)

keras_thread = PredictorThread()
keras_thread.start()

while True:
    ret, original = cap.read()
    original = cv2.flip(original, 3)
    frame = cv2.resize(original,(128,128),interpolation=cv2.INTER_AREA)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    
    framecount = framecount + 1
    print('framecount = '+str(framecount))
    # Display the predictions
#     cv2.putText(original, str(frameno), (500, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    cv2.putText(original, "Label: {}".format(label), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    cv2.imshow("Classification", original)

    if (cv2.waitKey(1) & 0xFF == ord('q')):
        break;

cap.release()
frame = None
cv2.destroyAllWindows()

[INFO] loading network...
framecount = 1
framecount = 2
framecount = 3
framecount = 4
framecount = 5
framecount = 6
framecount = 7
framecount = 8
framecount = 9
framecount = 10
framecount = 11
framecount = 12
framecount = 13
framecount = 14
framecount = 15
framecount = 16
framecount = 17
framecount = 18
framecount = 19
framecount = 20
framecount = 21
framecount = 22
framecount = 23
framecount = 24
framecount = 25
framecount = 26
framecount = 27
framecount = 28
framecount = 29
framecount = 30
framecount = 31
framecount = 32
framecount = 33
framecount = 34
framecount = 35
framecount = 36
framecount = 37
framecount = 38
framecount = 39
framecount = 40
framecount = 41
framecount = 42
framecount = 43
framecount = 44
framecount = 45
framecount = 46
framecount = 47
framecount = 48
framecount = 49
framecount = 50
framecount = 51
framecount = 52
framecount = 53
framecount = 54
framecount = 55
framecount = 56
framecount = 57
framecount = 58
framecount = 59
framecount = 60
framecount = 61
frameco

Exception in thread Thread-14:
TypeError: only size-1 arrays can be converted to Python scalars

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\dpadmin\Anaconda3\envs\pycv\lib\threading.py", line 916, in _bootstrap_inner
    self.run()
  File "<ipython-input-5-ac44aee1557c>", line 40, in run
    label, confidence = self.predict(np_frames)
  File "<ipython-input-5-ac44aee1557c>", line 48, in predict
    train_set = X_train.astype('float16')
ValueError: setting an array element with a sequence.



In [11]:
import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import cv2
import time
from keras.models import load_model
import random

model_path = "model\\3DCNN+3LSTM_128_4_aug_v4.h5"
model = load_model(model_path)

font = cv2.FONT_HERSHEY_SIMPLEX
img_rows,img_cols=128, 128
framecount = 0
fps = ""
start = time.time()
frames = []
confidence =''
pred=''
classes = ['Jap','Hook','Uppercut','None']

previous = 0
score =0
pose = 3
hit = False


cap = cv2.VideoCapture(0)
cv2.namedWindow('Original', cv2.WINDOW_NORMAL)
# set rt size as 640x480
ret = cap.set(3,640)
ret = cap.set(4,480)
time.sleep(2)

while True:
    ret, frame = cap.read()
    time.sleep(0.1)
    frame = cv2.flip(frame, 3)
    
    framecount = framecount + 1
    end  = time.time()
    timediff = (end - start)
    if( timediff >= 1):
        fps = 'FPS:%s' %(framecount)
        start = time.time()
        framecount = 0
    cv2.putText(frame,fps,(10,20), font, 0.7,(0,255,0),2,1)
    
    image=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    frames.append(rgb)
    input=np.array(frames)
    
    if len(input)==10: # initiate instruction
        pose = random.randint(0,2)
        if previous == pose: # ensure different psoe is generated
            pose +=1
        if pose ==3:
            pose =0
        count =0
        hit = False
        
    if len(input)==24: # prediction
        frames = []
        X_train = np.expand_dims(input, axis=0)
        train_set = X_train.astype('float16')
        train_set -= 111.75
        train_set /= 143.2
        result = model.predict(train_set)
        num = np.argmax(result,axis =1)
        max = np.max(result,axis = 1)
        confidence = max[0]
        pred = str(classes[int(num[0])])
        input=[]    
        
        if pose==num:
            hit = True
            if confidence >= 0.8:
                score +=100
            elif confidence >= 0.4 and confidence < 0.8:
                score +=80
            else:
                score +=50
                
        print('pose: '+str(pose))
        print('pred: '+str(num))
        print('confidence: '+str(confidence))
        print(hit)
    
    if len(input) <10:  # show if hit
        if hit:
            if confidence >= 0.8:
                cv2.putText(frame, 'PERFECT!', (280,250), font, 2, (255, 255, 255), 4, 8)
            elif confidence >= 0.4 and confidence < 0.8:
                cv2.putText(frame, 'GOOD!', (280,250), font, 2, (255, 255, 255), 4, 8)
            else:
                cv2.putText(frame, 'HIT!', (280,250), font, 2, (255, 255, 255), 4, 8)
        else:
            cv2.putText(frame, 'MISS!', (280,250), font, 2, (0, 0, 255), 4, 8)
       
    # layout
    cv2.putText(frame, classes[pose] , (300,400), font, 2, (255, 255, 0), 2, 3)
    cv2.putText(frame, 'Score: ' + str(score), (580,20), font, 0.7, (0, 255, 0), 2, 1)
    cv2.putText(frame, 'Confidence: ' + str(confidence), (10, 100), font, 0.7, (0, 255, 0), 2, 1)
    cv2.putText(frame, 'Class: ' + pred, (10, 50), font, 0.7, (0, 255, 0), 2, 1)
    cv2.imshow('Original',frame)
    
    previous = pose
    
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()


pose: 0
pred: [1]
confidence: 0.97550464
False
pose: 2
pred: [2]
confidence: 0.9980812
True
pose: 0
pred: [2]
confidence: 0.8246604
False
pose: 1
pred: [1]
confidence: 0.68752617
True
pose: 2
pred: [2]
confidence: 0.99920076
True
pose: 0
pred: [2]
confidence: 0.9881832
False
pose: 1
pred: [3]
confidence: 0.99408275
False
pose: 2
pred: [2]
confidence: 0.78551674
True
pose: 0
pred: [2]
confidence: 0.9987186
False
pose: 1
pred: [1]
confidence: 0.5910543
True
pose: 2
pred: [2]
confidence: 0.9997181
True
pose: 1
pred: [1]
confidence: 0.5752121
True
pose: 0
pred: [2]
confidence: 0.9996437
False
pose: 1
pred: [2]
confidence: 0.99630046
False
pose: 2
pred: [2]
confidence: 0.6315495
True
pose: 0
pred: [3]
confidence: 0.9093354
False
pose: 1
pred: [0]
confidence: 0.92955494
False
pose: 0
pred: [2]
confidence: 0.99903584
False
pose: 1
pred: [2]
confidence: 0.85418904
False
pose: 2
pred: [2]
confidence: 0.9992848
True
pose: 0
pred: [0]
confidence: 0.5203523
True
pose: 1
pred: [2]
confidence: 0.601