In [326]:
#this script works with OpenCV4
import cv2
from helpers import *
from keras import models
import numpy as np 
from skimage.transform import resize
from sklearn.metrics import confusion_matrix,accuracy_score
from datetime import datetime
from keras.applications.mobilenet_v2 import preprocess_input

## Reading Video, tracking and classification

In [458]:
class_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y','Z', 'del', 'nothing', 'space' ]

In [459]:
## Loading our pretrained model
model = models.load_model('models/model_large_asl_mobilenet.h5')

In [460]:
vote = ['']*30 # Used for voting selection
prediction_per_frame = [] # Used for saving voted decisions

In [461]:
begin_time = datetime.now()

#Read Video
cap = cv2.VideoCapture('demos/Sayed_Demo3.mp4')

#Get Region of Interest
success, img = cap.read()
roi = segment_hand(img,0)

#Define Boundry Box
bbox = (roi[2], roi[0], roi[3]-roi[2], roi[1]-roi[0])

#Initialize Tracker
#tracker = cv2.TrackerCSRT_create()
tracker = cv2.TrackerMOSSE_create()
print('Begginning detection')
success = tracker.init(img, bbox)
print('Start Tracking')
hands= []
count = 0
prediction = '0'
while True:
    #Get current Frame
    success1, img = cap.read()
    
    ## No more frames in the video
    if not success1:
        cap.release()
        cv2.destroyAllWindows()
        break

    
    #count frames 
    count +=1
    n_frame_update = 60 #number of frames after which run the detection and reset the tracker

    #Run detection every n_frame_update
    if count%n_frame_update == 0:

        #Run detection 
        roi = segment_hand(img,0)
        print(roi)
        bbox = (roi[2], roi[0], roi[3]-roi[2], roi[1]-roi[0])
        
        #delete old tracker and create a new one
        del tracker
        tracker = cv2.TrackerMOSSE_create()
        success = tracker.init(img, bbox)

        #print on screen 
        cv2.putText(img, "Detecting", (100,20), cv2.FONT_HERSHEY_SIMPLEX, 0.75,(0,255,0),2)
    
    # Track otherwise
    else:
        #Update tracker
        success, bbox = tracker.update(img)
    
    #if Tracker succeded
    if success:
        #Draw Boundry Box
        start_point = (int(bbox[0]),int(bbox[1]))
        end_point = (int(bbox[0]+bbox[2]),int(bbox[1]+bbox[3]))
        cv2.rectangle(img, start_point, end_point, (255,0,0), 2)
        
        #Prediction
        # Remove exceptions from tracker
        if start_point[1] < 0:
            prediction_per_frame.append(prediction)
            continue
        # Crop the hand according to tracker and detector results    
        hand = img[start_point[1]:end_point[1],start_point[0]:end_point[0]]
        
        #hands.append(hand)
        
        # Resize Images to suit the model
        test = preprocess_input(hand)
        test = resize(test, (224,224,3))
        test = np.array(test , dtype='float32')
        test = test.reshape(1,224,224,3)
        # Predict
        pred_label = model.predict(test)
        pred_label = np.argmax(pred_label,axis=1)
        pred = class_names[int(pred_label[0])]
        
        #Vote
        vote.pop(0)
        voting = sum([pred == vot for vot in vote])
        vote.append(pred)
        # If the current prediction matches at least 3 of the last five update prediction
        if voting >= 7 or count <7:
            prediction = pred
            
            
        # Append in the Decision list anyway    
        prediction_per_frame.append(prediction)
        
        # Print Decision on preview 
        cv2.putText(img, prediction , (300,50), cv2.FONT_HERSHEY_SIMPLEX, 1.5,(0,0,255),3)

    else:
        #If Tracker Failed
        cv2.putText(img, "Tracking failure detected", (100,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75,(0,0,255),2)
    
    #Show image
    cv2.imshow("Tracking", img)
    k = cv2.waitKey(1)
    if k == ord('q') or k == 27:
        cap.release()
        cv2.destroyAllWindows()
        break
        
time_taken = datetime.now() - begin_time

Begginning detection
Start Tracking
[290, 541, 210, 525]
[198, 479, 115, 417]
[274, 615, 216, 555]
[288, 615, 171, 499]
[202, 543, 230, 534]
[144, 403, 184, 457]
[116, 513, 132, 583]
[134, 543, 140, 516]


## Evaluation

### FPS

In [462]:
fps = len(prediction_per_frame)/time_taken.seconds
fps

12.465116279069768

### Confusion Matrix

In [463]:
## Confusion Matrix for Videos
def compute_confusion(video: int, prediction_per_frame):
    labels = []
    fps = len(prediction_per_frame)
    ## Sayed_Demo2
    if video == 1:
        for i in range(fps):
            if i >0.7*fps:
                labels.append('C')
            elif i >=0.23*fps:
                labels.append('A')                              
            else:
                labels.append('O')
    ## Sayed_Demo3
    if video == 2:
        for i in range(fps):
            if i >fps*0.69:
                labels.append('C')
            elif i >=fps*0.46:
                labels.append('O')                              
            else:
                labels.append('A')
    ## Asmar-Demo3
    if video == 3:
        for i in range(fps):
            if i>0.65:
                labels.append('O')
            elif i>=0.43:
                labels.append('C')
            elif i>=0.24:
                labels.append('R')
            else:
                labels.append('A')
    ## Demo_CV1
    if video == 4:
        for i in range(fps):
            if i>0.68:
                labels.append('C')
            elif i>=0.49:
                labels.append('R')
            elif i>=167:
                labels.append('Z')
            else:
                labels.append('A')
    # Salma_Demo1
    if video == 5:
        for i in range(fps):
            if i >fps*0.88:
                labels.append('A')
            elif i >=fps*0.77:
                labels.append('V')
            elif i >=fps*0.65:
                labels.append('U') 
            elif i >=fps*0.4:
                labels.append('C')  
            elif i >=fps*0.33:
                labels.append('B')
            else:
                labels.append('A')
    # Asmaa_Demo
    if video == 6:
        for i in range(fps):
            if i>0.84*fps:
                labels.append('N')
            elif i>=0.6*fps:
                labels.append('I')
            elif i>=0.41*fps:
                labels.append('B')
            elif i>=0.22*fps:
                labels.append('G')
            else:
                labels.append('B')
    # Sayed_Demo4
    if video == 7:
        for i in range(fps):
            if i>0.68*fps:
                labels.append('I')
            elif i>=0.53*fps:
                labels.append('V')
            elif i>=0.2167*fps:
                labels.append('N')
            else:
                labels.append('W')
    return labels, confusion_matrix(labels, prediction_per_frame)

In [471]:
labels, conf = compute_confusion(2, prediction_per_frame)
conf

array([[155,   1,   0,   1,  36,   0,   2,   1,  51],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0, 111,   0,  19,  36,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0],
       [ 24,   0,   0,   0,  85,  14,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0]])

In [472]:
accuracy_score(labels, prediction_per_frame)*100

65.48507462686567

In [477]:
truth_list = [labels[i] == prediction_per_frame[i] for i in range(len(labels))]
c = 0
correct = [0]
for i in range(len(labels)):
    if truth_list[i]:
        c +=1
    else:
        correct.append(c)
        c=0
        
max_correct_frames = max(correct)
max_correct_frames

63