In [1]:
from ultralytics import YOLO
import cv2
import torch
import os
import numpy as np
import math
from collections import deque


Importing dependecies and paths

In [2]:
DATA_path = r'D:\Datasets\fight'
actions = ['fighting','not_fighting']
fighting_path = r'D:\Datasets\fight\fighting'
not_fighting_path = r'D:\Datasets\fight\not_fighting'

Main functions to process and extract keypoints

In [21]:

def calc_distances(hands_dict,body_kp,head_kp):

    # creating a dictionary of distances between each keypoint (except of the same object) in the keypoint_dict
    dist_dict = {}
    keyhead = head_kp.keys()
    keysh = hands_dict.keys()
    keysb = body_kp.keys()

    # calculating distances between keypoints on each hand: left to left, right to left, left to right and right to right
    for i,keyi in enumerate(keysh,start =1):
        for j,keyj in enumerate(keysh,start =1):
            if j>=i:
                break
            distll = calc_euclid_dist(hands_dict[keyi][0],hands_dict[keyj][0])
            distlr = calc_euclid_dist(hands_dict[keyi][0],hands_dict[keyj][1])
            distrl = calc_euclid_dist(hands_dict[keyi][1],hands_dict[keyj][0])
            distrr = calc_euclid_dist(hands_dict[keyi][1],hands_dict[keyj][1])
            dist_dict[f'{keyi}'+f'{keyj}'] = list([distll,distlr,distrl,distrr])
 
    # calculating distances between hands and bodies
    for i,keyi in enumerate(keysh,start =1):
        for j,keyj in enumerate(keysb,start =1):
            if j>=i:
                break

            distlb = calc_euclid_dist(hands_dict[keyi][0],body_kp[keyj])
            
            distrb = calc_euclid_dist(body_kp[keyj],hands_dict[keyi][1])

            dist_dict[f'{keyi}'+f'{keyj}'].append(distlb)
            dist_dict[f'{keyi}'+f'{keyj}'].append(distrb)
                
    for i,keyi in enumerate(keysb,start =1):
        for j,keyj in enumerate(keysh,start =1):
            if j>=i:
                break

            distlb = calc_euclid_dist(hands_dict[keyj][0],body_kp[keyi])
            
            distrb = calc_euclid_dist(body_kp[keyi],hands_dict[keyj][1])
            

            dist_dict[f'{keyi}'+f'{keyj}'].append(distlb)
            dist_dict[f'{keyi}'+f'{keyj}'].append(distrb)


    # calculating distances between hands and heads
    for i,keyi in enumerate(keysh,start =1):
        for j,keyj in enumerate(keyhead,start =1):
            if j>=i:
                break

            distlh = calc_euclid_dist(hands_dict[keyi][0],head_kp[keyj])
            
            distrh = calc_euclid_dist(head_kp[keyj],hands_dict[keyi][1])
            
            dist_dict[f'{keyi}'+f'{keyj}'].append(distlh)
            dist_dict[f'{keyi}'+f'{keyj}'].append(distrh)



    for i,keyi in enumerate(keyhead,start =1):
        for j,keyj in enumerate(keysh,start =1):
            if j>=i:
                break

            distlh = calc_euclid_dist(hands_dict[keyj][0],head_kp[keyi])
            
            distrh = calc_euclid_dist(head_kp[keyi],hands_dict[keyj][1])
            
 
            dist_dict[f'{keyi}'+f'{keyj}'].append(distlh)
            dist_dict[f'{keyi}'+f'{keyj}'].append(distrh)



    # calculating distances between bodies
    for i,keyi in enumerate(keysb,start =1):
        for j,keyj in enumerate(keysb,start =1):
            if j>=i:
                break

            distbb = calc_euclid_dist(body_kp[keyi],body_kp[keyj])

            dist_dict[f'{keyi}'+f'{keyj}'].append(distbb)


    # calculating distances between heads
    for i,keyi in enumerate(keyhead,start =1):
        for j,keyj in enumerate(keyhead,start =1):
            if j>=i:
                break

            disthh = calc_euclid_dist(head_kp[keyi],head_kp[keyj])

            dist_dict[f'{keyi}'+f'{keyj}'].append(disthh)
    
    return dist_dict


def extract_hands_keypoints(results, threshold_class, threshold_keypoint):
    # creating a dictionary to collect keypoints to each object id as dictionary key
    existing_kp = {}
    for result,i_d in zip(results[0],results[0].boxes.id):
        # There results for bounding boxes, and confidence scores for general detect
        x1, y1, x2, y2,_, conf_for_detect, class_id_detected = (result.boxes.data.tolist())[0]
        # If the confidence score for general detect is lower than threshold, skip
        if conf_for_detect < threshold_class:
            continue
        # keypoints
        keys = (result.keypoints.data.tolist())[0]
        xl_key, yl_key, confl = keys[9]
        if confl > threshold_keypoint:
           l = [int(xl_key),int(yl_key)]
        else:
            l = []
        xr_key, yr_key, confr = keys[10]
        if confr > threshold_keypoint:
           r = [int(xr_key),int(yr_key)]
        else:
            r = []
        hands_coords = list([l,r])
        # Adding existing hand keypoints of an object in a frame to the dictionary   
        existing_kp[int(i_d)] = hands_coords
    return existing_kp

def extract_body_keypoints(results,threshold_class, threshold_keypoint):
    # creating a dictionary to collect keypoints to each object id as dictionary key
    existing_kp = {}
    for result,i_d in zip(results[0],results[0].boxes.id):
        # There results for bounding boxes, and confidence scores for general detect
        x1, y1, x2, y2,_, conf_for_detect, class_id_detected = (result.boxes.data.tolist())[0]
        # If the confidence score for general detect is lower than threshold, skip
        if conf_for_detect < threshold_class:
            continue
        # keypoints
        keys = (result.keypoints.data.tolist())[0]
        xl_key, yl_key, confl = keys[5]
        xr_key, yr_key, confr = keys[6]
        if (confl>threshold_keypoint) and (confr>threshold_keypoint):
            # Adding existing hand keypoints of an object in a frame to the dictionary   
            mid_point  = list([int((xr_key+xl_key)/2),int((yl_key+yr_key)/2)])
            
        else:
            mid_point = []

        existing_kp[int(i_d)] = mid_point

    return existing_kp

def extract_head_keypoints(results,threshold_class, threshold_keypoint):
    # creating a dictionary to collect keypoints to each object id as dictionary key
    existing_kp = {}
    for result,i_d in zip(results[0],results[0].boxes.id):
        # There results for bounding boxes, and confidence scores for general detect
        x1, y1, x2, y2,_, conf_for_detect, class_id_detected = (result.boxes.data.tolist())[0]
        # If the confidence score for general detect is lower than threshold, skip
        if conf_for_detect < threshold_class:
            continue
        # keypoints
        keys = (result.keypoints.data.tolist())[0]
        xh_key, yh_key, confh = keys[0]
        if confh>threshold_keypoint:
            # Adding existing hand keypoints of an object in a frame to the dictionary   
            mid_point  = list([int(xh_key),int(yh_key)])
        else:
            mid_point = []
        existing_kp[int(i_d)] = mid_point
    return existing_kp


def extract_keypoints(results, threshold_class):
    existing_kp = {}
    for result,i_d in zip(results[0],results[0].boxes.id):
        # There results for bounding boxes, and confidence scores for general detect
        x1, y1, x2, y2,_, conf_for_detect, class_id_detected = (result.boxes.data.tolist())[0]
        # If the confidence score for general detect is lower than threshold, skip
        if conf_for_detect < threshold_class:
            continue
        # keypoints
        keys = (result.keypoints.data.tolist())[0]
        keyp_arr = list()
        for key in keys:
            keyp_arr.append(key)
        # Adding existing hand keypoints of an object in a frame to the dictionary   
        existing_kp[int(i_d)] = keyp_arr
    return existing_kp

def calc_kp_to_kp_dist(keypoints_dict):
    # creating a dictionary of distances between each keypoint (except of the same object) in the keypoint_dict
    dist_dict = {}
    keys = keypoints_dict.keys()
    # calculating distances between keypoints 
    for l,keyi in enumerate(keys,start =1):
        for m,keyj in enumerate(keys,start =1):
            if m>=l:
                break  
            for i,p1 in enumerate(keypoints_dict[keyi]):
                for j,p2 in enumerate(keypoints_dict[keyj]):
                    dist = calc_euclid_dist(p1,p2)
                    dist_dict[f'{keyi}'+f'{keyj}'+f'{i}'+f'{j}'] = dist
    return dist_dict

def calc_euclid_dist(p1,p2):
    if (len(p1)>0) and (len(p2)>0):
        dist = int(math.sqrt((p1[0]-p2[0])*(p1[0]-p2[0]) + (p1[1]-p2[1])*(p1[1]-p2[1])))
        return dist
    else: 
        return np.nan
    
def calc_grad(dist_dict):
    return

Initializing dictionaries and parametres

In [4]:
text2 = "No suspicious activity"
text1 = "Suspicious activity"
text3 = "No people in sight"
color2 = (100, 200, 0)
color1 = (100, 0, 200)
color3 = (100, 100, 100)
font_scale = 1.6
thickness = 2

winsize = 40
all_keypoints = {}
distance_dict = {}
average_dist = {}
grad_dict = {}
outputs = [0,1]
nums_sequences = 0

Initializing YOLOv8 pose model and caption from file

In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
modely = YOLO('yolov8l-pose.pt')  # load a pretrained YOLOv8n classification model
modely.to(device)
video_path = r"D:\videos\fight4.mp4"
vid_name = 'v4'
cap = cv2.VideoCapture(video_path)
# Get video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fps = cap.get(cv2.CAP_PROP_FPS) # or number
# Create a VideoWriter object to save the output video
output_video_path = r"D:\videos_processed\fight4_processed.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

Data coleccting cycle. Each (winsize) frames you will have to press 'f' key if fight was on those frames and any other key if not

In [6]:
while cap.isOpened():
# Read a frame from the video
    success, frame = cap.read()
    if success:

        results = modely.track(frame, persist=True, retina_masks=True, boxes=True, show_conf=False, line_width=1,  conf=0.8, iou=0.5,  classes=0, show_labels=False, device=device,verbose = False,tracker="bytetrack.yaml")
        if results[0].boxes.id is not None:
            boxes = results[0].boxes.xyxy.cpu().numpy().astype(int)
            ids = results[0].boxes.id.cpu().numpy().astype(int)
            for box, i_d in zip(boxes, ids):
                x1, y1, x2, y2 = box[0], box[1], box[2], box[3]



                # Draw bounding box
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)

                # Draw customer id on the frame above the bounding box
                text = f"{i_d}"
                font = cv2.FONT_HERSHEY_SIMPLEX
                font_scale = 0.6
                font_thickness = 1
                text_size = cv2.getTextSize(text, font, font_scale, font_thickness)[0]

                # Calculate the position to align the label with the top of the bounding box
                text_x = x1 + (x2 - x1 - text_size[0]) // 2
                text_y = y1 - 10  # Adjust this value for the desired vertical offset

                # Make sure the text_y position is within the frame's bounds
                if text_y < 0:
                    text_y = 0

                # Draw the label background rectangle
                cv2.rectangle(frame, (text_x - 5, text_y - text_size[1] - 5), (text_x + text_size[0] + 5, text_y + 5), (0, 0, 0), -1)

                #Draw the customer id text
                cv2.putText(
                    frame,
                    text,
                    (text_x, text_y),
                    font,
                    font_scale,
                    (255, 255, 255),  # White color
                    font_thickness,
                    lineType=cv2.LINE_AA
                )
    
            #extracting keypoints
            body_kp = extract_body_keypoints(results = results,threshold_class=0.4,threshold_keypoint=0.4)
            hands_kp = extract_hands_keypoints(results = results,threshold_class=0.4,threshold_keypoint=0.4)
            head_kp = extract_head_keypoints(results = results,threshold_class=0.4,threshold_keypoint=0.4)
            #calculating distances between keypoints

            dd = calc_distances(hands_kp,body_kp,head_kp)
            #appending distances dictionary and evaluating average distance and classification based on it
            for key in dd.keys():

                if key not in distance_dict.keys():
                    distance_dict[key] = deque(maxlen=40)

                distance_dict[key].append(dd[key])
                
                if len(distance_dict[key]) == winsize:
                    nums_sequences = nums_sequences + 1
                    print(f'Processing pair {key}.')
                    keypoints = np.array(distance_dict[key])
                    if cv2.waitKey(-1) & 0xFF == ord('f'):
                        if cv2.waitKey(-1) & 0xFF == ord('f'):
                            save_path = fighting_path   + f'\{vid_name}' +  f'{nums_sequences}'
                        else:
                            save_path = not_fighting_path  + f'\{vid_name}' +  f'{nums_sequences}'
                    else:
                        distance_dict[key].clear()
                        continue
                    np.save(save_path,keypoints)
                    distance_dict[key].clear()
            

        annotated_frame_show = cv2.resize(frame, (1080, 720))
        cv2.imshow("YOLOv8 Inference", annotated_frame_show)
        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
            
        
    else:
        # Break the loop if the end of the video is reached
        break

cap.release()
cv2.destroyAllWindows()



Processing pair 42.
Processing pair 41.
Processing pair 12.
Processing pair 92.
Processing pair 21.
Processing pair 12.
Processing pair 12.
Processing pair 228.
Processing pair 3528.
Processing pair 3628.
Processing pair 3928.
Processing pair 3936.
Processing pair 3628.
Processing pair 3628.
Processing pair 3628.
Processing pair 4436.
Processing pair 4536.
Processing pair 2836.
Processing pair 4636.
Processing pair 4836.
Processing pair 4936.
Processing pair 4948.
Processing pair 4836.
Processing pair 4936.
Processing pair 4948.
Processing pair 5036.
Processing pair 5048.
Processing pair 5049.
Processing pair 4836.
Processing pair 4936.
Processing pair 4948.
Processing pair 5036.
Processing pair 5048.
Processing pair 5049.
Processing pair 4836.
Processing pair 4936.
Processing pair 4948.
Processing pair 4836.
Processing pair 5036.
Processing pair 5048.
Processing pair 4836.
Processing pair 4836.
Processing pair 5848.
Processing pair 5948.
Processing pair 5958.
Processing pair 5948.
Pro

Preprocessing data

In [8]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [10]:
from math import sqrt

In [11]:
label_map = {label: num for num, label in enumerate(actions)}
sequences,labels = [],[]
for action in actions:
    for file in os.listdir(os.path.join(DATA_path,action)):
        sequences.append(np.load(os.path.join(DATA_path,action,file),allow_pickle=True))
        labels.append(label_map[action])


seq_data = np.array(sequences)
for i,row in enumerate(seq_data):
    for j,line in enumerate(row):
        nums = 0
        num_num = 0
        deviation = 0
        for num in line:
            if np.isnan(num) == False:        
                nums = nums + num
                num_num = num_num + 1

        if num_num == 0:
            line = np.nan_to_num(x = line,copy= False,nan = 0)
            continue

        mean = nums/num_num

        for num in line:
            if np.isnan(num) == False:        
                deviation = deviation + (num - mean)*(num-mean)

        std_dev = sqrt(deviation/num_num)

        for k,num in enumerate(line):
            if np.isnan(num) == False:  
                seq_data[i][j][k]= (num - mean)/std_dev

        line = np.nan_to_num(x = line,copy= False,nan = 0)

seq_labels = to_categorical(labels).astype(int)

X_train, X_test, y_train, y_test = train_test_split(seq_data,seq_labels,test_size=0.1) 

  seq_data[i][j][k]= (num - mean)/std_dev


In [12]:
X_test.shape
y_test.shape

(55, 2)

Building neural network and train

In [44]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Conv1D,BatchNormalization,Conv2D,Flatten, Dropout
from tensorflow.keras.callbacks import TensorBoard

log_dir = r"D:\Models\fight_rec\Logs"
tb_callback = TensorBoard(log_dir=log_dir)

In [45]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[]

In [54]:
model_our  = Sequential()
model_our.add(Conv1D(32,20,activation = 'gelu',padding = 'same'))
model_our.add(BatchNormalization(axis = -1,input_shape = (winsize,32)))
model_our.add(Conv1D(64,20,activation = 'gelu',padding = 'same'))
model_our.add(BatchNormalization(axis = -1,input_shape = (winsize,64)))
model_our.add(Conv1D(128,20,activation = 'gelu',padding = 'same'))
model_our.add(BatchNormalization(axis = -1,input_shape = (winsize,128)))
model_our.add(Flatten())
model_our.add(Dense(128*40,activation = 'gelu'))
model_our.add(BatchNormalization(axis = -1,input_shape = (winsize,64*40)))
model_our.add(Dense(64*40,activation = 'gelu'))
model_our.add(BatchNormalization(axis = -1,input_shape = (winsize,64*40)))
model_our.add(Dense(32*40,activation = 'gelu'))
model_our.add(BatchNormalization(axis = -1,input_shape = (winsize,32*40)))
model_our.add(Dense(16*40,activation = 'gelu'))
model_our.add(BatchNormalization(axis = -1,input_shape = (winsize,32*40)))
model_our.add(Dense(8*40,activation = 'gelu'))
model_our.add(Dense(len(actions),activation = 'softmax'))


model_our.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss = 'categorical_crossentropy',metrics=['categorical_accuracy'])

In [55]:
model_our.fit(X_train,y_train,epochs = 50,callbacks=[tb_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x20e22e5ac90>

In [56]:
def valuate_model(X_test,y_test,model):
    right_ans = 0
    wrong_ans = 0
    results = model.predict(X_test)
    for result,ans in zip(results,y_test):
        if np.argmax(result) == np.argmax(ans):
            right_ans = right_ans + 1
        else:
            wrong_ans = wrong_ans + 1
    accuracy = right_ans/(right_ans+wrong_ans)
    print(right_ans)
    print(wrong_ans)
    return accuracy

print(valuate_model(X_test,y_test,model_our))

39
16
0.7090909090909091


Testing on a real video

In [49]:
def preprocess_keypoints(keypoints):
    kepoint = np.array(keypoints)
    for i,line in enumerate(kepoint):
        nums = 0
        num_num = 0
        deviation = 0
        for num in line:
            if np.isnan(num) == False:        
                nums = nums + num
                num_num = num_num + 1

            if num_num == 0:
                line = np.nan_to_num(x = line,copy= False,nan = 0)
                continue

        mean = nums/num_num

        for num in line:
            if np.isnan(num) == False:        
                deviation = deviation + (num - mean)*(num-mean)

        std_dev = sqrt(deviation/num_num)

        for j,num in enumerate(line):
            if np.isnan(num) == False:  
                kepoint[i][j]= (num - mean)/std_dev

        line = np.nan_to_num(x = line,copy= False,nan = 0)
    return np.array([kepoint])



In [42]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
modely = YOLO('yolov8l-pose.pt')  # load a pretrained YOLOv8n classification model
modely.to(device)
video_path = r"D:\videos\hands3.mp4"
cap = cv2.VideoCapture(video_path)
# Get video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

fps = cap.get(cv2.CAP_PROP_FPS) # or number
# Create a VideoWriter object to save the output video
output_video_path = r"D:\videos_processed\fight1_processed.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))


actions = ['fighting','not_fighting']

text2 = "No suspicious activity"
text1 = "Suspicious activity"
text3 = "No people in sight"
color_map = {'fighting': (200,100,0),'not_fighting': (0,100,200)}
font_scale = 1.6
thickness = 2

winsize = 40

distance_dict = {}


label_map = {num: label for num, label in enumerate(actions)}


In [43]:
ans = 'not_fighting'
while cap.isOpened():
# Read a frame from the video
    success, frame = cap.read()
    if success:

        results = modely.track(frame, persist=True, retina_masks=True, boxes=True, show_conf=False, line_width=1,  conf=0.6, iou=0.5,  classes=0, show_labels=False, device=device,verbose = False,tracker="bytetrack.yaml")




        if results[0].boxes.id is not None:
            
            #extracting keypoints
            body_kp = extract_body_keypoints(results = results,threshold_class=0.4,threshold_keypoint=0.4)
            hands_kp = extract_hands_keypoints(results = results,threshold_class=0.4,threshold_keypoint=0.4)
            head_kp = extract_head_keypoints(results = results,threshold_class=0.4,threshold_keypoint=0.4)
            #calculating distances between keypoints

            dd = calc_distances(hands_kp,body_kp,head_kp)
            #appending distances dictionary and evaluating average distance and classification based on it
            for key in dd.keys():

                if key not in distance_dict.keys():
                    distance_dict[key] = deque(maxlen=40)

                distance_dict[key].append(dd[key])
                
                if len(distance_dict[key]) == winsize:
                    nums_sequences = nums_sequences + 1
                    keypoints = preprocess_keypoints(distance_dict[key])
                    prediction = model_our.predict(keypoints)
                    print(prediction)
                    ans = label_map[np.argmax(prediction)]
                    distance_dict[key].clear()
                    if ans == 'fighting':
                        break
                    

            text_size, _ = cv2.getTextSize(ans, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
            text_position = (frame_width - text_size[0] - 10, text_size[1] + 10)
            cv2.rectangle(frame, (text_position[0] - 5, text_position[1] - text_size[1] - 5),
                                    (text_position[0] + text_size[0] + 5, text_position[1] + 5), color=(0, 0, 0),
                                    thickness=cv2.FILLED)
            cv2.putText(frame, ans, text_position, cv2.FONT_HERSHEY_SIMPLEX, font_scale, color_map[ans], thickness, cv2.LINE_AA)


                    
            

        annotated_frame_show = cv2.resize(frame, (1080, 720))
        out.write(frame)
        cv2.imshow("YOLOv8 Inference", annotated_frame_show)
        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
            
        
    else:
        # Break the loop if the end of the video is reached
        break
out.release()
cap.release()
cv2.destroyAllWindows()



[[ 8.1786e-26           1]]
[[ 7.2953e-06     0.99999]]
[[ 2.5642e-06           1]]
[[ 0.00039812      0.9996]]
[[          0           1]]
[[   0.023944     0.97606]]
[[   0.014966     0.98503]]
[[  0.0013765     0.99862]]
[[ 0.00036417     0.99964]]
[[ 0.00072461     0.99928]]
[[ 4.2748e-05     0.99996]]
[[   0.000117     0.99988]]
[[    0.99478   0.0052165]]
[[    0.99689   0.0031076]]
[[    0.99331   0.0066865]]
[[  0.0061053     0.99389]]
[[    0.25224     0.74776]]
[[ 3.3217e-11           1]]
[[ 5.0484e-09           1]]
[[    0.99992  8.1315e-05]]
