In [1]:
from env import SCIENTIST

In [2]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time

In [3]:
import importlib # for reloading .py files
# WARNING: updates of config may require restart of kernel if reload is unsuccessful

import config
# reload config without restarting the kernel
importlib.reload(config)
from config import actions, no_sequences, sequence_length, number_of_classes, DATA_PATH, WEIGHTS_PATH, facemesh_included, is_martin

import utils
# reload utils without restarting the kernel
importlib.reload(utils)
from utils import mediapipe_detection, extract_keypoints, draw_styled_landmarks, prob_viz


import model
# reload model without restarting the kernel
importlib.reload(model)
from model import create_model, build_which_model

# CHOOSE MODEL / WEIGHTS

In [4]:
# name of the weights file (without ".keras" ending) 
# also used as model name - dot not allowed, thus deleted, and file renamed
model_name = "model_19"
weights_name = model_name

# BUILD THE MODEL - CHRIS

In [5]:
facemesh_included = False

In [6]:
# OPTIONAL model() arguments
# available model_types: "LSTM", "Conv2D"
# tested activation functions: "tanh", "relu"; OTHERS: see below 
# tested activations for last neural layer: "softmax", "sigmoid"; OTHERS: "linear", "tanh", "relu", "softplus", "softsign", "selu", "elu", "exponential"
# neural multiplicator: multiply neurons per layer by this amount (default = 1); the higher NM, the longer the computation
# output will change according if facemesh_included = True or False

# reLU = Rectified Linear Activation Function: returns 0 if it receives any negative input, but for any positive value x it returns that value back.  It helps the model to account for non-linearity.
# Sigmoid: Outputs a value between 0 and 1, making it suitable for binary classification or multilabel classification tasks where each label is predicted independently.

importlib.reload(config)
from config import model_type, activation_function, activation, neural_factor, metrics
model = create_model(model_type, activation_function, activation, neural_factor, metrics)

  super().__init__(**kwargs)


KeyboardInterrupt: 

# CALL THE MODEL - Martin

In [7]:
if SCIENTIST == "Martin" or SCIENTIST == "Vero": 
    model = build_which_model(model_name)

In [8]:
model.predict

<bound method Model.predict of <keras.src.engine.sequential.Sequential object at 0x0000029B971FAB50>>

## Loading weights

In [9]:
model.load_weights(os.path.join(WEIGHTS_PATH, weights_name + ".keras"))

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'weights\model_19.keras', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

# PROBABILITY VIZUALIZATION - REAL TIME TESTING

In [10]:
import mediapipe as mp
from scipy.stats import mode 

In [11]:
from utils import mp_holistic, mp_drawing, set_camera_settings, default_fps
import datetime

In [24]:
### FPS FIXIN


In [12]:
import time
# 1. New detection variables
sequence = []
sentence = []
predictions = []
all_words = [] 
threshold = 0.4 # 40% 
fps_list = []
last_word = ""
num_of_words = 5

frame_counter = 0

frame_count_for_fps = 0
start_time = time.time()

length_of_sequence = 30
no_of_reps_to_accept = 5
silent_sign_name = "NoHands"

# picture of a static person using no hands as fall-back option for model
static_frame = np.load(os.path.join(DATA_PATH, "_.npy"))



cap = cv2.VideoCapture(0)  # Adjust device index as needed
actual_res, actual_fps = set_camera_settings(cap) # setting camera resolution and fps + printing the actual values
desired_fps = 7
frame_time = 1.0 / desired_fps
last_time = time.time()



#TODO: put recording in subfolder
# recording
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
video_file_name = f"recording_{timestamp}.avi"
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(video_file_name, fourcc, 10.0, (int(cap.get(3)), int(cap.get(4))))
# /recording


# Set mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while cap.isOpened():
        start_process_time = time.time()

        frame_counter += 1

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        print(results)

        # Draw landmarks
        draw_styled_landmarks(image, results)

        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        # dynamically adjust the length of the sequence using the variable from above
        sequence = sequence[-length_of_sequence:]

        if len(sequence) == length_of_sequence:

            zero_frame = np.zeros_like(sequence[0])  # Creates a zero array with the same shape as a frame

            # overwrites zero_frame with the static
            zero_frame = static_frame

            padding = [zero_frame] * (int((30 - len(sequence))/2))

            # pad the actual sequence from left and right
            padded_sequence = padding + sequence + padding

            # Ensure that the padded sequence is exactly 30 frames long
            padded_sequence = padded_sequence[:30]

            res = model.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)], round(max(res),2))
            predictions.append(np.argmax(res))
            #  my_predictions = list(dict.fromke<ys(predictions)) --> another way how to get rid of duplicates and keep the order of the elements in the list


        #3. Viz logic
            # if np.unique(predictions[-2:])[0]==np.argmax(res): <-- wrong line of code since .unique sorts the elements in the list alphabetically!!!
            
            # if mode(predictions[-3:])[0]==np.argmax(res):


            is_silent_sign = actions[np.argmax(res)] == silent_sign_name

            if (np.unique(predictions[-no_of_reps_to_accept:])[0]==np.argmax(res) 
                and (np.unique(predictions[-no_of_reps_to_accept:]).shape[0]==1) # additional condition to check if the returned unique list has only one element
                and not is_silent_sign
                ): 


                if (res[np.argmax(res)] > threshold) and (actions[np.argmax(res)] != last_word):
                    all_words.append(actions[np.argmax(res)]) 
                    if len(sentence) >= num_of_words:
                        time.sleep(3) 
                        sentence = []
                        sentence.append(actions[np.argmax(res)])
                        last_word = actions[np.argmax(res)]
                        frame_counter = 0
                    elif len(sentence) > 0: 
                        sentence.append(actions[np.argmax(res)])
                        last_word = actions[np.argmax(res)]
                        frame_counter = 0
                    elif len(sentence) == 0:
                        sentence.append(actions[np.argmax(res)])
                        last_word = actions[np.argmax(res)]
                        frame_counter = 0
                    # the same thing:
                    # else:
                    #     sentence.append(actions[np.argmax(res)])
                        
                        
            # if len(sentence) > 5:
            #     sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, threshold)
       
        if frame_counter == 15:
            sentence = []
            last_word = ""
            frame_counter = 0   

        # cv2.putText(image, f"{frame_counter}, last word {last_word}", (50,50),
        #             cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        cv2.putText(image, f"All detected words: {', '.join(all_words)}", (0,50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        cv2.rectangle(image, (0, 430), (640, 480), (128, 128, 128), -1) # TODO : make it more transperant
        cv2.putText(image, ', '.join(sentence), (50,465),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        
        # FPS fixin
        process_time = time.time() - start_process_time
        print(f"Processing time: {process_time:.2f}s")

        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        frame_count_for_fps += 1


        if frame_count_for_fps % 100 == 0:
            end_time = time.time()
            elapsed = end_time - start_time
            actual_fps = frame_count_for_fps / elapsed
            fps_list.append(actual_fps)
            print("Actual FPS:", actual_fps)
            # Reset timer and counter
            start_time = time.time()
            frame_count_for_fps = 0

        # write the frame in the video recording
        out.write(image)

        # FIXIN FPS
        # Calculate the time to wait
        elapsed = time.time() - last_time
        wait_time = max(0, frame_time - elapsed)
        print(f"Elapsed time: {elapsed:.2f}s, Waiting for: {wait_time:.2f}s")
        time.sleep(wait_time)
        last_time = time.time()


        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('t'):
            break
    
    #release the recording
    out.release()

    cap.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)

Requested Resolution: 1280x720, Actual Resolution: 1280x720
Requested FPS: 30, Actual FPS: 30.0


I0000 00:00:1715682448.920888       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 83.1), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


<class 'mediapipe.python.solution_base.SolutionOutputs'>
Processing time: 0.15s
Elapsed time: 0.32s, Waiting for: 0.00s
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Processing time: 0.05s
Elapsed time: 0.09s, Waiting for: 0.05s
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Processing time: 0.05s
Elapsed time: 0.08s, Waiting for: 0.06s
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Processing time: 0.04s
Elapsed time: 0.07s, Waiting for: 0.07s
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Processing time: 0.04s
Elapsed time: 0.13s, Waiting for: 0.02s
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Processing time: 0.06s
Elapsed time: 0.08s, Waiting for: 0.06s
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Processing time: 0.06s
Elapsed time: 0.08s, Waiting for: 0.07s
<class 'mediapipe.python.solution_base.SolutionOutputs'>
Processing time: 0.09s
Elapsed time: 0.11s, Waiting for: 0.03s
<class 'mediapipe.python.solution_base.S

In [26]:
fps_list

[5.94730033049648, 6.813402378387349, 6.766079420852675]

: 

In [None]:
# print the detected words
print(f"All the words that have been detected are [{len(all_words)}] and they are: {', '.join(all_words)}")
print(f"The last words that have been detected and printed out are [{len(sentence)}] and they are: {', '.join(sentence)}")