In [1]:
!pip install dv_processing

Collecting dv_processing
  Obtaining dependency information for dv_processing from https://files.pythonhosted.org/packages/78/06/e6572ef771bf57102e807d7787a7c11d5b586b042dffd52e4ee761b883df/dv_processing-1.7.9-cp311-cp311-win_amd64.whl.metadata
  Using cached dv_processing-1.7.9-cp311-cp311-win_amd64.whl.metadata (238 bytes)
Using cached dv_processing-1.7.9-cp311-cp311-win_amd64.whl (16.0 MB)
Installing collected packages: dv_processing
Successfully installed dv_processing-1.7.9



[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from ultralytics import YOLO
import numpy as np
import dv_processing as dv  # Import the dv_processing library for DAVIS 346 access

cameras = dv.io.discoverDevices()

print(f"Device discovery: found {len(cameras)} devices.")
for camera_name in cameras:
    print(f"Detected device [{camera_name}]")


# Load a model
modelpose = YOLO('yolov8n-pose.pt') 

Device discovery: found 0 devices.


In [4]:
import os

# Path for exported data, numpy arrays
DATA_PATH = os.path.join('MP_Data') 

# Thirty videos worth of data
no_sequences = 46

# Videos are going to be 30 frames in length
sequence_length = 15

In [5]:
# Define path to the main folder containing class subfolders
data_path = "C:\\Users\\ag701\\Desktop\\lstm\\LSTM\\MP_Data"  # Replace with your actual path

# Define actions based on subfolder names (modify as needed)
actions = np.array(os.listdir(data_path))  # Get list of subfolders
print(actions)


['boxing' 'jumping' 'running' 'sitting' 'squat' 'standing' 'walking']


In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [7]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [8]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(15,34)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [9]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [10]:
model.load_weights('action83.h5')

In [11]:
import cv2 

colors = [(245,117,16), (117,245,16), (16,117,245), (126, 249, 255), (255, 166, 255),(16,117,245), (40, 166, 133)]
def prob_viz(res, actions, input_frame, colors, keypoints):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
    top_left_x = int(keypoints[0][:, 0].min().item())  # Minimum x-coordinate across all keypoints
    top_left_y = int(keypoints[0][:, 1].min().item())-20  # Minimum y-coordinate across all keypoints
    bottom_right_x = int(keypoints[0][:, 0].max().item())  # Maximum x-coordinate across all keypoints
    bottom_right_y = int(keypoints[0][:, 1].max().item())+20 # Maximum y-coordinate across all keypoints

    # Define bounding box thickness and color
    thickness = 2
    color = (0, 255, 0)  # Green for bounding box

    # Draw the rectangle
    cv2.rectangle(output_frame, (top_left_x, top_left_y), (bottom_right_x, bottom_right_y), color, thickness)

    return output_frame



In [12]:
# # 1. New detection variables
# import cv2
# sequence = []
# sentence = []
# threshold = 0.6

# cap = cv2.VideoCapture("Dataset/jumping/6032918-hd_1920_1080_25fps.mp4")
# # Set mediapipe model 

# while cap.isOpened():

#     # Read feed
#     ret, frame = cap.read()

#     if not ret:
#         print("No frames left in video. Exiting...")
#         break
    
#     image = frame

#     # Make detections
#     results = modelpose.predict(frame)
#     # Flatten keypoints
#     # keypoints = np.array(results[0][0].keypoints.xy).flatten()
#     for r in results:
#         keypoints = np.array(r.keypoints.xy).flatten()  # Extract keypoints
        
    

#     sequence.append(keypoints)
#     sequence = sequence[-15:]
    
#     if len(sequence) == 15:
#         res = model.predict(np.expand_dims(sequence, axis=0))[0]
#         print(actions[np.argmax(res)])
        
        
#     #3. Viz logic
#         if res[np.argmax(res)] > threshold: 
#             if len(sentence) > 0: 
#                 if actions[np.argmax(res)] != sentence[-1]:
#                     sentence.append(actions[np.argmax(res)])
#             else:
#                 sentence.append(actions[np.argmax(res)])

#         if len(sentence) > 5: 
#             sentence = sentence[-5:]

        
#     cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
#     cv2.putText(image, ' '.join(sentence), (3,30), 
#                     cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    
#     # Show to screen
#     cv2.imshow('OpenCV Feed', image)

#     # Break gracefully
#     if cv2.waitKey(10) & 0xFF == ord('q'):
#         break
# cap.release()
# cv2.destroyAllWindows()

In [15]:
# 1. New detection variables
import cv2
import torch

sequence = []
sentence = []
threshold = 0.6

# cap = cv2.VideoCapture("C:\\Users\\ag701\\Desktop\\lstm\\LSTM\\jumping1.mp4")
# # Set mediapipe model 


# Use dv_processing for DAVIS 346 access
cap = dv.io.CameraCapture()


# Initiate a preview window
cv2.namedWindow("Preview", cv2.WINDOW_NORMAL)


while cap.isOpened():
    # Read feed
    ret, frame = cap.read()

    if not ret:
        print("No frames left in video. Exiting...")
        break
    
    image = frame

    # Make detections
    results = modelpose.predict(frame)
    
    keyframes = []
    # # Flatten keypoints
    keypointsn = np.array(results[0][0].keypoints.xyn).flatten()
    
    if len(keypointsn) == 0:
            keypointsn = torch.zeros(1, 17, 2)
            
    for r in results:
        keypoints=[]
        keypoints = r.keypoints.xy
        print("keypoints", keypoints.shape)
        # if len(keypoints) > 0:  # Check for missing keypoints (optional)
        #     keyframes.append(keypoints)
        if len(keypoints) == 0:
            keypoints = torch.zeros(1, 17, 2)
    # if len(keyframes) > 0:  # Check for empty frames (optional)
    #     keyframes = np.array(keyframes).flatten()
    #     sequence.append(keyframes[-1])
    #     sequence = sequence[-15:]

    sequence.append(keypointsn)
    sequence = sequence[-15:]
    
    if len(sequence) == 15:
        res = model.predict(np.expand_dims(sequence, axis=0))[0]
        print(actions[np.argmax(res)])
        
        
    #3. Viz logic
        if res[np.argmax(res)] > threshold: 
            if len(sentence) > 0: 
                if actions[np.argmax(res)] != sentence[-1]:
                    sentence.append(actions[np.argmax(res)])
            else:
                sentence.append(actions[np.argmax(res)])

        if len(sentence) > 5: 
            sentence = sentence[-5:]

        # Viz probabilities
        image = prob_viz(res, actions, image, colors, keypoints)
        
    cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
    cv2.putText(image, ' '.join(sentence), (3,30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    
    # Show to screen
    cv2.imshow('OpenCV Feed', image)

    # Break gracefully
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()


0: 640x384 2 persons, 68.3ms
Speed: 9.9ms preprocess, 68.3ms inference, 12.8ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 2 persons, 92.1ms
Speed: 2.9ms preprocess, 92.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)



keypoints torch.Size([2, 17, 2])
keypoints torch.Size([2, 17, 2])


0: 640x384 1 person, 115.7ms
Speed: 3.0ms preprocess, 115.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 97.2ms
Speed: 0.0ms preprocess, 97.2ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)



keypoints torch.Size([1, 17, 2])
keypoints torch.Size([1, 17, 2])


0: 640x384 1 person, 98.0ms
Speed: 1.0ms preprocess, 98.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 90.5ms
Speed: 3.0ms preprocess, 90.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)



keypoints torch.Size([1, 17, 2])
keypoints torch.Size([1, 17, 2])


0: 640x384 1 person, 87.9ms
Speed: 0.0ms preprocess, 87.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 80.8ms
Speed: 0.0ms preprocess, 80.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)



keypoints torch.Size([1, 17, 2])
keypoints torch.Size([1, 17, 2])


0: 640x384 1 person, 92.8ms
Speed: 1.9ms preprocess, 92.8ms inference, 0.6ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 83.4ms
Speed: 0.0ms preprocess, 83.4ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)



keypoints torch.Size([1, 17, 2])
keypoints torch.Size([1, 17, 2])


0: 640x384 1 person, 82.0ms
Speed: 0.0ms preprocess, 82.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 91.8ms
Speed: 0.0ms preprocess, 91.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)



keypoints torch.Size([1, 17, 2])
keypoints torch.Size([1, 17, 2])


0: 640x384 1 person, 118.3ms
Speed: 0.0ms preprocess, 118.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 person, 93.4ms
Speed: 0.0ms preprocess, 93.4ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)



keypoints torch.Size([1, 17, 2])
keypoints torch.Size([1, 17, 2])


0: 640x384 1 person, 92.3ms
Speed: 3.3ms preprocess, 92.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])



0: 640x384 1 person, 80.1ms
Speed: 3.0ms preprocess, 80.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


boxing
keypoints torch.Size([1, 17, 2])





walking


0: 640x384 1 person, 91.7ms
Speed: 0.0ms preprocess, 91.7ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
walking



0: 640x384 1 person, 84.4ms
Speed: 0.0ms preprocess, 84.4ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
walking



0: 640x384 1 person, 69.6ms
Speed: 6.9ms preprocess, 69.6ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





walking


0: 640x384 1 person, 76.9ms
Speed: 0.0ms preprocess, 76.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
sitting



0: 640x384 1 person, 93.4ms
Speed: 0.0ms preprocess, 93.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





sitting


0: 640x384 1 person, 79.8ms
Speed: 0.0ms preprocess, 79.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





standing


0: 640x384 1 person, 98.6ms
Speed: 0.0ms preprocess, 98.6ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
standing



0: 640x384 1 person, 86.1ms
Speed: 0.0ms preprocess, 86.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
standing



0: 640x384 1 person, 91.9ms
Speed: 0.0ms preprocess, 91.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





standing


0: 640x384 1 person, 93.0ms
Speed: 0.0ms preprocess, 93.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





standing


0: 640x384 1 person, 81.9ms
Speed: 0.0ms preprocess, 81.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





standing


0: 640x384 1 person, 82.5ms
Speed: 0.0ms preprocess, 82.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





boxing


0: 640x384 1 person, 82.5ms
Speed: 0.0ms preprocess, 82.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





boxing


0: 640x384 1 person, 85.8ms
Speed: 0.0ms preprocess, 85.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
boxing



0: 640x384 1 person, 87.2ms
Speed: 5.8ms preprocess, 87.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





boxing


0: 640x384 1 person, 82.9ms
Speed: 0.0ms preprocess, 82.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
boxing



0: 640x384 1 person, 87.1ms
Speed: 0.0ms preprocess, 87.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
boxing



0: 640x384 1 person, 89.9ms
Speed: 0.0ms preprocess, 89.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





boxing


0: 640x384 2 persons, 80.1ms
Speed: 0.0ms preprocess, 80.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([2, 17, 2])
boxing



0: 640x384 1 person, 85.2ms
Speed: 0.0ms preprocess, 85.2ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
boxing



0: 640x384 1 person, 90.7ms
Speed: 0.0ms preprocess, 90.7ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
boxing



0: 640x384 1 person, 92.2ms
Speed: 0.0ms preprocess, 92.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





boxing


0: 640x384 1 person, 99.9ms
Speed: 0.0ms preprocess, 99.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





boxing


0: 640x384 1 person, 81.1ms
Speed: 0.0ms preprocess, 81.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





boxing


0: 640x384 1 person, 78.7ms
Speed: 0.0ms preprocess, 78.7ms inference, 10.2ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





boxing


0: 640x384 1 person, 106.0ms
Speed: 0.0ms preprocess, 106.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





walking


0: 640x384 1 person, 102.5ms
Speed: 0.0ms preprocess, 102.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





boxing


0: 640x384 1 person, 83.4ms
Speed: 0.0ms preprocess, 83.4ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





boxing


0: 640x384 1 person, 112.3ms
Speed: 0.0ms preprocess, 112.3ms inference, 15.6ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
boxing



0: 640x384 1 person, 92.6ms
Speed: 0.0ms preprocess, 92.6ms inference, 15.6ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])



0: 640x384 1 person, 91.6ms
Speed: 1.7ms preprocess, 91.6ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


boxing
keypoints torch.Size([1, 17, 2])
walking



0: 640x384 1 person, 76.7ms
Speed: 1.8ms preprocess, 76.7ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
walking



0: 640x384 1 person, 80.0ms
Speed: 0.0ms preprocess, 80.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





walking


0: 640x384 1 person, 79.9ms
Speed: 1.3ms preprocess, 79.9ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





walking


0: 640x384 1 person, 70.0ms
Speed: 5.7ms preprocess, 70.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





walking


0: 640x384 1 person, 109.3ms
Speed: 0.0ms preprocess, 109.3ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])



0: 640x384 1 person, 85.9ms
Speed: 3.4ms preprocess, 85.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


walking
keypoints torch.Size([1, 17, 2])
walking



0: 640x384 1 person, 108.1ms
Speed: 0.0ms preprocess, 108.1ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
sitting



0: 640x384 1 person, 105.8ms
Speed: 0.0ms preprocess, 105.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])



0: 640x384 1 person, 106.9ms
Speed: 0.0ms preprocess, 106.9ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 384)


standing
keypoints torch.Size([1, 17, 2])
standing



0: 640x384 1 person, 80.6ms
Speed: 2.5ms preprocess, 80.6ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





standing


0: 640x384 1 person, 97.9ms
Speed: 0.0ms preprocess, 97.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])



0: 640x384 1 person, 94.2ms
Speed: 8.4ms preprocess, 94.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


standing
keypoints torch.Size([1, 17, 2])
standing



0: 640x384 1 person, 86.9ms
Speed: 2.0ms preprocess, 86.9ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





standing


0: 640x384 1 person, 77.5ms
Speed: 0.0ms preprocess, 77.5ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
standing



0: 640x384 1 person, 86.4ms
Speed: 0.0ms preprocess, 86.4ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





standing


0: 640x384 1 person, 103.0ms
Speed: 0.0ms preprocess, 103.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





standing


0: 640x384 1 person, 94.1ms
Speed: 0.0ms preprocess, 94.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])





standing


0: 640x384 1 person, 80.8ms
Speed: 0.0ms preprocess, 80.8ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
standing



0: 640x384 1 person, 82.0ms
Speed: 0.0ms preprocess, 82.0ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
standing



0: 640x384 1 person, 91.7ms
Speed: 3.4ms preprocess, 91.7ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
boxing



0: 640x384 1 person, 94.4ms
Speed: 0.9ms preprocess, 94.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


keypoints torch.Size([1, 17, 2])
boxing
