In [1]:
import cv2
import matplotlib.pyplot as plt
import os
from ultralytics import YOLO
import pandas as pd
import numpy as np

model = YOLO('yolov8n-pose.pt')

directory_path = '/home/mind/projects/projects/action_recognition/data'

df = pd.DataFrame(columns=['Nose_x', 'Nose_y', 'Left-eye_x', 'Left-eye_y', 'Right-eye_x', 'Right-eye_y', 'Left-ear_x', 'Left-ear_y','Right-ear_x', 'Right-ear_y', 'Left-shoulder_x', 'Left-shoulder_y', 'Right-shoulder_x', 'Right-shoulder_y', 'Left-elbow_x', 'Left-elbow_y', 'Right-elbow_x', 'Right-elbow_y', 'Left-wrist_x', 'Left-wrist_y', 'Right-wrist_x', 'Right-wrist_y', 'Left-hip_x', 'Left-hip_y', 'Right-hip_x', 'Right-hip_y', 'Left-knee_x', 'Left-knee_y', 'Right-knee_x', 'Right-knee_y', 'Left-ankle_x', 'Left-ankle_y', 'Right-ankle_x', 'Right-ankle_y', 'Class'])

for sub_directory in os.listdir(directory_path):

    sub_directory_path = os.path.join(directory_path, sub_directory)
    if sub_directory.lower() == 'standing':
        # class_encoding = np.array([1,0,0])
        class_encoding = 0
    elif sub_directory.lower() == 'falling':
        # class_encoding = np.array([0,1,0])
        class_encoding = 1
    elif sub_directory.lower() == 'walking':
        # class_encoding = np.array([0,0,1])
        class_encoding = 2

    for image in os.listdir(sub_directory_path):
        image_path = os.path.join(sub_directory_path, image)
        # img = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
        img = cv2.imread(image_path)
        # plt.imshow(img)
        # plt.show()
        results = model(source=img,
                show=False,
                conf=0.3,
                save=False,
                verbose=False,
                stream=True,
        )
        for result in results:
            keypoints = result.keypoints.xyn[0].numpy()
            df.loc[len(df.index)] = [keypoints[0][0],
                                     keypoints[0][1],
                                     keypoints[1][0],
                                     keypoints[1][1],
                                     keypoints[2][0],
                                     keypoints[2][1],
                                     keypoints[3][0],
                                     keypoints[3][1],
                                     keypoints[4][0],
                                     keypoints[4][1],
                                     keypoints[5][0],
                                     keypoints[5][1],
                                     keypoints[6][0],
                                     keypoints[6][1],
                                     keypoints[7][0],
                                     keypoints[7][1],
                                     keypoints[8][0],
                                     keypoints[8][1],
                                     keypoints[9][0],
                                     keypoints[9][1],
                                     keypoints[10][0],
                                     keypoints[10][1],
                                     keypoints[11][0],
                                     keypoints[11][1],
                                     keypoints[12][0],
                                     keypoints[12][1],
                                     keypoints[13][0],
                                     keypoints[13][1],
                                     keypoints[14][0],
                                     keypoints[14][1],
                                     keypoints[15][0],
                                     keypoints[15][1],
                                     keypoints[16][0],
                                     keypoints[16][1],
                                     class_encoding]  
   
    print('----------------------------------------------------------------------------------')

----------------------------------------------------------------------------------
----------------------------------------------------------------------------------
----------------------------------------------------------------------------------


In [2]:
df = df.sample(frac = 1, ignore_index=True)

In [3]:
x_train = df.drop(columns='Class')
y_train = df['Class']
# print(x_train, y_train)

# x_train = df.iloc[:, :17].values
# y_train = df.iloc[:, -1].values
# print(x_train, y_train)

In [None]:
# print(x_train.iloc[0, :])

In [4]:
from sklearn.svm import SVC

In [5]:
svc = SVC(kernel='rbf', C=0.1, gamma=0.1, degree=3)
clf = svc.fit(x_train, y_train)

## Single person image

In [None]:
outputs = model(source='test_data/standing0.jpg',
                show=False,
                conf=0.3,
                save=False,
                verbose=False,
                stream=True,
        )

for output in outputs:
    output_keypoints = output.keypoints.xyn.numpy()
    # print(output_keypoints)

prediction = clf.predict(output_keypoints.flatten().reshape(1, -1))

## Single person video

In [None]:
cap = cv2.VideoCapture('/home/mind/projects/projects/action_recognition/test_data/production_id_5136708(720p).mp4')

if (cap.isOpened() == False):
    print('Error while trying to read video. Please check path again')

while(cap.isOpened):

    ret, frame = cap.read()

    if ret:

        outputs = model(source=frame,
                show=True,
                conf=0.6,
                save=False,
                verbose=False,
                stream=True,
        )

        for output in outputs:
            output_keypoints = output.keypoints.xyn.numpy()
        
        text = ''
        flag = False

        if len(output_keypoints[0]) == 17:
            prediction = clf.predict(output_keypoints[0].flatten().reshape(1, -1))
            flag = True

        if flag:
            if prediction[0] == float(0):
                text = 'standing'
            elif prediction[0] == float(1):
                text = 'falling'
            elif prediction[0] == float(2):
                text = 'walking'

        frame = cv2.putText(frame.copy(), text, (425, 100), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1.5, color=[0, 0, 255], thickness=2, lineType=cv2.LINE_AA)

        cv2.imshow('output', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'): 
                break
    else:
        break
    
cap.release()
cv2.destroyAllWindows() 

## Multiple people video

In [6]:
import warnings
warnings.filterwarnings("ignore")

In [7]:
cap = cv2.VideoCapture('/home/mind/projects/projects/action_recognition/test_data/pexels-gabby-k-6220073(720p).mp4')

if (cap.isOpened() == False):
    print('Error while trying to read video. Please check path again')

frame_width = 720
frame_height = 1080
    
# frame_width = int(cap.get(3))
# frame_height = int(cap.get(4))
# frame_size = (frame_width,frame_height)

video_writer = cv2.VideoWriter("/home/mind/projects/projects/action_recognition/test_data/outputs/output_pexels-gabby-k-6220073(720p).mp4",
                               cv2.VideoWriter_fourcc(*'mp4v'),
                               int(cap.get(5)),
                               (frame_width, frame_height))

frame_count = 0

while cap.isOpened():

    ret, frame = cap.read()

    if ret:

        frame = cv2.resize(frame, (frame_width, frame_height))
        
        frame_count += 1
        # print(f'Frame {frame_count}')

        outputs = model(source=frame,
                show=False,
                conf=0.5,
                save=False,
                verbose=False,
                stream=False,
        )

        for output in outputs:
            output_keypoints = output.keypoints.xyn.numpy()
            output_boxes = output.boxes.xywh.numpy()
            # print(f'keypoints: {output_keypoints}')
            # print(f'boxes: {output_boxes}')
        
        for i in range(len(output_boxes)):
            text = ''
            flag = False

            #if len(output_keypoints[0]) == 17:
            prediction = clf.predict(output_keypoints[i].flatten().reshape(1, -1))
            flag = True

            if flag:
                if prediction[0] == float(0):
                    text = 'standing'
                elif prediction[0] == float(1):
                    text = 'falling'
                elif prediction[0] == float(2):
                    text = 'walking'

            # print((output_boxes[i][0], output_boxes[i][1]))
            frame = cv2.putText(frame.copy(), text, (int(output_boxes[i][0]), int(output_boxes[i][1])), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=[0, 0, 255], thickness=2, lineType=cv2.LINE_AA)
            # frame = cv2.resize(frame, (frame_width, frame_height))

            cv2.imshow('output', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'): 
                break
        
        video_writer.write(frame) 

    else:
        break

video_writer.release() 
cap.release()
cv2.destroyAllWindows() 