In [106]:
import mediapipe as mp
from mediapipe.tasks.python.components.containers import landmark as mpLandmark
from mediapipe.tasks.python import vision
import pandas as pd
import cv2
from pathlib import Path
import time
import os
import numpy as np




In [2]:
parentDirectory = Path(os.path.abspath(""))
out_path = r'C:\Users\mteer\OneDrive\Desktop\ikky\output'
inputDirectory = parentDirectory.joinpath("Videos")
outputFile = parentDirectory.joinpath("image output" + ".xlsx")
modelDirectory = parentDirectory.joinpath("Model")
handModel = modelDirectory.joinpath("hand_landmarker.task")
poseModel = modelDirectory.joinpath("pose_landmarker_full.task")

In [3]:
supportsExtension = ["*/*.mp4", "*/*.mov"]
sample = 5 #Save frame every n frame
frameBuffer = 10
processes = []
processesCount = 10

In [4]:
poseColumnNameList = ["nose", "left eye (inner)", "left eye", "left eye (outer)", "right eye (inner)",
                      "right eye", "right eye (outer)", "left ear", "right ear", "mouth (left)",
                      "mouth (right)", "left shoulder", "right shoulder", "left elbow", "right elbow",
                      "left wrist", "right wrist", "left pinky", "right pinky", "left index",
                      "right index","left thumb","right thumb","left hip","right hip"]
handColumnNameList = ["wrist", "thumb cmc", "thumb mcp", "thumb ip", "thumb tip",
                      "index finger mcp", "index finger pip", "index finger dip", "index finger tip", "middle finger mcp",
                      "middle finger pip", "middle finger dip", "middle finger tip", "ring finger mcp", "ring finger pip",
                      "ring finger dip", "ring finger tip", "pinky mcp", "pinky pip", "pinky dip",
                      "pinky tip"]
columnNames = ["Label"]

for i in range(frameBuffer):
    for columnName in poseColumnNameList:
        columnNames.append(f"{columnName}_{i}")
    for columnName in handColumnNameList:
        columnNames.append(f"right_{columnName}_{i}")
    for columnName in handColumnNameList:
        columnNames.append(f"left_{columnName}_{i}")
df = pd.DataFrame(columns=columnNames)

labelList = {"กรอบ": 0,     "กระเพรา": 1,    "ขา": 2,       "ข้าว": 3,
             "ไข่": 4,       "คะน้า": 5,      "เค็ม": 6,       "โจ๊ก": 7,
             "แดง": 8,      "ต้ม": 9,        "แตงโม": 10,    "น้ำพริกเผา": 11,
             "บะหมี่": 12,    "เปรี้ยว": 13,    "ผัด": 14,       "ฝรั่ง": 15,
             "พริกแกง": 16,  "มะม่วง": 17,    "ม้า": 18,       "มาม่า": 19,
             "ลูกชิ้นปลา": 20, "เลือด": 21,     "สับ": 22,       "เส้นเล็ก": 23,
             "เส้นใหญ่": 24,  "หมู": 25,       "หวาน": 26,     "องุ่น": 27,
             "แอปเปิ้ล": 28}

In [5]:
#mediapipe config
minPoseConfidence = 0.5
minHandConfidence = 0.5

baseOptions = mp.tasks.BaseOptions
poseLandmarker = vision.PoseLandmarker
poseLandmarkerOptions = vision.PoseLandmarkerOptions
handLandmarker = vision.HandLandmarker
handLandmarkerOptions = vision.HandLandmarkerOptions
visionRunningMode = vision.RunningMode

In [6]:
#create the landmarker object
poseOption = poseLandmarkerOptions(base_options=baseOptions(model_asset_path=poseModel),
                                   running_mode=vision.RunningMode.IMAGE,
                                   min_pose_detection_confidence=minPoseConfidence)
handOption = handLandmarkerOptions(base_options=baseOptions(model_asset_path=handModel),
                                   running_mode=visionRunningMode.IMAGE,
                                   min_hand_detection_confidence=minHandConfidence,
                                   num_hands=2)
poseLandmarker = poseLandmarker.create_from_options(poseOption)
handLandmarker = handLandmarker.create_from_options(handOption)


I0000 00:00:1711379905.237361    1310 task_runner.cc:85] GPU suport is not available: INTERNAL: ; RET_CHECK failure (mediapipe/gpu/gl_context_egl.cc:84) egl_initializedUnable to initialize EGL





INFO: Created TensorFlow Lite XNNPACK delegate for CPU.

I0000 00:00:1711379905.325346    1310 task_runner.cc:85] GPU suport is not available: INTERNAL: ; RET_CHECK failure (mediapipe/gpu/gl_context_egl.cc:84) egl_initializedUnable to initialize EGL







In [7]:
def getFilePATHS(directory):
    videoPATHs = []
    for extension in supportsExtension: #Collect file that has mp4 and mov file extension
        for file in directory.glob(extension):
            videoPATHs.append(file)
    print(f"total files: {len(videoPATHs)}")
    return videoPATHs

In [8]:
def splitList(list):
    step = len(list) // processesCount
    remain = len(list) % processesCount
    for i in range(0, len(list), step):
        yield list[i:i + step + remain] #return multiple 1D list
        remain = 0

In [120]:
#Re-written of addLandMark and toDataFrame
def addLandmarks(coordinates, array):
    if type(coordinates[0]) == mpLandmark.NormalizedLandmark:
        for landmark in coordinates:
            value = np.array([landmark.x, landmark.y, landmark.z], dtype=np.float16)
            array = np.vstack([array, value])
    else:
        for filler in coordinates:
            array = np.vstack([array, filler])
    return array

def generateFrameLandmarks(frame):
    frame = mp.Image.create_from_file(frame)
    poseResult = poseLandmarker.detect(frame)
    handResult = handLandmarker.detect(frame)
    poseCoordinates = poseResult.pose_landmarks
    handedness = handResult.handedness 
    handCoordinates = handResult.hand_landmarks

    coordinatesArray = np.empty((3, ), dtype=np.float16)
    coordinatesArray = addLandmarks(poseCoordinates[0][:25], coordinatesArray)
    coordinatesArray = np.delete(coordinatesArray, 0, axis=0) #remove the first element that got create when declaire the empty array
    for index, category in enumerate(handedness):
        if len(handCoordinates) < 2: #check if mp detect only one hand
            filler = np.zeros(shape=(len(handColumnNameList), 3), dtype=np.float16)
            if category[index].index == 0: #detect right
                coordinatesArray = addLandmarks(handCoordinates[index], coordinatesArray)
                coordinatesArray = addLandmarks(filler, coordinatesArray)
            else: #detect left
                coordinatesArray = addLandmarks(filler, coordinatesArray)
                coordinatesArray = addLandmarks(handCoordinates[index], coordinatesArray)
            break
        else:
            coordinatesArray = addLandmarks(handCoordinates[index], coordinatesArray)
    print(coordinatesArray)
    print(len(coordinatesArray))
    return coordinatesArray #return np array

bothIMG = parentDirectory.joinpath('Images/กรอบ/IMG_0189_50.png')
leftIMG = parentDirectory.joinpath('Images/กระเพรา/IMG_0199_25.png')
rightIMG =parentDirectory.joinpath('Images/เปรี้ยว/VID_20240123200118_0.png')

path = rightIMG
label = path.parent
generateFrameLandmarks(str(path), str(label))

25
[[ 0.756    1.15    -0.0951 ]
 [ 0.5254   0.503   -1.14   ]
 [ 0.5737   0.468   -1.061  ]
 [ 0.5967   0.4683  -1.061  ]
 [ 0.6196   0.4688  -1.062  ]
 [ 0.4858   0.4692  -1.039  ]
 [ 0.457    0.4707  -1.039  ]
 [ 0.434    0.473   -1.039  ]
 [ 0.6562   0.4888  -0.5728 ]
 [ 0.416    0.4946  -0.4294 ]
 [ 0.5728   0.5444  -0.9653 ]
 [ 0.4775   0.5483  -0.9272 ]
 [ 0.8477   0.7065  -0.396  ]
 [ 0.2568   0.702   -0.2817 ]
 [ 0.9556   0.944   -0.4546 ]
 [-0.02509  0.9375  -1.118  ]
 [ 0.988    1.193   -0.7783 ]
 [ 0.2366   0.7554  -2.38   ]
 [ 1.009    1.272   -0.894  ]
 [ 0.3198   0.701   -2.627  ]
 [ 0.9355   1.262   -0.921  ]
 [ 0.3481   0.6787  -2.506  ]
 [ 0.912    1.235   -0.806  ]
 [ 0.3403   0.6934  -2.367  ]
 [ 0.756    1.15    -0.0951 ]
 [ 0.329    1.155    0.1009 ]]
26


In [23]:
def saveFrames(videoPATHs, nextrow):
    for videoPATH in videoPATHs:
        flipping = True #don't even know what to name this
        frames = []
        label = videoPATH.parent.name
        print(f"Adding: {videoPATH.name} to dataframe as: {label} to index: {str(len(df))}")

        cap = cv2.VideoCapture(str(videoPATH))
        currentFrame = 0

        while cap.isOpened:
            ret, frame = cap.read()
            if ret:
                if currentFrame / sample - currentFrame // sample == 0: #check if the current frame is the sample frame
                    frames.append(frame)
                    #cv2.imshow(f"name: {videoPATH} frame: {currentFrame}", frame)
                currentFrame += 1
            else:
                break
        while len(frames) > frameBuffer: #remove frame untile the frames list contain only 10 frame
            if flipping:
                frames.pop(0)
            else:
                frames.pop()
            flipping = not flipping
        cap.release()
        toDataFrame(frames, label, nextrow)
        nextrow += 1

startTime = time.perf_counter()

In [24]:
print(inputDirectory)
videoPATHs = getFilePATHS(inputDirectory)
#videoPATHsList = list(splitList(videoPATHs))
#del videoPATHs #hopefully it will freeup some memory

saveFrames(videoPATHs, 0)

#nextRow = 0 #don't even know what to name this
#for i in range(processesCount): #initiate process
#    p = multiprocessing.Process(target=saveFrames, args=[videoPATHsList[i], nextRow])
#    p.start()
#    processes.append(p)
#    nextRow += len(videoPATHsList[i])
#for process in processes: #wait for process to end
#    process.join()
#
#del videoPATHsList
#del processes

df = df.sample(frac=1) #Shuffle dataframe
print("Output dataframe:")
print(df)
df.to_excel(outputFile, index=False)

finishTime = time.perf_counter()
print(f"total time: {finishTime - startTime}")

/home/page/code-save/Signa-Link/Source/Matrix converter/Videos
total files: 513
Adding: VID_20240122202046.mp4 to dataframe as: โจ๊ก to index: 0
Adding: VID_20240122202231.mp4 to dataframe as: โจ๊ก to index: 1
Adding: VID_20240122201845.mp4 to dataframe as: โจ๊ก to index: 2
Adding: VID_20240122201749.mp4 to dataframe as: โจ๊ก to index: 3
Adding: VID_20240122202207.mp4 to dataframe as: โจ๊ก to index: 4
Adding: VID_20240122202146.mp4 to dataframe as: โจ๊ก to index: 5
Adding: VID_20240122202117.mp4 to dataframe as: โจ๊ก to index: 6
Adding: VID_20240122202022.mp4 to dataframe as: โจ๊ก to index: 7
Adding: VID_20240122201948.mp4 to dataframe as: โจ๊ก to index: 8
Adding: VID_20240122201812.mp4 to dataframe as: โจ๊ก to index: 9
Adding: VID_20240122193559.mp4 to dataframe as: กรอบ to index: 10
Adding: VID_20240122193231.mp4 to dataframe as: กรอบ to index: 11
Adding: VID_20240122193532.mp4 to dataframe as: กรอบ to index: 12
Adding: VID_20240122193155.mp4 to dataframe as: กรอบ to index: 13
Adding

KeyboardInterrupt: 

In [7]:
test_path = r'C:\Users\mteer\OneDrive\Desktop\ikky'

os.chdir(out_path)
for file in os.listdir(test_path):

    if (os.path.splitext(file))[1] == '.mp4':
        cap = cv2.VideoCapture(fr"{test_path}\{file}")
        while cap.isOpened():
            ret, img = cap.read()

            if not ret:
                break
            cv2.imshow('frame', img)
            '''value = [[0, 0, 0]] * len(columnNames)
            value[0] = labelList["หมู"] 
            i = 1
            for index, frame in enumerate(img):
                i = 1 + (len(poseColumnNameList) + len(handColumnNameList) * 2) * index
                frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=(frame))
                poseResult = poseLandmarker.detect(frame)
                handResult = HandLandmarker.detect(frame)
                poseCoordinates = poseResult.pose_landmarks
                handCoordinates = handResult.hand_landmarks
                if poseCoordinates != []:
                    cv2.imwrite(img)
            '''
            cap.release()

    #except Exception:
        #print('not a video')

In [8]:
'''img = mp.Image.create_from_file(r"C:\Users\mteer\OneDrive\Desktop\hum.png")

value = [[0, 0, 0]] * len(columnNames)
value[0] = labelList["หมู"] 
poseResult = poseLandmarker.detect(img)
handResult = HandLandmarker.detect(img)
#if poseResult.pose_landmarks != []:
#    print('pass')
poseCoordinates = poseResult.pose_landmarks
handCoordinates = handResult.hand_landmarks

print(handResult.handedness[0][0].category_name)'''

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 36-37: truncated \UXXXXXXXX escape (2582673475.py, line 12)

: 