In [1]:
import cv2
import mediapipe as mp
import time
import pandas as pd
import tensorflow as tf
import numpy as np
import json

2023-07-19 23:11:58.720323: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:

pd.set_option('display.max_rows', None)
mp_drawing = mp.solutions.drawing_utils
mp_holistic = mp.solutions.holistic

interpreter = tf.lite.Interpreter(model_path="model.tflite")
interpreter.allocate_tensors()
model = interpreter.get_signature_runner()

selected_colomns = pd.read_json("inference_args.json", orient="index")
selected_colomns = selected_colomns.transpose().values
selected_colomns = np.reshape(selected_colomns, (164,))


# Read Character to Ordinal Encoding Mapping
with open('character_to_prediction_index.json') as json_file:
    CHAR2ORD = json.load(json_file)

# Ordinal to Character Mapping
ORD2CHAR = {j:i for i,j in CHAR2ORD.items()}

# Output Predictions to string
def outputs2phrase(outputs):
    if outputs.ndim == 2:
        outputs = np.argmax(outputs, axis=1)

    return ''.join([ORD2CHAR.get(s, '') for s in outputs])

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [3]:
def get_right_hand_landmark_indexes(hand, frame):
    index_list = ['WRIST', 'THUMB_CMC', 'THUMB_MCP', 'THUMB_IP', 'THUMB_TIP', 'INDEX_FINGER_MCP', 'INDEX_FINGER_PIP',
                  'INDEX_FINGER_DIP', 'INDEX_FINGER_TIP', 'MIDDLE_FINGER_MCP', 'MIDDLE_FINGER_PIP', 'MIDDLE_FINGER_DIP',
                  'MIDDLE_FINGER_TIP', 'RING_FINGER_MCP', 'RING_FINGER_PIP', 'RING_FINGER_DIP', 'RING_FINGER_TIP',
                  'PINKY_MCP', 'PINKY_PIP', 'PINKY_DIP', 'PINKY_TIP']

    right_hand_landmarks = []
    for part in index_list:
        if hand.right_hand_landmarks:
            right_hand_landmarks.append([frame, f"{frame}-right_hand-{index_list.index(part)}", "right_hand", index_list.index(part), hand.right_hand_landmarks.landmark[mp_holistic.HandLandmark(index_list.index(part))].x, hand.right_hand_landmarks.landmark[mp_holistic.HandLandmark(index_list.index(part))].y, hand.right_hand_landmarks.landmark[mp_holistic.HandLandmark(index_list.index(part))].z])

        else:
            right_hand_landmarks.append([frame, f"{frame}-right_hand-{index_list.index(part)}", "right_hand",index_list.index(part),  0, 0, 0])

    return right_hand_landmarks

In [4]:
def get_left_hand_landmark_indexes(hand, frame):
    index_list = ['WRIST', 'THUMB_CMC', 'THUMB_MCP', 'THUMB_IP', 'THUMB_TIP', 'INDEX_FINGER_MCP', 'INDEX_FINGER_PIP',
                  'INDEX_FINGER_DIP', 'INDEX_FINGER_TIP', 'MIDDLE_FINGER_MCP', 'MIDDLE_FINGER_PIP', 'MIDDLE_FINGER_DIP',
                  'MIDDLE_FINGER_TIP', 'RING_FINGER_MCP', 'RING_FINGER_PIP', 'RING_FINGER_DIP', 'RING_FINGER_TIP',
                  'PINKY_MCP', 'PINKY_PIP', 'PINKY_DIP', 'PINKY_TIP']

    left_hand_landmarks = []
    for part in index_list:
        if hand.left_hand_landmarks:
            left_hand_landmarks.append([frame, f"{frame}-left_hand-{index_list.index(part)}", "left_hand", index_list.index(part), hand.left_hand_landmarks.landmark[mp_holistic.HandLandmark(index_list.index(part))].x, hand.left_hand_landmarks.landmark[mp_holistic.HandLandmark(index_list.index(part))].y, hand.left_hand_landmarks.landmark[mp_holistic.HandLandmark(index_list.index(part))].z])

        else:
            left_hand_landmarks.append([frame, f"{frame}-left_hand-{index_list.index(part)}", "left_hand",index_list.index(part),  0, 0, 0])

    return left_hand_landmarks

In [5]:
def get_post_landmark_indexes(pose, frame):
    index_list = ['NOSE', 'LEFT_EYE_INNER', 'LEFT_EYE', 'LEFT_EYE_OUTER', 'RIGHT_EYE_INNER', 'RIGHT_EYE', 'RIGHT_EYE_OUTER', 'LEFT_EAR', 'RIGHT_EAR', 'MOUTH_LEFT', 'MOUTH_RIGHT', 'LEFT_SHOULDER', 'RIGHT_SHOULDER', 'LEFT_ELBOW', 'RIGHT_ELBOW', 'LEFT_WRIST', 'RIGHT_WRIST', 'LEFT_PINKY', 'RIGHT_PINKY', 'LEFT_INDEX', 'RIGHT_INDEX', 'LEFT_THUMB', 'RIGHT_THUMB', 'LEFT_HIP', 'RIGHT_HIP', 'LEFT_KNEE', 'RIGHT_KNEE', 'LEFT_ANKLE', 'RIGHT_ANKLE', 'LEFT_HEEL', 'RIGHT_HEEL', 'LEFT_FOOT_INDEX', 'RIGHT_FOOT_INDEX']


    pose_landmarks = []
    for part in index_list:
        if pose.pose_landmarks:
            pose_landmarks.append([frame, f"{frame}-pose-{index_list.index(part)}", "pose", index_list.index(part), pose.pose_landmarks.landmark[mp_holistic.PoseLandmark(index_list.index(part))].x, pose.pose_landmarks.landmark[mp_holistic.PoseLandmark(index_list.index(part))].y, pose.pose_landmarks.landmark[mp_holistic.PoseLandmark(index_list.index(part))].z])

        else:
            pose_landmarks.append([frame, f"{frame}-pose-{index_list.index(part)}", "pose",index_list.index(part),  0, 0, 0])

    return pose_landmarks

In [6]:
def get_face_landmark_indexes(face, frame):
    face_landmarks = []
    if face.face_landmarks:
        for i in range(len(face.face_landmarks.landmark)):
            face_landmarks.append([frame, f"{frame}-face-{i}", "face", i, face.face_landmarks.landmark[i].x,
                                   face.face_landmarks.landmark[i].y, face.face_landmarks.landmark[i].z])
    else:
        for i in range(0, 468):
            face_landmarks.append([frame, f"{frame}-face-{i}", "face", i, 0, 0, 0])
    return face_landmarks

In [7]:
def Mediapipe_holistic():
    count = 0
    data = []
    cap = cv2.VideoCapture("WAIT.mp4")
    #cap = cv2.VideoCapture(0)
    with mp_holistic.Holistic(
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5) as hands:

        while cap.isOpened():

            success, image = cap.read()

            start = time.time()

            if image is not None:
                img_h, img_w, img_c = image.shape

            # Convert the BGR image to RGB.
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

            # To improve performance, optionally mark the image as not writeable to
            # pass by reference.
                image.flags.writeable = False

            # Process the image and find hands
                results = hands.process(image)

                image.flags.writeable = True

            # Draw the hand annotations on the image.
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)


                mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS)
                mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
                mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
                mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)

                end = time.time()
                totalTime = end - start

                fps = 1 / totalTime
                count = count + 1

                data.append(get_right_hand_landmark_indexes(results, count))
                data.append(get_left_hand_landmark_indexes(results,count))
                data.append(get_post_landmark_indexes(results, count))
                data.append(get_face_landmark_indexes(results, count))
                cv2.putText(image, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,0), 2)

                #cv2.imshow('MediaPipe Hands', image)

                #if cv2.waitKey(5) & 0xFF == 27:
                  #break
            else:
                cap.release()

            #for live video
            # cv2.putText(image, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 2, (0,255,0), 2)
            #
            # cv2.imshow('MediaPipe Hands', image)
            #
            # if cv2.waitKey(5) & 0xFF == 27:
            #   break
    return data

In [8]:
data = Mediapipe_holistic()
flat_list = [item for sublist in data for item in sublist]
df = pd.DataFrame(flat_list, columns=['frame', 'row_id', 'type', 'landmark_index', 'x', 'y', 'z'])

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [9]:
df.head()

Unnamed: 0,frame,row_id,type,landmark_index,x,y,z
0,1,1-right_hand-0,right_hand,0,0.0,0.0,0.0
1,1,1-right_hand-1,right_hand,1,0.0,0.0,0.0
2,1,1-right_hand-2,right_hand,2,0.0,0.0,0.0
3,1,1-right_hand-3,right_hand,3,0.0,0.0,0.0
4,1,1-right_hand-4,right_hand,4,0.0,0.0,0.0


In [10]:
newdf = pd.read_parquet("one_video.parquet")

In [11]:
newdf.columns.tolist()
with open('newdf_columns.txt', 'w') as file:
    file.write('\n'.join(newdf.columns.tolist()))

In [12]:
#newdf.head()

In [13]:
# Reshape the DataFrame using pivot_table
df = pd.DataFrame({'landmark_index': pd.to_numeric(df['landmark_index'], errors='coerce'),
                   'frame': pd.to_numeric(df['frame'], errors='coerce'),
                   'type': df['type'].astype(str),
                   'x': df['x'],
                   'y': df['y'],
                   'z': df['z']})

In [14]:
df.columns

Index(['landmark_index', 'frame', 'type', 'x', 'y', 'z'], dtype='object')

In [15]:
df_pivot = df.pivot_table(index='frame', columns=['type', 'landmark_index'])

In [16]:
df_pivot

Unnamed: 0_level_0,x,x,x,x,x,x,x,x,x,x,...,z,z,z,z,z,z,z,z,z,z
type,face,face,face,face,face,face,face,face,face,face,...,right_hand,right_hand,right_hand,right_hand,right_hand,right_hand,right_hand,right_hand,right_hand,right_hand
landmark_index,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,17,18,19,20
frame,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
1,0.53906,0.541514,0.539884,0.537,0.541945,0.541833,0.540893,0.501844,0.540905,0.541368,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.53979,0.542298,0.540561,0.537582,0.542723,0.542572,0.541435,0.50157,0.541297,0.54172,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.539625,0.541992,0.540317,0.537236,0.542394,0.542212,0.541031,0.5016,0.540848,0.541256,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.539331,0.541843,0.540116,0.537124,0.542256,0.542101,0.540976,0.501365,0.540857,0.541281,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.539128,0.54171,0.539934,0.536963,0.54213,0.541964,0.540805,0.501048,0.54069,0.541127,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.538897,0.541431,0.539663,0.536635,0.541836,0.541654,0.540438,0.500599,0.540266,0.540673,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.538514,0.541069,0.539323,0.53637,0.54148,0.541325,0.540187,0.500574,0.540058,0.540478,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.538284,0.540875,0.539094,0.53615,0.541291,0.541125,0.539964,0.500365,0.539846,0.540284,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.538213,0.540914,0.539048,0.536184,0.541347,0.541167,0.539961,0.500201,0.539843,0.540292,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.538083,0.540733,0.538885,0.536011,0.541163,0.540985,0.539787,0.500027,0.539649,0.540087,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
['{}_{}_{}'.format(pos, type_val, point) for pos, type_val, point in df_pivot.columns]

['x_face_0',
 'x_face_1',
 'x_face_2',
 'x_face_3',
 'x_face_4',
 'x_face_5',
 'x_face_6',
 'x_face_7',
 'x_face_8',
 'x_face_9',
 'x_face_10',
 'x_face_11',
 'x_face_12',
 'x_face_13',
 'x_face_14',
 'x_face_15',
 'x_face_16',
 'x_face_17',
 'x_face_18',
 'x_face_19',
 'x_face_20',
 'x_face_21',
 'x_face_22',
 'x_face_23',
 'x_face_24',
 'x_face_25',
 'x_face_26',
 'x_face_27',
 'x_face_28',
 'x_face_29',
 'x_face_30',
 'x_face_31',
 'x_face_32',
 'x_face_33',
 'x_face_34',
 'x_face_35',
 'x_face_36',
 'x_face_37',
 'x_face_38',
 'x_face_39',
 'x_face_40',
 'x_face_41',
 'x_face_42',
 'x_face_43',
 'x_face_44',
 'x_face_45',
 'x_face_46',
 'x_face_47',
 'x_face_48',
 'x_face_49',
 'x_face_50',
 'x_face_51',
 'x_face_52',
 'x_face_53',
 'x_face_54',
 'x_face_55',
 'x_face_56',
 'x_face_57',
 'x_face_58',
 'x_face_59',
 'x_face_60',
 'x_face_61',
 'x_face_62',
 'x_face_63',
 'x_face_64',
 'x_face_65',
 'x_face_66',
 'x_face_67',
 'x_face_68',
 'x_face_69',
 'x_face_70',
 'x_face_71',
 '

In [18]:

# Flatten the MultiIndex column names
#df_pivot.columns = ['{}_{}_{}'.format(pos, type_val, point) for pos, type_val, point in df_pivot.columns]

In [19]:
df_pivot.columns

MultiIndex([('x',       'face',  0),
            ('x',       'face',  1),
            ('x',       'face',  2),
            ('x',       'face',  3),
            ('x',       'face',  4),
            ('x',       'face',  5),
            ('x',       'face',  6),
            ('x',       'face',  7),
            ('x',       'face',  8),
            ('x',       'face',  9),
            ...
            ('z', 'right_hand', 11),
            ('z', 'right_hand', 12),
            ('z', 'right_hand', 13),
            ('z', 'right_hand', 14),
            ('z', 'right_hand', 15),
            ('z', 'right_hand', 16),
            ('z', 'right_hand', 17),
            ('z', 'right_hand', 18),
            ('z', 'right_hand', 19),
            ('z', 'right_hand', 20)],
           names=[None, 'type', 'landmark_index'], length=1629)

In [20]:
# Create the final DataFrame
final_df = pd.DataFrame(df_pivot.to_records(), columns=['{}_{}_{}'.format(pos, type_val, point) for pos, type_val, point in df_pivot.columns])

In [21]:
final_df

Unnamed: 0,x_face_0,x_face_1,x_face_2,x_face_3,x_face_4,x_face_5,x_face_6,x_face_7,x_face_8,x_face_9,...,z_right_hand_11,z_right_hand_12,z_right_hand_13,z_right_hand_14,z_right_hand_15,z_right_hand_16,z_right_hand_17,z_right_hand_18,z_right_hand_19,z_right_hand_20
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
7,,,,,,,,,,,...,,,,,,,,,,
8,,,,,,,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,,,,


In [22]:
common_columns = final_df.columns.intersection(newdf.columns)


In [23]:
final_df

Unnamed: 0,x_face_0,x_face_1,x_face_2,x_face_3,x_face_4,x_face_5,x_face_6,x_face_7,x_face_8,x_face_9,...,z_right_hand_11,z_right_hand_12,z_right_hand_13,z_right_hand_14,z_right_hand_15,z_right_hand_16,z_right_hand_17,z_right_hand_18,z_right_hand_19,z_right_hand_20
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
7,,,,,,,,,,,...,,,,,,,,,,
8,,,,,,,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,,,,


In [24]:
columns_to_remove = list(set(final_df.columns) - set(newdf.columns))
with open('removed_columns.txt', 'w') as file:
    file.write('\n'.join(columns_to_remove))

In [25]:
file = open("removed_columns.txt")

In [26]:
with open('removed_columns.txt', 'r') as file:
    removed_columns = [column.strip() for column in file.readlines()]

# Print the removed columns
len(removed_columns)

1465

In [33]:
removed_columns2 = []

In [34]:
[removed_columns2 for i in removed_columns if i == 'frame']

[]

In [27]:
final_df.drop(columns=removed_columns, inplace=True)

In [28]:
final_df

Unnamed: 0,x_face_0,x_face_13,x_face_14,x_face_17,x_face_37,x_face_39,x_face_40,x_face_61,x_face_78,x_face_80,...,y_right_hand_11,y_right_hand_12,y_right_hand_13,y_right_hand_14,y_right_hand_15,y_right_hand_16,y_right_hand_17,y_right_hand_18,y_right_hand_19,y_right_hand_20
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
7,,,,,,,,,,,...,,,,,,,,,,
8,,,,,,,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,,,,


In [29]:
#cast to float32
final_df = final_df.astype('float32')

In [30]:
demo_output = model(inputs=final_df.values)['outputs']
print(f'phrase decoded: {outputs2phrase(demo_output)}')

phrase decoded: 2113 colon
