In [2]:
import numpy as np 
import matplotlib.pyplot as plt 
import cv2
import os
import mediapipe as mp

In [3]:
path = ".\\videos"
import pickle
import gzip
with gzip.open("phoenix14t.pami0.test.annotations_only.gzip", 'rb') as f:
    annotations = pickle.load(f)

print(annotations[0])

{'name': 'test/25October_2010_Monday_tagesschau-17', 'signer': 'Signer01', 'gloss': 'REGEN SCHNEE REGION VERSCHWINDEN NORD REGEN KOENNEN REGION STERN KOENNEN SEHEN', 'text': 'regen und schnee lassen an den alpen in der nacht nach im norden und nordosten fallen hier und da schauer sonst ist das klar .'}


In [4]:
x_train = []
y_train = []

for annot in annotations:
    temp = annot['name'].split('/')
    annot['name'] = '\\'.join(temp)
    dest = os.path.join(path,annot['name'] + '.mp4')
    x_train.append(dest)
    y_train.append(annot['text'])

x_train = np.array(x_train)
y_train = np.array(y_train)
print(x_train.shape,y_train.shape)  

(642,) (642,)


In [5]:
print(x_train[0])

.\videos\test\25October_2010_Monday_tagesschau-17.mp4


In [6]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [7]:
def mediapipe_detection(image,model):
    image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image,cv2.COLOR_RGB2BGR)

    return image,results

In [8]:
def draw_landmarks(image,results):
    mp_drawing.draw_landmarks(image,results.face_landmarks,mp_holistic.FACEMESH_CONTOURS)
    mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS)

In [9]:
def draw_styled_landmarks(image,results):
    mp_drawing.draw_landmarks(image,results.face_landmarks,mp_holistic.FACEMESH_CONTOURS,
                            mp_drawing.DrawingSpec(color = (80,110,10),thickness=1,circle_radius=1),
                            mp_drawing.DrawingSpec(color = (80,256,121),thickness=1,circle_radius=1),
                            )
    mp_drawing.draw_landmarks(image,results.pose_landmarks,mp_holistic.POSE_CONNECTIONS,
                            mp_drawing.DrawingSpec(color = (80,22,10),thickness=2,circle_radius=4),
                            mp_drawing.DrawingSpec(color = (80,44,121),thickness=2,circle_radius=2),
                            )
    mp_drawing.draw_landmarks(image,results.left_hand_landmarks,mp_holistic.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color = (121,22,76),thickness=2,circle_radius=4),
                            mp_drawing.DrawingSpec(color = (121,44,250),thickness=2,circle_radius=2),
                            )
    mp_drawing.draw_landmarks(image,results.right_hand_landmarks,mp_holistic.HAND_CONNECTIONS,
                            mp_drawing.DrawingSpec(color = (245,117,66),thickness=2,circle_radius=4),
                            mp_drawing.DrawingSpec(color = (245,66,230),thickness=2,circle_radius=2),
                            )

In [10]:
def extract_keypoints(results):
    pose = np.array([[res.x,res.y,res.z,res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
    face = np.array([[res.x,res.y,res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x,res.y,res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x,res.y,res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

    return np.concatenate([pose,face,lh,rh])

In [13]:
image_data = []
for i in range(5):

    video_data = []

    cap = cv2.VideoCapture(x_train[i])

    with mp_holistic.Holistic(min_detection_confidence=0.5,min_tracking_confidence=0.5) as holistic:
        while(True):
            ret,frame = cap.read()
            if(ret==False):
                break
            
            image,results = mediapipe_detection(frame,holistic)

            frame_data = extract_keypoints(results)
            video_data.append(frame_data)

            # draw_styled_landmarks(image,results)
            
            # cv2.imshow('Video Feed',image)

            if cv2.waitKey(10) & 0xFF == ord('q'):
                break

        cap.release()
        cv2.destroyAllWindows()

    video_data = np.array(video_data)
    print(len(video_data))
    
    image_data.append(video_data)

181
150
198
130
111
