In [1]:
import cv2
import itertools
import numpy as np
import re
import glob
import os
import pickle
import json
from skimage import transform as transf
import concurrent.futures
from tqdm import tqdm

In [2]:
lips_folder = '/home/taylorpap/Bootcamp/lips_points'

In [3]:
def save2npz(filename, data=None):
    if not os.path.exists(os.path.dirname(filename)):
        os.makedirs(os.path.dirname(filename))
    np.savez_compressed(filename, data=data)

In [4]:
#Linear Interpolate on images/frames that are missing landmarks
def linear_interpolate(landmarks, start_idx, stop_idx):
    start_landmarks = landmarks[start_idx]
    stop_landmarks = landmarks[stop_idx]
    delta = stop_landmarks - start_landmarks
    for idx in range(1, stop_idx-start_idx):
        landmarks[start_idx+idx] = start_landmarks + idx/float(stop_idx-start_idx) * delta
    return landmarks

In [5]:
def interpolate_missing_landmarks(landmarks):
    good_frames = [indexes for indexes, _ in enumerate(landmarks) if _ is not None]
    if not good_frames:
        return None
    for idx in range(1, len(good_frames)):
        if good_frames[idx] - good_frames[idx-1] == 1:
            continue
        else:
            landmarks = linear_interpolate(landmarks, good_frames[idx-1], good_frames[idx])
    valid_frames_idx = [idx for idx, _ in enumerate(landmarks) if _ is not None]
    # -- Corner case: keep frames at the beginning or at the end failed to be detected.
    if valid_frames_idx:
        landmarks[:valid_frames_idx[0]] = [landmarks[valid_frames_idx[0]]] * valid_frames_idx[0]
        landmarks[valid_frames_idx[-1]:] = [landmarks[valid_frames_idx[-1]]] * (len(landmarks) - valid_frames_idx[-1])
    return landmarks

In [6]:
#Create Method for warping image and getting transform parameters
def warp_img(src, dst, img, std_size):
    tform = transf.estimate_transform('similarity', src, dst)  # find the transformation matrix
    warped = transf.warp(img, inverse_map=tform.inverse, output_shape=std_size)  # wrap the frame image
    warped = warped * 255  # note output from wrap is double image (value range [0,1])
    warped = warped.astype('uint8')
    return warped, tform
#Create Method to apply a previously calculated transform
def apply_transform(transform, img, std_size):
    warped = transf.warp(img, inverse_map=transform.inverse, output_shape=std_size)
    warped = warped * 255  # note output from wrap is double image (value range [0,1])
    warped = warped.astype('uint8')
    return warped

In [7]:
def crop_out_patch(img, landmarks, height, width):
    center_x, center_y = np.mean(landmarks, axis=0)

    cutted_img = np.copy(img[int(round(center_y) - round(height)): int(round(center_y) + round(height)),
                         int(round(center_x) - round(width)): int(round(center_x) + round(width))])
    return cutted_img

In [8]:
test_none = [{'testnone': np.array([[0.28191882, 0.53480649],
        [0.75067544, 0.33600596]])},{'testnone': np.array([None])},{'testnone': np.array([[0.28191882, 0.53480649],
        [0.75067544, 0.33600596]])}]

In [9]:
face_oval_avgs = np.load('/home/taylorpap/Bootcamp/face_oval_averages.npz', allow_pickle=True)['data']
face_oval_avgs = face_oval_avgs * 256
std_size = (256, 256)
landmark_indexes_for_cropping = [2, 3, 10, 11, 26, 30]

In [10]:
def crop_and_return_lip_landmarks(video_path, output_path, oval_landmarks, mouth_landmarks):
    vid_capture = cv2.VideoCapture(video_path)
    frame_idx = 0
    crop_width = 96
    crop_height = 96
    cropped_size = (crop_width, crop_height)
    lips_points = []
    if (vid_capture.isOpened() == False):
        print("Error opening the video file")
    else:
        fps = int(vid_capture.get(5))

        #output_video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, cropped_size)
        while(vid_capture.isOpened()):
            ret, frame = vid_capture.read()
            if ret:
                current_oval = oval_landmarks[frame_idx] *256
                current_lips = mouth_landmarks[frame_idx] *256
                if frame_idx == 0:
                    transformed_frame, trans_mat = warp_img(current_oval[landmark_indexes_for_cropping, :],
                                                            face_oval_avgs[landmark_indexes_for_cropping, :],
                                                            frame,
                                                            std_size)
                    sequence = []
                trans_lips = trans_mat(current_lips)
                trans_frame = apply_transform(trans_mat, frame, std_size)
                cut_frame = crop_out_patch(trans_frame, trans_lips, crop_height//2, crop_width//2)
                #output_video.write(cut_frame)
                sequence.append(cut_frame)
                lips_points.append(trans_lips)
                frame_idx+= 1
            else:
                break
    vid_capture.release()
    #output_video.release()
    cv2.destroyAllWindows()
    return np.array(lips_points), np.array(sequence)

In [11]:
landmarks = [None] * len(test_none)
for frame_idx in range(len(landmarks)):
    try:
        landmarks[frame_idx] = test_none[frame_idx]['testnone']
    except IndexError:
        continue
test_landmarks = interpolate_missing_landmarks(landmarks)
print(test_landmarks)

[array([[0.28191882, 0.53480649],
       [0.75067544, 0.33600596]]), array([None], dtype=object), array([[0.28191882, 0.53480649],
       [0.75067544, 0.33600596]])]


In [12]:
def interpolate_oval_and_face(landmarks_npz):
    landmarks_data = np.load(landmarks_npz, allow_pickle=True)['data']
    landmarks_ovals = [None] * len(landmarks_data)
    landmarks_lips = [None] * len(landmarks_data)
    for frame_idx in range(len(landmarks_ovals)):
        try:
            landmarks_ovals[frame_idx] = landmarks_data[frame_idx]['oval_landmarks']
        except IndexError:
            continue
    for frame_idx in range(len(landmarks_lips)):
        try:
            landmarks_lips[frame_idx] = landmarks_data[frame_idx]['lips_landmarks']
        except IndexError:
            continue
    preprocessed_ovals = interpolate_missing_landmarks(landmarks_ovals)
    preprocessed_lips = interpolate_missing_landmarks(landmarks_lips)
    return preprocessed_ovals, preprocessed_lips

In [13]:
def interpolate_and_crop(video, landmarks_folder, cropped_save_folder):
    assert os.path.isfile(video), "File does not exist. Path input: {}".format(video)
    dir, file = os.path.split(video)
    part, which_folder = os.path.split(dir)
    otherpart, word = os.path.split(part)
    npz_filename = file[:-4] + ".npz"

    #Get Landmarks path and check that file exists
    current_landmarks = os.path.join(landmarks_folder, word, which_folder, npz_filename)
    assert os.path.isfile(current_landmarks), "File does not exist. Path input: {}".format(current_landmarks)
    #Open Landmarks, interpolate missing
    oval_landmarks, lips_landmarks = interpolate_oval_and_face(current_landmarks)
    if oval_landmarks:
        #Create save location for cropped video and transformed lips landmarks
        output_save_video = os.path.join(cropped_save_folder, word, which_folder, file)
        output_save_video_array_path = os.path.join(cropped_save_folder, word, which_folder, npz_filename)
        output_save_lips_points = os.path.join(lips_folder, word, which_folder, npz_filename)

        transformed_lips, output_save_video_as_array = crop_and_return_lip_landmarks(video, output_save_video, oval_landmarks, lips_landmarks)
        save2npz(output_save_lips_points, data=transformed_lips)
        save2npz(output_save_video_array_path, data= output_save_video_as_array)
    else:
        print("No Landmarks in any frame for {}".format(video))

In [14]:
lrw_path = '/media/taylorpap/1TBM2/DatasetML/lipread_mp4'
landmarks_path = '/home/taylorpap/Bootcamp/LANDMARKS'
which_folder = 'test'
cropped_save_loc = '/home/taylorpap/Bootcamp/CroppedLRW'

In [15]:
def multiprocess_cropping(lrw_path, landmarks_path, cropped_save_loc, word='*'):
    videos = glob.glob(os.path.join(lrw_path, word, which_folder, '*.mp4'))
    l= len(videos)
    with tqdm(total = l) as pbar:
        with concurrent.futures.ThreadPoolExecutor(max_workers=12) as executor:
            futures = [executor.submit(interpolate_and_crop, video, landmarks_path, cropped_save_loc) for video in videos]
            for future in concurrent.futures.as_completed(futures):
                pbar.update(1)

In [16]:
#MULTIPROCESS
#multiprocess_cropping(lrw_path, landmarks_path, cropped_save_loc)

In [17]:
#SINGLE PROCESS

#videos = glob.glob(os.path.join(lrw_path, '*', which_folder, '*.mp4'))
#for video in tqdm(videos):
#    interpolate_and_crop(video, landmarks_path, cropped_save_loc)

In [18]:
#example = np.load('/home/taylorpap/Bootcamp/LANDMARKS/ABUSE/test/ABUSE_00027.npz', allow_pickle=True)['data']
#print(example)
#for each_frame in example[2:4]:
#    print(each_frame)

In [19]:
#interpolate_and_crop('/media/taylorpap/1TBM2/DatasetML/lipread_mp4/ABOUT/test/ABOUT_00001.mp4', landmarks_path, cropped_save_loc)

In [20]:
temp_words_list = ['ABSOLUTELY', 'BUDGET', 'EVERYONE', 'HOUSE', 'MILITARY', 'PUBLIC', 'RESULT', 'SIGNIFICANT', 'WEATHER']
which_folder = 'val'

In [21]:
#for testing_words in temp_words_list:
#   multiprocess_cropping(lrw_path, landmarks_path, cropped_save_loc, word=testing_words)

In [22]:
#['BUDGET', 'HOUSE', 'MILITARY', 'PUBLIC', 'RESULT']

In [23]:
#MULTIPROCESS
#which_folder = 'val'
#multiprocess_cropping(lrw_path, landmarks_path, cropped_save_loc)

In [24]:
def get_wordslist_from_txt_file(file_path):
    with open(file_path) as file:
        word_list = file.readlines()
        word_list = [item.rstrip() for item in word_list]
    return word_list

In [25]:
words_list_file = '/home/taylorpap/Bootcamp/wordlist.txt'
words_list = get_wordslist_from_txt_file(words_list_file)

In [32]:
which_folder = 'train'
chunk_size=10
chunked_words_list = [words_list[f:f+chunk_size] for f in range(0, len(words_list), chunk_size)]
temp_words_list_outer = chunked_words_list[40:45]
for temp_words_list in temp_words_list_outer:
    for temp_word in temp_words_list:
        print("Current Word: {}".format(temp_word))
        multiprocess_cropping(lrw_path, landmarks_path, cropped_save_loc, word=temp_word)

Current Word: SMALL


100%|██████████| 1000/1000 [00:25<00:00, 38.80it/s]


Current Word: SOCIAL


100%|██████████| 1000/1000 [00:25<00:00, 39.56it/s]


Current Word: SOMEONE


100%|██████████| 915/915 [00:23<00:00, 39.01it/s]


Current Word: SOMETHING


100%|██████████| 1000/1000 [00:25<00:00, 38.62it/s]


Current Word: SOUTH


100%|██████████| 1000/1000 [00:27<00:00, 36.94it/s]


Current Word: SOUTHERN


100%|██████████| 901/901 [00:24<00:00, 37.11it/s]


Current Word: SPEAKING


100%|██████████| 1000/1000 [00:27<00:00, 36.92it/s]
Exception ignored in: <function ZipFile.__del__ at 0x7f1b0bad93f0>
Traceback (most recent call last):
  File "/home/taylorpap/anaconda3/envs/projectenv/lib/python3.10/zipfile.py", line 1808, in __del__
    self.close()
  File "/home/taylorpap/anaconda3/envs/projectenv/lib/python3.10/zipfile.py", line 1830, in close
    self._fpclose(fp)
  File "/home/taylorpap/anaconda3/envs/projectenv/lib/python3.10/zipfile.py", line 1930, in _fpclose
    fp.close()
OSError: [Errno 28] No space left on device
Exception ignored in: <function ZipFile.__del__ at 0x7f1b0bad93f0>
Traceback (most recent call last):
  File "/home/taylorpap/anaconda3/envs/projectenv/lib/python3.10/zipfile.py", line 1808, in __del__
    self.close()
  File "/home/taylorpap/anaconda3/envs/projectenv/lib/python3.10/zipfile.py", line 1830, in close
    self._fpclose(fp)
  File "/home/taylorpap/anaconda3/envs/projectenv/lib/python3.10/zipfile.py", line 1930, in _fpclose
    fp.cl

Current Word: SPECIAL


100%|██████████| 1000/1000 [00:23<00:00, 41.91it/s]


Current Word: SPEECH


100%|██████████| 1000/1000 [00:23<00:00, 43.32it/s]


Current Word: SPEND


100%|██████████| 834/834 [00:18<00:00, 44.35it/s]


Current Word: SPENDING


100%|██████████| 1000/1000 [00:23<00:00, 42.69it/s]


Current Word: SPENT


 87%|████████▋ | 771/887 [00:16<00:02, 40.70it/s]

No Landmarks in any frame for /media/taylorpap/1TBM2/DatasetML/lipread_mp4/SPENT/train/SPENT_00359.mp4


100%|██████████| 887/887 [00:19<00:00, 45.80it/s]


Current Word: STAFF


100%|██████████| 1000/1000 [00:22<00:00, 43.79it/s]


Current Word: STAGE


100%|██████████| 948/948 [00:21<00:00, 43.98it/s]


Current Word: STAND


 10%|▉         | 96/1000 [00:22<03:33,  4.23it/s]


KeyboardInterrupt: 