In [11]:
import dlib
import cv2
import numpy as np
import os
import tensorflow as tf
import imageio
from imutils import face_utils
from tqdm import tqdm


In [None]:
# Paths
video_dir = "raw_data/videos/s1"
alignment_dir = "raw_data/alignments/s1"
output_dir = "lip_reading/processed_lips"
os.makedirs(output_dir, exist_ok=True)

# Load dlib models
detector = dlib.get_frontal_face_detector()
predictor =("raw_data/videos/s1.mpg_vcd/s1/bbaf2n.mpg")

# Lip landmark indices (mouth)
LIP_INDEXES = list(range(48, 68))

def extract_lip_region(frame, landmarks):
    lip_points = landmarks[LIP_INDEXES]
    x, y, w, h = cv2.boundingRect(np.array(lip_points))
    margin = 10
    x = max(x - margin, 0)
    y = max(y - margin, 0)
    return frame[y:y+h+margin, x:x+w+margin]

# Get all video files
video_files = "raw_data/videos/s1.mpg_vcd/s1/bbaf2n.mpg"

print(f"Processing {len(video_files)} video files...")

for video_file in tqdm(video_files):
    video_path = os.path.join(video_dir, video_file)
    cap = cv2.VideoCapture(video_path)

    frame_num = 0
    success = True
    while success:
        success, frame = cap.read()
        if not success:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = detector(gray)

        if len(faces) > 0:
            shape = predictor(gray, faces[0])
            shape = face_utils.shape_to_np(shape)

            lip = extract_lip_region(frame, shape)

            # Save the lip image
            lip_filename = f"{video_file[:-4]}_frame{frame_num}.png"
            lip_path = os.path.join(output_dir, lip_filename)
            cv2.imwrite(lip_path, lip)

        frame_num += 1

    cap.release()

print("✅ Lip extraction complete.")


Processing 40 video files...


100%|██████████| 40/40 [00:00<00:00, 4877.24it/s]

✅ Lip extraction complete.





In [None]:
#  Set your video and alignment directories
video_dir = "raw_data/videos/s1.mpg_vcd/s1"  # <-- Set to directory, not file
alignment_dir = "/home/diya871/code/G-Gress/lip_reading/raw_data/alignments /s1/align/bbaf2n.align"
output_dir = "lip_reading/processed_lips"
os.makedirs(output_dir, exist_ok=True)


#  Lip landmark indices from dlib's 68-point face model
LIP_INDEXES = list(range(48, 68))

def extract_lip_region(frame, landmarks):
    """Crop the lip region based on 68-point landmarks"""
    lip_points = landmarks[LIP_INDEXES]
    x, y, w, h = cv2.boundingRect(np.array(lip_points))
    margin = 10  # Add padding around lips
    x = max(x - margin, 0)
    y = max(y - margin, 0)
    return frame[y:y+h+margin, x:x+w+margin]


print(f"Processing {len(video_files)} video files...")

for video_file in tqdm(video_files):
    video_path = os.path.join(video_dir, video_file)
    cap = cv2.VideoCapture(video_path)

    frame_num = 0
    success = True
    while success:
        success, frame = cap.read()
        if not success:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = detector(gray)

        if len(faces) > 0:
            shape = predictor(gray, faces[0])
            shape = face_utils.shape_to_np(shape)

            lip = extract_lip_region(frame, shape)

            #  Save the cropped lip image
            lip_filename = f"{video_file[:-4]}_frame{frame_num}.png"
            lip_path = os.path.join(output_dir, lip_filename)
            cv2.imwrite(lip_path, lip)

        frame_num += 1

    cap.release()

print("✅ Lip extraction complete.")


Processing 40 video files...


100%|██████████| 40/40 [00:00<00:00, 1404.27it/s]

✅ Lip extraction complete.





In [None]:
from IPython.display import Image, display

print("Extracted lip images:")
for fname in os.listdir(output_dir):
    print(fname)
    display(Image(filename=os.path.join(output_dir, fname)))

In [4]:
# Ensure cv2 is imported
import cv2

# Get input shape (height, width, channels) of the first video file
first_video = ("/home/diya871/code/G-Gress/lip_reading/raw_data/videos/s1.mpg_vcd/s1/bbaf2n.mpg")
video_path = first_video  # Use the absolute path directly
cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()
if ret:
    print("Input shape:", frame.shape)  # (height, width, channels)
else:
    print("Failed to read video.")
cap.release()

Input shape: (288, 360, 3)
