In [1]:
from pytube import YouTube
from moviepy.editor import VideoFileClip
import deepface
import mediapipe as mp
import cv2
import os
import re
import yt_dlp


2024-11-07 20:08:57.911958: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-07 20:08:57.953992: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


This function checks if the directories for each JSON file's output (test, train, and validation sets) exist. If they do not exist, it creates them.This organization is essential for storing the processed video segments in separate folders based on their JSON source, facilitating data management and retrieval.
    

In [2]:
import os

# Define paths for each JSON file's output directories
# These paths will store the processed video segments for each JSON file
BASE_PATHS = {
    "test": "./MSAL_test_output",      # Output folder for MSAL_test JSON data
    "train": "./MSASL_train_output",   # Output folder for MSASL_train JSON data
    "val": "./MSASL_val_output"        # Output folder for MSASL_val JSON data
}

def setup_output_folders():

    for key, path in BASE_PATHS.items():
        if not os.path.exists(path):
            # Create directory if it doesn't exist
            os.makedirs(path)
            print(f"Created directory: {path}")
        else:
            # If directory exists, notify that it's already present
            print(f"Directory already exists: {path}")

# Call the function to create folders
setup_output_folders()


Directory already exists: ./MSAL_test_output
Directory already exists: ./MSASL_train_output
Directory already exists: ./MSASL_val_output


Corrects the YouTube URL format if necessary, downloads the video using pytube,
    and saves it to the corresponding folder based on dataset type (test, train, or val).
    
    Parameters:
    - url (str): The original URL of the YouTube video.
    - dataset_type (str): The dataset category, which determines the output folder 
                          (e.g., 'test', 'train', or 'val').

    Returns:
    - file_path (str): The path where the downloaded video is saved.

In [3]:
import yt_dlp as youtube_dl
import os
import logging

# Define base paths for output
BASE_PATHS = {
    "test": "./MSAL_test_output",
    "train": "./MSASL_train_output",
    "val": "./MSASL_val_output"
}

def correct_and_download_video_yt_dlp(url, dataset_type):
    """
    Downloads the video from YouTube using yt-dlp. Skips private or unavailable videos
    and logs any errors encountered.
    """
    try:
        # Define output path for yt-dlp
        output_path = os.path.join(BASE_PATHS[dataset_type], "%(id)s.%(ext)s")
        
        # yt-dlp options, configured to ignore minor errors and avoid retries on private videos
        ydl_opts = {
            'format': 'bestvideo+bestaudio/best',
            'outtmpl': output_path,
            'quiet': True,
            'noplaylist': True,
            'ignoreerrors': True,  # Ignores minor errors, like permissions issues
            'no_warnings': True,  # Suppresses most warnings from yt-dlp
        }
        
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            # Attempt to download the video
            ydl.download([url])

        # Construct and return the expected path of the downloaded file
        video_id = url.split('=')[-1]
        video_path = os.path.join(BASE_PATHS[dataset_type], f"{video_id}.mp4")
        
        # Verify if the video actually downloaded successfully
        if os.path.exists(video_path):
            return video_path
        else:
            # Log and return None if the file is not found after download attempt
            logging.error(f"Download failed or video file not found for URL: {url}")
            return None

    except youtube_dl.DownloadError as e:
        # Log the specific error and print it to the console
        error_message = f"Error downloading video from URL: {url}. Error: {e}"
        logging.error(error_message)
        print(error_message)
        return None  # Return None if the download fails


Step 3: Slicing the Video Based on Start and End Times
Now that we’ve successfully downloaded the videos, our next task is to slice these videos into segments based on the start and end times specified in the JSON files. Each segment will correspond to a specific American Sign Language (ASL) gesture.

Purpose of Video Slicing
The JSON file provides timing information for each ASL gesture within the YouTube video, allowing us to create individual video clips focusing on each gesture. This step is essential for isolating gestures for further processing, such as facial sentiment analysis and hand gesture detection.

Approach
Identify Start and End Times:
Extract the start_time and end_time values from the JSON file for each entry. These values denote the time range for each ASL gesture within the downloaded video.
Extract Video Segment:
Using the moviepy library, open the downloaded video file and extract the segment that matches the start_time and end_time values.
Save Each Segment:
Save the extracted video segment locally in the appropriate output folder (test, train, or val) based on the dataset type, with each file named by its class (clean_text).
Output Format
Each sliced video segment will be saved in the corresponding output folder (e.g., ./MSAL_test_output) with a filename corresponding to the class name of the gesture (e.g., forget.mp4 for a segment representing the "forget" gesture).

In [4]:
from moviepy.editor import VideoFileClip
import os

def slice_video(video_path, start_time, end_time, label, dataset_type):
    """
    Extracts a video segment based on the provided start and end times and saves it
    as a new file named after the gesture class.

    Parameters:
    - video_path (str): Path to the downloaded video file.
    - start_time (float): Start time (in seconds) for the video segment.
    - end_time (float): End time (in seconds) for the video segment.
    - label (str): Class name for the gesture, used to name the output file.
    - dataset_type (str): The dataset category ('test', 'train', or 'val'), which determines the output folder.

    Returns:
    - output_path (str): The path where the sliced video segment is saved.
    """
    try:
        # Open the video file using moviepy
        clip = VideoFileClip(video_path)
        
        # Extract the subclip based on start and end times
        segment = clip.subclip(start_time, end_time)
        
        # Define the output path for the segment, using the gesture class name as the filename
        output_path = os.path.join(BASE_PATHS[dataset_type], f"{label}.mp4")
        
        # Save the video segment to the specified output path
        segment.write_videofile(output_path, codec="libx264")
        
        # Close the clip after saving to free up resources
        clip.close()
        print(f"Saved video segment for '{label}' from {start_time} to {end_time} seconds at {output_path}")
        
        return output_path  # Return the path of the sliced segment
    
    except Exception as e:
        print(f"Error slicing video for '{label}' from {start_time} to {end_time}. Error: {e}")
        return None


In this step, we’ll analyze the facial expressions in each sliced video segment to determine the sentiment (e.g., happy, sad, neutral). This sentiment information will be used in the next step to rename the video files with both the class and the detected sentiment, such as "beer-happy.mp4".

Purpose of Facial Sentiment Analysis
This analysis allows us to add contextual information to each ASL gesture by associating a sentiment, which could be valuable for understanding the emotional context of each gesture in training data.

Approach
Extract Frames: Load the video segment and extract frames. We don’t need every frame for sentiment analysis, so analyzing every nth frame (e.g., every second frame) is usually sufficient.

Analyze Each Frame: For each selected frame, we’ll use a facial sentiment analysis model to detect emotions. We’ll use the DeepFace library, which provides pre-trained models for this purpose.

Determine Dominant Sentiment: Analyze the detected emotions and choose the dominant one. This could be done by calculating the most frequently detected emotion across the sampled frames.

Return Sentiment Label: The dominant emotion is returned as the overall sentiment for that video segment.

In [5]:
from deepface import DeepFace
import cv2

def facial_sentiment_analysis(video_path):
    """
    Performs facial sentiment analysis on a video segment by sampling frames
    and determining the dominant emotion across frames.

    Parameters:
    - video_path (str): Path to the video segment file.

    Returns:
    - sentiment (str): The dominant emotion detected in the video segment.
    """
    try:
        # Initialize OpenCV's video capture for the input video segment
        cap = cv2.VideoCapture(video_path)
        
        # List to store detected emotions from sampled frames
        emotions = []

        # Frame sampling rate (analyzing every nth frame to reduce computation)
        sampling_rate = 10  # Analyze every 10th frame for efficiency
        frame_count = 0

        # Loop through video frames
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break  # Break if no more frames to read

            # Only analyze every nth frame (based on sampling rate)
            if frame_count % sampling_rate == 0:
                # Convert frame to RGB (DeepFace expects RGB format)
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
                # Perform emotion analysis using DeepFace
                analysis = DeepFace.analyze(rgb_frame, actions=['emotion'], enforce_detection=False)
                
                # Check for list structure and extract dominant emotion
                if isinstance(analysis, list):
                    # Iterate over detected faces and add each dominant emotion
                    for face_data in analysis:
                        if 'dominant_emotion' in face_data:
                            emotions.append(face_data['dominant_emotion'])
                elif 'dominant_emotion' in analysis:
                    emotions.append(analysis['dominant_emotion'])

            frame_count += 1  # Increment frame count

        # Release the video capture object
        cap.release()
        
        # Determine the most frequent emotion as the dominant sentiment
        if emotions:
            sentiment = max(set(emotions), key=emotions.count)
            print(f"Dominant sentiment for {video_path}: {sentiment}")
            return sentiment
        else:
            print("No emotion detected in video segment.")
            return "neutral"
    
    except Exception as e:
        print(f"Error performing facial sentiment analysis on {video_path}. Error: {e}")
        return "neutral"  # Default to neutral if analysis fails


Next Step: Renaming the File with Class and Sentiment
Now that we have the dominant sentiment for each video segment, the next step is to rename the segment files to include both the class and sentiment, following the format class-sentiment.mp4 (e.g., beer-happy.mp4).

Approach
Define File Naming Convention:
Use the gesture class (clean_text) and the detected sentiment to create the filename in the format class-sentiment.mp4.
Rename the File:
Save the renamed file in the appropriate output directory (test, train, or val), replacing the original filename.

In [6]:
import os

def rename_with_sentiment(file_path, label, sentiment, dataset_type):
    """
    Renames the video segment file to include both the class and sentiment.
    
    Parameters:
    - file_path (str): The original file path of the video segment.
    - label (str): Class name for the gesture.
    - sentiment (str): Detected sentiment for the video segment.
    - dataset_type (str): The dataset category (e.g., 'test', 'train', or 'val').
    
    Returns:
    - new_path (str): The new path of the renamed file.
    """
    # Define new filename based on class and sentiment
    new_name = f"{label}-{sentiment}.mp4"
    new_path = os.path.join(BASE_PATHS[dataset_type], new_name)
    
    try:
        # Rename the file to the new name with sentiment included
        os.rename(file_path, new_path)
        print(f"Renamed file to {new_path}")
        return new_path
    
    except Exception as e:
        print(f"Error renaming file {file_path} to {new_path}. Error: {e}")
        return file_path  # Return original path if renaming fails


Step 6: Extracting Hand Gestures with MediaPipe
The final step is to extract hand gestures from each video segment. We’ll use MediaPipe to detect and visualize hand landmarks, creating a simplified "stick-man" style representation of the hand gestures. This will help reduce the file size by focusing only on the hand movements, saving space.

Approach
Initialize MediaPipe Hand Detection:
Set up MediaPipe’s hand detection model with necessary configurations (e.g., detection confidence).
Process Each Frame:
For each frame in the video, detect hand landmarks.
Draw hand landmarks and connections as a "stick-man" representation.
Save the Processed Video:
Save the output with the same naming convention (class-sentiment.mp4) in the designated folder (e.g., MSAL_test_output).

In [7]:
import mediapipe as mp
import cv2

def extract_hand_gesture(video_path, label, sentiment, dataset_type):
    """
    Extracts hand gesture landmarks from a video segment, creates a stick-man
    representation, and saves it in a simplified format to reduce storage space.
    
    Parameters:
    - video_path (str): Path to the video segment file.
    - label (str): Class name for the gesture.
    - sentiment (str): Detected sentiment for the video segment.
    - dataset_type (str): The dataset category (e.g., 'test', 'train', or 'val').
    
    Returns:
    - output_path (str): Path where the processed hand gesture video is saved.
    """
    # Define MediaPipe Hands solution
    mp_hands = mp.solutions.hands
    hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)
    
    # Set up OpenCV video capture for reading frames from the original video
    cap = cv2.VideoCapture(video_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    
    # Define output file path with the naming convention
    output_name = f"{label}-{sentiment}.mp4"
    output_path = os.path.join(BASE_PATHS[dataset_type], output_name)
    
    # Set up video writer for the stick-man output
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
    
    # Process each frame
    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            break

        # Convert the frame to RGB as required by MediaPipe
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)
        
        # Draw landmarks if hands are detected
        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                # Draw hand landmarks and connections on the frame
                mp.solutions.drawing_utils.draw_landmarks(
                    frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

        # Write the processed frame to the output video
        out.write(frame)

    # Release resources
    cap.release()
    out.release()
    hands.close()
    print(f"Hand gesture video saved at: {output_path}")
    
    return output_path


Pipeline

In [8]:
import json
import logging
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import random
import yt_dlp

# Setup logging
logging.basicConfig(filename="error_log.txt", level=logging.ERROR, format="%(asctime)s - %(levelname)s - %(message)s")

# Define paths for each JSON file's output directories
BASE_PATHS = {
    "test": "./MSAL_test_output",
    "train": "./MSASL_train_output",
    "val": "./MSASL_val_output"
}

# Setup output folders
def setup_output_folders():
    for path in BASE_PATHS.values():
        os.makedirs(path, exist_ok=True)
        print(f"Directory ready: {path}")

def correct_and_download_video_yt_dlp(url, dataset_type):
    """Download video using yt-dlp."""
    try:
        output_path = os.path.join(BASE_PATHS[dataset_type], "%(id)s.%(ext)s")
        ydl_opts = {'format': 'bestvideo+bestaudio/best', 'outtmpl': output_path, 'quiet': True, 'noplaylist': True}
        
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])

        video_id = url.split('=')[-1]
        return os.path.join(BASE_PATHS[dataset_type], f"{video_id}.mp4")
    except yt_dlp.DownloadError as e:
        logging.error(f"Download error for URL {url}: {e}")
        return None

def process_single_entry(entry, dataset_type):
    """Process a single JSON entry."""
    try:
        url = entry.get("url")
        video_path = correct_and_download_video_yt_dlp(url, dataset_type)
        
        if not video_path or not os.path.exists(video_path):
            raise FileNotFoundError(f"Downloaded video not found for URL: {url}")

        start_time = entry.get("start_time")
        end_time = entry.get("end_time")
        label = entry.get("clean_text")
        # Slicing, sentiment, renaming, etc., would be invoked here
        # Assume all other functions (e.g., slicing) are defined similarly
        # ...

        # Log successful processing
        print(f"Processed '{label}' from {start_time} to {end_time} in dataset '{dataset_type}'")

    except Exception as e:
        logging.error(f"Error processing entry '{entry.get('clean_text')}' in dataset '{dataset_type}': {e}")
        print(f"Error processing entry '{entry.get('clean_text')}': {e}")

def process_asl_dataset_parallel(json_file_path, dataset_type):
    """Process JSON entries in parallel."""
    if not os.path.isfile(json_file_path):
        logging.error(f"JSON file not found: {json_file_path}")
        return

    # Load JSON data
    with open(json_file_path, 'r') as f:
        data = json.load(f)
    
    # Use random sampling for testing
    sample_data = random.sample(data, min(2, len(data)))

    # Process each entry in parallel
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(process_single_entry, entry, dataset_type) for entry in sample_data]
        for future in as_completed(futures):
            future.result()  # Check for exceptions in threads

# Example usage
if __name__ == "__main__":
    setup_output_folders()
    # Update paths with actual locations for WSL compatibility if needed
    process_asl_dataset_parallel("/mnt/d/ASL/Dataset/Json/MSASL_test.json", "test")
    process_asl_dataset_parallel("/mnt/d/ASL/Dataset/Json/MSASL_train.json", "train")
    process_asl_dataset_parallel("/mnt/d/ASL/Dataset/Json/MSASL_val.json", "val")


Deprecated Feature: Support for Python version 3.8 has been deprecated. Please update to Python 3.9 or above
Deprecated Feature: Support for Python version 3.8 has been deprecated. Please update to Python 3.9 or above


Directory ready: ./MSAL_test_output
Directory ready: ./MSASL_train_output
Directory ready: ./MSASL_val_output
[download]  17.0% of  117.54MiB at   29.29MiB/s ETA 00:03  



Processed 'milk' from 29.07 to 30.709 in dataset 'test'
                                                           

Deprecated Feature: Support for Python version 3.8 has been deprecated. Please update to Python 3.9 or above
Deprecated Feature: Support for Python version 3.8 has been deprecated. Please update to Python 3.9 or above


Processed 'bald' from 44.678 to 48.348 in dataset 'test'


ERROR: [youtube] cdl5N710d28: Video unavailable


Error processing entry 'nurse': Downloaded video not found for URL: https://www.youtube.com/watch?v=cdl5N710d28
                                                                         

Deprecated Feature: Support for Python version 3.8 has been deprecated. Please update to Python 3.9 or above
Deprecated Feature: Support for Python version 3.8 has been deprecated. Please update to Python 3.9 or above


Processed 'believe' from 21.321 to 25.526 in dataset 'train'
                                                           



Error processing entry 'help': Downloaded video not found for URL: https://www.youtube.com/watch?v=bX1eJjB3nyA




Error processing entry 'airplane': Downloaded video not found for URL: https://www.youtube.com/watch?v=gVUwBsPOUjk
