# Video Preprocessing

Description

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
import pandas as pd
import json

In [4]:
from video_analyzer import VideoAnalyzer
from preprocessor import Preprocessor

2025-03-30 02:26:32.372756: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743269192.544962  176545 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743269192.600484  176545 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-30 02:26:33.012095: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
timestamp = "28032025"
path_to_root = "/home/ben/projects/SaoPauloBrazilChapter_BrazilianSignLanguage/"

In [6]:
metadata = pd.read_csv(os.path.join(
    path_to_root,
    "data",
    "raw",
    "combined",
    "target_dataset_video_metadata.csv"
    ))

In [None]:
for i, metadata_row in metadata.iterrows():
    analyzer = VideoAnalyzer(
        metadata_row,
        timestamp,
        path_to_root,
        verbose=False,
    )

    motion_data = analyzer.motion_detect()
    motion_result = analyzer.motion_analyze()

    pose_data = analyzer.pose_detect()
    pose_result = analyzer.pose_analyze()

    analyzer.save_analysis_info()


In [7]:
def get_processing_params(analysis_info):
    start_frame = analysis_info["motion_analysis"]["start_frame"]
    end_frame = analysis_info["motion_analysis"]["end_frame"]

    # horizontal offset
    shoulders_median = analysis_info["pose_analysis"]["horizontal_offsets"]["shoulders"]["median"]
    face_median = analysis_info["pose_analysis"]["horizontal_offsets"]["face"]["median"]

    shoulders_reference = 0.5
    face_reference = 0.5

    shoulders_offset = shoulders_reference - shoulders_median
    face_offset = face_reference - face_median

    shoulders_weight = 0.7
    face_weight = 0.3
    horizontal_offset = shoulders_weight * shoulders_offset + face_weight * face_offset

    # Measurements from the video
    ## Horizontal
    shoulder_width = analysis_info["pose_analysis"]["landmark_measurements"]["shoulder_width"]["mean"]
    face_width = analysis_info["pose_analysis"]["landmark_measurements"]["face_width"]["mean"]
    ## Vertical
    face_height = analysis_info["pose_analysis"]["landmark_measurements"]["face_height"]["mean"]
    chin_to_shoulders = analysis_info["pose_analysis"]["landmark_measurements"]["chin_to_shoulders"]["median"]

    # Reference values to scale to
    ## Horizontal
    reference_shoulder_width = 0.3
    reference_face_width = 0.15
    ## Vertical
    reference_face_height = 0.2
    reference_chin_to_shoulders = 0.15

    # Scale Factors
    ## Horizontal
    shoulder_width_weight = 0.7
    face_width_weight = 0.3
    x_scale_factor = shoulder_width_weight * reference_shoulder_width / shoulder_width + face_width_weight * reference_face_width / face_width
    ## Vertical
    face_height_weight = 0.7
    chin_to_shoulders_weight = 0.3
    y_scale_factor = face_height_weight * reference_face_height / face_height + chin_to_shoulders_weight * reference_chin_to_shoulders / chin_to_shoulders


    # Measured
    shoulders_median = analysis_info["pose_analysis"]["vertical_offsets"]["shoulders"]["median"]
    face_median = analysis_info["pose_analysis"]["vertical_offsets"]["face"]["median"]
    # Reference
    reference_shoulders = 0.5
    reference_face = 0.25

    shoulders_offset = reference_shoulders - shoulders_median
    face_offset = reference_face - face_median

    # Weighted Average
    shoulders_weight = 0.6
    face_weight = 0.4
    vertical_offset = shoulders_weight * shoulders_offset + face_weight * face_offset

    target_duration = 3

    params_dict = {
        "start_frame": start_frame,
        "end_frame": end_frame,
        "horizontal_offset": horizontal_offset,
        "vertical_offset": vertical_offset,
        "x_scale_factor": x_scale_factor,
        "y_scale_factor": y_scale_factor,
        "target_duration": target_duration,
    }

    return params_dict

In [8]:
for i, metadata_row in metadata.iterrows():
    with open(os.path.join(
        path_to_root, 
        "data", 
        "interim", 
        "Analysis", 
        timestamp, 
        "individual_json", 
        metadata_row["filename"].split(".")[0] + "_analysis_info.json"
        )) as f:
        analysis_info = json.load(f)
    
    preprocessing_params = get_processing_params(analysis_info)

    preprocessor = Preprocessor(
        metadata_row,
        preprocessing_params,
        path_to_root,
    )

    preprocessor.preprocess_video()
    preprocessor.preprocess_landmarks()


Initialized Preprocessor for ajudar_ne_1.mp4
Naming this preprocessing version: v1
Preprocessing video: /home/ben/projects/SaoPauloBrazilChapter_BrazilianSignLanguage/data/raw/combined/videos/ajudar_ne_1.mp4
Loaded 44 frames from video
Trimmed video from 44 frames to 32 frames
Frame range: 7 to 38
Horizontally aligned frames with offset 0.019842283427715303, shifted 5 pixels right
Filled empty space with edge colors from the original frame
Scaled frames with factors: x=0.9782713052799648, y=0.8576274074905624
Using edge colors to fill any empty space from scaling
Vertically aligned frames with offset -0.02808350026607513, shifted 5 pixels up
Filled empty space with edge colors from the original frame
Saved interim processed video to /home/ben/projects/SaoPauloBrazilChapter_BrazilianSignLanguage/data/interim/Videos/ajudar_ne_1_processed.mp4
Padded frames from 32 to 36 frames
Added 2 frames at the start and 2 at the end
Saved video metadata to /home/ben/projects/SaoPauloBrazilChapter_Bra

: 