In [1]:
import os
import sys
from tqdm import tqdm

src_path = os.path.abspath(os.path.join('../../', 'src'))
if src_path not in sys.path:
    sys.path.append(src_path)

from tools.data_processor import DataProcessor
from tools.frame_processors import SupervisionVertexProcessorWithLandmarkFrontalization
from tools.frame_preprocessors import TextureFrontalizationPreprocessor, FaceExtractionPreprocessor




In [2]:
RAVDESS_DATA_PATH = os.path.abspath(os.path.join('..', '..', 'data', 'raw', 'ravdess'))
PROCESSED_DATA_PATH = os.path.abspath(os.path.join('..', '..', 'data', 'processed', 'ravdess'))
REFERENCE_POINTS_PATH = os.path.abspath(os.path.join('..', '..', 'data', 'reference_points', 'key_points_xyz.npy'))

MODELS_PATH = os.path.join(src_path, 'models', 'frontalization_models')


### Data processor

In [3]:
dp = DataProcessor(
    frame_preprocessors=[
        FaceExtractionPreprocessor(
            skip_bad_frames=False,
            output_size=(200, 200)
        ),
        TextureFrontalizationPreprocessor(
            models_path=MODELS_PATH,
            do_calculate_symmetry=True
        ),
    ],
    frame_processor=SupervisionVertexProcessorWithLandmarkFrontalization(
        reference_points_path=REFERENCE_POINTS_PATH,
        do_make_face_mesh=False  # Output pure landmarks
    ),
)

# Process ravdess data

In [4]:
# os.makedirs(PROCESSED_DATA_PATH, exist_ok=True)

In [4]:
already_processed_data = set(file.split(".")[0] for file in os.listdir(PROCESSED_DATA_PATH))

In [11]:
total = len(os.listdir(RAVDESS_DATA_PATH))

for i, subdir in enumerate(os.listdir(RAVDESS_DATA_PATH), start=1):
    subdir_n = subdir.split("_")[-1]
    subdir = os.path.join(subdir, f"Actor_{subdir_n}")
    for file in tqdm(os.listdir(os.path.join(RAVDESS_DATA_PATH, subdir)), desc=f"Processing files [{i}/{total}]", leave=False):
        file_path = os.path.join(RAVDESS_DATA_PATH, subdir, file)
        out_file = file.split(".")[0]
        if out_file in already_processed_data:
            continue
        try:
            dp.process_data(file_path, os.path.join(PROCESSED_DATA_PATH, out_file))
        except Exception as e:
            print(f"Could not process {file_path} due to {e}")
            continue

Processing files [13/24]:  28%|██▊       | 33/120 [00:05<00:13,  6.28it/s]

Could not process c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\ravdess\Video_Speech_Actor_13\Actor_13\01-01-05-02-01-01-13.mp4 due to Cannot cast array data from dtype('float32') to dtype('uint8') according to the rule 'safe'


Processing files [17/24]:  82%|████████▏ | 98/120 [16:12<03:28,  9.50s/it] 

Could not process c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\ravdess\Video_Speech_Actor_17\Actor_17\02-01-06-01-01-02-17.mp4 due to Cannot cast array data from dtype('float32') to dtype('uint8') according to the rule 'safe'


Processing files [23/24]:  10%|█         | 12/120 [02:00<16:14,  9.02s/it] 

Could not process c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\ravdess\Video_Speech_Actor_23\Actor_23\01-01-02-02-02-02-23.mp4 due to Cannot cast array data from dtype('float32') to dtype('uint8') according to the rule 'safe'


Processing files [23/24]:  68%|██████▊   | 82/120 [11:47<04:58,  7.85s/it]

Could not process c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\ravdess\Video_Speech_Actor_23\Actor_23\02-01-04-01-01-02-23.mp4 due to Cannot cast array data from dtype('float32') to dtype('uint8') according to the rule 'safe'


Processing files [24/24]:  21%|██        | 25/120 [03:32<13:03,  8.25s/it] 

Could not process c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\ravdess\Video_Speech_Actor_24\Actor_24\01-01-04-02-01-01-24.mp4 due to Cannot cast array data from dtype('float32') to dtype('uint8') according to the rule 'safe'


                                                                           