In [1]:
import os
import sys
from tqdm import tqdm
import xml.etree.ElementTree as ET

src_path = os.path.abspath(os.path.join('../../', 'src'))
if src_path not in sys.path:
    sys.path.append(src_path)

from tools.data_processor import DataProcessor
from tools.frame_processors import SupervisionVertexProcessorWithLandmarkFrontalization
from tools.frame_preprocessors import TextureFrontalizationPreprocessor, FaceExtractionPreprocessor




In [2]:
SILESIAN_DATA_PATH = os.path.abspath(os.path.join('..', '..', 'data', 'raw', 'silesian_deception'))
MIDWAY_DATA_PATH = os.path.abspath(os.path.join('..', '..', 'data', 'raw', 'silesian_deception_cut'))
PROCESSED_DATA_PATH = os.path.abspath(os.path.join('..', '..', 'data', 'processed', 'miami_deception'))
REFERENCE_POINTS_PATH = os.path.abspath(os.path.join('..', '..', 'data', 'reference_points', 'key_points_xyz.npy'))

MODELS_PATH = os.path.join(src_path, 'models', 'frontalization_models')

### For this dataset we first need to cut the videos into separate questions based on Truth / Deception

In [3]:
# according to https://www.researchgate.net/publication/301461665_Silesian_Deception_Database_Presentation_and_Analysis
QUESTIONS_DECEPTION = [0, 0, 1, 1, 1, 1, 1, 1, 0, 1]

In [37]:
mappings = {}

In [38]:
for subfolder in ["poli1Video", "poli2Video", "poli3Video"]:
    mappings[subfolder] = {}
    SUBFOLDER_DIR = os.path.join(SILESIAN_DATA_PATH, subfolder)
    for file in os.listdir(SUBFOLDER_DIR):
        if ".avi" not in file:
            continue
        person_id = file.split(".")[0]
        mappings[subfolder][person_id] = {}
        tree = ET.parse(os.path.join(SUBFOLDER_DIR, f"{person_id}.eaf"))
        time_slot_mapping = {
            slot.get("TIME_SLOT_ID"): slot.get("TIME_VALUE")
            for slot in tree.findall(".//TIME_SLOT")
        }
        for tier in tree.findall("TIER"):
            if (
                tier.get("DEFAULT_LOCALE") == "pl"
                and tier.get("LINGUISTIC_TYPE_REF") == "Question"
                and tier.get("TIER_ID") == "Question"
            ):
                for i, annotation in enumerate(tier.findall(".//ANNOTATION/ALIGNABLE_ANNOTATION"), start=1):
                    time_slot1 = annotation.get("TIME_SLOT_REF1")
                    time_slot2 = annotation.get("TIME_SLOT_REF2")
                    mappings[subfolder][person_id][i] = (int(time_slot_mapping[time_slot1]), int(time_slot_mapping[time_slot2]))
        

In [None]:
from moviepy import VideoFileClip


for subfolder in ["poli1Video", "poli2Video", "poli3Video"]:
    SUBFOLDER_DIR = os.path.join(SILESIAN_DATA_PATH, subfolder)
    OUTPUT_DIR = os.path.join(MIDWAY_DATA_PATH, subfolder)
    if not os.path.exists(OUTPUT_DIR):
        os.mkdir(OUTPUT_DIR)
    for file in os.listdir(SUBFOLDER_DIR):
        if ".avi" not in file:
            continue
        person_id = file.split(".")[0]
        if os.path.exists(os.path.join(OUTPUT_DIR, f"{person_id}_1.avi")):  # skip for existing
            continue

        for q_id in range(1, 11):
            with VideoFileClip(os.path.join(SUBFOLDER_DIR, file)) as video:
                new = video.subclipped(
                    mappings[subfolder][person_id][q_id][0] / 1000,
                    mappings[subfolder][person_id][q_id][1] / 1000,
                )
                new.write_videofile(os.path.join(OUTPUT_DIR, f"{person_id}_{q_id}.avi"), codec="libx264", preset="ultrafast", bitrate="5000k")

                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:40<00:01, 1632.85it/s, now=None]

{'video_found': True, 'audio_found': False, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 480], 'bitrate': 20496, 'fps': 100.0, 'codec_name': 'mjpeg', 'profile': '(Baseline)'}], 'input_number': 0}], 'duration': 119.67, 'bitrate': 20524, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'mjpeg', 'video_profile': '(Baseline)', 'video_size': [640, 480], 'video_bitrate': 20496, 'video_fps': 100.0, 'video_duration': 119.67, 'video_n_frames': 11967}
C:\Users\Bartosz\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\imageio_ffmpeg\binaries\ffmpeg-win64-v4.2.2.exe -i c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception\poli1Video\person1.avi -loglevel error -f image2pipe -vf scale=640:480 -sws_flags bicubic -pix_fmt rgb24 -vcodec rawvideo -
{


[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:41<00:01, 1632.85it/s, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_1.avi
{'video_found': True, 'audio_found': False, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 480], 'bitrate': 20496, 'fps': 100.0, 'codec_name': 'mjpeg', 'profile': '(Baseline)'}], 'input_number': 0}], 'duration': 119.67, 'bitrate': 20524, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'mjpeg', 'video_profile': '(Baseline)', 'video_size': [640, 480], 'video_bitrate': 20496, 'video_fps': 100.0, 'video_duration': 119.67, 'video_n_frames': 11967}
C:\Users\Bartosz\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\imageio_ffmpeg\binaries\ffmpeg-win64-v4.2.2.exe -i c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw

                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:41<00:01, 1632.85it/s, now=None]

MoviePy - Building video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_2.avi.
MoviePy - Writing video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_2.avi




[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:42<00:01, 1632.85it/s, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_2.avi
{'video_found': True, 'audio_found': False, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 480], 'bitrate': 20496, 'fps': 100.0, 'codec_name': 'mjpeg', 'profile': '(Baseline)'}], 'input_number': 0}], 'duration': 119.67, 'bitrate': 20524, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'mjpeg', 'video_profile': '(Baseline)', 'video_size': [640, 480], 'video_bitrate': 20496, 'video_fps': 100.0, 'video_duration': 119.67, 'video_n_frames': 11967}
C:\Users\Bartosz\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\imageio_ffmpeg\binaries\ffmpeg-win64-v4.2.2.exe -i c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw

                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:42<00:01, 1632.85it/s, now=None]

MoviePy - Building video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_3.avi.
MoviePy - Writing video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_3.avi




[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:44<00:01, 1632.85it/s, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_3.avi
{'video_found': True, 'audio_found': False, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 480], 'bitrate': 20496, 'fps': 100.0, 'codec_name': 'mjpeg', 'profile': '(Baseline)'}], 'input_number': 0}], 'duration': 119.67, 'bitrate': 20524, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'mjpeg', 'video_profile': '(Baseline)', 'video_size': [640, 480], 'video_bitrate': 20496, 'video_fps': 100.0, 'video_duration': 119.67, 'video_n_frames': 11967}
C:\Users\Bartosz\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\imageio_ffmpeg\binaries\ffmpeg-win64-v4.2.2.exe -i c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw

                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:44<00:01, 1632.85it/s, now=None]

MoviePy - Building video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_4.avi.
MoviePy - Writing video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_4.avi




[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:45<00:01, 1632.85it/s, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_4.avi
{'video_found': True, 'audio_found': False, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 480], 'bitrate': 20496, 'fps': 100.0, 'codec_name': 'mjpeg', 'profile': '(Baseline)'}], 'input_number': 0}], 'duration': 119.67, 'bitrate': 20524, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'mjpeg', 'video_profile': '(Baseline)', 'video_size': [640, 480], 'video_bitrate': 20496, 'video_fps': 100.0, 'video_duration': 119.67, 'video_n_frames': 11967}
C:\Users\Bartosz\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\imageio_ffmpeg\binaries\ffmpeg-win64-v4.2.2.exe -i c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw

                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:45<00:01, 1632.85it/s, now=None]

MoviePy - Building video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_5.avi.
MoviePy - Writing video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_5.avi




[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:46<00:01, 1632.85it/s, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_5.avi
{'video_found': True, 'audio_found': False, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 480], 'bitrate': 20496, 'fps': 100.0, 'codec_name': 'mjpeg', 'profile': '(Baseline)'}], 'input_number': 0}], 'duration': 119.67, 'bitrate': 20524, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'mjpeg', 'video_profile': '(Baseline)', 'video_size': [640, 480], 'video_bitrate': 20496, 'video_fps': 100.0, 'video_duration': 119.67, 'video_n_frames': 11967}
C:\Users\Bartosz\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\imageio_ffmpeg\binaries\ffmpeg-win64-v4.2.2.exe -i c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw

                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:47<00:01, 1632.85it/s, now=None]

MoviePy - Building video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_6.avi.
MoviePy - Writing video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_6.avi




[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:48<00:01, 1632.85it/s, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_6.avi
{'video_found': True, 'audio_found': False, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 480], 'bitrate': 20496, 'fps': 100.0, 'codec_name': 'mjpeg', 'profile': '(Baseline)'}], 'input_number': 0}], 'duration': 119.67, 'bitrate': 20524, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'mjpeg', 'video_profile': '(Baseline)', 'video_size': [640, 480], 'video_bitrate': 20496, 'video_fps': 100.0, 'video_duration': 119.67, 'video_n_frames': 11967}
C:\Users\Bartosz\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\imageio_ffmpeg\binaries\ffmpeg-win64-v4.2.2.exe -i c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw

                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:48<00:01, 1632.85it/s, now=None]

MoviePy - Building video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_7.avi.
MoviePy - Writing video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_7.avi




[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                                                            
                                                                            
                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:49<00:01, 1632.85it/s, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_7.avi
{'video_found': True, 'audio_found': False, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 480], 'bitrate': 20496, 'fps': 100.0, 'codec_name': 'mjpeg', 'profile': '(Baseline)'}], 'input_number': 0}], 'duration': 119.67, 'bitrate': 20524, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'mjpeg', 'video_profile': '(Baseline)', 'video_size': [640, 480], 'video_bitrate': 20496, 'video_fps': 100.0, 'video_duration': 119.67, 'video_n_frames': 11967}
C:\Users\Bartosz\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\imageio_ffmpeg\binaries\ffmpeg-win64-v4.2.2.exe -i c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw


[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                                                            
                                                                            
                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:51<00:01, 1632.85it/s, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_8.avi
{'video_found': True, 'audio_found': False, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 480], 'bitrate': 20496, 'fps': 100.0, 'codec_name': 'mjpeg', 'profile': '(Baseline)'}], 'input_number': 0}], 'duration': 119.67, 'bitrate': 20524, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'mjpeg', 'video_profile': '(Baseline)', 'video_size': [640, 480], 'video_bitrate': 20496, 'video_fps': 100.0, 'video_duration': 119.67, 'video_n_frames': 11967}
C:\Users\Bartosz\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\imageio_ffmpeg\binaries\ffmpeg-win64-v4.2.2.exe -i c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw


[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:52<00:01, 1632.85it/s, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_9.avi
{'video_found': True, 'audio_found': False, 'metadata': {}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [640, 480], 'bitrate': 20496, 'fps': 100.0, 'codec_name': 'mjpeg', 'profile': '(Baseline)'}], 'input_number': 0}], 'duration': 119.67, 'bitrate': 20524, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'mjpeg', 'video_profile': '(Baseline)', 'video_size': [640, 480], 'video_bitrate': 20496, 'video_fps': 100.0, 'video_duration': 119.67, 'video_n_frames': 11967}
C:\Users\Bartosz\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\imageio_ffmpeg\binaries\ffmpeg-win64-v4.2.2.exe -i c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw

                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:52<00:01, 1632.85it/s, now=None]

MoviePy - Building video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_10.avi.
MoviePy - Writing video c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_10.avi




[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                                                            
                                                                            
frame_index:  66%|██████▌   | 6092/9239 [02:53<00:01, 1632.85it/s, now=None]

MoviePy - Done !
MoviePy - video ready c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\silesian_deception_cut\poli1Video\person1_10.avi


### Data processor

In [5]:
dp = DataProcessor(
    frame_preprocessors=[
        FaceExtractionPreprocessor(
            skip_bad_frames=False,
            output_size=(200, 200)
        ),
        TextureFrontalizationPreprocessor(
            models_path=MODELS_PATH,
            do_calculate_symmetry=True
        ),
    ],
    frame_processor=SupervisionVertexProcessorWithLandmarkFrontalization(
        reference_points_path=REFERENCE_POINTS_PATH,
        do_make_face_mesh=False  # Output pure landmarks
    ),
)

# Process silesian data

In [6]:
# os.makedirs(PROCESSED_DATA_PATH, exist_ok=True)

In [7]:
already_processed_data = set(file.split(".")[0] for file in os.listdir(PROCESSED_DATA_PATH))

In [8]:
total = len(os.listdir(MIAMI_DATA_PATH))

for file in tqdm(os.listdir(MIAMI_DATA_PATH), desc=f"Processing files", total=total, leave=False):
    file_path = os.path.join(MIAMI_DATA_PATH, file)
    out_file = file.split(".")[0]
    if out_file in already_processed_data:
        continue
    try:
        dp.process_data(file_path, os.path.join(PROCESSED_DATA_PATH, out_file))
    except Exception as e:
        print(f"Could not process {file_path} due to {e}")
        continue

Processing files:   0%|          | 0/321 [00:00<?, ?it/s]

Processing files:  50%|█████     | 161/321 [1:49:33<4:25:14, 99.46s/it] 

Could not process c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\miami_deception\Videos\MU3D Codebook.xlsx due to Could not open video at c:\Users\Bartosz\Desktop\automatic-lie-detection\data\raw\miami_deception\Videos\MU3D Codebook.xlsx


                                                                        