In [11]:
from pathlib import Path
import pandas as pd
import pickle, os, json
import numpy as np
import cv2
import mediapipe as mp


In [12]:
test_path = './20241101_kontor_001_1'
data = Path(test_path + '/depth_data.bin').read_bytes()
data = np.frombuffer(data, dtype=np.float32)
depth_length = data.shape[0]
print(f"Depth data length: {depth_length}")

# path for frame_log.csv
frame_log_path = test_path + '/frame_log.csv'
frame_log_df = pd.read_csv(
        frame_log_path,
        header=None
)
frame_log_df.columns = ["data_type", "frame_number", "timestamp_iphone", "timescale", "timecode_tentacle", "unclear"]
depth_frames = frame_log_df[frame_log_df['data_type'] == 'D']['timecode_tentacle'].count()
print(f"Depth frames: {depth_frames}")

video_frames = frame_log_df[frame_log_df['data_type'] == 'V']['timecode_tentacle'].count()
print(f"Video frames: {video_frames}")

total_frames = frame_log_df.shape[0]

start_timecode = frame_log_df.loc[0, "timecode_tentacle"]
end_timecode = frame_log_df.loc[frame_log_df.shape[0] - 1, "timecode_tentacle"]
print(f"Start timecode: {start_timecode}, End timecode: {end_timecode}")

def timecode_to_seconds(timecode):
    # timecode is in the format of HH:MM:SS:FF.MSS
    timecode = timecode.split(':')
    hours = int(timecode[0])
    minutes = int(timecode[1])
    seconds = int(timecode[2])
    miliseconds = int(timecode[3].split('.')[1])
    total_seconds = hours * 3600 + minutes * 60 + seconds + miliseconds / 1000
    return total_seconds

total_seconds = timecode_to_seconds(end_timecode) - timecode_to_seconds(start_timecode)
print(f"Total seconds: {total_seconds}")

depth_fps = depth_frames / total_seconds
print(f"Depth FPS: {depth_fps}")

# Check if the depth data is divided by the depth_fps
print(depth_length / depth_fps, depth_length / depth_frames, depth_length / total_seconds, depth_length / video_frames, depth_length / total_frames)

Depth data length: 9597543
Depth frames: 534
Video frames: 1067
Start timecode: 15:14:01:42.063, End timecode: 15:14:19:28.052
Total seconds: 17.989000000001397
Depth FPS: 29.68480738228687
323314.98319665436 17972.926966292136 533522.8750902916 8994.88566073102 5990.975655430711


In [4]:
# 9528088 =  (x * 528) + x
# 7364896 = (x * 412) + x

x1 = 9528088 / 529
x2 = 7364896 / 413
x3 = 4905690 / 326
x4 = 9597543 / 535

print(x1, x2, x3, x4)

18011.508506616257 17832.677966101695 15048.128834355828 17939.332710280374


In [9]:
data[17937: 17940]

array([4.4162722e+05, 1.3235888e-17, 8.0760996e-25], dtype=float32)

In [87]:
data[:3]

array([6.4096650e-10, 6.6281114e-10, 1.6688216e-07], dtype=float32)

In [2]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.18-cp39-cp39-macosx_11_0_universal2.whl.metadata (9.7 kB)
Collecting absl-py (from mediapipe)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting flatbuffers>=2.0 (from mediapipe)
  Using cached flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting jax (from mediapipe)
  Downloading jax-0.4.30-py3-none-any.whl.metadata (22 kB)
Collecting jaxlib (from mediapipe)
  Downloading jaxlib-0.4.30-cp39-cp39-macosx_11_0_arm64.whl.metadata (1.0 kB)
Collecting opencv-contrib-python (from mediapipe)
  Using cached opencv_contrib_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.5-cp37-abi3-macosx_10_9_universal2.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Using cached sounddevice-0.5.1-py3-none-macosx_10_6_x86_64.macosx_10_6_universal2.whl.metadata (1.4 kB)
Collecting sentencepiece (from 

In [3]:
!pip install opencv-python

Collecting opencv-python
  Using cached opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Using cached opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl (54.8 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.10.0.84


In [None]:
# OLD
import cv2
import mediapipe as mp

mp_face_detection = mp.solutions.face_detection
mp_face_mesh = mp.solutions.face_mesh
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.5)
hands_detection = mp.solutions.hands.Hands(min_detection_confidence=0.2, min_tracking_confidence=0.2)

# Initialize video capture and background subtractor
video_path = "./20241101_varg_002_2/varg_002_2_pmil.mov"
cap = cv2.VideoCapture(video_path)
fgbg = cv2.createBackgroundSubtractorMOG2(detectShadows=True)

# Define output parameters
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
occlusions = []
is_occlusion = False
start_frame = None

output_path = "./occlusion_detected_video.mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Codec
out = cv2.VideoWriter(output_path, fourcc, fps, (width, int(0.4 * height)))

prev_landmarks = None

# Loop through each frame
frame_count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # crop the bottom and the top of the frame
    frame = frame[:int(0.6 * height), :, :]
    # crop the top of the frame
    frame = frame[int(0.2 * height):, :, :]
    
    # Convert frame to RGB for MediaPipe
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Face detection
    face_results = face_detection.process(rgb_frame)
    face_bbox = None
    if face_results.detections:
        for detection in face_results.detections:
            # Get bounding box for the face
            bboxC = detection.location_data.relative_bounding_box
            h, w, _ = frame.shape
            face_bbox = (
                int(bboxC.xmin * w),
                int(bboxC.ymin * h),
                int(bboxC.width * w),
                int(bboxC.height * h)
            )
            # Enlarge the face bounding box slightly to account for slight movements
            padding = 10  # adjust as needed
            face_bbox = (
                max(face_bbox[0] - padding, 0),
                max(face_bbox[1] - padding, 0),
                min(face_bbox[2] + 2 * padding, w - face_bbox[0]),
                min(face_bbox[3] + 2 * padding, h - face_bbox[1])
            )
            break  # Use only the first detected face

    # Hand detection
    hand_results = hands_detection.process(rgb_frame)
    hand_bboxes = []
    if hand_results.multi_hand_landmarks:
        for hand_landmarks in hand_results.multi_hand_landmarks:
            # Get the bounding box for each detected hand
            x_coords = [landmark.x for landmark in hand_landmarks.landmark]
            y_coords = [landmark.y for landmark in hand_landmarks.landmark]
            h, w, _ = frame.shape
            x_min = int(min(x_coords) * w)
            y_min = int(min(y_coords) * h)
            x_max = int(max(x_coords) * w)
            y_max = int(max(y_coords) * h)
            hand_bboxes.append((x_min, y_min, x_max - x_min, y_max - y_min))

    # Moving object detection using background subtraction
    fgmask = fgbg.apply(frame)
    contours, _ = cv2.findContours(fgmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    moving_objects = [cv2.boundingRect(contour) for contour in contours if cv2.contourArea(contour) > 6000]  # 500 is the minimum area threshold

    # Prune old objects from the tracker
    # moving_object_tracker = {k: v for k, v in moving_object_tracker.items() if v < frame_threshold or k in current_moving_objects}

    # Check for occlusion (intersection) between face and moving objects
    frame_occlusion = False
    if face_bbox:
        for hand_bbox in hand_bboxes:
            # Draw hand bounding boxes in yellow
            cv2.rectangle(frame, (hand_bbox[0], hand_bbox[1]), 
                          (hand_bbox[0] + hand_bbox[2], hand_bbox[1] + hand_bbox[3]), 
                          (0, 255, 255), 2)
            
        for obj_bbox in moving_objects:
            x, y, w, h = obj_bbox

            # Draw the moving object bounding box in blue
            cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

            # Check if the moving object intersects with the face bounding box
            if (face_bbox[0] < x + w and
                face_bbox[0] + face_bbox[2] > x and
                face_bbox[1] < y + h and
                face_bbox[1] + face_bbox[3] > y):

                # Calculate the intersection area percentage with respect to the face bounding box
                intersection_area = (min(face_bbox[0] + face_bbox[2], x + w) - max(face_bbox[0], x)) * (min(face_bbox[1] + face_bbox[3], y + h) - max(face_bbox[1], y))
                face_area = face_bbox[2] * face_bbox[3]
                intersection_area_percentage = intersection_area / face_area

                # plot percentage of intersection as a number
                cv2.putText(frame, f"{intersection_area_percentage:.2f}", (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
                
                # Occlusion detected
                if intersection_area_percentage > 0.25:
                    frame_occlusion = True
                    if not is_occlusion:
                        start_frame = frame_count
                        is_occlusion = True
                break
        
        # Draw face bounding box in green if no occlusion, red if occluded
        box_color = (0, 255, 0) if not frame_occlusion else (0, 0, 255)
        cv2.rectangle(frame, (face_bbox[0], face_bbox[1]), 
                      (face_bbox[0] + face_bbox[2], face_bbox[1] + face_bbox[3]), 
                      box_color, 2)
    else:
        # No face detected, reset occlusion state
        if is_occlusion:
            end_frame = frame_count
            occlusions.append((start_frame / fps, end_frame / fps))
            is_occlusion = False

    # End of occlusion when no intersection is found in the frame
    if not frame_occlusion and is_occlusion:
        end_frame = frame_count
        occlusions.append((start_frame / fps, end_frame / fps))
        is_occlusion = False

    out.write(frame)
    # Show the frame with annotations
    cv2.imshow("Object-Face Occlusion Detection", frame)

    # Break loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    frame_count += 1

# Release resources
cap.release()
out.release()
face_detection.close()
hands_detection.close()
cv2.destroyAllWindows()

# Display occlusion intervals
print("Occlusions detected at the following intervals (in seconds):")
for start, end in occlusions:
    print(f"Start: {start:.2f}s, End: {end:.2f}s")

# Check for any open windows and close them explicitly
for i in range(10):
    cv2.waitKey(1)  # Let any remaining events process
cv2.destroyAllWindows()  # Ensure all windows are closed

I0000 00:00:1730995912.763963  341955 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M3 Pro
W0000 00:00:1730995912.765061  427129 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1730995912.767967  341955 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M3 Pro
W0000 00:00:1730995912.773429  427136 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1730995912.778953  427136 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Occlusions detected at the following intervals (in seconds):
Start: 0.00s, End: 0.02s
Start: 2.35s, End: 2.65s
Start: 5.20s, End: 5.22s
Start: 5.23s, End: 5.48s
Start: 8.55s, End: 9.45s
Start: 9.48s, End: 9.53s
Start: 9.58s, End: 9.92s
Start: 9.95s, End: 10.10s
Start: 10.12s, End: 10.13s
Start: 10.22s, End: 10.25s
Start: 10.82s, End: 10.85s
Start: 10.87s, End: 11.13s
Start: 13.57s, End: 13.65s
Start: 13.68s, End: 13.82s
Start: 13.83s, End: 14.00s
Start: 14.02s, End: 14.25s
Start: 14.32s, End: 14.33s
Start: 14.35s, End: 14.77s
Start: 14.78s, End: 14.80s
Start: 15.45s, End: 15.53s
Start: 15.55s, End: 15.78s


In [2]:
cap.release()
out.release()
face_detection.close()
cv2.destroyAllWindows()

# Display occlusion intervals
print("Occlusions detected at the following intervals (in seconds):")
for start, end in occlusions:
    print(f"Start: {start:.2f}s, End: {end:.2f}s")

# Check for any open windows and close them explicitly
for i in range(10):
    cv2.waitKey(1)  # Let any remaining events process
cv2.destroyAllWindows() 

NameError: name 'cap' is not defined

In [14]:
import mediapipe as mp
from mediapipe.framework.formats import landmark_pb2
from mediapipe import solutions
import numpy as np
import matplotlib.pyplot as plt


def draw_landmarks_on_image(rgb_image, detection_result):
  face_landmarks_list = detection_result.face_landmarks
  annotated_image = np.copy(rgb_image)

  # Loop through the detected faces to visualize.
  for idx in range(len(face_landmarks_list)):
    face_landmarks = face_landmarks_list[idx]

    # Draw the face landmarks.
    face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    face_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
    ])

    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_tesselation_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_contours_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_IRISES,
          landmark_drawing_spec=None,
          connection_drawing_spec=mp.solutions.drawing_styles
          .get_default_face_mesh_iris_connections_style())

  return annotated_image

def plot_face_blendshapes_bar_graph(face_blendshapes):
  # Extract the face blendshapes category names and scores.
  face_blendshapes_names = [face_blendshapes_category.category_name for face_blendshapes_category in face_blendshapes]
  face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in face_blendshapes]
  # The blendshapes are ordered in decreasing score value.
  face_blendshapes_ranks = range(len(face_blendshapes_names))

  fig, ax = plt.subplots(figsize=(12, 12))
  bar = ax.barh(face_blendshapes_ranks, face_blendshapes_scores, label=[str(x) for x in face_blendshapes_ranks])
  ax.set_yticks(face_blendshapes_ranks, face_blendshapes_names)
  ax.invert_yaxis()

  # Label each bar with values
  for score, patch in zip(face_blendshapes_scores, bar.patches):
    plt.text(patch.get_x() + patch.get_width(), patch.get_y(), f"{score:.4f}", va="top")

  ax.set_xlabel('Score')
  ax.set_title("Face Blendshapes")
  plt.tight_layout()
  plt.show()

def timecode_to_milliseconds(timecode, fps):
    # 16:08:04:11.047
    hh, mm, ss, frames_ms = timecode.split(':')
    frames, ms = frames_ms.split('.')
    return (
        int(hh) * 3600 * 1000 +
        int(mm) * 60 * 1000 +
        int(ss) * 1000 +
        int(frames) * (1000 / fps) +
        int(ms)
    )


def extract_moving_objects(frame, fgbg, min_contour_area=6000):
    fgmask = fgbg.apply(frame)
    contours, _ = cv2.findContours(fgmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    moving_objects = [cv2.boundingRect(contour) for contour in contours if cv2.contourArea(contour) > min_contour_area]  # 500 is the minimum area threshold
    return moving_objects

def plot_facial_landmarks(annotated_image, face_landmarks):
    mp_drawing = mp.solutions.drawing_utils 
    mp_drawing_styles = mp.solutions.drawing_styles
    mp_drawing.draw_landmarks(image=annotated_image,
                                        landmark_list=face_landmarks,
                                        connections=mp_face_mesh.FACEMESH_TESSELATION,
                                        landmark_drawing_spec=None,
                                        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
                                        )
    mp_drawing.draw_landmarks(image=annotated_image,
                                        landmark_list=face_landmarks,
                                        connections=mp_face_mesh.FACEMESH_CONTOURS,
                                        landmark_drawing_spec=None,
                                        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style()
                                        )
    mp_drawing.draw_landmarks(image=annotated_image,
                                        landmark_list=face_landmarks,
                                        connections=mp_face_mesh.FACEMESH_IRISES,
                                        landmark_drawing_spec=None,
                                        connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_iris_connections_style()
                                        )
    return annotated_image

# =================================================================================================
from datetime import datetime, timedelta


def timecode_to_timestamp(timecode: str, fps: int = 30) -> int:
    # Parse the timecode in format HH:MM:SS:FF.ms
    hh, mm, ss, ff_ms = timecode.split(':')
    ss, ff_ms = int(ss), ff_ms.split('.')
    ff, ms = int(ff_ms[0]), int(ff_ms[1])
    
    # Construct a timedelta object representing hours, minutes, and seconds
    base_time = timedelta(hours=int(hh), minutes=int(mm), seconds=ss)
    
    # Calculate the total time in milliseconds
    total_ms = base_time.total_seconds() * 1000 + (ff / fps) * 1000 + ms
    return int(total_ms)


def timestamp_to_timecode(timestamp: int, fps: int = 30) -> str:
    # Convert timestamp in milliseconds to a timedelta
    total_seconds = timestamp / 1000
    base_time = timedelta(seconds=total_seconds)
    
    # Extract hours, minutes, and seconds from timedelta
    hours, remainder = divmod(base_time.seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    
    # Extract milliseconds and frames
    ms = timestamp % 1000
    frames = int((ms / 1000) * fps)
    
    # Format the timecode
    return f"{hours:02}:{minutes:02}:{seconds:02}:{frames:02}.{ms:03}"



def milliseconds_to_timecode(frame_timestamp_ms, fps):
    total_seconds = int(frame_timestamp_ms // 1000)
    millis = int(frame_timestamp_ms % 1000)
    hours, remainder = divmod(total_seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    frame_number = int((frame_timestamp_ms % 1000) * fps / 1000)
    return f"{hours:02}:{minutes:02}:{seconds:02}:{frame_number:02}.{millis:03}"

timecode = "16:08:04:11.047"
fps = 60

print(f"Timecode original: {timecode}")
# Convert timecode to timestamp
timestamp = timecode_to_timestamp(timecode, fps)
print(f"Timestamp: {timestamp} ms")

# Convert timestamp back to timecode
new_timecode = timestamp_to_timecode(timestamp, fps)
print(f"Timecode: {new_timecode}")

milliseconds_to_timecode(timecode_to_milliseconds("16:08:04:11.047", 60), 60)

Timecode original: 16:08:04:11.047
Timestamp: 58084230 ms
Timecode: 16:08:04:13.230


'16:08:04:13.230'

In [6]:
milliseconds_to_timecode(timecode_to_milliseconds("16:08:04:11.047", 60), 60)

'16:08:04:13.230'

In [None]:
# !wget -O face_landmarker_v2_with_blendshapes.task -q https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task

zsh:1: command not found: wget


### Method two, plot confidence of face detection, low confidence -> occlusion

In [15]:
# Method two, plot confidence of face detection, low confidence -> occlusion

import cv2
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import pandas as pd
import json
import numpy as np
from datetime import timedelta
from scipy.signal import butter, filtfilt
import numpy as np


# Initialize video capture and background subtractor
file_folder = "./20241101_varg_002_2/"
video_path = file_folder + "varg_002_2_pmil.mov"
# get video name
video_name = video_path.split('/')[-1].split('.')[0]
cap = cv2.VideoCapture(video_path)
fgbg = cv2.createBackgroundSubtractorMOG2(detectShadows=True)

# Define output parameters
# fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
occlusions = []
is_occlusion = False
start_frame = None


# load json with metadata
json_path = file_folder + f"take.json"
with open(json_path, 'r') as f:
    data = json.load(f)
    # this is a Video (!) fps, depth and blendshapes have different fps values 
    fps = data["videoTargetFrameRate"]
    frames_number = int(data["frames"])

frame_log_df = pd.read_csv(file_folder + f"frame_log.csv", sep=",", header=None)
# assign column names
frame_log_df.columns = ["data_source", "frame_relative_to_source", "timestamp", "timescale", "timecode_tentacle", "unclear"]
assert frame_log_df[frame_log_df["data_source"] == "V"]["frame_relative_to_source"].to_list() == list(range(frames_number))
timecodes = frame_log_df[frame_log_df["data_source"] == "V"]["timecode_tentacle"].to_list()
assert len(timecodes) == frames_number

def calculate_convex_hull_perimeter(landmarks):
    points = np.array(landmarks[:16], dtype=np.float32)
    hull = cv2.convexHull(points)
    perimeter = cv2.arcLength(hull, True)
    return perimeter


In [4]:
!pip install ultralytics

Collecting ultralytics
  Using cached ultralytics-8.3.29-py3-none-any.whl.metadata (35 kB)
Collecting py-cpuinfo (from ultralytics)
  Using cached py_cpuinfo-9.0.0-py3-none-any.whl.metadata (794 bytes)
Collecting seaborn>=0.11.0 (from ultralytics)
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Using cached ultralytics_thop-2.0.11-py3-none-any.whl.metadata (9.4 kB)
Using cached ultralytics-8.3.29-py3-none-any.whl (883 kB)
Using cached seaborn-0.13.2-py3-none-any.whl (294 kB)
Using cached ultralytics_thop-2.0.11-py3-none-any.whl (26 kB)
Using cached py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Installing collected packages: py-cpuinfo, ultralytics-thop, seaborn, ultralytics
Successfully installed py-cpuinfo-9.0.0 seaborn-0.13.2 ultralytics-8.3.29 ultralytics-thop-2.0.11


In [8]:
!pip install mediapipe==0.8.6.1

[31mERROR: Could not find a version that satisfies the requirement mediapipe==0.8.6.1 (from versions: 0.10.0, 0.10.1, 0.10.2, 0.10.3, 0.10.5, 0.10.7, 0.10.8, 0.10.9, 0.10.10, 0.10.11, 0.10.13, 0.10.14, 0.10.15, 0.10.18)[0m[31m
[0m[31mERROR: No matching distribution found for mediapipe==0.8.6.1[0m[31m
[0m

In [8]:
from ultralytics import FastSAM
import torch

# Use mps device
#device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
#print(f"Using device: {device}")

# Load a model
model = FastSAM("FastSAM-s.pt")

# Display model information (optional)
model.info()


Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/FastSAM-s.pt to 'FastSAM-s.pt'...


100%|██████████| 22.7M/22.7M [00:02<00:00, 11.3MB/s]


YOLOv8s-seg summary: 261 layers, 11,790,483 parameters, 0 gradients, 42.7 GFLOPs


(261, 11790483, 0, 42.690713599999995)

In [None]:
# MAIN STUFF

def main_stuff(cap, file_folder, video_path, fps, frames_number, timecodes, video_name):
    occlusions = []
    is_occlusion = False
    start_frame = None
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    prev_landmarks = None
    face_blendshapes_prev, face_blendshapes = None, None
    arkit_list = []
    faces = []

    # Loop through each frame
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        timecode = timecodes[frame_count]
        # print(f"Frame {frame_count} - Timecode: {timecode}")

        # crop the bottom and the top of the frame
        frame = frame[:int(0.6 * height), :, :]
        # crop the top of the frame
        frame = frame[int(0.2 * height):, :, :]

        # Save frame for sam testing
        # cv2.imwrite(f"./images/frame_{frame_count}.jpg", frame)

        # Convert frame to RGB for MediaPipe
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)

        # Face detection
        face_results = face_detection.process(rgb_frame)
        face_mesh_results = face_mesh_detection.process(rgb_frame)
        face_bbox = None

        landmarks_occluded = False
        frame_occlusion = False
        low_confidence = False
        
        if face_results.detections:
        
            for detection in face_results.detections:
                # Get bounding box for the face
                bboxC = detection.location_data.relative_bounding_box
                h, w, _ = frame.shape
                face_bbox = (
                    int(bboxC.xmin * w),
                    int(bboxC.ymin * h),
                    int(bboxC.width * w),
                    int(bboxC.height * h)
                )
                # Enlarge the face bounding box slightly to account for slight movements
                padding = 5 # adjust as needed
                face_bbox = (
                    max(face_bbox[0] - padding, 0),
                    max(face_bbox[1] - padding, 0),
                    min(face_bbox[2] + 2 * padding, w - face_bbox[0]),
                    min(face_bbox[3] + 2 * padding, h - face_bbox[1])
                )
                break  # Use only the first detected face

        # Moving object detection using background subtraction
        fgmask = fgbg.apply(frame)
        contours, _ = cv2.findContours(fgmask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        moving_objects = [cv2.boundingRect(contour) for contour in contours if cv2.contourArea(contour) > 6000]  # 500 is the minimum area threshold

        # Check for occlusion (intersection) between face and moving objects
        if face_bbox and face_mesh_results.multi_face_landmarks:
            
            for obj_bbox in moving_objects:
                x, y, w, h = obj_bbox
                x_max, y_max = x + w, y + h

                # Draw the moving object bounding box in blue
                cv2.rectangle(frame, (x, y), (x_max, y_max ), (255, 0, 0), 2)
                
                # Save the moving object cropped image
                # cv2.imwrite(f"./images/moving_object_{frame_count}.jpg", frame[x:x_max, y:y_max])

                # segmented_image = model(frame[x:x_max, y:y_max])[0].plot()

                # Create a semi-opaque overlay
                #alpha = 0.5  # Transparency level (0 = fully transparent, 1 = fully opaque)
                #overlay = frame.copy()
                # Apply the mask to the overlay image (set color to e.g., red [0, 0, 255])
                #overlay[segmentation_mask == 1] = [0, 0, 255]  # Red color for mask
                # Blend the overlay with the original image using the transparency level
                # segmented_image = cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0)

                # Save or display the result of SAM
                # cv2.imshow("Segmented Object", segmented_image)
                # break

                # ! don't use generic bounding box based occlusion
                # Check if the moving object intersects with the face bounding box
                #if (face_bbox[0] < x + w and
                #    face_bbox[0] + face_bbox[2] > x and
                #    face_bbox[1] < y + h and
                #    face_bbox[1] + face_bbox[3] > y):

                    # Calculate the intersection area percentage with respect to the face bounding box
                #    intersection_area = (min(face_bbox[0] + face_bbox[2], x + w) - max(face_bbox[0], x)) * (min(face_bbox[1] + face_bbox[3], y + h) - max(face_bbox[1], y))
                #    face_area = face_bbox[2] * face_bbox[3]
                #    intersection_area_percentage = intersection_area / face_area

                    # plot percentage of intersection as a number
                #    cv2.putText(frame, f"{intersection_area_percentage:.2f}", (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
                    
                    # Occlusion detected  - don't use generic bounding box based occlusion
                    #if intersection_area_percentage > 0.3:
                    #    frame_occlusion = True
                        #if not is_occlusion:
                        #    start_frame = frame_count
                        #    is_occlusion = True

                # Check if any face landmarks in the moving object bounding box
                face_landmarks = face_mesh_results.multi_face_landmarks[0]
                for landmark in face_landmarks.landmark:
                    if x < float(landmark.x * width) < x + w and y < float(landmark.y * height) < y + h:
                        # print(f"===Landmarks occluded=== in frame {frame_count}")
                        frame_occlusion = True
                        landmarks_occluded = True
                        cv2.putText(annotated_image, "Landmarks occluded", (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
                        if not is_occlusion:
                            start_frame = frame_count
                            is_occlusion = True
                        break # Break this inner loop if any landmark is occluded
                    
                # Calculate the contour are of the face landmarks in pixels
                face_contour = [(float(landmark.x), float(landmark.y)) for landmark in face_landmarks.landmark]
                face_contour_area = calculate_convex_hull_perimeter(face_contour)
                # print(f"Face contour area: {face_contour_area}")
                faces.append(face_contour_area)
                if len(faces) > 4:
                    current_average_face_area = np.mean(faces)
                    current_std_face_area = np.std(faces)
                    if face_contour_area < current_average_face_area - 3 * current_std_face_area:
                        # print("===Misdetection of landmarks===")
                        landmarks_occluded = True
                        frame_occlusion = True
                        cv2.putText(annotated_image, "Landmarks occluded", (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)
                        if not is_occlusion:
                            start_frame = frame_count
                            is_occlusion = True

            if face_results.detections[0].score[0] < 0.89:
                low_confidence = True
                if not is_occlusion:
                    start_frame = frame_count
                    is_occlusion = True

            # === comment this out for work with occlusions ===
            detection_result = face_landmarks_detector.detect(mp_image)
            if detection_result.face_blendshapes:
                face_blendshapes = detection_result.face_blendshapes[0]
                frame_coeffs = np.array([category.score for category in face_blendshapes])
                face_blendshapes_prev = face_blendshapes
            else:
                # If no face detected, use the previous frame's face blendshapes
                if face_blendshapes_prev:
                    face_blendshapes = face_blendshapes_prev
                else:
                    face_blendshapes = None
            if face_blendshapes is not None:
                # add face blendshapes and timestamp to the dict
                data_row = {category.category_name[0].upper()+category.category_name[1:] : category.score for category in face_blendshapes if category.category_name != '_neutral'}
            else:
                data_row = {}
            # print(data_row)
            data_row.update({'Timecode': timecode, 'BlendshapeCount': 51})
            arkit_list.append(data_row)
            
            # =================================================

            # === Plot facial landmarks above the face bounding box ===
            annotated_image = frame.copy()
            if face_mesh_results.multi_face_landmarks:
                for face_landmarks in face_mesh_results.multi_face_landmarks:
                    
                    mp_drawing.draw_landmarks(image=annotated_image,
                                            landmark_list=face_landmarks,
                                            connections=mp_face_mesh.FACEMESH_TESSELATION,
                                            landmark_drawing_spec=None,
                                            connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_tesselation_style()
                                            )
                    mp_drawing.draw_landmarks(image=annotated_image,
                                            landmark_list=face_landmarks,
                                            connections=mp_face_mesh.FACEMESH_CONTOURS,
                                            landmark_drawing_spec=None,
                                            connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_contours_style()
                                            )
                    mp_drawing.draw_landmarks(image=annotated_image,
                                            landmark_list=face_landmarks,
                                            connections=mp_face_mesh.FACEMESH_IRISES,
                                            landmark_drawing_spec=None,
                                            connection_drawing_spec=mp_drawing_styles.get_default_face_mesh_iris_connections_style()
                                            )

                # Draw face bounding box in green if no occlusion, red if occluded
                if landmarks_occluded or low_confidence:
                    box_color = (0, 0, 255)
                else:
                    box_color = (0, 255, 0)
                # Plot confidence of face detection
                cv2.putText(annotated_image, f"Conf: {face_results.detections[0].score[0]}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
                cv2.rectangle(annotated_image, (face_bbox[0], face_bbox[1]), 
                        (face_bbox[0] + face_bbox[2], face_bbox[1] + face_bbox[3]), 
                        box_color, 2)
                # Plot facial landmarks above the face bounding box
                cv2.imshow("Object-Face Occlusion Detection", annotated_image)
            
        else:
            # No face detected, count it as occlusion too, all frames should have a face 
            if not is_occlusion:
                #end_frame = frame_count
                #occlusions.append((start_frame / fps, end_frame / fps))
                #is_occlusion = False
                start_frame = frame_count
                is_occlusion = True

        # End of occlusion when no intersection is found in the frame and confidence is high
        if not landmarks_occluded and not low_confidence and is_occlusion:
            end_frame = frame_count
            occlusions.append((start_frame, end_frame))
            is_occlusion = False

        out.write(annotated_image)

        # Show the frame with annotations
        # cv2.imshow("Object-Face Occlusion Detection", frame)

        # Break loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        frame_count += 1

    # Release resources
    cap.release()
    out.release()
    face_detection.close()
    face_mesh_detection.close()
    cv2.destroyAllWindows()

    # Display occlusion intervals
    print("Occlusions detected at the following intervals (in frames):")
    for start, end in occlusions:
        print(f"Start frame: {start}, End frame: {end}")

    print(f"Total occluded {sum([end - start for start, end in occlusions])} frames out of {frame_count}")
    print(f"Average occlusion length: {np.mean([end - start for start, end in occlusions]):.2f} frames")

    # Check for any open windows and close them explicitly
    for i in range(10):
        cv2.waitKey(1)  # Let any remaining events process
    cv2.destroyAllWindows()  # Ensure all windows are closed
    return arkit_list, occlusions


output_path = "./occlusion_detected_video_confidence" + video_name + ".mp4"
fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Codec
out = cv2.VideoWriter(output_path, fourcc, fps, (width, int(0.4 * height)))

# reload video_occlusions from npz
if os.path.exists("occlusions_results.npz"):
    with np.load("occlusions_results.npz", allow_pickle=True) as data:
        video_occlusions = data["video_occlusions"].item()
        video_blendshapes = data["video_blendshapes"].item()
else:
    video_blendshapes = {}
    video_occlusions = {}

# os walk through all folders that start with 20241101
for root, dirs, files in os.walk("./"):
    if root.split('/')[-1].startswith("20241101"):
        for file in files:
            if file.endswith(".mov"):
                # ==== Inititialize MP face detection objects =====
                mp_drawing = solutions.drawing_utils 
                mp_drawing_styles = solutions.drawing_styles
                base_options = python.BaseOptions(model_asset_path='face_landmarker.task')
                options = vision.FaceLandmarkerOptions(base_options=base_options,
                                                    min_face_detection_confidence=0.01,
                                                    output_face_blendshapes=True,
                                                    output_facial_transformation_matrixes=True,
                                                    num_faces=1)
                face_landmarks_detector = vision.FaceLandmarker.create_from_options(options)
                mp_face_detection = solutions.face_detection
                mp_face_mesh = solutions.face_mesh
                face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.0001)
                face_mesh_detection = mp_face_mesh.FaceMesh(static_image_mode=False, refine_landmarks=True, max_num_faces=1, min_detection_confidence=0.2)
                # ======== Get metadata ========
                # Initialize video capture and background subtractor
                # get folder name
                file_folder = "./" + root.split('/')[-1] + "/"
                print("==== Processing folder: ", file_folder)
                video_path = os.path.join(file_folder, file)
                # get video name
                video_name = video_path.split('/')[-1].split('.')[0]

                if video_name in video_occlusions:
                    print("==== Video already processed: ", video_name)
                    continue
                print("==== Processing video: ", video_name)
                cap = cv2.VideoCapture(video_path)
                fgbg = cv2.createBackgroundSubtractorMOG2(detectShadows=True)

                # load json with metadata
                json_path = file_folder + f"take.json"
                with open(json_path, 'r') as f:
                    data = json.load(f)
                    # this is a Video (!) fps, depth and blendshapes have different fps values 
                    fps = data["videoTargetFrameRate"]
                    frames_number = int(data["frames"])

                frame_log_df = pd.read_csv(file_folder + f"frame_log.csv", sep=",", header=None)
                # assign column names
                frame_log_df.columns = ["data_source", "frame_relative_to_source", "timestamp", "timescale", "timecode_tentacle", "unclear"]
                assert frame_log_df[frame_log_df["data_source"] == "V"]["frame_relative_to_source"].to_list() == list(range(frames_number))
                timecodes = frame_log_df[frame_log_df["data_source"] == "V"]["timecode_tentacle"].to_list()
                assert len(timecodes) == frames_number

                arkit_list, occlusions = main_stuff(cap, file_folder, video_path, fps, frames_number, timecodes, video_name)
                video_blendshapes[video_name] = arkit_list
                video_occlusions[video_name] = occlusions

I0000 00:00:1733127233.311280  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127233.313976  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1733127233.323592  328319 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127233.332858  328326 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127233.338973  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127233.341645  328328 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127233.348477  17109

==== Processing folder:  ./20241101_static_ROOF_fingers_contact_at_45_degrees_neutral_location_001_1/
==== Video already processed:  static_ROOF_fingers_contact_at_45_degrees_neutral_location_001_1_pmil
==== Processing folder:  ./20241101_varg_001_1/
==== Video already processed:  varg_001_1_pmil
==== Processing folder:  ./20241101_bi_005_5/
==== Video already processed:  bi_005_5_pmil
==== Processing folder:  ./20241101_fran_001_1/
==== Video already processed:  fran_001_1_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-1-det_001_1/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-1-det_001_1_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-4-nu_002_2/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-4-nu_002_2_pmil
==== Processing folder:  ./20241101_katt_001_1/
==== Video already processed:  katt_001_1_pmil


W0000 00:00:1733127233.550084  328514 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127233.550325  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127233.550577  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127233.555168  328523 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127233.556615  328512 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127233.562401  328530 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127233.570

==== Processing folder:  ./20241101_bok_002_2/
==== Video already processed:  bok_002_2_pmil
==== Processing folder:  ./20241101_varg_003_3/
==== Video already processed:  varg_003_3_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-1-det_003_3/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-1-det_003_3_pmil
==== Processing folder:  ./20241101_djur_001_1/
==== Video already processed:  djur_001_1_pmil
==== Processing folder:  ./20241101_abborre_002_2/
==== Video already processed:  abborre_002_2_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-5-bosattarna_003_3/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-5-bosattarna_003_3_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-7-det_001_1/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-7-det_001_1_pmil


I0000 00:00:1733127233.768382  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127233.768640  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127233.774509  328726 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127233.776415  328715 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127233.782161  328725 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127233.790722  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127233.791962  328736 inference_feedback_manager.cc:114] Feedback manager re

==== Processing folder:  ./20241101_hundvalp_001_1/
==== Video already processed:  hundvalp_001_1_pmil
==== Processing folder:  ./20241101_left_hand_on_top_of_the_battery_neutral_location_001_1/
==== Video already processed:  left_hand_on_top_of_the_battery_neutral_location_001_1_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-5-bosattarna_001_1/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-5-bosattarna_001_1_pmil
==== Processing folder:  ./20241101_touch_chin_with_index_finger_side_of_the_chin_far_from_active_hand_active_hand_001_1/
==== Video already processed:  touch_chin_with_index_finger_side_of_the_chin_far_from_active_hand_active_hand_001_1_pmil
==== Processing folder:  ./20241101_ogon_cheeks_active_hand_in_each_place_001_1/
==== Video already processed:  ogon_cheeks_active_hand_in_each_place_001_1_pmil
==== Processing folder:  ./20241101_mussla_002_2/
==== Video already processed:  mussla_002_2_pmil
==== Processing folder

W0000 00:00:1733127233.991326  328921 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127233.991847  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127233.992134  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127233.997775  328932 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127233.998452  328919 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127234.003771  328932 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127234.011

==== Processing folder:  ./20241101_static_ROOF_fingers_contact_at_45_degrees_left_top_location_001_1/
==== Video already processed:  static_ROOF_fingers_contact_at_45_degrees_left_top_location_001_1_pmil
==== Processing folder:  ./20241101_skara_001_1/
==== Video already processed:  skara_001_1_pmil
==== Processing folder:  ./20241101_minister-flydde-fran-tomater-story_001_1/
==== Video already processed:  minister-flydde-fran-tomater-story_001_1_pmil
==== Processing folder:  ./20241101_hundvalp_003_3/
==== Video already processed:  hundvalp_003_3_pmil
==== Processing folder:  ./20241101_static_closed_book_left_bottom_location_001_1/
==== Video already processed:  static_closed_book_left_bottom_location_001_1_pmil
==== Processing folder:  ./20241101_stada_001_1/
==== Video already processed:  stada_001_1_pmil
==== Processing folder:  ./20241101_tro_001_1/
==== Video already processed:  tro_001_1_pmil


W0000 00:00:1733127234.208635  329128 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127234.208975  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127234.209261  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127234.214950  329139 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127234.216997  329130 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127234.222630  329139 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127234.231

==== Processing folder:  ./20241101_ensam_002_2/
==== Video already processed:  ensam_002_2_pmil
==== Processing folder:  ./20241101_baver_002_2/
==== Video already processed:  baver_002_2_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-2-men_002_2/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-2-men_002_2_pmil
==== Processing folder:  ./20241101_static_closed_book_right_top_location_001_1/
==== Video already processed:  static_closed_book_right_top_location_001_1_pmil
==== Processing folder:  ./20241101_till_001_1/
==== Video already processed:  till_001_1_pmil
==== Processing folder:  ./20241101_sko_001_1/
==== Video already processed:  sko_001_1_pmil
==== Processing folder:  ./20241101_right_hand_on_top_of_the_battery_neutral_location_001_1/
==== Video already processed:  right_hand_on_top_of_the_battery_neutral_location_001_1_pmil
==== Processing folder:  ./20241101_halsband_002_2/
==== Video already processed:  halsband_002_2

W0000 00:00:1733127234.442686  329360 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127234.447330  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127234.447684  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127234.448784  329362 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127234.452175  329373 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127234.458723  329374 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127234.466

==== Processing folder:  ./20241101_groda_002_2/
==== Video already processed:  groda_002_2_pmil
==== Processing folder:  ./20241101_kalkon_002_2/
==== Video already processed:  kalkon_002_2_pmil
==== Processing folder:  ./20241101_static_ROOF_fingers_contact_at_45_degrees_right_top_location_001_1/
==== Video already processed:  static_ROOF_fingers_contact_at_45_degrees_right_top_location_001_1_pmil
==== Processing folder:  ./20241101_tip_of_the_nose_touch_and_hold_active_hand_001_1/
==== Video already processed:  tip_of_the_nose_touch_and_hold_active_hand_001_1_pmil
==== Processing folder:  ./20241101_min_001_1/
==== Video already processed:  min_001_1_pmil
==== Processing folder:  ./20241101_touch_chin_with_index_finger_middle_of_the_chin_active_hand_001_1/
==== Video already processed:  touch_chin_with_index_finger_middle_of_the_chin_active_hand_001_1_pmil
==== Processing folder:  ./20241101_nytt-forslag-om-invandrare-1-regeringen_002_2/
==== Video already processed:  nytt-forslag-o

I0000 00:00:1733127234.648385  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127234.648627  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127234.653055  329576 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127234.653340  329569 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127234.658877  329574 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127234.667806  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127234.669196  329586 inference_feedback_manager.cc:114] Feedback manager re

==== Processing folder:  ./20241101_nytt-forslag-om-invandrare-3-nu_001_1/
==== Video already processed:  nytt-forslag-om-invandrare-3-nu_001_1_pmil
==== Processing folder:  ./20241101_forehead-to-chin_NOT_UNDERSTAND_R_chin_forehead_active_hand_in_each_place_001_1/
==== Video already processed:  forehead-to-chin_NOT_UNDERSTAND_R_chin_forehead_active_hand_in_each_place_001_1_pmil
==== Processing folder:  ./20241101_glad_002_2/
==== Video already processed:  glad_002_2_pmil
==== Processing folder:  ./20241101_kobra_002_2/
==== Video already processed:  kobra_002_2_pmil
==== Processing folder:  ./20241101_static_JOBBA_left_bottom_location_001_1/
==== Video already processed:  static_JOBBA_left_bottom_location_001_1_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-6-partiet_001_1/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-6-partiet_001_1_pmil
==== Processing folder:  ./20241101_fjaril_001_1/
==== Video already processed:  fjaril_00

I0000 00:00:1733127234.864865  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127234.865076  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127234.869198  329779 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127234.869525  329768 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127234.874805  329780 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127234.882238  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127234.883355  329789 inference_feedback_manager.cc:114] Feedback manager re

==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-3-en_001_1/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-3-en_001_1_pmil
==== Processing folder:  ./20241101_nytt-forslag-om-invandrare-4-politikerna_001_1/
==== Video already processed:  nytt-forslag-om-invandrare-4-politikerna_001_1_pmil
==== Processing folder:  ./20241101_matt_001_1/
==== Video already processed:  matt_001_1_pmil
==== Processing folder:  ./20241101_static_closed_book_left_top_location_001_1/
==== Video already processed:  static_closed_book_left_top_location_001_1_pmil
==== Processing folder:  ./20241101_fasting_001_1/
==== Video already processed:  fasting_001_1_pmil
==== Processing folder:  ./20241101_static_JOBBA_neutral_location_001_1/
==== Video already processed:  static_JOBBA_neutral_location_001_1_pmil
==== Processing folder:  ./20241101_face-calib_002_2/
==== Video already processed:  face-calib_002_2_pmil
==== Processing folder:  ./20241101_static_ROOF_finge

W0000 00:00:1733127235.088655  330004 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127235.089298  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127235.089567  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127235.094347  330011 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127235.094899  330004 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127235.100371  330012 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127235.108

==== Processing folder:  ./20241101_static_JOBBA_left_top_location_001_1/
==== Video already processed:  static_JOBBA_left_top_location_001_1_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-5-bosattarna_004_4/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-5-bosattarna_004_4_pmil
==== Processing folder:  ./20241101_t-pose_001_1/
==== Video already processed:  t-pose_001_1_pmil
==== Processing folder:  ./20241101_ensam_001_1/
==== Video already processed:  ensam_001_1_pmil
==== Processing folder:  ./20241101_right_hand_resting_on_the_chest_chest_001_1/
==== Video already processed:  right_hand_resting_on_the_chest_chest_001_1_pmil
==== Processing folder:  ./20241101_fagel_001_1/
==== Video already processed:  fagel_001_1_pmil
==== Processing folder:  ./20241101_hand_alphabet_001_1/
==== Video already processed:  hand_alphabet_001_1_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-2-men_001_1/
==== Vide

I0000 00:00:1733127235.313142  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127235.313493  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127235.314800  330234 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127235.318291  330244 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127235.323367  330244 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127235.331676  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127235.332753  330254 inference_feedback_manager.cc:114] Feedback manager re

==== Processing folder:  ./20241101_baver_001_1/
==== Video already processed:  baver_001_1_pmil
==== Processing folder:  ./20241101_groda_003_3/
==== Video already processed:  groda_003_3_pmil
==== Processing folder:  ./20241101_static_JOBBA_right_bottom_location_001_1/
==== Video already processed:  static_JOBBA_right_bottom_location_001_1_pmil
==== Processing folder:  ./20241101_left_hand_resting_on_the_chest_chest_001_1/
==== Video already processed:  left_hand_resting_on_the_chest_chest_001_1_pmil
==== Processing folder:  ./20241101_bi_003_3/
==== Video already processed:  bi_003_3_pmil
==== Processing folder:  ./20241101_apa_001_1/
==== Video already processed:  apa_001_1_pmil
==== Processing folder:  ./20241101_anka_001_1/
==== Video already processed:  anka_001_1_pmil


I0000 00:00:1733127235.539713  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127235.539962  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127235.544594  330477 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127235.546038  330469 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127235.549824  330475 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127235.563294  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127235.564457  330488 inference_feedback_manager.cc:114] Feedback manager re

==== Processing folder:  ./20241101_static_JOBBA_right_top_location_001_1/
==== Video already processed:  static_JOBBA_right_top_location_001_1_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-2-men_003_3/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-2-men_003_3_pmil
==== Processing folder:  ./20241101_baver_003_3/
==== Video already processed:  baver_003_3_pmil
==== Processing folder:  ./20241101_halsband_001_1/
==== Video already processed:  halsband_001_1_pmil
==== Processing folder:  ./20241101_groda_001_1/
==== Video already processed:  groda_001_1_pmil
==== Processing folder:  ./20241101_kalkon_001_1/
==== Video already processed:  kalkon_001_1_pmil
==== Processing folder:  ./20241101_bi_001_1/
==== Video already processed:  bi_001_1_pmil
==== Processing folder:  ./20241101_nytt-forslag-om-invandrare-1-regeringen_001_1/
==== Video already processed:  nytt-forslag-om-invandrare-1-regeringen_001_1_pmil


quires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127235.771670  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127235.773230  330703 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127235.774681  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127235.775041  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127235.778759  330701 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127235.780838  330714 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0

==== Processing folder:  ./20241101_static_ROOF_fingers_contact_at_45_degrees_right_bottom_location_001_1/
==== Video already processed:  static_ROOF_fingers_contact_at_45_degrees_right_bottom_location_001_1_pmil
==== Processing folder:  ./20241101_bock_001_1/
==== Video already processed:  bock_001_1_pmil
==== Processing folder:  ./20241101_kobra_001_1/
==== Video already processed:  kobra_001_1_pmil
==== Processing folder:  ./20241101_static_JOBBA_in_front_of_the_face_location_001_1/
==== Video already processed:  static_JOBBA_in_front_of_the_face_location_001_1_pmil
==== Processing folder:  ./20241101_antilop_001_1/
==== Video already processed:  antilop_001_1_pmil
==== Processing folder:  ./20241101_glad_001_1/
==== Video already processed:  glad_001_1_pmil
==== Processing folder:  ./20241101_hackspett_001_1/
==== Video already processed:  hackspett_001_1_pmil
==== Processing folder:  ./20241101_static_closed_book_left_top_location_002_2/
==== Video already processed:  static_close

W0000 00:00:1733127235.969005  330899 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127235.973937  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127235.975042  330908 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127235.979376  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127235.979651  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127235.979878  330907 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127235.984070  330918 inference_feedback_manager.cc:114] Feedback manager re

==== Processing folder:  ./20241101_touch_side_of_the_forehead_on_active_hand_side_active_hand_001_1/
==== Video already processed:  touch_side_of_the_forehead_on_active_hand_side_active_hand_001_1_pmil
==== Processing folder:  ./20241101_nytt-forslag-om-invandrare-2-de_001_1/
==== Video already processed:  nytt-forslag-om-invandrare-2-de_001_1_pmil
==== Processing folder:  ./20241101_tack_001_1/
==== Video already processed:  tack_001_1_pmil
==== Processing folder:  ./20241101_krama_001_1/
==== Video already processed:  krama_001_1_pmil
==== Processing folder:  ./20241101_face-calib_001_1/
==== Video already processed:  face-calib_001_1_pmil
==== Processing folder:  ./20241101_a-pose_001_1/
==== Video already processed:  a-pose_001_1_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-2-men_004_4/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-2-men_004_4_pmil


I0000 00:00:1733127236.167429  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127236.167748  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127236.170356  331084 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127236.173228  331092 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127236.179046  331092 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127236.189386  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127236.190651  331104 inference_feedback_manager.cc:114] Feedback manager re

==== Processing folder:  ./20241101_kontor_001_1/
==== Video already processed:  kontor_001_1_pmil
==== Processing folder:  ./20241101_varg_002_2/
==== Video already processed:  varg_002_2_pmil
==== Processing folder:  ./20241101_nytt-forslag-om-invandrare-5-en_001_1/
==== Video already processed:  nytt-forslag-om-invandrare-5-en_001_1_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-1-det_002_2/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-1-det_002_2_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-4-nu_001_1/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-4-nu_001_1_pmil
==== Processing folder:  ./20241101_static_closed_book_right_bottom_location_001_1/
==== Video already processed:  static_closed_book_right_bottom_location_001_1_pmil
==== Processing folder:  ./20241101_bok_001_1/
==== Video already processed:  bok_001_1_pmil
==== Processing folder:  ./20241101_static_ROOF_f

W0000 00:00:1733127236.400394  331319 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127236.401293  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127236.401584  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127236.405870  331332 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127236.408121  331322 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127236.411941  331334 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127236.420

==== Processing folder:  ./20241101_bi_004_4/
==== Video already processed:  bi_004_4_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-4-nu_003_3/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-4-nu_003_3_pmil
==== Processing folder:  ./20241101_abborre_001_1/
==== Video already processed:  abborre_001_1_pmil
==== Processing folder:  ./20241101_touch_chin_with_index_finger_side_of_the_chin_close_to_active_hand_active_hand_001_1/
==== Video already processed:  touch_chin_with_index_finger_side_of_the_chin_close_to_active_hand_active_hand_001_1_pmil
==== Processing folder:  ./20241101_en-1_001_1/
==== Video already processed:  en-1_001_1_pmil
==== Processing folder:  ./20241101_skola_001_1/
==== Video already processed:  skola_001_1_pmil
==== Processing folder:  ./20241101_chinchilla_001_1/
==== Video already processed:  chinchilla_001_1_pmil


W0000 00:00:1733127236.623973  331531 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127236.624537  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127236.624847  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127236.629350  331534 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127236.630370  331527 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127236.634660  331534 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127236.646

==== Processing folder:  ./20241101_static_closed_book_neutral_location_001_1/
==== Video already processed:  static_closed_book_neutral_location_001_1_pmil
==== Processing folder:  ./20241101_touch_side_of_the_cheek_with_a_palm_on_active_hand_side_active_hand_001_1/
==== Video already processed:  touch_side_of_the_cheek_with_a_palm_on_active_hand_side_active_hand_001_1_pmil
==== Processing folder:  ./20241101_hundvalp_002_2/
==== Video already processed:  hundvalp_002_2_pmil
==== Processing folder:  ./20241101_parti-vill-kalla-grupp-for-terrorister-5-bosattarna_002_2/
==== Video already processed:  parti-vill-kalla-grupp-for-terrorister-5-bosattarna_002_2_pmil
==== Processing folder:  ./20241101_left_hand_above_belly_button_001_1/
==== Video already processed:  left_hand_above_belly_button_001_1_pmil
==== Processing folder:  ./20241101_static_closed_book_in_front_of_the_face_location_001_1/
==== Video already processed:  static_closed_book_in_front_of_the_face_location_001_1_pmil
====

I0000 00:00:1733127236.839887  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127236.840166  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127236.844183  331728 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127236.844841  331739 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127236.852091  331739 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127236.864013  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127236.865384  331748 inference_feedback_manager.cc:114] Feedback manager re

==== Processing folder:  ./20241101_right_hand_above_belly_button_002_2/
==== Video already processed:  right_hand_above_belly_button_002_2_pmil
==== Processing folder:  ./20241101_mussla_001_1/
==== Video already processed:  mussla_001_1_pmil
==== Processing folder:  ./20241101_dalig_001_1/
==== Video already processed:  dalig_001_1_pmil
==== Processing folder:  ./20241101_BOOK_neutral_location_repetitive_001_1/
==== Video already processed:  BOOK_neutral_location_repetitive_001_1_pmil
==== Processing folder:  ./20241101_minister-flydde-fran-tomater-story_002_2/
==== Video already processed:  minister-flydde-fran-tomater-story_002_2_pmil


I0000 00:00:1733127237.055826  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127237.056122  171094 face_landmarker_graph.cc:174] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
W0000 00:00:1733127237.061394  331932 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127237.062190  331944 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733127237.068401  331944 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
I0000 00:00:1733127237.080688  171094 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M3 Pro
W0000 00:00:1733127237.081821  331951 inference_feedback_manager.cc:114] Feedback manager re

W0000 00:00:1733127237.102248  331963 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [10]:
total_occluded_frames_per_video = []
lengths_of_occlusions = []
percentages_per_video = []
total_frames_dataset = 0
for video_name in video_occlusions:
    print(f"Video name: {video_name}")
    frames_number = len(video_blendshapes[video_name])
    total_frames_dataset += frames_number
    if frames_number == 0:
        print("No frames detected")
        continue
    print(f"Total occluded {sum([end - start for start, end in video_occlusions[video_name]])} frames out of {frames_number}")
    percentage_occluded_frames = sum([end - start for start, end in video_occlusions[video_name]]) / frames_number * 100

    total_occluded_frames_per_video.append(sum([end - start for start, end in video_occlusions[video_name]]))
    percentages_per_video.append(percentage_occluded_frames)
    lengths_of_occlusions.extend([end - start for start, end in video_occlusions[video_name]])

print("==========\n")
print(f"Average length of occlusions: {np.mean(lengths_of_occlusions):.2f} frames")
print(f"Standard deviation of occlusion lengths: {np.std(lengths_of_occlusions):.2f} frames")
print(f"Average number of occluded frames per video: {np.mean(total_occluded_frames_per_video):.2f} frames")
print(f"Average percentage of occluded frames per video: {np.mean(percentages_per_video):.2f}%")
print(f"Total number of occluded frames: {sum(total_occluded_frames_per_video)} out of total for the dataset {total_frames_dataset} ")

Video name: static_ROOF_fingers_contact_at_45_degrees_neutral_location_001_1_pmil
Total occluded 0 frames out of 354
Video name: varg_001_1_pmil
Total occluded 81 frames out of 824
Video name: bi_005_5_pmil
Total occluded 0 frames out of 387
Video name: fran_001_1_pmil
Total occluded 8 frames out of 1200
Video name: parti-vill-kalla-grupp-for-terrorister-1-det_001_1_pmil
Total occluded 0 frames out of 583
Video name: parti-vill-kalla-grupp-for-terrorister-4-nu_002_2_pmil
Total occluded 0 frames out of 169
Video name: katt_001_1_pmil
Total occluded 0 frames out of 1046
Video name: bok_002_2_pmil
Total occluded 20 frames out of 1121
Video name: varg_003_3_pmil
Total occluded 124 frames out of 1065
Video name: parti-vill-kalla-grupp-for-terrorister-1-det_003_3_pmil
Total occluded 63 frames out of 778
Video name: djur_001_1_pmil
Total occluded 1 frames out of 1319
Video name: abborre_002_2_pmil
Total occluded 2 frames out of 1169
Video name: parti-vill-kalla-grupp-for-terrorister-5-bosatta

In [59]:
len(video_occlusions)

125

In [60]:
# save the results to npz file for 125 folders
np.savez("occlusions_results.npz", video_blendshapes=video_blendshapes, video_occlusions=video_occlusions)

In [61]:
# Release resources
cap.release()
# out.release()
face_detection.close()
face_mesh_detection.close()
cv2.destroyAllWindows()

ValueError: Closing SolutionBase._graph which is already None

In [None]:
# ====== Smooth blednshapes before ratargetting them onto the avatar ======
def smooth_array(arr, window=3):
    kernel = np.array([1] * window) / window
    smoothed_arr = np.ones_like(arr)
    for i in range(arr.shape[1]):
        smoothed_arr[:, i] = np.convolve(arr[:, i], kernel, mode='same')    
    return smoothed_arr

video_coeffs = []
for row in arkit_list:
    frame_coeffs = []
    for category in row:
        if category != 'Timecode' and category != 'BlendshapeCount':
            # prepare for smoothing
            frame_coeffs.append(row[category]) 
    frame_coeffs = np.array(frame_coeffs)
    video_coeffs.append(frame_coeffs)
video_coeffs = np.stack(video_coeffs, axis=0)
video_coeffs = smooth_array(video_coeffs)
print(video_coeffs)

# Put video_coeffs back to the arkit_list
new_arkit_list = []
for i, row in enumerate(arkit_list):
    new_row = {}
    for j, category in enumerate(row):
        if category != 'Timecode' and category != 'BlendshapeCount':
            new_row[category] = video_coeffs[i, j]
        else:
            new_row[category] = row[category]
    print(new_row)
    new_arkit_list.append(new_row)

[[1.33286849e-01 1.62527174e-01 1.65555777e-03 ... 7.45002641e-05
  2.09958406e-07 1.24919366e-06]
 [1.81154971e-01 2.11726939e-01 3.07348817e-03 ... 1.08276050e-04
  3.53000208e-07 1.95513132e-06]
 [1.35466439e-01 1.70099477e-01 4.42770923e-03 ... 9.75788547e-05
  3.88815797e-07 2.00193593e-06]
 ...
 [2.72263815e-01 2.83718576e-01 2.65433833e-03 ... 2.91879919e-04
  2.49194159e-07 2.78192768e-06]
 [3.19914361e-01 3.28791747e-01 1.97508260e-03 ... 3.08678903e-04
  2.46790700e-07 2.87524934e-06]
 [2.36387223e-01 2.29831159e-01 1.06865886e-03 ... 2.07978029e-04
  1.54894039e-07 1.85501434e-06]]
{'BrowDownLeft': 0.13328684866428375, 'BrowDownRight': 0.1625271737575531, 'BrowInnerUp': 0.001655557774938643, 'BrowOuterUpLeft': 0.004447245194266239, 'BrowOuterUpRight': 0.0026076349895447493, 'CheekPuff': 1.1991333546272168e-05, 'CheekSquintLeft': 1.0771843032368147e-07, 'CheekSquintRight': 2.8017645566554467e-07, 'EyeBlinkLeft': 0.07020085056622823, 'EyeBlinkRight': 0.03901257490118344, 'EyeL

In [142]:
len(new_arkit_list), len(arkit_list)

(1056, 1056)

In [143]:
new_arkit_list[10] == arkit_list[10]

False

In [144]:
# Save the ARKit dict to a .csv file with columns Timestamp, Blendshape, Blednshape, ...
# add column 'BlendshapeCount' with a constant value 51

# Before saving, transform the list of dictionaries to a pandas DataFrame
arkit_df = pd.DataFrame(new_arkit_list)
arkit_df.head()

Unnamed: 0,BrowDownLeft,BrowDownRight,BrowInnerUp,BrowOuterUpLeft,BrowOuterUpRight,CheekPuff,CheekSquintLeft,CheekSquintRight,EyeBlinkLeft,EyeBlinkRight,...,MouthSmileLeft,MouthSmileRight,MouthStretchLeft,MouthStretchRight,MouthUpperUpLeft,MouthUpperUpRight,NoseSneerLeft,NoseSneerRight,Timecode,BlendshapeCount
0,0.133287,0.162527,0.001656,0.004447,0.002608,1.2e-05,1.077184e-07,2.801765e-07,0.070201,0.039013,...,0.019095,0.019014,0.014207,0.012554,3.3e-05,7.5e-05,2.099584e-07,1e-06,16:08:04:11.047,51
1,0.181155,0.211727,0.003073,0.007648,0.005104,1.8e-05,1.635854e-07,4.530078e-07,0.106813,0.056071,...,0.028809,0.026911,0.02105,0.020791,5.3e-05,0.000108,3.530002e-07,2e-06,16:08:04:12.047,51
2,0.135466,0.170099,0.004428,0.00959,0.006136,1.8e-05,1.709668e-07,5.099558e-07,0.10519,0.049693,...,0.031397,0.027741,0.022808,0.025548,5.3e-05,9.8e-05,3.888158e-07,2e-06,16:08:04:13.047,51
3,0.139687,0.176914,0.00427,0.009416,0.006067,1.7e-05,1.830934e-07,5.733417e-07,0.094657,0.044779,...,0.04462,0.039839,0.037654,0.040898,7.2e-05,0.000138,4.070725e-07,2e-06,16:08:04:14.047,51
4,0.132061,0.177331,0.004307,0.009855,0.006114,1.7e-05,1.797024e-07,5.776042e-07,0.085976,0.040327,...,0.044058,0.039388,0.03964,0.039111,6.7e-05,0.000132,4.047783e-07,2e-06,16:08:04:15.047,51


In [145]:
arkit_df.to_csv(video_name + '_blendshapes_smooth.csv', index=False)

In [57]:
# calculate avergae occlusions duration
occlusions_durations = [end - start for start, end in occlusions]
occlusions_durations_frames = [int(duration * fps) for duration in occlusions_durations]
if len(occlusions) > 0:
    average_occlusion_duration = sum(occlusions_durations) / len(occlusions)
print(f"Average occlusion duration: {average_occlusion_duration:.2f} seconds")

# calculate average occlusions duration in frames
fps = 60
average_occlusion_duration_frames = average_occlusion_duration * fps
print(f"Average occlusion duration in frames: {average_occlusion_duration_frames:.2f} frames")

# maximum occlusion length in frames
max_occlusion_length = max(occlusions_durations) * fps
print(f"Maximum occlusion length in frames: {max_occlusion_length:.2f} frames")

print(f"Total occlusions detected: {len(occlusions)}")
print(f"Length of occluded frames out of total frames: {sum(occlusions_durations_frames)} out of {frame_count}, {sum(occlusions_durations_frames) / frame_count * 100:.2f}%")

Average occlusion duration: 4.71 seconds
Average occlusion duration in frames: 282.86 frames
Maximum occlusion length in frames: 1200.00 frames
Total occlusions detected: 7


NameError: name 'frame_count' is not defined

In [None]:
cap.release()
out.release()
face_detection.close()
face_mesh_detection.close()

ValueError: Closing SolutionBase._graph which is already None

In [None]:
# Save the ARKit dict to a .csv file with columns Timestamp, Blendshape, Blednshape, ...
# add column 'BlendshapeCount' with a constant value 51

# Before saving, transform the list of dictionaries to a pandas DataFrame
arkit_df = pd.DataFrame(arkit_list)
arkit_df.head()

Unnamed: 0,BrowDownLeft,BrowDownRight,BrowInnerUp,BrowOuterUpLeft,BrowOuterUpRight,CheekPuff,CheekSquintLeft,CheekSquintRight,EyeBlinkLeft,EyeBlinkRight,...,MouthSmileLeft,MouthSmileRight,MouthStretchLeft,MouthStretchRight,MouthUpperUpLeft,MouthUpperUpRight,NoseSneerLeft,NoseSneerRight,Timecode,BlendshapeCount
0,0.311359,0.18337,0.001457,0.003485,0.009018,2.7e-05,2.002681e-07,8.431355e-07,0.048827,0.037859,...,0.003727,0.00295,0.001333,0.032052,4.9e-05,3.4e-05,5.547313e-07,3e-06,16:08:49:28.881,51
1,0.330014,0.230359,0.001062,0.003973,0.008013,2.7e-05,1.925177e-07,7.565751e-07,0.057541,0.044671,...,0.00271,0.002029,0.002184,0.020858,3e-05,2.3e-05,5.838058e-07,3e-06,16:08:49:29.883,51
2,0.227259,0.129951,0.002482,0.007528,0.013172,2.3e-05,1.638128e-07,5.946032e-07,0.043044,0.038407,...,0.001147,0.000904,0.000865,0.010866,2.5e-05,1.7e-05,5.764126e-07,2e-06,16:08:49:30.885,51
3,0.240812,0.125621,0.00259,0.00623,0.015386,2.9e-05,1.789482e-07,8.818045e-07,0.03704,0.022781,...,0.005168,0.004039,0.002554,0.024045,3.9e-05,2.8e-05,4.482748e-07,3e-06,16:08:49:31.886,51
4,0.264517,0.154681,0.001116,0.005672,0.013568,2.8e-05,2.000474e-07,6.337019e-07,0.043737,0.028246,...,0.003686,0.003553,0.00218,0.021481,4.1e-05,3.8e-05,3.48814e-07,2e-06,16:08:49:32.886,51


In [None]:
arkit_list

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [None]:
arkit_df.to_csv(video_name + '_blendshapes.csv', index=False)

In [None]:
arkit_dict

{'61 days, 22:05:13.339299': [Category(index=0, score=2.678043529158458e-06, display_name='', category_name='_neutral'),
  Category(index=1, score=0.24165955185890198, display_name='', category_name='browDownLeft'),
  Category(index=2, score=0.2914310395717621, display_name='', category_name='browDownRight'),
  Category(index=3, score=0.0015924862818792462, display_name='', category_name='browInnerUp'),
  Category(index=4, score=0.005324058700352907, display_name='', category_name='browOuterUpLeft'),
  Category(index=5, score=0.0030414594803005457, display_name='', category_name='browOuterUpRight'),
  Category(index=6, score=1.6997868442558683e-05, display_name='', category_name='cheekPuff'),
  Category(index=7, score=1.550213823975355e-07, display_name='', category_name='cheekSquintLeft'),
  Category(index=8, score=3.92830457940363e-07, display_name='', category_name='cheekSquintRight'),
  Category(index=9, score=0.10249514877796173, display_name='', category_name='eyeBlinkLeft'),
  C

In [None]:
# Release resources
cap.release()
out.release()
face_detection.close()
face_mesh_detection.close()
cv2.destroyAllWindows()


ValueError: Closing SolutionBase._graph which is already None