## Evaluate detected occlusions 

* ✅ I hand annotated 11 videos in ELAN. 
* ✅ Read the annotation layer called `Gaps` as timecodes and frame numbers from `.eaf`.
* ✅ Create binary vector for each frame that says whether it's occluded or not
* ✅ Compare the percentage of occluded frames
* ✅ Compare how close these binary vectors are, just pure accuracy
* ✅ Then precision, recall


In [1]:
import os, json, time
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [5]:
# read occlusions
with np.load("occlusions_results_timecodes.npz", allow_pickle=True) as data:
    video_occlusions = data["video_occlusions"].item()
print(len(video_occlusions))

# read occlusions
with np.load("occlusions_results.npz", allow_pickle=True) as data:
    video_occlusions_frames = data["video_occlusions"].item()
print(len(video_occlusions_frames))

125
125


In [7]:
video_occlusions['abborre_002_2_pmil'], video_occlusions_frames['abborre_002_2_pmil']

([('15:33:35:00.604', '15:33:35:02.604')], [(1083, 1085)])

In [13]:
# read blendshapes from npz
with np.load("blendshapes_timecodes_velocities.npz", allow_pickle=True) as data:
    video_blendshapes = data["video_blendshapes"].item()
print(len(video_blendshapes))

125


### Read .eaf files into frame numbers and their status

In [8]:
!pip install pympi-ling

Collecting pympi-ling
  Downloading pympi_ling-1.70.2-py2.py3-none-any.whl.metadata (3.4 kB)
Downloading pympi_ling-1.70.2-py2.py3-none-any.whl (24 kB)
Installing collected packages: pympi-ling
Successfully installed pympi-ling-1.70.2


In [20]:
import pympi
import numpy as np

def generate_annotation_frame_list(eaf_file, frame_rate, total_frames):
    """
    Generate a list of frame annotations from an ELAN .eaf file.
    Returns:
        list: A list where each index corresponds to a frame number.
              Values are 1 if annotated, otherwise 0.
    """
    eaf = pympi.Elan.Eaf(eaf_file)

    annotations = []
    for tier in eaf.tiers:
        annotations.extend(eaf.get_annotation_data_for_tier(tier))

    # Convert time range to frames
    frame_list = [0] * total_frames

    if annotations == []:
        return frame_list

    # Mark frames with annotations
    for start, end, _ in annotations:
        start_frame = int(np.floor((start / 1000) * frame_rate))
        end_frame = int(np.ceil((end / 1000) * frame_rate))
        for frame in range(start_frame, end_frame):
            frame_list[frame] = 1

    return frame_list


In [21]:
len(video_blendshapes['groda_003_3_pmil']), len(video_blendshapes['antilop_001_1_pmil']), len(video_blendshapes['nytt-forslag-om-invandrare-5-en_001_1_pmil'])

(366, 1289, 533)

In [30]:
frame_rate = 60
files_to_test = []
video_frame_annotations = {}
for root, dirs, files in os.walk("./"):
    for file in files:
        if file.endswith(".eaf"):
            files_to_test.append(file.split(".")[0])
            eaf_file_path = os.path.join(root, file)
            total_frames = len(video_blendshapes[file[:-4]])
            print(total_frames)
            frame_annotations = generate_annotation_frame_list(eaf_file_path, frame_rate, total_frames)
            print(file, len(frame_annotations), frame_annotations)
            video_frame_annotations[file[:-4]] = frame_annotations

366
groda_003_3_pmil.eaf 366 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0

In [None]:
def generate_occlusion_frame_list(gap_dict, total_frames):
    """
    Generate a list of frames where 1 indicates occluded frames, and 0 indicates non-occluded frames.
    Args:
        gap_dict (dict): Dictionary containing file names as keys and a list of (start, end) tuples for occlusions as values.
        total_frames (int): The total number of frames for the file.
    Returns:
        dict: A dictionary where keys are file names and values are lists of frame annotations (1 for occluded, 0 otherwise).
    """
    frame_lists = {}

    for file_name, gaps in gap_dict.items():
        frame_list = [0] * total_frames

        # Mark occluded frames as 1
        for start, end in gaps:
            for frame in range(start, end + 1):
                if 0 <= frame < total_frames:  # Ensure the frame index is within bounds
                    frame_list[frame] = 1

        frame_lists[file_name] = frame_list
    return frame_lists

In [29]:
gap_dict = {video_name: video_occlusions_frames[video_name] for video_name in video_occlusions_frames if video_name in files_to_test}
print(len(gap_dict.keys()), gap_dict.keys())

11 dict_keys(['tro_001_1_pmil', 'kalkon_002_2_pmil', 'glad_002_2_pmil', 'fjaril_001_1_pmil', 'fasting_001_1_pmil', 'varg_002_2_pmil', 'groda_003_3_pmil', 'kobra_001_1_pmil', 'antilop_001_1_pmil', 'nytt-forslag-om-invandrare-5-en_001_1_pmil', 'parti-vill-kalla-grupp-for-terrorister-4-nu_003_3_pmil'])


In [46]:
average_accuracy = 0
average_f1 = 0
for video_name in video_frame_annotations:
    if video_name in gap_dict:
        total_frames = len(video_frame_annotations[video_name])
        print(video_name, total_frames)
        frame_annotations = generate_occlusion_frame_list({video_name: gap_dict[video_name]}, total_frames)
        print(frame_annotations[video_name])

        # percentage of occluded frames for frame_annotations Vs video_frame_annotations
        # predicted
        predicted_occluded_frames = sum(frame_annotations[video_name])
        # real
        real_occluded_frames = sum(video_frame_annotations[video_name])
        print(f"Percentage of predicted occlussions for {video_name}: {(predicted_occluded_frames / total_frames):.3}")
        print(f"Percentage of real occlussions for {video_name}: {(real_occluded_frames / total_frames):.3}")
        print(f"Predicted total {predicted_occluded_frames} VS real {real_occluded_frames} for {video_name}")
        # false positives = predicted - real
        false_positives = sum([1 for i, j in zip(frame_annotations[video_name], video_frame_annotations[video_name]) if i != j and i == 1])
        if false_positives < 0:
            false_positives = 0
        print(f"False positives for {video_name}: {false_positives}")
        # false negatives = real - predicted
        false_negatives = sum([1 for i, j in zip(frame_annotations[video_name], video_frame_annotations[video_name]) if i != j and i == 0])
        if false_negatives < 0:
            false_negatives = 0
        print(f"False negatives for {video_name}: {false_negatives}")
        # true positives
        true_positives = sum([1 for i, j in zip(frame_annotations[video_name], video_frame_annotations[video_name]) if i == j and i == 1])
        print(f"True positives for {video_name}: {true_positives}")
        # true negatives
        true_negatives = sum([1 for i, j in zip(frame_annotations[video_name], video_frame_annotations[video_name]) if i == j and i == 0])
        print(f"True negatives for {video_name}: {true_negatives}")
        # precision
        precision = true_positives / (true_positives + false_positives + 1e-10)
        print(f"Precision for {video_name}: {precision:.3}")
        # recall
        recall = true_positives / (true_positives + false_negatives + 1e-10)
        print(f"Recall for {video_name}: {recall:.3}")
        # f1
        f1 = 2 * (precision * recall) / (precision + recall + 1e-10)
        print(f"F1 for {video_name}: {f1:.3}")
        # accuracy
        accuracy = (true_positives + true_negatives) / total_frames
        print(f"Accuracy for {video_name}: {accuracy:.3}")
        print("")
        average_accuracy += accuracy
        average_f1 += f1
average_accuracy /= len(video_frame_annotations)
average_f1 /= len(video_frame_annotations)
print(f"Average accuracy: {average_accuracy:.3}")
print(f"Average F1: {average_f1:.3}")

groda_003_3_pmil 366
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 