## Import modules

In [None]:
import os
import logging
import random
import cv2
import numpy as np
from typing import List, Dict, Tuple, Any
from collections import OrderedDict, deque 
from matplotlib import pyplot as plt

from mdx.mtmc.core.calibration import Calibrator
from mdx.mtmc.core.data import Loader, Preprocessor
from mdx.mtmc.config import AppConfig
from mdx.mtmc.utils.io_utils import load_json_from_file

logging.basicConfig(format="%(asctime)s - %(message)s", datefmt="%y/%m/%d %H:%M:%S", level=logging.INFO)

## Load config and configure param ranges

In [None]:
sensor_id = "Building_K_Cam1"
confidence_range = (0.4, 0.5)
bbox_area_range = (2000, 50000)
bbox_aspect_ratio_range = (0., 2.)
behavior_length_range = (0., 60.)
inspect_behavior_level = False
num_inspection_instances = 5

app_config_path = "resources/app_mtmc_config.json"
assert os.path.exists(app_config_path), "App config not found"
app_config = AppConfig(**load_json_from_file(app_config_path))
app_config.io.selectedSensorIds = [sensor_id]
# Disable embedding-level and behavior-level filters to include all objects
app_config.preprocessing.embeddingConfidenceThresh = 0.
app_config.preprocessing.embeddingBboxAreaThresh = 0.
app_config.preprocessing.embeddingBboxAspectRatioThresh = float("inf")
app_config.preprocessing.behaviorConfidenceThresh = 0.
app_config.preprocessing.behaviorBboxAreaThresh = 0.
app_config.preprocessing.behaviorBboxAspectRatioThresh = float("inf")
app_config.preprocessing.behaviorLengthThreshSec = 0.

if inspect_behavior_level:
    calibration_path = "resources/calibration_building_k.json"
    assert os.path.exists(calibration_path), "Calibration info not found"


## Load raw data and process

In [None]:
# Load input data from the perception pipeline
loader = Loader(app_config)
frames = None
json_data_path = app_config.io.jsonDataPath
protobuf_data_path = app_config.io.protobufDataPath
if os.path.isfile(json_data_path):
    frames = loader.load_json_data_to_frames(json_data_path)
elif os.path.isfile(protobuf_data_path):
    frames = loader.load_protobuf_data_to_frames(protobuf_data_path)
else:
    logging.error(f"ERROR: The JSON data path {json_data_path} and "
                  f"protobuf data path {protobuf_data_path} do NOT exist.")
    exit(1)

if inspect_behavior_level:
    # Calibrate sensors
    calibrator = Calibrator(app_config)
    sensor_state_objects = calibrator.calibrate(calibration_path)

    # Preprocess frames into behaviors and filter outliers
    preprocessor = Preprocessor(app_config)
    preprocessor.set_sensor_state_objects(sensor_state_objects)
    behaviors = preprocessor.preprocess(frames)

## Parse frames for embedding-level inspection

In [None]:
frames_dict = OrderedDict()

for frame in frames:
    if frame.sensorId == sensor_id:
        objects = frame.objects
        for object_instance in objects:
            confidence = object_instance.confidence
            bbox = object_instance.bbox
            bbox_width = bbox.rightX - bbox.leftX + 1.
            bbox_height = bbox.bottomY - bbox.topY + 1.
            bbox_area = bbox_width * bbox_height
            bbox_aspect_ratio = bbox_width / bbox_height

            # Exclude objects outside the given param ranges
            if confidence < confidence_range[0]:
                continue
            if confidence > confidence_range[1]:
                continue
            if bbox_area < bbox_area_range[0]:
                continue
            if bbox_area > bbox_area_range[1]:
                continue
            if bbox_aspect_ratio < bbox_aspect_ratio_range[0]:
                continue
            if bbox_aspect_ratio > bbox_aspect_ratio_range[1]:
                continue

            object_key = frame.id + " #-# " + object_instance.id
            frames_dict[object_key] = dict()
            frames_dict[object_key]["confidence"] = confidence
            frames_dict[object_key]["bbox"] = bbox

print(list(frames_dict.items())[:5])

## Parse behaviors for behavior-level inspection

In [None]:
if inspect_behavior_level:
    behaviors_dict: Dict[str, Dict[str, Any]] = OrderedDict()
    
    for behavior in behaviors:
        if behavior.sensorId == sensor_id:
            object_id = behavior.objectId
            start_frame = behavior.startFrame
            end_frame = behavior.endFrame

            # Aggregate confidences
            confidences: List[float] = list()
            for frame_id in range(int(start_frame), int(end_frame) + 1):
                object_key = str(frame_id) + " #-# " + object_id
                if object_key in frames_dict.keys():
                    confidence = frames_dict[object_key]["confidence"]
                else:
                    confidence = 0.0
                if confidence > 0:
                    confidences.append(confidence)
            if len(confidences) == 0:
                continue
            behavior_confidence = np.mean(confidences)
            
            # Aggregate bbox areas and aspect ratios
            bbox_areas: List[float] = list()
            bbox_aspect_ratios: List[float] = list()
            for bbox in behavior.bboxes:
                bbox_width = bbox.rightX - bbox.leftX + 1.
                bbox_height = bbox.bottomY - bbox.topY + 1.
                bbox_areas.append(bbox_width * bbox_height)
                bbox_aspect_ratios.append(bbox_width / bbox_height)
            if (len(bbox_areas) == 0) or (bbox_aspect_ratios == 0):
                continue
            behavior_bbox_area = np.mean(bbox_areas)
            behavior_bbox_aspect_ratio = np.mean(bbox_aspect_ratios)
            behavior_length = (behavior.end - behavior.timestamp).total_seconds()

            # Exclude objects outside the given param ranges
            if behavior_confidence < confidence_range[0]:
                continue
            if behavior_confidence > confidence_range[1]:
                continue
            if behavior_bbox_area < bbox_area_range[0]:
                continue
            if behavior_bbox_area > bbox_area_range[1]:
                continue
            if behavior_bbox_aspect_ratio < bbox_aspect_ratio_range[0]:
                continue
            if behavior_bbox_aspect_ratio > bbox_aspect_ratio_range[1]:
                continue
            if behavior_length < behavior_length_range[0]:
                continue
            if behavior_length > behavior_length_range[1]:
                continue

            for frame_id in range(int(start_frame), int(end_frame) + 1):
                object_key = str(frame_id) + " #-# " + object_id
                if object_key in frames_dict.keys():
                    behaviors_dict[object_key] = frames_dict[object_key].copy()
                    behaviors_dict[object_key]["behavior_confidence"] = behavior_confidence
                    behaviors_dict[object_key]["behavior_bbox_area"] = behavior_bbox_area
                    behaviors_dict[object_key]["behavior_bbox_aspect_ratio"] = behavior_bbox_aspect_ratio
                    behaviors_dict[object_key]["behavior_length"] = behavior_length

    print(list(behaviors_dict.items())[:5])

## Show random instances for inspection

In [None]:
# Plot on image frame and print anomaly information
def display_anomaly_info(objects_dict: Dict[Tuple[str, str], Dict[str, Any]], object_key: str,
                         video: cv2.VideoCapture, bbox_color: Tuple[int, int, int] = (255, 0, 0)) -> None:
    """
    Displays anomaly information

    :param Dict[Tuple[str, str], Dict[str, Any]] objects_dict: dictionary of objects
    :param str object_key: key for the object
    :param cv2.VideoCapture video: positive array
    :param Tuple[int, int, int] bbox_color: bbox color
    :return: None
    ::

        display_anomaly_info(objects_dict, object_key, video, bbox_color)
    """
    object_tokens = object_key.split(" #-# ")
    frame_id = object_tokens[0]
    object_id = object_tokens[1]
    confidence = objects_dict[object_key]["confidence"]
    bbox = objects_dict[object_key]["bbox"]

    behavior_confidence = objects_dict[object_key].get("behavior_confidence", None)
    if behavior_confidence is not None:
        behavior_confidence = round(behavior_confidence, 2)
    behavior_bbox_area = objects_dict[object_key].get("behavior_bbox_area", None)
    if behavior_bbox_area is not None:
        behavior_bbox_area = round(behavior_bbox_area, 2)
    behavior_bbox_aspect_ratio = objects_dict[object_key].get("behavior_bbox_aspect_ratio", None)
    if behavior_bbox_aspect_ratio is not None:
        behavior_bbox_aspect_ratio = round(behavior_bbox_aspect_ratio, 2)
    behavior_length = objects_dict[object_key].get("behavior_length", None)
    if behavior_length is not None:
        behavior_length = round(behavior_length, 2)

    bbox_left_x = round(bbox.leftX, 2)
    bbox_top_y = round(bbox.topY, 2)
    bbox_right_x = round(bbox.rightX, 2)
    bbox_bottom_y = round(bbox.bottomY, 2)
    bbox_width = round(bbox_right_x - bbox_left_x, 2)
    bbox_height = round(bbox_bottom_y - bbox_top_y, 2)
    bbox_area = round(bbox_width * bbox_height, 2)
    bbox_aspect_ratio = round(bbox_width / bbox_height, 2)

    frame_id = int(frame_id)
    success = video.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
    while not success:
        success = video.set(cv2.CAP_PROP_POS_FRAMES, frame_id)

    success, image_frame = video.read()
    if success:
        image_frame = cv2.cvtColor(image_frame, cv2.COLOR_BGR2RGB)
        image_frame = cv2.putText(image_frame, sensor_id + ": %06d" % frame_id, (10, 30),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2, cv2.LINE_AA)
        image_frame = cv2.rectangle(image_frame, (int(bbox_left_x), int(bbox_top_y)), 
                                    (int(bbox_right_x), int(bbox_bottom_y)), bbox_color, 2)
        image_frame = cv2.putText(image_frame, object_id,
                                    (int(bbox_left_x), int(bbox_top_y) - 10),
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, bbox_color, 2, cv2.LINE_AA)

        plt.figure(figsize=(17, 13))
        plt.imshow(image_frame)
        plt.show()

        print(f"Frame ID: {frame_id}")
        print(f"Object ID: {object_id}")
        print(f"Confidence: {confidence}")
        print(f"Bbox left X: {bbox_left_x}")
        print(f"Bbox top Y: {bbox_top_y}")
        print(f"Bbox right X: {bbox_right_x}")
        print(f"Bbox bottom Y: {bbox_bottom_y}")
        print(f"Bbox width: {bbox_width}")
        print(f"Bbox height: {bbox_height}")
        print(f"Bbox area: {bbox_area}")
        print(f"Bbox aspect ratio: {bbox_aspect_ratio}")
        if behavior_confidence is not None:
            print(f"Behavior confidence: {behavior_confidence}")
        if behavior_bbox_area is not None:
            print(f"Behavior bbox area: {behavior_bbox_area}")
        if behavior_bbox_aspect_ratio is not None:
            print(f"Behavior bbox aspect ratio: {behavior_bbox_aspect_ratio}")
        if behavior_length is not None:
            print(f"Behavior length: {behavior_length}")


video_path = os.path.join(app_config.io.videoDirPath, sensor_id + ".mp4")
video = cv2.VideoCapture(video_path)

# Embedding-level inspection
if not inspect_behavior_level:
    if len(frames_dict) == 0:
        print("There is no anomaly found.")
        exit(0)
    random_keys = random.sample(list(frames_dict), num_inspection_instances)

    for object_key in frames_dict:
        if object_key not in random_keys:
            continue

        display_anomaly_info(frames_dict, object_key, video)

# Behavior-level inspection  
else:
    if len(behaviors_dict) == 0:
        print("There is no anomaly found.")
        exit(0)
    random_key = random.choice(list(behaviors_dict.keys()))
    random_keys = deque([random_key])
    num_attempts = 100

    # Expand the behaviors based on consecutive frames
    while len(random_keys) < num_inspection_instances and num_attempts > 0:
        num_attempts -= 1
        random_tokens = random_keys[0].split(" #-# ")
        left_frame_id = random_tokens[0]
        object_id = random_tokens[1]
        last_frame_id = str(int(left_frame_id) - 1)
        for object_key in behaviors_dict.keys():
            object_tokens = object_key.split(" #-# ")
            if (last_frame_id == object_tokens[0]) and (object_id == object_tokens[1]):
                random_keys.appendleft(object_key)
                break

        right_frame_id = random_keys[-1].split(" #-# ")[0]
        next_frame_id = str(int(right_frame_id) + 1)
        for object_key in behaviors_dict.keys():
            object_tokens = object_key.split(" #-# ")
            if (next_frame_id == object_tokens[0]) and (object_id == object_tokens[1]):
                random_keys.append(object_key)
                break

    for object_key in behaviors_dict:
        if object_key not in random_keys:
            continue

        display_anomaly_info(behaviors_dict, object_key, video)

video.release()