In [None]:
import json
import os
import sys
import cProfile
import pstats

import numpy as np
from PIL import Image
from sklearn.metrics import mean_squared_error

from detectron2 import model_zoo
model = model_zoo.get("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml", trained=True)

# sys.path.append(r"../")
from ct_assist import transform
from ct_assist.utils import accuracy

# Performance
In this Notebook, I will be performance testing various parts of the ct_assist library. This includes both speed and accuracy testing.

First getting the testing data ready:

In [None]:
image_coord_dict = dict()
image_coord_dict["00.jpg"] = np.array([[1470, 1430], [2333, 1355], [3247, 1781], [1935, 1952]])
image_coord_dict["01.jpg"] = np.array([[1495, 1552], [2219, 1589], [1843, 1969], [805, 1875]])
image_coord_dict["03.jpg"] = np.array([[1216, 1398], [2215, 1754], [3268, 1530], [2067, 1282]])   

def setup_vars():
    """Loads data for test_transform_image"""
    data_dir = r"./data/table"
    json_fp = os.path.join(data_dir, "anno.json")
    arr_fp = os.path.join(data_dir, "anno.npz")
    with open(json_fp, "r") as fp:
        mappings = json.load(fp)

    with np.load(arr_fp) as arrs:
        anno_dict = {img: {"heads": arrs[f"{prefix}heads"],
                           "feet": arrs[f"{prefix}feet"]}
                     for img, prefix in mappings.items()}
    
    for key, items in anno_dict.items():
        if key.endswith("02.jpg"):
            continue
        else:
            image_coords = image_coord_dict[key[-6:]]
        # feet and heads have been swapped in annotations
        reference = np.array([items["feet"], items["heads"]])
        height = 0.095  # m
        STD = 0.01  # m
        img = Image.open(key)
        yield (img, reference, height, STD, image_coords)

## Speed

In [None]:
generator = setup_vars()
pr = cProfile.Profile()
pr.enable()
coords_sq = (transform.fit_transform(*params)[:, :2] for params in generator)
y_pred = np.array([accuracy.calc_area(poly) for poly in coords_sq])
pr.disable()
pr.print_stats("tottime")

## Accuracy
As the "real" positions in images currently aren't known, in order to figure out how close CameraTransform is to "real" positions, we have to take a derivative of these positions. For the purposes of this assignment, I've chosen the area of the polygon, as this is an important variable in the use case.

The three images used for testing all feature a table with a area of `133` cm$^{2}$. 

In [None]:
y_true = np.repeat(1.33455, y_pred.size)

In [None]:
y_pred

In [None]:
mean_squared_error(y_true, y_pred, squared=False)

An RMSE of around 20 square centimeters is adequate. Especially the latter two images are taken and worse angles and are insufficiently labeled in comparison to `img_03.jpg`.