In [1]:
import json
import os
import sys
import cProfile
import pstats

import numpy as np
from PIL import Image
from sklearn.metrics import mean_squared_error

sys.path.append(r"../")
from AssistanceTransform import transform, estimator

# Performance
In this Notebook, I will be performance testing various parts of the AssistanceTransform library. This includes both speed and accuracy testing.

First getting the testing data ready:

In [2]:
image_coord_dict = dict()
image_coord_dict["00.jpg"] = np.array([[1470, 1430], [2333, 1355], [3247, 1781], [1935, 1952]])
image_coord_dict["01.jpg"] = np.array([[1495, 1552], [2219, 1589], [1843, 1969], [805, 1875]])
image_coord_dict["03.jpg"] = np.array([[1216, 1398], [2215, 1754], [3268, 1530], [2067, 1282]])   

def setup_vars():
    """Loads data for test_transform_image"""
    data_dir = r"./data/table"
    json_fp = os.path.join(data_dir, "anno.json")
    arr_fp = os.path.join(data_dir, "anno.npz")
    with open(json_fp, "r") as fp:
        mappings = json.load(fp)

    with np.load(arr_fp) as arrs:
        anno_dict = {img: {"heads": arrs[f"{prefix}heads"],
                           "feet": arrs[f"{prefix}feet"]}
                     for img, prefix in mappings.items()}
    
    for key, items in anno_dict.items():
        if key.endswith("02.jpg"):
            continue
        else:
            image_coords = image_coord_dict[key[-6:]]
        # feet and heads have been swapped in annotations
        reference = np.array([items["feet"], items["heads"]])
        height = 0.095  # m
        STD = 0.01  # m
        img = Image.open(key)
        yield (img, reference, height, STD, image_coords)

## Speed

In [3]:
generator = setup_vars()
pr = cProfile.Profile()
pr.enable()
coords_sq = (transform.transform_image(*params, iters=1e5)[:, :2] for params in generator)
y_pred = np.array([estimator.area(poly) for poly in coords_sq])
pr.disable()
pr.print_stats("tottime")

         114108377 function calls (111710065 primitive calls) in 173.042 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
 11098663   18.121    0.000   18.121    0.000 {built-in method numpy.array}
   900021   16.446    0.000   59.793    0.000 camera.py:894(spaceFromImage)
   300006   11.870    0.000   50.740    0.000 _distn_infrastructure.py:1774(logpdf)
   900021   11.171    0.000   24.268    0.000 projection.py:335(getRay)
   300012   10.857    0.000   28.524    0.000 spatial.py:107(_initCameraMatrix)
8996870/6599066   10.837    0.000   45.837    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
  1200052    5.918    0.000    5.918    0.000 {method 'reduce' of 'numpy.ufunc' objects}
        3    5.528    1.843  172.791   57.597 statistic.py:68(metropolis)
   600012    3.783    0.000    5.016    0.000 numerictypes.py:545(_can_coerce_all)
  1800042    3.503    0.000   11.570    0.000 spatial.py:1

      135    0.000    0.000    0.000    0.000 __init__.py:1356(debug)
       16    0.000    0.000    0.000    0.000 zipfile.py:1023(_read2)
       45    0.000    0.000    0.000    0.000 inspect.py:2843(replace)
       16    0.000    0.000    0.000    0.000 zipfile.py:1039(close)
      9/6    0.000    0.000    0.009    0.002 <frozen importlib._bootstrap>:663(_load_unlocked)
        3    0.000    0.000    0.001    0.000 series.py:2102(idxmax)
        6    0.000    0.000    0.000    0.000 blocks.py:2698(make_block)
     15/2    0.000    0.000    0.001    0.000 sre_parse.py:411(_parse_sub)
       10    0.000    0.000    0.000    0.000 zipfile.py:339(__init__)
        3    0.000    0.000    0.000    0.000 estimator.py:9(area)
       45    0.000    0.000    0.000    0.000 _util.py:313(<listcomp>)
        9    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:157(_get_module_lock)
       15    0.000    0.000    0.000    0.000 sre_compile.py:276(_optimize_charset)
        3    0.0

        3    0.000    0.000    0.000    0.000 base.py:5666(_maybe_cast_with_dtype)
        1    0.000    0.000    0.000    0.000 __init__.py:274(load)
        8    0.000    0.000    0.000    0.000 random.py:256(choice)
       36    0.000    0.000    0.000    0.000 {pandas._libs.lib.is_float}
        3    0.000    0.000    0.000    0.000 _weakrefset.py:26(__exit__)
        6    0.000    0.000    0.000    0.000 compilerop.py:138(extra_flags)
        2    0.000    0.000    0.000    0.000 {built-in method _sre.compile}
        9    0.000    0.000    0.000    0.000 <frozen importlib._bootstrap>:143(__init__)
        1    0.000    0.000    0.000    0.000 enum.py:830(__or__)
       18    0.000    0.000    0.000    0.000 sre_parse.py:81(groups)
        1    0.000    0.000    0.000    0.000 TiffImagePlugin.py:1616(AppendingTiffWriter)
       19    0.000    0.000    0.000    0.000 {built-in method _thread.get_ident}
        2    0.000    0.000    0.000    0.000 sre_parse.py:224(__init__)
       

## Accuracy
As the "real" positions in images currently aren't known, in order to figure out how close CameraTransform is to "real" positions, we have to take a derivative of these positions. For the purposes of this assignment, I've chosen the area of the polygon, as this is an important variable in the use case.

The three images used for testing all feature a table with a area of `133` cm$^{2}$. 

In [4]:
y_true = np.repeat(1.33455, y_pred.size)

In [5]:
y_pred

array([1.36138023, 1.7020628 , 1.71143048])

In [6]:
mean_squared_error(y_true, y_pred, squared=False)

0.3043158541370361

An RMSE of around 20 square centimeters is adequate. Especially the latter two images are taken and worse angles and are insufficiently labeled in comparison to `img_03.jpg`.