# Testing in Python

### Topics
- Static testing
- Assertions (Sanity Check)
- Unit testing


### Example task
- Load a CSV of bounding boxes ([Data from luderick-seagrass dataset](https://github.com/globalwetlands/luderick-seagrass))
- Find the center x point of each bounding box
- Calculate the mean of the center x points

![example bbox](example_bbox.png)

# Read the CSV

In [1]:
from IPython.display import display
import pandas as pd

df = pd.read_csv("luderick_seagrass_jack_evans_a.csv")
display(df.head())

# Sanity check data

# check number of rows
assert len(df) == 6702, "Wrong number of rows"

assert df['bbox_x'].min() >= 0, "bbox_x should be >= 0"
assert df['bbox_w'].min() >= 0, "bbox_w should be >= 0"

# ensure all bbox values are not na
assert df['bbox_x'].isna().any() == False, "bbox_x should not be na"
assert df['bbox_w'].isna().any() == False, "bbox_w should not be na"

Unnamed: 0,id,category,category_id,image,image_id,bbox_x,bbox_y,bbox_w,bbox_h,area,segmentation
0,0,luderick,1,04C1_Luderick_1.mov_5fps_000001.jpg,0,1778,425,141,185,26085,"[[1778, 515, 1789, 498, 1806, 479, 1835, 457, ..."
1,1,luderick,1,04C1_Luderick_1.mov_5fps_000002.jpg,1,1659,406,260,239,62140,"[[1659, 509, 1675, 488, 1696, 465, 1720, 437, ..."
2,2,luderick,1,04C1_Luderick_1.mov_5fps_000003.jpg,2,1578,400,337,220,74140,"[[1578, 530, 1585, 512, 1594, 496, 1592, 477, ..."
3,3,luderick,1,04C1_Luderick_1.mov_5fps_000004.jpg,3,1508,398,304,228,69312,"[[1812, 624, 1795, 578, 1795, 542, 1797, 507, ..."
4,4,luderick,1,04C1_Luderick_1.mov_5fps_000005.jpg,4,1465,390,234,236,55224,"[[1691, 591, 1687, 544, 1688, 503, 1691, 473, ..."


### Define a function to get the center x point of a bounding box

In [2]:
from utils import get_bbox_center_x

# we moved this function and tests to utils_test.py

# before:
# assert get_bbox_center_x(0, 50) == 25
# assert get_bbox_center_x(100, 200) == 200
# assert get_bbox_center_x(500, 1) == 500.5

### Calculate the center x points

In [3]:
# we could skip the function and simply use:
# df['bbox_center_x'] = df['bbox_x'] + df['bbox_w'] / 2
# but using a function for this example

df["bbox_center_x"] = df.apply(
    lambda row: get_bbox_center_x(row["bbox_x"], row["bbox_w"]), axis=1
)

df.head()

Unnamed: 0,id,category,category_id,image,image_id,bbox_x,bbox_y,bbox_w,bbox_h,area,segmentation,bbox_center_x
0,0,luderick,1,04C1_Luderick_1.mov_5fps_000001.jpg,0,1778,425,141,185,26085,"[[1778, 515, 1789, 498, 1806, 479, 1835, 457, ...",1848.5
1,1,luderick,1,04C1_Luderick_1.mov_5fps_000002.jpg,1,1659,406,260,239,62140,"[[1659, 509, 1675, 488, 1696, 465, 1720, 437, ...",1789.0
2,2,luderick,1,04C1_Luderick_1.mov_5fps_000003.jpg,2,1578,400,337,220,74140,"[[1578, 530, 1585, 512, 1594, 496, 1592, 477, ...",1746.5
3,3,luderick,1,04C1_Luderick_1.mov_5fps_000004.jpg,3,1508,398,304,228,69312,"[[1812, 624, 1795, 578, 1795, 542, 1797, 507, ...",1660.0
4,4,luderick,1,04C1_Luderick_1.mov_5fps_000005.jpg,4,1465,390,234,236,55224,"[[1691, 591, 1687, 544, 1688, 503, 1691, 473, ...",1582.0


In [4]:
# check a single row
sample_bbox = df.iloc[12]
display(sample_bbox)

# Manually calculate the bbox_center_x for this row
correct_center_x = sample_bbox['bbox_x'] + (sample_bbox['bbox_w'] / 2)

# check that the calculated center_x matches our manual calculation
assert sample_bbox['bbox_center_x'] == correct_center_x

id                                                              12
category                                                  luderick
category_id                                                      1
image                          04C1_Luderick_1.mov_5fps_000010.jpg
image_id                                                         9
bbox_x                                                        1227
bbox_y                                                         459
bbox_w                                                         158
bbox_h                                                         184
area                                                         29072
segmentation     [[1227, 595, 1243, 583, 1255, 575, 1266, 571, ...
bbox_center_x                                                 1306
Name: 12, dtype: object

### Calculate the mean of center x points

In [5]:
# Check that all bbox_center_x values are less than 1920
assert df['bbox_center_x'].max() <= 1920, "All x vals should be less or equal to 1920"

average_bbox_center_x = df['bbox_center_x'].mean()
print(average_bbox_center_x)

1026.7417934944792
