Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

# In progress
## Changed
* Default behavior for metrics to not include subclasses in the calculation.

## Fixed
* Polygon extraction from masks creating invalid polygons. This would cause issues in the coco converter.

# Version 3.28.0 (2022-10-14)

### Added
Expand Down Expand Up @@ -45,7 +52,7 @@
* Increase scalar metric value limit to 100m
* Added deprecation warnings when updating project `queue_mode`
### Fixed
* Fix bug in `feature_confusion_matrix` and `confusion_matrix` causing FPs and FNs to be capped at 1 when there were no matching annotations
* Fix bug in `feature_confusion_matrix` and `confusion_matrix` causing FPs and FNs to be capped at 1 when there were no matching annotations

# Version 3.26.2 (2022-09-06)
### Added
Expand All @@ -65,7 +72,7 @@
* Resets model run training metadata
* `ModelRun.get_config()`
* Fetches model run training metadata

### Changed
* `Model.create_model_run()`
* Add training metadata config as a model run creation param
Expand Down
27 changes: 4 additions & 23 deletions labelbox/data/metrics/confusion_matrix/calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,29 +268,10 @@ def mask_confusion_matrix(ground_truths: List[ObjectAnnotation],
elif has_no_annotations(ground_truths, predictions):
return None

if include_subclasses:
# This results in a faily drastically different value than without subclasses.
# If we have subclasses set to True, then this is object detection with masks
# Otherwise this will compute metrics on each pixel.
pairs = _get_mask_pairs(ground_truths, predictions)
return object_pair_confusion_matrix(
pairs, include_subclasses=include_subclasses, iou=iou)

prediction_np = np.max([pred.value.draw(color=1) for pred in predictions],
axis=0)
ground_truth_np = np.max(
[ground_truth.value.draw(color=1) for ground_truth in ground_truths],
axis=0)
if prediction_np.shape != ground_truth_np.shape:
raise ValueError(
"Prediction and mask must have the same shape."
f" Found {prediction_np.shape}/{ground_truth_np.shape}.")

tp_mask = prediction_np == ground_truth_np == 1
fp_mask = (prediction_np == 1) & (ground_truth_np == 0)
fn_mask = (prediction_np == 0) & (ground_truth_np == 1)
tn_mask = prediction_np == ground_truth_np == 0
return [np.sum(tp_mask), np.sum(fp_mask), np.sum(fn_mask), np.sum(tn_mask)]
pairs = _get_mask_pairs(ground_truths, predictions)
return object_pair_confusion_matrix(pairs,
include_subclasses=include_subclasses,
iou=iou)


def ner_confusion_matrix(ground_truths: List[ObjectAnnotation],
Expand Down
4 changes: 2 additions & 2 deletions labelbox/data/metrics/confusion_matrix/confusion_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def confusion_matrix_metric(ground_truths: List[Union[
ObjectAnnotation, ClassificationAnnotation]],
predictions: List[Union[ObjectAnnotation,
ClassificationAnnotation]],
include_subclasses=True,
include_subclasses=False,
iou=0.5) -> List[ConfusionMatrixMetric]:
"""
Computes confusion matrix metrics between two sets of annotations.
Expand Down Expand Up @@ -47,7 +47,7 @@ def confusion_matrix_metric(ground_truths: List[Union[
def feature_confusion_matrix_metric(
ground_truths: List[Union[ObjectAnnotation, ClassificationAnnotation]],
predictions: List[Union[ObjectAnnotation, ClassificationAnnotation]],
include_subclasses=True,
include_subclasses=False,
iou: float = 0.5,
) -> List[ConfusionMatrixMetric]:
"""
Expand Down
4 changes: 2 additions & 2 deletions labelbox/data/metrics/iou/iou.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def miou_metric(ground_truths: List[Union[ObjectAnnotation,
ClassificationAnnotation]],
predictions: List[Union[ObjectAnnotation,
ClassificationAnnotation]],
include_subclasses=True) -> List[ScalarMetric]:
include_subclasses=False) -> List[ScalarMetric]:
"""
Computes miou between two sets of annotations.
These annotations should relate to the same data (image/video).
Expand Down Expand Up @@ -68,7 +68,7 @@ def feature_miou_metric(ground_truths: List[Union[ObjectAnnotation,

def data_row_miou(ground_truth: Label,
prediction: Label,
include_subclasses=True) -> Optional[float]:
include_subclasses=False) -> Optional[float]:
"""

This function is no longer supported. Use miou() for raw values or miou_metric() for the metric
Expand Down
34 changes: 25 additions & 9 deletions tests/data/metrics/confusion_matrix/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,16 @@

class NameSpace(SimpleNamespace):

def __init__(self, predictions, ground_truths, expected):
super(NameSpace, self).__init__(predictions=predictions,
ground_truths=ground_truths,
expected=expected)
def __init__(self,
predictions,
ground_truths,
expected,
expected_without_subclasses=None):
super(NameSpace, self).__init__(
predictions=predictions,
ground_truths=ground_truths,
expected=expected,
expected_without_subclasses=expected_without_subclasses or expected)


def get_radio(name, answer_name):
Expand Down Expand Up @@ -109,7 +115,8 @@ def get_object_pairs(tool_fn, **kwargs):
**kwargs,
subclasses=[get_radio("is_animal", answer_name="yes")])
],
expected={'cat': [1, 0, 0, 0]}),
expected={'cat': [1, 0, 0, 0]},
expected_without_subclasses={'cat': [1, 0, 0, 0]}),
NameSpace(predictions=[
tool_fn("cat",
**kwargs,
Expand All @@ -121,7 +128,8 @@ def get_object_pairs(tool_fn, **kwargs):
**kwargs,
subclasses=[get_radio("is_animal", answer_name="no")])
],
expected={'cat': [0, 1, 0, 1]}),
expected={'cat': [0, 1, 0, 1]},
expected_without_subclasses={'cat': [1, 0, 0, 0]}),
NameSpace(predictions=[
tool_fn("cat",
**kwargs,
Expand All @@ -136,7 +144,8 @@ def get_object_pairs(tool_fn, **kwargs):
**kwargs,
subclasses=[get_radio("is_animal", answer_name="no")])
],
expected={'cat': [1, 1, 0, 0]}),
expected={'cat': [1, 1, 0, 0]},
expected_without_subclasses={'cat': [1, 1, 0, 0]}),
NameSpace(predictions=[
tool_fn("cat",
**kwargs,
Expand All @@ -154,6 +163,10 @@ def get_object_pairs(tool_fn, **kwargs):
expected={
'cat': [0, 1, 0, 1],
'dog': [0, 1, 0, 0]
},
expected_without_subclasses={
'cat': [1, 0, 0, 0],
'dog': [0, 1, 0, 0]
}),
NameSpace(
predictions=[tool_fn("cat", **kwargs),
Expand All @@ -171,7 +184,10 @@ def get_object_pairs(tool_fn, **kwargs):
ground_truths=[tool_fn("cat", **kwargs),
tool_fn("cat", **kwargs)],
expected={'cat': [1, 0, 0, 1]}),
NameSpace(predictions=[], ground_truths=[], expected=[]),
NameSpace(predictions=[],
ground_truths=[],
expected=[],
expected_without_subclasses=[]),
NameSpace(predictions=[],
ground_truths=[tool_fn("cat", **kwargs)],
expected={'cat': [0, 0, 0, 1]}),
Expand All @@ -183,7 +199,7 @@ def get_object_pairs(tool_fn, **kwargs):
expected={
'cat': [0, 1, 0, 0],
'dog': [0, 0, 0, 1]
}),
})
]


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,25 @@
])
def test_overlapping_objects(tool_examples):
for example in tool_examples:
score = confusion_matrix_metric(example.ground_truths,
example.predictions)

if len(example.expected) == 0:
assert len(score) == 0
else:
expected = [0, 0, 0, 0]
for expected_values in example.expected.values():
for idx in range(4):
expected[idx] += expected_values[idx]
assert score[0].value == tuple(
expected), f"{example.predictions},{example.ground_truths}"
for include_subclasses, expected_attr_name in [[
True, 'expected'
], [False, 'expected_without_subclasses']]:
score = confusion_matrix_metric(
example.ground_truths,
example.predictions,
include_subclasses=include_subclasses)

if len(getattr(example, expected_attr_name)) == 0:
assert len(score) == 0
else:
expected = [0, 0, 0, 0]
for expected_values in getattr(example,
expected_attr_name).values():
for idx in range(4):
expected[idx] += expected_values[idx]
assert score[0].value == tuple(
expected), f"{example.predictions},{example.ground_truths}"


@parametrize("tool_examples",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,21 @@
])
def test_overlapping_objects(tool_examples):
for example in tool_examples:
metrics = feature_confusion_matrix_metric(example.ground_truths,
example.predictions)

metrics = {r.feature_name: list(r.value) for r in metrics}
if len(example.expected) == 0:
assert len(metrics) == 0
else:
assert metrics == example.expected, f"{example.predictions},{example.ground_truths}"
for include_subclasses, expected_attr_name in [[
True, 'expected'
], [False, 'expected_without_subclasses']]:
metrics = feature_confusion_matrix_metric(
example.ground_truths,
example.predictions,
include_subclasses=include_subclasses)

metrics = {r.feature_name: list(r.value) for r in metrics}
if len(getattr(example, expected_attr_name)) == 0:
assert len(metrics) == 0
else:
assert metrics == getattr(
example, expected_attr_name
), f"{example.predictions},{example.ground_truths}"


@parametrize("tool_examples",
Expand Down
35 changes: 19 additions & 16 deletions tests/data/metrics/iou/data_row/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,26 @@ def __init__(self,
predictions,
labels,
expected,
expected_without_subclasses=None,
data_row_expected=None,
media_attributes=None,
metadata=None,
classifications=None):
super(NameSpace,
self).__init__(predictions=predictions,
labels={
'DataRow ID': 'ckppihxc10005aeyjen11h7jh',
'Labeled Data': "https://.jpg",
'Media Attributes': media_attributes or {},
'DataRow Metadata': metadata or [],
'Label': {
'objects': labels,
'classifications': classifications or []
}
},
expected=expected,
data_row_expected=data_row_expected)
super(NameSpace, self).__init__(
predictions=predictions,
labels={
'DataRow ID': 'ckppihxc10005aeyjen11h7jh',
'Labeled Data': "https://.jpg",
'Media Attributes': media_attributes or {},
'DataRow Metadata': metadata or [],
'Label': {
'objects': labels,
'classifications': classifications or []
}
},
expected=expected,
expected_without_subclasses=expected_without_subclasses or expected,
data_row_expected=data_row_expected)


@pytest.fixture
Expand Down Expand Up @@ -645,7 +647,8 @@ def test_box_with_wrong_subclass():
'answer': 'not_test'
}]
}],
expected=0.5)
expected=0.5,
expected_without_subclasses=1.0)


@pytest.fixture
Expand Down Expand Up @@ -780,4 +783,4 @@ def partial_matching_ner():
"end": 5
}
}],
expected=0.2857142857142857)
expected=0.2857142857142857)
34 changes: 24 additions & 10 deletions tests/data/metrics/iou/data_row/test_data_row_iou.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,29 @@ def check_iou(pair, mask=None):
annotation.value.mask.arr = np.frombuffer(
base64.b64decode(annotation.value.mask.url.encode('utf-8')),
dtype=np.uint8).reshape((32, 32, 3))
assert math.isclose(data_row_miou(label, prediction), pair.expected)
assert math.isclose(
miou_metric(label.annotations, prediction.annotations)[0].value,
pair.expected)
feature_ious = feature_miou_metric(label.annotations,
prediction.annotations)
assert len(feature_ious
) == 1 # The tests run here should only have one class present.
assert math.isclose(feature_ious[0].value, pair.expected)

for include_subclasses, expected_attr_name in [[
True, 'expected'
], [False, 'expected_without_subclasses']]:
assert math.isclose(
data_row_miou(label,
prediction,
include_subclasses=include_subclasses),
getattr(pair, expected_attr_name))
assert math.isclose(
miou_metric(label.annotations,
prediction.annotations,
include_subclasses=include_subclasses)[0].value,
getattr(pair, expected_attr_name))
feature_ious = feature_miou_metric(
label.annotations,
prediction.annotations,
include_subclasses=include_subclasses)
assert len(
feature_ious
) == 1 # The tests run here should only have one class present.
assert math.isclose(feature_ious[0].value,
getattr(pair, expected_attr_name))


def check_iou_checklist(pair, mask=None):
Expand Down Expand Up @@ -122,4 +136,4 @@ def test_others(pair):
strings_to_fixtures(
["matching_ner", "no_matching_ner", "partial_matching_ner"]))
def test_ner(pair):
check_iou(pair)
check_iou(pair)