Skip to content

Commit 4bfa6be

Browse files
authored
Merge pull request #166 from Labelbox/ms/mea-metrics
mea metrics
2 parents c242a1e + 0ec63fa commit 4bfa6be

File tree

14 files changed

+1212
-413
lines changed

14 files changed

+1212
-413
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
* Update client to support creating and querying for `Model`s
77
* Implement new prediction import pipeline to support both MAL and MEA
88
* Added notebook to demonstrate how to use MEA
9+
* Added `datarow_miou` for calculating datarow level iou scores
910

1011
# Version 2.5.6 (2021-05-19)
1112
## Fix

examples/model_assisted_labeling/image_mea.ipynb

Lines changed: 82 additions & 411 deletions
Large diffs are not rendered by default.

labelbox/client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ def convert_value(value):
154154
request['headers'] = {
155155
'Authorization': self.headers['Authorization']
156156
}
157+
157158
response = requests.post(**request)
158159
logger.debug("Response: %s", response.text)
159160
except requests.exceptions.Timeout as e:

labelbox/data/__init__.py

Whitespace-only changes.

labelbox/data/metrics/__init__.py

Whitespace-only changes.

labelbox/data/metrics/iou.py

Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
# type: ignore
2+
from typing import Dict, Any, List, Optional, Tuple, Union
3+
from shapely.geometry import Polygon
4+
from itertools import product
5+
import numpy as np
6+
7+
from labelbox.data.metrics.preprocess import label_to_ndannotation
8+
from labelbox.schema.bulk_import_request import (NDAnnotation, NDChecklist,
9+
NDClassification, NDTool,
10+
NDMask, NDPoint, NDPolygon,
11+
NDPolyline, NDRadio, NDText,
12+
NDRectangle)
13+
from labelbox.data.metrics.preprocess import (create_schema_lookup,
14+
url_to_numpy)
15+
16+
VectorTool = Union[NDPoint, NDRectangle, NDPolyline, NDPolygon]
17+
ClassificationTool = Union[NDText, NDRadio, NDChecklist]
18+
19+
20+
def mask_miou(predictions: List[NDMask], labels: List[NDMask]) -> float:
21+
"""
22+
Creates prediction and label binary mask for all features with the same feature scheama id.
23+
24+
Args:
25+
predictions: List of masks objects
26+
labels: List of masks objects
27+
Returns:
28+
float indicating iou score
29+
"""
30+
31+
pred_mask = _instance_urls_to_binary_mask(
32+
[pred.mask['instanceURI'] for pred in predictions])
33+
label_mask = _instance_urls_to_binary_mask(
34+
[label.mask['instanceURI'] for label in labels])
35+
assert label_mask.shape == pred_mask.shape
36+
return _mask_iou(label_mask, pred_mask)
37+
38+
39+
def classification_miou(predictions: List[ClassificationTool],
40+
labels: List[ClassificationTool]) -> float:
41+
"""
42+
Computes iou for classification features.
43+
44+
Args:
45+
prediction : list of predictions for a particular feature schema ( should have a max of one ).
46+
label : list of predictions for a particular feature schema ( should have a max of one ).
47+
Returns:
48+
float indicating iou score.
49+
50+
"""
51+
52+
if len(predictions) != len(labels) != 1:
53+
return 0.
54+
55+
prediction, label = predictions[0], labels[0]
56+
57+
if type(prediction) != type(label):
58+
raise TypeError(
59+
"Classification features must be the same type to compute agreement. "
60+
f"Found `{type(prediction)}` and `{type(label)}`")
61+
62+
if isinstance(prediction, NDText):
63+
return float(prediction.answer == label.answer)
64+
elif isinstance(prediction, NDRadio):
65+
return float(prediction.answer.schemaId == label.answer.schemaId)
66+
elif isinstance(prediction, NDChecklist):
67+
schema_ids_pred = {answer.schemaId for answer in prediction.answers}
68+
schema_ids_label = {answer.schemaId for answer in label.answers}
69+
return float(
70+
len(schema_ids_label & schema_ids_pred) /
71+
len(schema_ids_label | schema_ids_pred))
72+
else:
73+
raise ValueError(f"Unexpected subclass. {prediction}")
74+
75+
76+
def subclassification_miou(
77+
subclass_predictions: List[ClassificationTool],
78+
subclass_labels: List[ClassificationTool]) -> Optional[float]:
79+
"""
80+
81+
Computes subclass iou score between two vector tools that were matched.
82+
83+
Arg:
84+
subclass_predictions: All subclasses for a particular vector feature inference
85+
subclass_labels : All subclass labels for a label that matched with the vector feature inference.
86+
87+
Returns:
88+
miou across all subclasses.
89+
"""
90+
91+
subclass_predictions = create_schema_lookup(subclass_predictions)
92+
subclass_labels = create_schema_lookup(subclass_labels)
93+
feature_schemas = set(subclass_predictions.keys()).union(
94+
set(subclass_labels.keys()))
95+
# There should only be one feature schema per subclass.
96+
97+
classification_iou = [
98+
feature_miou(subclass_predictions[feature_schema],
99+
subclass_labels[feature_schema])
100+
for feature_schema in feature_schemas
101+
]
102+
classification_iou = [x for x in classification_iou if x is not None]
103+
return None if not len(classification_iou) else np.mean(classification_iou)
104+
105+
106+
def vector_miou(predictions: List[VectorTool], labels: List[VectorTool],
107+
include_subclasses) -> float:
108+
"""
109+
Computes an iou score for vector tools.
110+
111+
Args:
112+
predictions: List of predictions that correspond to the same feature schema
113+
labels: List of labels that correspond to the same feature schema
114+
include_subclasses: Whether or not to include the subclasses in the calculation.
115+
Returns:
116+
miou score for the feature schema
117+
118+
"""
119+
pairs = _get_vector_pairs(predictions, labels)
120+
pairs.sort(key=lambda triplet: triplet[2], reverse=True)
121+
solution_agreements = []
122+
solution_features = set()
123+
all_features = set()
124+
for pred, label, agreement in pairs:
125+
all_features.update({pred.uuid, label.uuid})
126+
if pred.uuid not in solution_features and label.uuid not in solution_features:
127+
solution_features.update({pred.uuid, label.uuid})
128+
if include_subclasses:
129+
classification_iou = subclassification_miou(
130+
pred.classifications, label.classifications)
131+
classification_iou = classification_iou if classification_iou is not None else agreement
132+
solution_agreements.append(
133+
(agreement + classification_iou) / 2.)
134+
else:
135+
solution_agreements.append(agreement)
136+
137+
# Add zeros for unmatched Features
138+
solution_agreements.extend([0.0] *
139+
(len(all_features) - len(solution_features)))
140+
return np.mean(solution_agreements)
141+
142+
143+
def feature_miou(predictions: List[NDAnnotation],
144+
labels: List[NDAnnotation],
145+
include_subclasses=True) -> Optional[float]:
146+
"""
147+
Computes iou score for all features with the same feature schema id.
148+
149+
Args:
150+
predictions: List of annotations with the same feature schema.
151+
labels: List of labels with the same feature schema.
152+
Returns:
153+
float representing the iou score for the feature type if score can be computed otherwise None.
154+
155+
"""
156+
if len(predictions):
157+
keys = predictions[0]
158+
elif len(labels):
159+
# No existing predictions but existing labels means no matches.
160+
return 0.0
161+
else:
162+
# Ignore examples that do not have any labels or predictions
163+
return None
164+
165+
tool_types = {type(annot) for annot in predictions
166+
}.union({type(annot) for annot in labels})
167+
168+
if len(tool_types) > 1:
169+
raise ValueError(
170+
"feature_miou predictions and annotations should all be of the same type"
171+
)
172+
173+
tool_type = tool_types.pop()
174+
if tool_type == NDMask:
175+
return mask_miou(predictions, labels)
176+
elif tool_type in NDTool.get_union_types():
177+
return vector_miou(predictions,
178+
labels,
179+
include_subclasses=include_subclasses)
180+
elif tool_type in NDClassification.get_union_types():
181+
return classification_miou(predictions, labels)
182+
else:
183+
raise ValueError(f"Unexpected annotation found. Found {tool_type}")
184+
185+
186+
def datarow_miou(label_content: List[Dict[str, Any]],
187+
ndjsons: List[Dict[str, Any]],
188+
include_classifications=True,
189+
include_subclasses=True) -> float:
190+
"""
191+
192+
Args:
193+
label_content : one row from the bulk label export - `project.export_labels()`
194+
ndjsons: Model predictions in the ndjson format specified here (https://docs.labelbox.com/data-model/en/index-en#annotations)
195+
include_classifications: Whether or not to factor top level classifications into the iou score.
196+
include_subclassifications: Whether or not to factor in subclassifications into the iou score
197+
Returns:
198+
float indicating the iou score for this data row.
199+
200+
"""
201+
202+
predictions, labels, feature_schemas = _preprocess_args(
203+
label_content, ndjsons, include_classifications)
204+
205+
ious = [
206+
feature_miou(predictions[feature_schema],
207+
labels[feature_schema],
208+
include_subclasses=include_subclasses)
209+
for feature_schema in feature_schemas
210+
]
211+
ious = [iou for iou in ious if iou is not None]
212+
if not ious:
213+
return None
214+
return np.mean(ious)
215+
216+
217+
def _preprocess_args(
218+
label_content: List[Dict[str, Any]],
219+
ndjsons: List[Dict[str, Any]],
220+
include_classifications=True
221+
) -> Tuple[Dict[str, List[NDAnnotation]], Dict[str, List[NDAnnotation]],
222+
List[str]]:
223+
"""
224+
225+
This function takes in the raw json payloads, validates, and converts to python objects.
226+
In the future datarow_miou will directly take the objects as args.
227+
228+
Args:
229+
label_content : one row from the bulk label export - `project.export_labels()`
230+
ndjsons: Model predictions in the ndjson format specified here (https://docs.labelbox.com/data-model/en/index-en#annotations)
231+
Returns a tuple containing:
232+
- a dict for looking up a list of predictions by feature schema id
233+
- a dict for looking up a list of labels by feature schema id
234+
- a list of a all feature schema ids
235+
236+
"""
237+
labels = label_content['Label'].get('objects')
238+
if include_classifications:
239+
labels += label_content['Label'].get('classifications')
240+
241+
predictions = [NDAnnotation(**pred.copy()) for pred in ndjsons]
242+
243+
unique_datarows = {pred.dataRow.id for pred in predictions}
244+
if len(unique_datarows):
245+
# Empty set of annotations is valid (if labels exist but no inferences then iou will be 0.)
246+
if unique_datarows != {label_content['DataRow ID']}:
247+
raise ValueError(
248+
f"There should only be one datarow passed to the datarow_miou function. Found {unique_datarows}"
249+
)
250+
251+
labels = [
252+
label_to_ndannotation(label, label_content['DataRow ID'])
253+
for label in labels
254+
]
255+
256+
labels = create_schema_lookup(labels)
257+
predictions = create_schema_lookup(predictions)
258+
259+
feature_schemas = set(predictions.keys()).union(set(labels.keys()))
260+
return predictions, labels, feature_schemas
261+
262+
263+
def _get_vector_pairs(predictions: List[Dict[str, Any]], labels):
264+
"""
265+
# Get iou score for all pairs of labels and predictions
266+
"""
267+
return [(prediction, label,
268+
_polygon_iou(prediction.to_shapely_poly(),
269+
label.to_shapely_poly()))
270+
for prediction, label in product(predictions, labels)]
271+
272+
273+
def _polygon_iou(poly1: Polygon, poly2: Polygon) -> float:
274+
"""Computes iou between two shapely polygons."""
275+
if poly1.intersects(poly2):
276+
return poly1.intersection(poly2).area / poly1.union(poly2).area
277+
return 0.
278+
279+
280+
def _mask_iou(mask1: np.ndarray, mask2: np.ndarray) -> float:
281+
"""Computes iou between two binary segmentation masks."""
282+
return np.sum(mask1 & mask2) / np.sum(mask1 | mask2)
283+
284+
285+
def _instance_urls_to_binary_mask(urls: List[str]) -> np.ndarray:
286+
"""Downloads segmentation masks and turns the image into a binary mask."""
287+
masks = [url_to_numpy(url) for url in urls]
288+
return np.sum(masks, axis=(0, 3)) > 0
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from typing import List, Dict, Any, Union
2+
from collections import defaultdict
3+
import numpy as np # type: ignore
4+
from PIL import Image # type: ignore
5+
import requests
6+
from io import BytesIO
7+
from google.api_core import retry
8+
import uuid
9+
10+
from labelbox.data.metrics.tool_types import ALL_TOOL_TYPES, SEGMENTATION_TOOLS, CLASSIFICATION_TOOLS
11+
from labelbox.schema.bulk_import_request import NDAnnotation, NDBase
12+
13+
14+
def get_tool(label: Dict[str, Any]):
15+
"""Uses the keys in the label to determine the tool type """
16+
return next(iter(set(label) & ALL_TOOL_TYPES or SEGMENTATION_TOOLS))
17+
18+
19+
def update_base(label: Dict[str, Any], datarow_id: str):
20+
""" Adds required field to the label json payload """
21+
label['uuid'] = str(uuid.uuid4())
22+
label['dataRow'] = {'id': datarow_id}
23+
24+
25+
def label_to_ndannotation(label: Dict[str, Any],
26+
datarow_id: str) -> NDAnnotation:
27+
""" Converts a label to an ndannotation. """
28+
tool = get_tool(label)
29+
30+
# remove unecessary keys
31+
label = label.copy()
32+
update_base(label, datarow_id)
33+
# These only apply to vector tools....
34+
unused_keys = ['title', 'value', 'color', 'featureId', 'instanceURI']
35+
if tool in SEGMENTATION_TOOLS:
36+
label['mask'] = {
37+
'instanceURI': label['instanceURI'],
38+
'colorRGB': (0, 0, 0)
39+
}
40+
for unused_key in unused_keys:
41+
label.pop(unused_key, None)
42+
43+
if tool not in CLASSIFICATION_TOOLS:
44+
label['classifications'] = clean_classifications(
45+
label.get('classifications', []), datarow_id)
46+
return NDAnnotation(**label)
47+
48+
49+
def clean_classifications(classifications: List[Dict[str, Any]],
50+
datarow_id: str) -> List[Dict[str, Any]]:
51+
""" Converts classifications to a format compatible with NDAnnotations """
52+
classifications = classifications.copy()
53+
unused_keys = ['title', 'value', 'color', 'featureId', 'instanceURI']
54+
for classification in classifications:
55+
update_base(classification, datarow_id)
56+
for unused_key in unused_keys:
57+
classification.pop(unused_key, None)
58+
return classifications
59+
60+
61+
def create_schema_lookup(rows: List[NDBase]) -> Dict[str, List[Any]]:
62+
""" Takes a list of annotations and groups them by feature types """
63+
data = defaultdict(list)
64+
for row in rows:
65+
data[row.schemaId].append(row)
66+
return data
67+
68+
69+
@retry.Retry(deadline=10.)
70+
def url_to_numpy(mask_url: str) -> np.ndarray:
71+
""" Downloads an image and converts to a numpy array """
72+
return np.array(Image.open(BytesIO(requests.get(mask_url).content)))
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
VECTOR_TOOLS = {'bbox', 'polygon', 'line', 'point'}
2+
SEGMENTATION_TOOLS = {'segmentation'}
3+
CLASSIFICATION_TOOLS = {'answer', 'answers'}
4+
ALL_TOOL_TYPES = VECTOR_TOOLS.union(SEGMENTATION_TOOLS).union(
5+
CLASSIFICATION_TOOLS)

labelbox/schema/annotation_import.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def statuses(self) -> List[Dict[str, Any]]:
8989
self.wait_until_done()
9090
return self._fetch_remote_ndjson(self.status_file_url)
9191

92-
def wait_until_done(self, sleep_time_seconds: int = 5) -> None:
92+
def wait_until_done(self, sleep_time_seconds: int = 10) -> None:
9393
"""Blocks import job until certain conditions are met.
9494
9595
Blocks until the AnnotationImport.state changes either to

0 commit comments

Comments
 (0)