Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CodeAnt AI: Made Antipattern Fixes #4

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/maskrcnn-example/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def train_one_epoch(model, device, data_loader, optimizer, log_freq=None):
model.train()

for batch_id, (images, targets, _) in enumerate(data_loader):
images = list(image.to(device) for image in images)
images = [image.to(device) for image in images]
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

loss_dict = model(images, targets)
Expand All @@ -39,7 +39,7 @@ def evaluate(model, device, data_loader, map_metric):
model.eval()

for images, targets, _ in data_loader:
images = list(image.to(device) for image in images)
images = [image.to(device) for image in images]
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

predictions = model(images)
Expand Down
8 changes: 1 addition & 7 deletions examples/maskrcnn-example/utils/encord_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,7 @@ def __getitem__(self, idx):
segmentations = [obj["segmentation"] for obj in target]
masks = convert_coco_poly_to_mask(segmentations, img_height, img_width)

processed_target = {}
processed_target["boxes"] = torch.as_tensor(boxes, dtype=torch.float32)
processed_target["labels"] = torch.as_tensor(labels, dtype=torch.int64)
processed_target["masks"] = masks
processed_target["image_id"] = torch.tensor([image_id])
processed_target["area"] = torch.tensor(area)
processed_target["iscrowd"] = torch.as_tensor(iscrowd, dtype=torch.int64)
processed_target = {"boxes": torch.as_tensor(boxes, dtype=torch.float32), "labels": torch.as_tensor(labels, dtype=torch.int64), "masks": masks, "image_id": torch.tensor([image_id]), "area": torch.tensor(area), "iscrowd": torch.as_tensor(iscrowd, dtype=torch.int64)}

if self._transforms is not None:
img, processed_target = self._transforms(img, processed_target)
Expand Down
4 changes: 2 additions & 2 deletions src/encord_active/cli/utils/encord.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def import_encord_project(
)
exit()

choices = list(map(lambda p: Choice(p.project_hash, name=p.title), projects))
choices = [Choice(p.project_hash, name=p.title) for p in projects]
project_hash = i.fuzzy(
message="What project would you like to import?",
choices=choices,
Expand Down Expand Up @@ -119,7 +119,7 @@ def import_encord_project(
NOTE: this will affect the results of 'encord.Project.list_label_rows()' as every label row will now have a label_hash.
"""
):
untoched_data = list(filter(lambda x: x.label_hash is None, project.list_label_rows_v2()))
untoched_data = [x for x in project.list_label_rows_v2() if x.label_hash is None]
collect_async(lambda x: x.initialise_labels(), untoched_data, desc="Preparing uninitialized label rows")
project.refetch_data()
rich.print()
Expand Down
3 changes: 1 addition & 2 deletions src/encord_active/cli/utils/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ def launch_server_app(target: Path, port: int):
f"[orange1]Port [blue]{port}[/blue] already in use. Try changing the `[blue]--port[/blue]` option.[/orange1]"
)
raise typer.Exit()
else:
rich.print("[yellow]Bear with us, this might take a short while...")
rich.print("[yellow]Bear with us, this might take a short while...")

if did_schema_change():
generate_prisma_client()
Expand Down
21 changes: 7 additions & 14 deletions src/encord_active/db/scripts/migrate_disk_to_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def _assign_metrics(
if metric_column_name not in metrics_dict:
raw_score = score
if metric_column_name in WELL_KNOWN_PERCENTAGE_METRICS:
score = score / 100.0
score /= 100.0
metric_def = metric_types[metric_column_name]
# Run set of patches to generate correctly scaled results.
if metric_column_name == "metric_sharpness":
Expand All @@ -145,7 +145,7 @@ def _assign_metrics(
score = math.sqrt(score) / (8.0 * 255.0)
elif metric_column_name == "metric_label_shape_outlier":
# NOTE: guesswork, not based on any analysis of hu moments
score = score / 10000.0
score /= 10000.0
pass
# Update the score
if metric_def.type == MetricType.NORMAL:
Expand Down Expand Up @@ -580,14 +580,7 @@ def __migrate_predictions(
# Non-annotated metrics (new version - harder to process)
# attempt to guess what the metric should be associated with.
metric_target = " (P)"
if metric_name in [
"Random Values on Images",
"Aspect Ratio",
"Area",
"Image Difficulty",
"Image Diversity",
"Image Singularity",
]:
if metric_name in {"Random Values on Images", "Aspect Ratio", "Area", "Image Difficulty", "Image Diversity", "Image Singularity"}:
metric_target = " (F)"
metric_key = WELL_KNOWN_METRICS[metric_name]
if metric_key == "$SKIP":
Expand All @@ -604,7 +597,7 @@ def __migrate_predictions(
description_dict=None,
description=None,
)
elif metric_key == "metric_object_density" or metric_key == "metric_object_count":
elif metric_key in {'metric_object_density', 'metric_object_count'}:
pass # FIXME: this p-metric should be stored somewhere.
else:
raise ValueError(f"Unknown prediction metric: {metric_key}")
Expand Down Expand Up @@ -862,7 +855,7 @@ def migrate_disk_to_db(pfs: ProjectFileStructure, delete_existing_project: bool
du_json = data_units_json[data_unit.data_hash]
if data_type != "video":
expected_data_units.remove(data_unit.data_hash)
if data_type == "image" or data_type == "img_group":
if data_type in {'image', 'img_group'}:
labels_json = du_json["labels"]
elif data_type == "video":
labels_json = du_json["labels"].get(str(data_unit.frame), {})
Expand Down Expand Up @@ -1146,7 +1139,7 @@ def migrate_disk_to_db(pfs: ProjectFileStructure, delete_existing_project: bool
names = {child_dir.name for child_dir in predictions_child_dirs}
if "predictions.csv" not in names:
# 1 or 2 predictions for objects / classifications in the child structure
predictions_dir = predictions_dir / prediction_type.value
predictions_dir /= prediction_type.value
elif prediction_type == MainPredictionType.CLASSIFICATION:
# 1 prediction for objects - hence classification predictions should be skipped.
continue
Expand Down Expand Up @@ -1248,4 +1241,4 @@ def migrate_disk_to_db(pfs: ProjectFileStructure, delete_existing_project: bool

# Now correctly assign duplicates
for prediction in predictions_run_db:
prediction_hash = prediction.prediction_hash
prediction_hash = prediction.prediction_hash
8 changes: 4 additions & 4 deletions src/encord_active/lib/charts/data_quality_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ def create_labels_distribution_chart(
fig = go.Figure(
data=[
go.Bar(
x=labels_df.loc[labels_df[LabelStatisticsSchema.status] == False][LabelStatisticsSchema.name],
y=labels_df[labels_df[LabelStatisticsSchema.status] == False][LabelStatisticsSchema.count],
x=labels_df.loc[labels_df[LabelStatisticsSchema.status] is False][LabelStatisticsSchema.name],
y=labels_df[labels_df[LabelStatisticsSchema.status] is False][LabelStatisticsSchema.count],
name="representative",
marker_color="#3380FF",
),
go.Bar(
x=labels_df.loc[labels_df[LabelStatisticsSchema.status] == True][LabelStatisticsSchema.name],
y=labels_df[labels_df[LabelStatisticsSchema.status] == True][LabelStatisticsSchema.count],
x=labels_df.loc[labels_df[LabelStatisticsSchema.status] is True][LabelStatisticsSchema.name],
y=labels_df[labels_df[LabelStatisticsSchema.status] is True][LabelStatisticsSchema.count],
name="undersampled",
marker_color="tomato",
),
Expand Down
20 changes: 9 additions & 11 deletions src/encord_active/lib/charts/performance_by_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,17 +99,15 @@ def bin_bar_chart(
alt.Tooltip(f"{BinSchema.class_name}:N", title="Class name"),
],
)
else:
# Only use aggregate over all classes
return chart.encode(
alt.X(f"{BinSchema.bin}:Q"),
alt.Y("sum(pctf):Q", stack="zero"),
tooltip=[
alt.Tooltip(BinSchema.bin, title=metric_name, format=FLOAT_FMT),
alt.Tooltip("count():Q", title=f"Num. {str_type}", format=COUNT_FMT),
alt.Tooltip("sum(pct):Q", title=f"% of total {str_type}", format=PCT_FMT),
],
)
return chart.encode(
alt.X(f"{BinSchema.bin}:Q"),
alt.Y("sum(pctf):Q", stack="zero"),
tooltip=[
alt.Tooltip(BinSchema.bin, title=metric_name, format=FLOAT_FMT),
alt.Tooltip("count():Q", title=f"Num. {str_type}", format=COUNT_FMT),
alt.Tooltip("sum(pct):Q", title=f"% of total {str_type}", format=PCT_FMT),
],
)


def performance_rate_line_chart(
Expand Down
34 changes: 15 additions & 19 deletions src/encord_active/lib/coco/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ def __init__(self, labels_list: List[dict], metrics: dict, ontology: OntologyStr
self._labels_list = labels_list
self._metrics = metrics
self._ontology = ontology
self._coco_json: dict = dict()
self._coco_json: dict = {}
self._current_annotation_id: int = 0
self._object_hash_to_track_id_map: dict = dict()
self._coco_categories_id_to_ontology_object_map: dict = dict() # DENIS: do we need this?
self._feature_hash_to_coco_category_id_map: dict = dict()
self._data_hash_to_image_id_map: dict = dict()
self._object_hash_to_track_id_map: dict = {}
self._coco_categories_id_to_ontology_object_map: dict = {} # DENIS: do we need this?
self._feature_hash_to_coco_category_id_map: dict = {}
self._data_hash_to_image_id_map: dict = {}
"""Map of (data_hash, frame_offset) to the image id"""

# self._data_location_to_image_id_map = dict()
Expand Down Expand Up @@ -172,14 +172,11 @@ def get_info(self) -> dict:
def get_description(self) -> Optional[str]:
if len(self._labels_list) == 0:
return None
else:
return self._download_file_path.as_posix().split("/")[-1]
return self._download_file_path.as_posix().split("/")[-1]

def get_categories(self) -> List[dict]:
"""This does not translate classifications as they are not part of the Coco spec."""
categories = []
for object_ in self._ontology.objects:
categories.append(self.get_category(object_))
categories = [self.get_category(object_) for object_ in self._ontology.objects]

return categories

Expand Down Expand Up @@ -379,7 +376,7 @@ def get_annotations(self):
continue
data_unit_metrics = self._metrics[label_hash][data_hash]

if data_unit["data_type"] in ["video", "application/dicom"]:
if data_unit["data_type"] in {"video", "application/dicom"}:
if not self._include_videos:
continue
for frame_num, frame_item in data_unit["labels"].items():
Expand Down Expand Up @@ -502,7 +499,7 @@ def get_rotatable_bounding_box(
)

def get_polygon(self, object_: dict, image_id: int, size: Size) -> Union[CocoAnnotation, SuperClass, None]:
if not len(object_["polygon"]) >= 3:
if len(object_["polygon"]) < 3:
return None

polygon = get_polygon_from_dict(object_["polygon"], size.width, size.height)
Expand Down Expand Up @@ -530,7 +527,7 @@ def get_polygon(self, object_: dict, image_id: int, size: Size) -> Union[CocoAnn

def get_polyline(self, object_: dict, image_id: int, size: Size) -> Union[CocoAnnotation, SuperClass, None]:
"""Polylines are technically not supported in COCO, but here we use a trick to allow a representation."""
if not len(object_["polyline"]) >= 2:
if len(object_["polyline"]) < 2:
return None

polygon = get_polygon_from_dict(object_["polyline"], size.width, size.height)
Expand Down Expand Up @@ -622,7 +619,7 @@ def get_point(self, object_: dict, image_id: int, size: Size) -> Union[CocoAnnot

def get_skeleton(self, object_: dict, image_id: int, size: Size) -> Union[CocoAnnotation, SuperClass, None]:
# DENIS: next up: check how this is visualised.
if not len(object_["skeleton"]) >= 1:
if len(object_["skeleton"]) < 1:
return None

area = 0
Expand Down Expand Up @@ -686,10 +683,9 @@ def next_annotation_id(self) -> int:
def get_and_set_track_id(self, object_hash: str) -> int:
if object_hash in self._object_hash_to_track_id_map:
return self._object_hash_to_track_id_map[object_hash]
else:
next_track_id = len(self._object_hash_to_track_id_map)
self._object_hash_to_track_id_map[object_hash] = next_track_id
return next_track_id
next_track_id = len(self._object_hash_to_track_id_map)
self._object_hash_to_track_id_map[object_hash] = next_track_id
return next_track_id

def download_image(self, url: str, path: Path):
"""Check if directory exists, create the directory if needed, download the file, store it into the path."""
Expand Down Expand Up @@ -802,4 +798,4 @@ def df_to_nested_dict(df: pd.DataFrame) -> dict:
{k: v for k, v in row.items() if k not in ["identifier", "url", "tags"] and not pd.isnull(v)}
)
frame_dict[object_hash].setdefault("tags", []).extend(tags)
return metrics
return metrics