Merge branch 'main' into fix_stress_tests

Striveworks · Dec 22, 2023 · 9e2c6ae · 9e2c6ae
2 parents ac62c20 + 39cd20a
commit 9e2c6ae
Show file tree

Hide file tree

Showing 64 changed files with 4,128 additions and 3,822 deletions.
diff --git a/.gitignore b/.gitignore
@@ -28,3 +28,6 @@ runs
 
 *.jsonl
 *.jpeg
+*.pt
+*.png
+*.jpg
diff --git a/README.md b/README.md
@@ -11,9 +11,9 @@ Velour is maintained by Striveworks, a cutting-edge MLOps company based out of A
 For more information, please see our user docs:
 
 - **[Overview](https://striveworks.github.io/velour/)**: Provides an overview of what Velour is, why it's important, and how it works.
-- **[Sample Notebook](#TODO update this link when notebook is complete)**: Illustrates how to evaluate model performance using Velour in a descriptive Jupyter notebook.
+- **[Sample Notebook](https://github.com/Striveworks/velour/blob/main/examples/getting_started.ipynb)**: Illustrates how to evaluate model performance using Velour in a descriptive Jupyter notebook.
 - **[Getting Started](https://striveworks.github.io/velour/getting_started)**: Details everything you need to get up-and-running with Velour.
 - **[Endpoints](https://striveworks.github.io/velour/endpoints/)**: Documents Velour's various API endpoints.
 - **[Technical Concepts](https://striveworks.github.io/velour/technical_concepts)**: Describes the technical concepts that underpin Velour.
 - **[Contributing & Development](https://striveworks.github.io/velour/contributing)**: Explains how you can contribute to Velour.
-- **[Python Client API](https://striveworks.github.io/velour/client_api/client/)**: Shares reference documentation for our Python client.
+- **[Python Client API](https://striveworks.github.io/velour/client_api/Client/)**: Shares reference documentation for our Python client.
diff --git a/api/velour_api/backend/metrics/classification.py b/api/velour_api/backend/metrics/classification.py
@@ -212,6 +212,17 @@ def _compute_confusion_matrix_at_label_key(
     pFilter.models_names = [job_request.model]
     pFilter.label_keys = [label_key]
 
+    # 0. Get groundtruths that conform to gFilter
+    groundtruths = (
+        Query(
+            models.GroundTruth, 
+            models.Annotation.datum_id.label("datum_id"),
+        )
+        .filter(gFilter)
+        .groundtruths(as_subquery=False)
+        .alias()
+    )
+
     # 1. Get predictions that conform to pFilter
     predictions = (
         Query(models.Prediction)
@@ -220,70 +231,52 @@ def _compute_confusion_matrix_at_label_key(
         .alias()
     )
 
-    # 2. Get the max prediction scores by datum that conform to pFilter
+    # 2. Get the max prediction scores by datum
     max_scores_by_datum_id = (
-        Query(
-            func.max(models.Prediction.score).label("max_score"),
-            models.Datum.id.label("datum_id"),
+        select(
+            func.max(predictions.c.score).label("max_score"),
+            models.Annotation.datum_id.label("datum_id"),
         )
-        .filter(pFilter)
-        .predictions(as_subquery=False)
-        .group_by(models.Datum.id)
+        .join(models.Annotation, models.Annotation.id == predictions.c.annotation_id)
+        .group_by(models.Annotation.datum_id)
         .alias()
     )
 
     # 3. Remove duplicate scores per datum
     # used for the edge case where the max confidence appears twice
     # the result of this query is all of the hard predictions
     min_id_query = (
-        select(func.min(predictions.c.id).label("min_id"))
+        select(
+            func.min(predictions.c.id).label("min_id"),
+            models.Annotation.datum_id.label("datum_id"),
+        )
         .select_from(predictions)
         .join(
             models.Annotation,
             models.Annotation.id == predictions.c.annotation_id,
         )
-        .join(
-            models.Datum,
-            models.Annotation.datum_id == models.Datum.id,
-        )
         .join(
             max_scores_by_datum_id,
             and_(
-                models.Datum.id == max_scores_by_datum_id.c.datum_id,
+                models.Annotation.datum_id == max_scores_by_datum_id.c.datum_id,
                 predictions.c.score == max_scores_by_datum_id.c.max_score,
             ),
         )
-        .join(
-            models.Label,
-            models.Label.id == predictions.c.label_id,
-        )
-        .group_by(models.Datum.id)
+        .group_by(models.Annotation.datum_id)
         .alias()
     )
 
     # 4. Get labels for hard predictions, organize per datum
     hard_preds_query = (
         select(
             models.Label.value.label("pred_label_value"),
-            models.Datum.id.label("datum_id"),
+            min_id_query.c.datum_id.label("datum_id"),
         )
         .select_from(min_id_query)
         .join(
             models.Prediction,
             models.Prediction.id == min_id_query.c.min_id,
         )
-        .join(
-            models.Annotation,
-            models.Annotation.id == models.Prediction.annotation_id,
-        )
-        .join(models.Datum, models.Datum.id == models.Annotation.datum_id)
-        .join(
-            max_scores_by_datum_id,
-            and_(
-                models.Prediction.score == max_scores_by_datum_id.c.max_score,
-                models.Datum.id == max_scores_by_datum_id.c.datum_id,
-            ),
-        )
         .join(
             models.Label,
             models.Label.id == models.Prediction.label_id,
@@ -294,31 +287,18 @@ def _compute_confusion_matrix_at_label_key(
     # 5. Link value to the Label.value object
     b = Bundle("cols", hard_preds_query.c.pred_label_value, models.Label.value)
 
-    # 6. Get groundtruths that conform to gFilter
-    groundtruths = (
-        Query(models.GroundTruth)
-        .filter(gFilter)
-        .groundtruths(as_subquery=False)
-        .alias()
-    )
-
     # 6. Generate confusion matrix
     total_query = (
         select(b, func.count())
-        .select_from(groundtruths)
-        .join(
-            models.Annotation,
-            models.Annotation.id == groundtruths.c.annotation_id,
-        )
+        .select_from(hard_preds_query)
         .join(
-            hard_preds_query,
-            hard_preds_query.c.datum_id == models.Annotation.datum_id,
+            groundtruths,
+            groundtruths.c.datum_id == hard_preds_query.c.datum_id,
         )
         .join(
             models.Label,
             models.Label.id == groundtruths.c.label_id,
         )
-        .where(models.Label.key == label_key)
         .group_by(b)
     )
 

diff --git a/api/velour_api/backend/metrics/detection.py b/api/velour_api/backend/metrics/detection.py
@@ -26,7 +26,7 @@ class RankedPair:
 
 def _ap(
     sorted_ranked_pairs: Dict[int, List[RankedPair]],
-    number_of_ground_truths: int,
+    number_of_ground_truths: Dict[int, int],
     labels: Dict[int, schemas.Label],
     iou_thresholds: list[float],
 ) -> list[schemas.APMetric]:
@@ -202,7 +202,7 @@ def compute_detection_metrics(
         gunion_gt = gfunc.ST_Count(joint.c.gt_geom)
         gunion_pd = gfunc.ST_Count(joint.c.pd_geom)
         gunion = gunion_gt + gunion_pd - gintersection
-        iou_computation = gfunc.ST_Area(gintersection) / gfunc.ST_Area(gunion)
+        iou_computation = gintersection / gunion
     else:
         gintersection = gfunc.ST_Intersection(joint.c.gt_geom, joint.c.pd_geom)
         gunion = gfunc.ST_Union(joint.c.gt_geom, joint.c.pd_geom)
@@ -265,9 +265,10 @@ def compute_detection_metrics(
     # Get groundtruth labels
     labels = {
         label.id: schemas.Label(key=label.key, value=label.value)
-        for label in db.query(
-            Query(models.Label).filter(gt_filter).groundtruths()
-        ).all()
+        for label in db.scalars(
+            select(models.Label)
+            .where(models.Label.id.in_(ranking.keys()))
+        )
     }
 
     # Get the number of ground truths per label id
@@ -409,23 +410,23 @@ def _get_annotation_types_for_computation(
     # get dominant type
     groundtruth_type = core.get_annotation_type(db, dataset, None)
     prediction_type = core.get_annotation_type(db, dataset, model)
-    gct = (
+    greatest_common_type = (
         groundtruth_type
         if groundtruth_type < prediction_type
         else prediction_type
     )
     if job_filter.annotation_types:
-        if gct not in job_filter.annotation_types:
+        if greatest_common_type not in job_filter.annotation_types:
             sorted_types = sorted(
                 job_filter.annotation_types,
                 key=lambda x: x,
                 reverse=True,
             )
             for annotation_type in sorted_types:
-                if gct <= annotation_type:
-                    return annotation_type, groundtruth_type, prediction_type
+                if greatest_common_type >= annotation_type:
+                    return annotation_type, annotation_type
             raise RuntimeError(
-                f"Annotation type filter is too restrictive. Attempted filter `{gct}` over `{groundtruth_type, prediction_type}`."
+                f"Annotation type filter is too restrictive. Attempted filter `{greatest_common_type}` over `{groundtruth_type, prediction_type}`."
             )
     return groundtruth_type, prediction_type
 
@@ -618,4 +619,4 @@ def create_detection_metrics(
         get_or_create_row(
             db, models.Metric, mapping, columns_to_ignore=["value"]
         )
-    db.commit()
+    db.commit()
diff --git a/api/velour_api/backend/metrics/metric_utils.py b/api/velour_api/backend/metrics/metric_utils.py
@@ -3,6 +3,7 @@
 from velour_api.enums import JobStatus
 
 from velour_api import schemas
+from velour_api.enums import JobStatus
 from velour_api.backend import core, models
 
 
@@ -203,7 +204,7 @@ def get_evaluations(
             ),
             settings=evaluation.settings,
             job_id=evaluation.id,
-            status=JobStatus.PENDING,  # unknown to backend
+            status=JobStatus.PENDING,
             metrics=[
                 _db_metric_to_pydantic_metric(metric)
                 for metric in evaluation.metrics

diff --git a/api/velour_api/backend/metrics/segmentation.py b/api/velour_api/backend/metrics/segmentation.py
@@ -163,7 +163,13 @@ def _compute_segmentation_metrics(
         )
 
     ret.append(
-        mIOUMetric(value=sum([metric.value for metric in ret]) / len(ret))
+        mIOUMetric(
+            value=(
+                sum([metric.value for metric in ret]) / len(ret)
+                if len(ret) != 0
+                else -1
+            )
+        )
     )
 
     return ret

diff --git a/api/velour_api/backend/ops.py b/api/velour_api/backend/ops.py
@@ -45,6 +45,7 @@ def __init__(self, *args):
                 if (
                     isinstance(argument, DeclarativeMeta)
                     or isinstance(argument, InstrumentedAttribute)
+                    or hasattr(argument, "__visit_name__")
                 )
             ]
         )
@@ -76,11 +77,17 @@ def _expression(self, table_set: set[DeclarativeMeta]) -> BinaryExpression:
     def _map_attribute_to_table(
         self, attr: InstrumentedAttribute | DeclarativeMeta
     ) -> DeclarativeMeta | None:
+
         if isinstance(attr, DeclarativeMeta):
             return attr
-        if not isinstance(attr, InstrumentedAttribute):
+        elif isinstance(attr, InstrumentedAttribute):
+            table_name = attr.table.name
+        elif hasattr(attr, "__visit_name__"):
+            table_name = attr.__visit_name__
+        else:
             return None
-        match attr.table.name:
+
+        match table_name:
             case models.Dataset.__tablename__:
                 return models.Dataset
             case models.Model.__tablename__:

diff --git a/api/velour_api/crud/stateflow.py b/api/velour_api/crud/stateflow.py
@@ -460,8 +460,8 @@ def _finalize_success(state: StateflowJob, msg: str = ""):
     """
     if state.node == StateflowNode.PREDICTION:
         if get_status_from_uuid(state.model_uuid) == JobStatus.CREATING:
-            Job.get(state.model_uuid).set_status(state.success)
-    state.set_status(state.success)
+            Job.get(state.model_uuid).set_status(state.success, msg)
+    state.set_status(state.success, msg)
 
 
 # stateflow decorator definitions

diff --git a/api/velour_api/schemas/metrics.py b/api/velour_api/schemas/metrics.py
@@ -578,7 +578,7 @@ def db_mapping(self, evaluation_id: int) -> dict:
         A mapping dictionary.
         """
         return {
-            "value": self.value,
+            "value": self.value if not np.isnan(self.value) else -1,
             "type": "ROCAUC",
             "parameters": {"label_key": self.label_key},
             "evaluation_id": evaluation_id,

diff --git a/client/unit-tests/schemas/test_core.py b/client/unit-tests/schemas/test_core.py
@@ -239,7 +239,7 @@ def test_prediction():
             annotations=pds,
             model=1234,
         )
-    assert "`model` should be of type" in str(e)
+    assert "`model_name` should be of type" in str(e)
 
     with pytest.raises(ValueError) as e:
         Prediction(