Skip to content

Commit

Permalink
[Justice Counts] Update _check_expected_columns to expect system co…
Browse files Browse the repository at this point in the history
…lumns for supervision subsystem metrics. (Recidiviz/recidiviz-data#29369)

When supervision systems upload metrics with a system column, they see a
misleading `Unexpected System Column` warnings for all of their
subsystem metrics.

<img width="946" alt="Screenshot 2024-04-23 at 6 03 40 PM"
src="https://github.com/Recidiviz/recidiviz-data/assets/19961693/49dc1976-f707-42a0-a8e0-be7a23482541">

This PR makes sure that warnings about systems columns are only given
for agencies that 1) report for supervision subsystems 2) the metric
being ingested is a supervision / supervision subsystem metric.

Below is a screen recording of me uploading a sheet from Carroll County
Community Corrections, where Brandon noticed this behavior initially.

https://github.com/Recidiviz/recidiviz-data/assets/19961693/b4e246bc-26c1-4504-bd29-c06f103de15d

## Related issues

Closes Recidiviz/recidiviz-data#27683

## Checklists

### Development

**This box MUST be checked by the submitter prior to merging**:
- [x] **Double- and triple-checked that there is no Personally
Identifiable Information (PII) being mistakenly added in this pull
request**

These boxes should be checked by the submitter prior to merging:
- [ ] Tests have been written to cover the code changed/added as part of
this pull request

### Code review

These boxes should be checked by reviewers prior to merging:

- [ ] This pull request has a descriptive title and information useful
to a reviewer
- [ ] Potential security implications or infrastructural changes have
been considered, if relevant

GitOrigin-RevId: 6d0d613bead6d430fcc49f93cac26588a4f52f5e
  • Loading branch information
nichelle-hall authored and Helper Bot committed May 14, 2024
1 parent f5ad3c7 commit a132767
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 2 deletions.
12 changes: 12 additions & 0 deletions recidiviz/justice_counts/agency.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,3 +290,15 @@ def get_child_agencies_by_agency_ids(
.filter(schema.Agency.super_agency_id.in_(agency_ids))
.all()
)

@staticmethod
def does_supervision_agency_report_for_subsystems(agency: schema.Agency) -> bool:
"""This method is used to differentiate between Supervision agencies that do not
report for any subsystems and supervision agencies that report for subsystems.
"""
systems_enums = {schema.System[s] for s in agency.systems}

return (
len(schema.System.supervision_subsystems().intersection(systems_enums)) > 0
and schema.System.SUPERVISION in agency.systems
)
21 changes: 19 additions & 2 deletions recidiviz/justice_counts/bulk_upload/spreadsheet_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from sqlalchemy.orm import Session

from recidiviz.common.text_analysis import TextAnalyzer
from recidiviz.justice_counts.agency import AgencyInterface
from recidiviz.justice_counts.bulk_upload.bulk_upload_helpers import get_column_value
from recidiviz.justice_counts.bulk_upload.time_range_uploader import TimeRangeUploader
from recidiviz.justice_counts.datapoint import DatapointUniqueKey
Expand Down Expand Up @@ -513,6 +514,7 @@ def _check_expected_columns(
Additionally, this function adds Unexpected Columns warnings to the metric_key_to_errors
dictionary if there are unexpected column names for a given metric in a given sheet.
"""

# First, handle missing columns
if "value" not in actual_columns:
description = (
Expand All @@ -536,10 +538,16 @@ def _check_expected_columns(
description=description,
message_type=BulkUploadMessageType.ERROR,
)
# The "system" column is expected for supervision agencies that have supervision subsystems,
# but not for a supervision agency that does not have supervision subsystems.
if (
metric_definition.system.value == "SUPERVISION"
AgencyInterface.does_supervision_agency_report_for_subsystems(
agency=self.agency
)
and metric_definition.is_metric_for_supervision_or_subsystem
and "system" not in actual_columns
):

description = (
f'We expected to see a column named "system". '
f"Only the following columns were found in the sheet: "
Expand Down Expand Up @@ -586,7 +594,16 @@ def _check_expected_columns(
expected_columns.add(
metricfile.disaggregation_column_name # type: ignore[arg-type]
)
if metric_definition.system.value == "SUPERVISION":

# Expect a system column if the agency has supervision subsystems AND
# we are uploading to a supervision system or subsystem. The "system" column
# is not expected for an agency that is ONLY a supervision agency and has no subsystems?
if (
AgencyInterface.does_supervision_agency_report_for_subsystems(
agency=self.agency
)
and metric_definition.is_metric_for_supervision_or_subsystem
):
expected_columns.add("system")
if (
len(self.child_agency_name_to_agency) > 0
Expand Down
7 changes: 7 additions & 0 deletions recidiviz/justice_counts/metrics/metric_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from recidiviz.common.constants.justice_counts import ContextKey, ValueType
from recidiviz.justice_counts.dimensions.base import DimensionBase
from recidiviz.persistence.database.schema.justice_counts import schema
from recidiviz.persistence.database.schema.justice_counts.schema import (
MeasurementType,
MetricType,
Expand Down Expand Up @@ -255,3 +256,9 @@ def reporting_frequency(self) -> ReportingFrequency:
if len(self.reporting_frequencies) > 1:
raise ValueError("Multiple reporting frequencies are not yet supported.")
return self.reporting_frequencies[0]

@property
def is_metric_for_supervision_or_subsystem(self) -> bool:
return self.system.value in (
{"SUPERVISION"} | {s.value for s in schema.System.supervision_subsystems()}
)

0 comments on commit a132767

Please sign in to comment.