From a132767a5a677371eba3119dd34ba907138f012e Mon Sep 17 00:00:00 2001 From: Nichelle Date: Mon, 29 Apr 2024 12:18:06 -0400 Subject: [PATCH] [Justice Counts] Update `_check_expected_columns` to expect system columns for supervision subsystem metrics. (Recidiviz/recidiviz-data#29369) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When supervision systems upload metrics with a system column, they see a misleading `Unexpected System Column` warnings for all of their subsystem metrics. Screenshot 2024-04-23 at 6 03 40 PM This PR makes sure that warnings about systems columns are only given for agencies that 1) report for supervision subsystems 2) the metric being ingested is a supervision / supervision subsystem metric. Below is a screen recording of me uploading a sheet from Carroll County Community Corrections, where Brandon noticed this behavior initially. https://github.com/Recidiviz/recidiviz-data/assets/19961693/b4e246bc-26c1-4504-bd29-c06f103de15d ## Related issues Closes Recidiviz/recidiviz-data#27683 ## Checklists ### Development **This box MUST be checked by the submitter prior to merging**: - [x] **Double- and triple-checked that there is no Personally Identifiable Information (PII) being mistakenly added in this pull request** These boxes should be checked by the submitter prior to merging: - [ ] Tests have been written to cover the code changed/added as part of this pull request ### Code review These boxes should be checked by reviewers prior to merging: - [ ] This pull request has a descriptive title and information useful to a reviewer - [ ] Potential security implications or infrastructural changes have been considered, if relevant GitOrigin-RevId: 6d0d613bead6d430fcc49f93cac26588a4f52f5e --- recidiviz/justice_counts/agency.py | 12 +++++++++++ .../bulk_upload/spreadsheet_uploader.py | 21 +++++++++++++++++-- .../metrics/metric_definition.py | 7 +++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/recidiviz/justice_counts/agency.py b/recidiviz/justice_counts/agency.py index 5ae855a624..88556a7421 100644 --- a/recidiviz/justice_counts/agency.py +++ b/recidiviz/justice_counts/agency.py @@ -290,3 +290,15 @@ def get_child_agencies_by_agency_ids( .filter(schema.Agency.super_agency_id.in_(agency_ids)) .all() ) + + @staticmethod + def does_supervision_agency_report_for_subsystems(agency: schema.Agency) -> bool: + """This method is used to differentiate between Supervision agencies that do not + report for any subsystems and supervision agencies that report for subsystems. + """ + systems_enums = {schema.System[s] for s in agency.systems} + + return ( + len(schema.System.supervision_subsystems().intersection(systems_enums)) > 0 + and schema.System.SUPERVISION in agency.systems + ) diff --git a/recidiviz/justice_counts/bulk_upload/spreadsheet_uploader.py b/recidiviz/justice_counts/bulk_upload/spreadsheet_uploader.py index b1b1c7c85c..0da61d2792 100644 --- a/recidiviz/justice_counts/bulk_upload/spreadsheet_uploader.py +++ b/recidiviz/justice_counts/bulk_upload/spreadsheet_uploader.py @@ -26,6 +26,7 @@ from sqlalchemy.orm import Session from recidiviz.common.text_analysis import TextAnalyzer +from recidiviz.justice_counts.agency import AgencyInterface from recidiviz.justice_counts.bulk_upload.bulk_upload_helpers import get_column_value from recidiviz.justice_counts.bulk_upload.time_range_uploader import TimeRangeUploader from recidiviz.justice_counts.datapoint import DatapointUniqueKey @@ -513,6 +514,7 @@ def _check_expected_columns( Additionally, this function adds Unexpected Columns warnings to the metric_key_to_errors dictionary if there are unexpected column names for a given metric in a given sheet. """ + # First, handle missing columns if "value" not in actual_columns: description = ( @@ -536,10 +538,16 @@ def _check_expected_columns( description=description, message_type=BulkUploadMessageType.ERROR, ) + # The "system" column is expected for supervision agencies that have supervision subsystems, + # but not for a supervision agency that does not have supervision subsystems. if ( - metric_definition.system.value == "SUPERVISION" + AgencyInterface.does_supervision_agency_report_for_subsystems( + agency=self.agency + ) + and metric_definition.is_metric_for_supervision_or_subsystem and "system" not in actual_columns ): + description = ( f'We expected to see a column named "system". ' f"Only the following columns were found in the sheet: " @@ -586,7 +594,16 @@ def _check_expected_columns( expected_columns.add( metricfile.disaggregation_column_name # type: ignore[arg-type] ) - if metric_definition.system.value == "SUPERVISION": + + # Expect a system column if the agency has supervision subsystems AND + # we are uploading to a supervision system or subsystem. The "system" column + # is not expected for an agency that is ONLY a supervision agency and has no subsystems? + if ( + AgencyInterface.does_supervision_agency_report_for_subsystems( + agency=self.agency + ) + and metric_definition.is_metric_for_supervision_or_subsystem + ): expected_columns.add("system") if ( len(self.child_agency_name_to_agency) > 0 diff --git a/recidiviz/justice_counts/metrics/metric_definition.py b/recidiviz/justice_counts/metrics/metric_definition.py index 2346944141..fa41f254e8 100644 --- a/recidiviz/justice_counts/metrics/metric_definition.py +++ b/recidiviz/justice_counts/metrics/metric_definition.py @@ -23,6 +23,7 @@ from recidiviz.common.constants.justice_counts import ContextKey, ValueType from recidiviz.justice_counts.dimensions.base import DimensionBase +from recidiviz.persistence.database.schema.justice_counts import schema from recidiviz.persistence.database.schema.justice_counts.schema import ( MeasurementType, MetricType, @@ -255,3 +256,9 @@ def reporting_frequency(self) -> ReportingFrequency: if len(self.reporting_frequencies) > 1: raise ValueError("Multiple reporting frequencies are not yet supported.") return self.reporting_frequencies[0] + + @property + def is_metric_for_supervision_or_subsystem(self) -> bool: + return self.system.value in ( + {"SUPERVISION"} | {s.value for s in schema.System.supervision_subsystems()} + )