From 029353125003138ffda9f5b4d90650404c234908 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Thu, 21 Nov 2024 17:04:01 -0500 Subject: [PATCH 1/3] Add principale recolte to regexes - see HEA-214 --- pipelines/assets/livelihood_activity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py index d2c4168..93b7e19 100644 --- a/pipelines/assets/livelihood_activity.py +++ b/pipelines/assets/livelihood_activity.py @@ -180,7 +180,7 @@ def get_livelihood_activity_regexes() -> list: placeholder_patterns = { "label_pattern": r"[a-zà-ÿ][a-zà-ÿ',/ \.\>\-\(\)]+?", "product_pattern": r"(?P[a-zà-ÿ][a-zà-ÿ',/ \.\>\-\(\)]+?)", - "season_pattern": r"(?Pseason [12]|saison [12]|[12][a-z] season||[12][a-zà-ÿ] saison|r[eé]colte principale|gu|deyr+?)", # NOQA: E501 + "season_pattern": r"(?Pseason [12]|saison [12]|[12][a-z] season||[12][a-zà-ÿ] saison|r[eé]colte principale|principale r[eé]colte|gu|deyr+?)", # NOQA: E501 "additional_identifier_pattern": r"\(?(?Prainfed|irrigated|pluviale?|irriguée|submersion libre|submersion contrôlée|flottant)\)?", "unit_of_measure_pattern": r"(?P[a-z]+)", "nbr_pattern": r"(?:n[b|o]r?)\.?", From a5d30a314fc568a589e15894a246481cff7d743d Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Thu, 21 Nov 2024 22:44:46 -0500 Subject: [PATCH 2/3] Apply field checks in xxx_valid_instances - see HEA-57 --- pipelines/assets/fixtures.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/pipelines/assets/fixtures.py b/pipelines/assets/fixtures.py index 5902132..88b7e8e 100644 --- a/pipelines/assets/fixtures.py +++ b/pipelines/assets/fixtures.py @@ -6,6 +6,7 @@ from io import StringIO import django +import numpy as np import pandas as pd from dagster import AssetExecutionContext, MetadataValue, Output, asset from django.core.files import File @@ -174,6 +175,31 @@ def validate_instances( ) errors.append(error) + # Use the Django model to validate the fields, so we can apply already defined model validations and + # return informative error messages. + fields = [ + field + for field in model._meta.concrete_fields + if not isinstance(field, models.ForeignKey) and field.name in df + ] + instance = model() + for record in df.replace(np.nan, None).itertuples(): + for field in fields: + value = getattr(record, field.name) + if not value and field.null: + # Replace empty strings with None for optional fields + value = None + try: + field.clean(value, instance) + except Exception as e: + error = ( + f'Invalid {field.name} value {value}: "{", ".join(e.error_list[0].messages)}"\nRecord ' + f"{record.Index} from cell '{record.bss_sheet}'!{record.bss_column}{record.bss_row} " + f"for {model_name} in record " + f'{str({k: v for k,v in record._asdict().items() if k != "Index"})}.' + ) + errors.append(error) + # Check that the kcals/kg matches the values in the ClassifiedProduct model, if it's present in the BSS if model_name == "LivelihoodActivity" and "product__kcals_per_unit" in df: df["product"] = df["livelihood_strategy"].apply(lambda x: x[4]) @@ -182,7 +208,7 @@ def validate_instances( df["reference_unit_of_measure"] = df["product"].apply(lambda x: x.unit_of_measure) for record in df[df["product__kcals_per_unit"] != df["reference_kcals_per_unit"]].itertuples(): error = ( - f"Non-standard value {record.product__kcals_per_unit} in '{record.column}" + f"Non-standard value {record.product__kcals_per_unit} in '{record.column}' " f"for {model_name} in record " f'{str({k: v for k,v in record._asdict().items() if k != "Index"})}. ' f"Expected {record.reference_kcals_per_unit}/{record.reference_unit_of_measure} for {record.product}" From fe6aa6da280d0703ed7e5be0e66193c6b71c8714 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Thu, 21 Nov 2024 22:54:32 -0500 Subject: [PATCH 3/3] Ensure percentage_kcals is not negative - see HEA-572 --- ...ter_livelihoodactivity_percentage_kcals.py | 25 +++++++++++++++++++ apps/baseline/models.py | 1 + 2 files changed, 26 insertions(+) create mode 100644 apps/baseline/migrations/0017_alter_livelihoodactivity_percentage_kcals.py diff --git a/apps/baseline/migrations/0017_alter_livelihoodactivity_percentage_kcals.py b/apps/baseline/migrations/0017_alter_livelihoodactivity_percentage_kcals.py new file mode 100644 index 0000000..69ca324 --- /dev/null +++ b/apps/baseline/migrations/0017_alter_livelihoodactivity_percentage_kcals.py @@ -0,0 +1,25 @@ +# Generated by Django 5.1.1 on 2024-11-22 03:51 + +import django.core.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("baseline", "0016_alter_livelihoodstrategy_additional_identifier_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="livelihoodactivity", + name="percentage_kcals", + field=models.FloatField( + blank=True, + help_text="Percentage of annual household kcal requirement provided by this livelihood strategy", + null=True, + validators=[django.core.validators.MinValueValidator(0)], + verbose_name="Percentage of required kcals", + ), + ), + ] diff --git a/apps/baseline/models.py b/apps/baseline/models.py index 9dabfd3..03ef2ca 100644 --- a/apps/baseline/models.py +++ b/apps/baseline/models.py @@ -1125,6 +1125,7 @@ class LivelihoodActivity(common_models.Model): percentage_kcals = models.FloatField( blank=True, null=True, + validators=[MinValueValidator(0)], verbose_name=_("Percentage of required kcals"), help_text=_("Percentage of annual household kcal requirement provided by this livelihood strategy"), )