diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py index f539a5f..de9e951 100644 --- a/pipelines/assets/livelihood_activity.py +++ b/pipelines/assets/livelihood_activity.py @@ -179,7 +179,7 @@ def get_livelihood_activity_regexes() -> list: # Create regex patterns for metadata attributes to replace the placeholders in the regexes placeholder_patterns = { "label_pattern": r"[a-zà-ÿ][a-zà-ÿ',/ \.\>\-\(\)]+?", - "product_pattern": r"(?P[a-zà-ÿ][a-zà-ÿ',/ \.\>\-\(\)]+?)", + "product_pattern": r"(?P[a-zà-ÿ][a-zà-ÿ1-9',/ \.\>\-\(\)]+?)", "season_pattern": r"(?Pseason [12]|saison [12]|[12][a-z] season||[12][a-zà-ÿ] saison|r[eé]colte principale|principale r[eé]colte|gu|deyr+?)", # NOQA: E501 "additional_identifier_pattern": r"\(?(?Prainfed|irrigated|pluviale?|irriguée|submersion libre|submersion contrôlée|flottant)\)?", "unit_of_measure_pattern": r"(?P[a-z]+)", diff --git a/pipelines/assets/livelihood_activity_regexes.json b/pipelines/assets/livelihood_activity_regexes.json index b33ddc0..b84a6e5 100644 --- a/pipelines/assets/livelihood_activity_regexes.json +++ b/pipelines/assets/livelihood_activity_regexes.json @@ -703,6 +703,24 @@ true, "quantity_produced" ], + [ + "(?:wild foods?{separator_pattern} )?{product_pattern}{separator_pattern} \\(?{unit_of_measure_pattern} gathered\\)?", + null, + true, + "quantity_produced" + ], + [ + "(?:fish|fish \\(?dry\\)?|fish \\(?fresh\\)?){separator_pattern} {product_pattern}{separator_pattern} \\(?{unit_of_measure_pattern} gathered\\)?", + null, + true, + "quantity_produced" + ], + [ + "{product_pattern}{separator_pattern}\\(?{unit_of_measure_pattern} gathered\\)?", + null, + true, + "quantity_produced" + ], [ "{product_pattern} (?P[1|2]è[m|r]e récolte){separator_pattern} {nbr_pattern} mois", null, diff --git a/pipelines_tests/test_assets/test_livelihood_activity_regexes.json b/pipelines_tests/test_assets/test_livelihood_activity_regexes.json index fdfa490..26c992e 100644 --- a/pipelines_tests/test_assets/test_livelihood_activity_regexes.json +++ b/pipelines_tests/test_assets/test_livelihood_activity_regexes.json @@ -829,5 +829,35 @@ "attribute": "payment_per_time", "product_id": "grain", "unit_of_measure_id": "kg" + }, + "wild food: avocado (kg gathered)": { + "is_start": true, + "product_id": "avocado", + "unit_of_measure_id": "kg", + "attribute": "quantity_produced" + }, + "mangoes (kg gathered)": { + "is_start": true, + "product_id": "mangoes", + "unit_of_measure_id": "kg", + "attribute": "quantity_produced" + }, + "okra - kg gathered": { + "is_start": true, + "product_id": "okra", + "unit_of_measure_id": "kg", + "attribute": "quantity_produced" + }, + "Fish (dry) : Tilapia (dry/smoked) (kg gathered)": { + "is_start": true, + "product_id": "tilapia (dry/smoked)", + "unit_of_measure_id": "kg", + "attribute": "quantity_produced" + }, + "Fish type 2 (dried) - kg gathered": { + "is_start": true, + "product_id": "fish type 2 (dried)", + "unit_of_measure_id": "kg", + "attribute": "quantity_produced" } } \ No newline at end of file