From eaa1002736b47a058cdb5c6dbe435c5531d2d272 Mon Sep 17 00:00:00 2001 From: Girum Bizuayehu Date: Fri, 22 Nov 2024 08:52:15 +0300 Subject: [PATCH 1/2] Add a wealth_group_category missing error report --- pipelines/assets/baseline.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pipelines/assets/baseline.py b/pipelines/assets/baseline.py index 875a14d..bc25962 100644 --- a/pipelines/assets/baseline.py +++ b/pipelines/assets/baseline.py @@ -88,6 +88,16 @@ def get_wealth_group_dataframe( wealth_group_df = wealth_group_df.loc[:, ~wealth_group_df.columns.duplicated()] except ValueError: pass + # Check if there are unrecognized wealth group category at this point and report + wealth_group_missing_category_df = wealth_group_df[ + wealth_group_df["wealth_group_category"].isnull() & df["wealth_group_category_original"].notnull() + ] + if not wealth_group_missing_category_df.empty: + unique_values = set(wealth_group_missing_category_df["wealth_group_category_original"].unique()) + raise ValueError( + "%s has unrecognized wealth group category in %s:\n%s" + % (partition_key, worksheet_name, "\n ".join(unique_values)), + ) # Lookup the Community instances community_lookup = CommunityLookup() From b168b1045905fc0cecbb102463427a14b5561e16 Mon Sep 17 00:00:00 2001 From: Girum Bizuayehu Date: Fri, 22 Nov 2024 14:09:41 +0300 Subject: [PATCH 2/2] Add a wealth_group_category missing error report --- pipelines/assets/baseline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/assets/baseline.py b/pipelines/assets/baseline.py index bc25962..5e01efb 100644 --- a/pipelines/assets/baseline.py +++ b/pipelines/assets/baseline.py @@ -90,7 +90,8 @@ def get_wealth_group_dataframe( pass # Check if there are unrecognized wealth group category at this point and report wealth_group_missing_category_df = wealth_group_df[ - wealth_group_df["wealth_group_category"].isnull() & df["wealth_group_category_original"].notnull() + wealth_group_df["wealth_group_category"].isnull() + & wealth_group_df["wealth_group_category_original"].notnull() ] if not wealth_group_missing_category_df.empty: unique_values = set(wealth_group_missing_category_df["wealth_group_category_original"].unique()) @@ -98,7 +99,6 @@ def get_wealth_group_dataframe( "%s has unrecognized wealth group category in %s:\n%s" % (partition_key, worksheet_name, "\n ".join(unique_values)), ) - # Lookup the Community instances community_lookup = CommunityLookup() wealth_group_df["livelihood_zone_baseline"] = livelihood_zone_baseline.id # required parent for lookup