From 3325abff37992d4bc21bd1d287e34f043cf45dca Mon Sep 17 00:00:00 2001 From: Girum Bizuayehu Date: Fri, 17 Oct 2025 15:36:35 +0300 Subject: [PATCH 1/2] Add coverstion to numeric value before the aggregates have been applied see HEA-780 --- pipelines/assets/livelihood_activity.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py index d931c9a..f3288e1 100644 --- a/pipelines/assets/livelihood_activity.py +++ b/pipelines/assets/livelihood_activity.py @@ -1337,6 +1337,9 @@ def get_annotated_instances_from_dataframe( # Annotate the output metadata with completeness information # Get the summary dataframe, grouped by strategy_type summary_df = pd.DataFrame(reported_summary_output.value["LivelihoodActivity"]) + for col in ["income", "expenditure", "kcals_consumed"]: + summary_df[col] = pd.to_numeric(summary_df[col], errors="coerce") + summary_df = ( summary_df[["strategy_type", "income", "expenditure", "kcals_consumed"]].groupby("strategy_type").sum() ) From 3de34666dd6370ad4311320a9c8cb22b51117e88 Mon Sep 17 00:00:00 2001 From: Roger Hunwicks Date: Fri, 17 Oct 2025 14:00:49 -0400 Subject: [PATCH 2/2] Update numeric conversion to fill NaN with 0 - see HEA-780 Ensure numeric conversion fills NaN values with 0. --- pipelines/assets/livelihood_activity.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pipelines/assets/livelihood_activity.py b/pipelines/assets/livelihood_activity.py index f3288e1..ca45eff 100644 --- a/pipelines/assets/livelihood_activity.py +++ b/pipelines/assets/livelihood_activity.py @@ -1338,8 +1338,7 @@ def get_annotated_instances_from_dataframe( # Get the summary dataframe, grouped by strategy_type summary_df = pd.DataFrame(reported_summary_output.value["LivelihoodActivity"]) for col in ["income", "expenditure", "kcals_consumed"]: - summary_df[col] = pd.to_numeric(summary_df[col], errors="coerce") - + summary_df[col] = pd.to_numeric(summary_df[col], errors="coerce").fillna(0) summary_df = ( summary_df[["strategy_type", "income", "expenditure", "kcals_consumed"]].groupby("strategy_type").sum() )