From 2905ff003990db3423ebd9683241def474b9ca57 Mon Sep 17 00:00:00 2001 From: erinspace Date: Thu, 4 Feb 2016 15:44:43 -0500 Subject: [PATCH] Check for empty string emails that somehow made it through --- scrapi/harvesters/push_api.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scrapi/harvesters/push_api.py b/scrapi/harvesters/push_api.py index 51818431..4d83a6c2 100644 --- a/scrapi/harvesters/push_api.py +++ b/scrapi/harvesters/push_api.py @@ -100,7 +100,12 @@ def get_records(self, start_date, end_date): yield record def normalize(self, raw): - return NormalizedDocument(json.loads(raw['doc'])['jsonData']) + document = json.loads(raw['doc'])['jsonData'] + # This is a workaround for the push API did not have proper email validation + for contributor in document['contributors']: + if contributor['email'] == '': + del contributor['email'] + return NormalizedDocument(document) @property def run_at(self):