diff --git a/scrapi/harvesters/push_api.py b/scrapi/harvesters/push_api.py index 51818431..4d83a6c2 100644 --- a/scrapi/harvesters/push_api.py +++ b/scrapi/harvesters/push_api.py @@ -100,7 +100,12 @@ def get_records(self, start_date, end_date): yield record def normalize(self, raw): - return NormalizedDocument(json.loads(raw['doc'])['jsonData']) + document = json.loads(raw['doc'])['jsonData'] + # This is a workaround for the push API did not have proper email validation + for contributor in document['contributors']: + if contributor['email'] == '': + del contributor['email'] + return NormalizedDocument(document) @property def run_at(self):