From 23bbbc9ee99c78a8e5872ce696bb769804148db0 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Mon, 27 Jan 2020 16:08:53 +0000 Subject: [PATCH] Slicing query results to deal with OoM errors --- iati_datastore/iatilib/crawler.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/iati_datastore/iatilib/crawler.py b/iati_datastore/iatilib/crawler.py index bce6946..e45849d 100644 --- a/iati_datastore/iatilib/crawler.py +++ b/iati_datastore/iatilib/crawler.py @@ -104,6 +104,7 @@ def fetch_dataset_list(modified_since=None): def delete_datasets(datasets): + deleted_datasets = db.session.query(Dataset).filter(Dataset.name.in_(datasets)) activities_to_delete = db.session.query(Activity). \ @@ -111,11 +112,11 @@ def delete_datasets(datasets): filter(Resource.dataset_id.in_(datasets)) now = datetime.datetime.now() - deleted_activities = [DeletedActivity( - iati_identifier=a.iati_identifier, - deletion_date=now - ) - for a in activities_to_delete] + deleted_activities = [] + # Slice the query to make sure it doesn't use up all the memory + for i in range(0, activities_to_delete.count(), 100): + for a in activities_to_delete.slice(i, i+100): + deleted_activities.append(DeletedActivity(iati_identifier=a.iati_identifier, deletion_date=now)) db.session.add_all(deleted_activities) db.session.commit() deleted = deleted_datasets.delete(synchronize_session='fetch')