Skip to content

Commit

Permalink
Merge 314b546 into 0afdb98
Browse files Browse the repository at this point in the history
  • Loading branch information
R2ZER0 committed Oct 20, 2023
2 parents 0afdb98 + 314b546 commit 380b366
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 5 deletions.
34 changes: 33 additions & 1 deletion datastore/db/management/commands/delete_datagetter_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime, timedelta
from django.core.management.base import BaseCommand, CommandError

from db.models import GetterRun, Latest
Expand Down Expand Up @@ -27,16 +28,41 @@ def add_arguments(self, parser):
help="Delete the oldest datagetter data",
)

parser.add_argument(
"--older-than-days",
type=int,
help="Delete datagetter data that's more than N days old.",
)

parser.add_argument(
"--force-delete-in-use-data",
action="store_true",
help="Delete datagetter data even if it's in use by a latest best.",
)

parser.add_argument(
"--update-latest-best",
action="store_true",
help="Update the latest best data set after deleting data",
)

def handle(self, *args, **options):
if options.get("older_than_days"):
now_dt = datetime.now()
older_than_dt = now_dt - timedelta(days=options["older_than_days"])
older_than_objs = (
getter_run.pk
for getter_run in GetterRun.objects.filter(datetime__lt=older_than_dt)
)
options["getter_run_ids"] = set(options["getter_run_ids"]).union(
older_than_objs
)

if options.get("oldest"):
to_delete = GetterRun.objects.order_by("datetime").first()
options["getter_run_ids"] = [to_delete.pk]
options["getter_run_ids"] = set(options["getter_run_ids"]).union(
[to_delete.pk]
)

if len(options["getter_run_ids"]) == 0:
raise CommandError("No datagetter data specified")
Expand All @@ -46,6 +72,12 @@ def handle(self, *args, **options):
confirm = "n"
getter_run = GetterRun.objects.get(pk=run)

if getter_run.is_in_use():
print("In use %s" % run)
if not options["force_delete_in_use_data"]:
print("Skipped %s" % run)
continue

if not options["no_prompt"]:
confirm = input("Confirm delete '%s' y/n: " % run)

Expand Down
7 changes: 4 additions & 3 deletions datastore/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def update():
for failed_source in latest_getter.sourcefile_set.filter(
models.Q(downloads=False) | models.Q(data_valid=False)
):

failed_id = failed_source.data["identifier"]
print(
"Processing the failed source %s\n%s" % (failed_id, failed_source.data)
Expand All @@ -58,7 +57,6 @@ def update():
acceptable_license=True,
downloads=True,
).order_by("-getter_run"):

# Extra check make sure the source actually has grants.
# It isn't much good if not.
source_grant_count = candidate_replacement_source.grant_set.count()
Expand Down Expand Up @@ -133,6 +131,10 @@ def archive_run(self):
def __str__(self):
return "%s - %s" % (self.pk, self.datetime)

def is_in_use(self):
"""Check if this GetterRun is included in any of the Latest best grantsets."""
return Latest.objects.all().filter(grant__getter_run__pk=self.pk).count() > 0


class SourceFile(models.Model):
data = JSONField()
Expand Down Expand Up @@ -292,7 +294,6 @@ def update_aggregate(self, grant):


class Publisher(Entity):

data = JSONField()
quality = JSONField(null=True)

Expand Down
17 changes: 16 additions & 1 deletion datastore/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,26 @@ def test_create_and_load_data_package(self):
def test_delete_datagetter_data(self):
err_out = StringIO()
call_command(
"delete_datagetter_data", "--oldest", "--no-prompt", stderr=err_out
"delete_datagetter_data",
"--oldest",
"--no-prompt",
"--force-delete-in-use-data",
stderr=err_out,
)
self.assertEqual(len(err_out.getvalue()), 0, "Errors output by command")
self.assertEqual(db.GetterRun.objects.count(), 0)

def test_doesnt_delete_in_use_datagetter_data(self):
err_out = StringIO()
call_command(
"delete_datagetter_data",
"--oldest",
"--no-prompt",
stderr=err_out,
)
self.assertEqual(len(err_out.getvalue()), 0, "Errors output by command")
self.assertGreater(db.GetterRun.objects.count(), 0)

def test_list_datagetter_runs(self):
out = StringIO()
err_out = StringIO()
Expand Down

0 comments on commit 380b366

Please sign in to comment.