Skip to content

Commit

Permalink
Merge Magenta (#72)
Browse files Browse the repository at this point in the history
* Fix mkgroup command

* Roll back DNASequencingProcessRecord created on new dgroup only

* Add optional unique_name to MajoraArtifactProcessRecord

* Attempts to prevent Foel race conditions when adding multiple DNASequencingProcess
  records concurrently by enforcing an optional uniqueness constraint
* DNASequencingProcessRecord uses a compound run_name and library_name key
* Apply Migration 150 to the affected area
  • Loading branch information
SamStudio8 authored Feb 26, 2022
1 parent b95a6b8 commit 0507c0d
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 23 deletions.
36 changes: 15 additions & 21 deletions majora2/form_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,28 +118,22 @@ def handle_testsequencing(form, user=None, api_o=None, request=None):
physical=False
)

if dgroup_created:
# Try and protect DNASequencingProcessRecord from multi-create
#
# If you're here because of another race condition incident, the background
# is I made the mistake of abusing get_or_create to create objects without
# a uniqueness constraint... Unfortunately you can't just add a constraint
# on (process, in_artifact, out_group) because the model was designed to
# be super flexible and add any combination of these that you like.
# Adding that constraint in models.py is likely going to cause a load of
# constaint violations when it comes to running the database migration.
#
# My suggestion would be to add a new unique field to the base
# MajoraArtifactProcessRecord and use that here as the only argument for
# get_or_create and populate the process and in/outputs after fetching.
# Good luck!
rec, rec_created = models.DNASequencingProcessRecord.objects.get_or_create(
process=p,
in_artifact=form.cleaned_data.get("library_name"),
out_group=dgroup,
)
rec.save()
# Create a DNASequencingProcessRecord to link library to run
# One time I put this under the dgroup_created if statement and put thousands
# of genomes down the back of the proverbial sofa so don't do that
# See https://github.com/COG-UK/dipi-group/issues/193
# Assign a unique_name to try and prevent race conditions adding the same
# process record multiple times
in_library = form.cleaned_data.get("library_name")
rec, rec_created = models.DNASequencingProcessRecord.objects.get_or_create(
process=p,
in_artifact=in_library,
out_group=dgroup,
unique_name="%s-%s" % (run_name, in_library.dice_name),
)
rec.save()

if dgroup_created:
# Placeholder basecalling
bio = models.AbstractBioinformaticsProcess(
who = user,
Expand Down
18 changes: 18 additions & 0 deletions majora2/migrations/0150_majoraartifactprocessrecord_unique_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 2.2.27 on 2022-02-26 13:19

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('majora2', '0149_institute_credit_code_only'),
]

operations = [
migrations.AddField(
model_name='majoraartifactprocessrecord',
name='unique_name',
field=models.CharField(max_length=128, null=True, unique=True),
),
]
8 changes: 8 additions & 0 deletions majora2/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,6 +1251,14 @@ class MajoraArtifactProcessRecord(PolymorphicModel):
out_artifact= models.ForeignKey('MajoraArtifact', blank=True, null=True, related_name='after_process', on_delete=models.CASCADE)
out_group = models.ForeignKey('MajoraArtifactGroup', blank=True, null=True, related_name='after_process', on_delete=models.CASCADE)

# we cannot bind uniqueness on any combination of processes and artifacts as it would restrict the utility of the process record tree
# instead we can just toss some garbage in a unique_name field to reduce the risk of get_or_create creating multiple process records
# with the same process and in/out artifacts
# * this was added as a result of DNASequencingProcessRecord race conditions
# * note we use null and unique which allows us to only worry about integritychecks for cases where unique_name is set
# so we dont have to roll this out throughout majora unless needed (eg. future race conditions)
unique_name = models.CharField(max_length=128, null=True, unique=True)

class MajoraArtifactProcess(PolymorphicModel):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) #
when = models.DateTimeField(blank=True, null=True)
Expand Down
72 changes: 72 additions & 0 deletions majora2/test/test_sequencingprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@ def test_add_basic_sequencing_ok(self):
pipe_version=payload["runs"][0]["bioinfo_pipe_version"],
).count() == 1

# Check the sequencing process record
assert models.DNASequencingProcessRecord.objects.filter(
unique_name="%s-%s" % (run_name, self.library_name)
).count() == 1


def test_m70_add_sequencing_with_future_start_rejected(self):
run_name = "YYMMDD_AB000000_1234_ABCDEFGHI0"
Expand Down Expand Up @@ -105,3 +110,70 @@ def test_m70_add_sequencing_with_future_start_rejected(self):

self.assertIn("Sequencing run cannot start in the future", message_strs)
self.assertIn("Sequencing run cannot end in the future", message_strs)


def test_193_can_link_multiple_libraries_to_run_ok(self):

library_name_1 = "HOOT-LIBRARY-ONE"
library_o = models.LibraryArtifact(dice_name=library_name_1)
library_o.save()

library_name_2 = "HOOT-LIBRARY-TWO"
library_o = models.LibraryArtifact(dice_name=library_name_2)
library_o.save()

run_name = "YYMMDD_AB000000_1234_ABCDEFGHI0"
payload = {
"runs": [
{
"bioinfo_pipe_name": "ARTIC Pipeline (iVar)",
"bioinfo_pipe_version": "1.3.0",
"end_time": "2022-02-26 12:00",
"flowcell_id": "ABCDEF",
"flowcell_type": "v3",
"instrument_make": "ILLUMINA",
"instrument_model": "MiSeq",
"run_name": run_name,
"start_time": "2022-02-26 05:00"
}
],
"token": "oauth",
"username": "oauth"
}

# Create sequencing process and link first lib
payload["library_name"] = library_name_1
response = self.c.post(self.endpoint, payload, secure=True, content_type="application/json", HTTP_AUTHORIZATION="Bearer %s" % self.token)

self.assertEqual(200, response.status_code)
j = response.json()
self.assertEqual(j["errors"], 0)

# Assert run exists and process has a dnasequencingprocessrecord
assert models.DNASequencingProcess.objects.filter(run_name=run_name).count() == 1
process = models.DNASequencingProcess.objects.get(run_name=run_name)
assert process.records.count() == 1

# Check the sequencing process record
assert models.DNASequencingProcessRecord.objects.filter(
process=process,
unique_name="%s-%s" % (run_name, library_name_1)
).count() == 1

# Link second lib
payload["library_name"] = library_name_2
response = self.c.post(self.endpoint, payload, secure=True, content_type="application/json", HTTP_AUTHORIZATION="Bearer %s" % self.token)

self.assertEqual(200, response.status_code)
j = response.json()
self.assertEqual(j["errors"], 0)

# Assert process is linked with second dnasequencingprocessrecord
process = models.DNASequencingProcess.objects.get(run_name=run_name)
assert process.records.count() == 2

# Check the sequencing process record
assert models.DNASequencingProcessRecord.objects.filter(
process=process,
unique_name="%s-%s" % (run_name, library_name_2)
).count() == 1
4 changes: 2 additions & 2 deletions tatl/management/commands/mkgroup.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ def handle(self, *args, **options):
if group:
sys.stderr.write("[NOTE] %s group %s\n" % (group.name, "CREATED" if created else "RETRIEVED"))

if '&' in permissions:
if '&' in options["permissions"]:
sep = '&'
elif ' ' in permissions:
elif ' ' in options["permissions"]:
sep = ' '
else:
sys.stderr.write("[FAIL] Cannot determine permission separator, use space or ampersand!\n")
Expand Down

0 comments on commit 0507c0d

Please sign in to comment.