-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
EVA-3543 check if rs with same hash already exist before merging #441
Changes from 2 commits
022e088
2f29133
dab4e0f
8488d97
7c95cec
2422f15
a70301a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,6 +35,7 @@ | |
import uk.ac.ebi.eva.accession.clustering.metric.ClusteringMetric; | ||
import uk.ac.ebi.eva.accession.core.model.IClusteredVariant; | ||
import uk.ac.ebi.eva.accession.core.model.ISubmittedVariant; | ||
import uk.ac.ebi.eva.accession.core.model.dbsnp.DbsnpClusteredVariantEntity; | ||
import uk.ac.ebi.eva.accession.core.model.dbsnp.DbsnpSubmittedVariantEntity; | ||
import uk.ac.ebi.eva.accession.core.model.dbsnp.DbsnpSubmittedVariantOperationEntity; | ||
import uk.ac.ebi.eva.accession.core.model.eva.ClusteredVariantEntity; | ||
|
@@ -192,6 +193,18 @@ public void writeRSMerge(SubmittedVariantOperationEntity currentOperation) | |
getMergeDestinationAndMergees(mergeCandidates); | ||
ClusteredVariantEntity mergeDestination = mergeDestinationAndMergees.getLeft(); | ||
|
||
// check if any variant with same hash as mergeDestination already exist in DB | ||
ClusteredVariantEntity existingClusteredVariantEntity = getClusteredVariantEntityWithHash(mergeDestination); | ||
if (existingClusteredVariantEntity != null && existingClusteredVariantEntity.getAccession()!=mergeDestination.getAccession()) { | ||
if (mergeDestination.getAccession() == ClusteredVariantMergingPolicy.prioritise(mergeDestination.getAccession(), | ||
existingClusteredVariantEntity.getAccession()).accessionToKeep) { | ||
merge(mergeDestination, existingClusteredVariantEntity, currentOperation); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
} else { | ||
merge(existingClusteredVariantEntity, mergeDestination, currentOperation); | ||
mergeDestination = existingClusteredVariantEntity; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this part is necessary. If the existing cluster variant entity document is the merge destination then the norm code bellow would work. |
||
} | ||
} | ||
|
||
List<ClusteredVariantEntity> mergees = mergeDestinationAndMergees.getRight(); | ||
for (ClusteredVariantEntity mergee: mergees) { | ||
logger.info("RS merge operation: Merging rs{} to rs{} due to hash collision...", | ||
|
@@ -200,6 +213,19 @@ public void writeRSMerge(SubmittedVariantOperationEntity currentOperation) | |
} | ||
} | ||
|
||
private ClusteredVariantEntity getClusteredVariantEntityWithHash(ClusteredVariantEntity cve) { | ||
ClusteredVariantEntity existingClusteredVariantEntity = null; | ||
existingClusteredVariantEntity = mongoTemplate.findOne(query(where(ID_ATTRIBUTE).is(cve.getHashedMessage())), | ||
ClusteredVariantEntity.class); | ||
if (existingClusteredVariantEntity != null) { | ||
return existingClusteredVariantEntity; | ||
} else { | ||
existingClusteredVariantEntity = mongoTemplate.findOne(query(where(ID_ATTRIBUTE).is(cve.getHashedMessage())), | ||
DbsnpClusteredVariantEntity.class); | ||
} | ||
return existingClusteredVariantEntity; | ||
} | ||
|
||
private ImmutablePair<String, Long> getHashedMessageAndAccessionForSVIE(SubmittedVariantInactiveEntity svie) { | ||
return new ImmutablePair<>(svie.getHashedMessage(), svie.getAccession()); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This section is only relevant if the list of
mergees
has more than 2 elements. Can we test this before going through this ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it also work (and maybe make more sense) to actually just extract this removal into a method that's called before the for-loop, and modify it so it checks for all mergee accessions instead of just one?
I'm sure this implementation works, but I'm wary of essentially redo-ing merge detection and prioritisation logic rather than just trusting the candidates we have.