Skip to content

Commit

Permalink
#354 #397 #403 #406 #430 Added support for batched assembly
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Cameron committed Dec 10, 2020
1 parent 77019da commit f529c82
Show file tree
Hide file tree
Showing 16 changed files with 258 additions and 48 deletions.
17 changes: 11 additions & 6 deletions scripts/gridss.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ while true; do
shift 2
;;
-a|--assembly)
assembly="$2"
assembly="$assembly $2"
# TODO: support multiple assembly files
shift 2
;;
-b|--blacklist)
Expand Down Expand Up @@ -208,7 +209,7 @@ write_status() {
echo "$(date): $1" | tee -a $logfile 1>&2
}
write_status "Full log file is: $logfile"
trap 'echo "\"${last_command}\" command completed with exit code $?.\nThe underlying error message can be found in $logfile."' EXIT
trap 'echo "\"${last_command}\" command completed with exit code $?." 1>&2; echo 1>&2; echo "The actual error message can be found in $logfile." 1>&2' EXIT
# Timing instrumentation
timinglogfile=$workingdir/gridss.timing.$timestamp.$HOSTNAME.$$.log
if which /usr/bin/time >/dev/null ; then
Expand Down Expand Up @@ -836,12 +837,12 @@ if [[ $do_assemble == true ]] ; then
WORKING_DIR=$workingdir \
REFERENCE_SEQUENCE=$reference \
WORKER_THREADS=$threads \
O=$assembly \
$input_args \
$blacklist_arg \
$config_args \
$picardoptions \
$readpairing_args \
O=$assembly \
; } 1>&2 2>> $logfile
else
write_status "Skipping assembly as $assembly already exists. $assembly"
Expand Down Expand Up @@ -945,6 +946,10 @@ if [[ $do_assemble == true ]] ; then
else
write_status "Skipping assembly $assembly"
fi
assembly_args=""
for ass in $assembly ; do
assembly_args="$assembly_args ASSEMBLY=$ass"
done
if [[ $sanityCheck == "true" ]] ; then
write_status "Running sanity checks"
java -Xmx$jvmheap $jvm_args \
Expand All @@ -956,7 +961,7 @@ if [[ $sanityCheck == "true" ]] ; then
$input_args \
$blacklist_arg \
$config_args \
ASSEMBLY=$assembly \
$assembly_args \
$readpairing_args \
OUTPUT_ERROR_READ_NAMES=reads_failing_sanity_check.txt
fi
Expand Down Expand Up @@ -985,7 +990,7 @@ if [[ $do_call == true ]] ; then
$input_args \
$blacklist_arg \
$config_args \
ASSEMBLY=$assembly \
$assembly_args \
OUTPUT_VCF=$prefix.unallocated.vcf \
$readpairing_args \
; } 1>&2 2>> $logfile
Expand All @@ -1001,7 +1006,7 @@ if [[ $do_call == true ]] ; then
$input_args \
$blacklist_arg \
$config_args \
ASSEMBLY=$assembly \
$assembly_args \
INPUT_VCF=$prefix.unallocated.vcf \
OUTPUT_VCF=$prefix.allocated.vcf \
$picardoptions \
Expand Down
16 changes: 12 additions & 4 deletions src/main/java/au/edu/wehi/idsv/AssemblyAttributes.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,11 @@ public static void adjustAssemblyAnnotationDueToContigChange(SAMRecord record, i
* @return true if the record is likely part of the breakend, false if definitely not
*/
public boolean isPartOfAssembly(DirectedEvidence e) {
return getSupport().stream().anyMatch(ee -> ee.getEvidenceID().equals(e.getEvidenceID()));
return getSupport(e.getEvidenceSource() instanceof AssemblyEvidenceSource ? (AssemblyEvidenceSource)e.getEvidenceSource() : null)
.stream()
.anyMatch(ee -> ee.getEvidenceID().equals(e.getEvidenceID()));
}
private Collection<AssemblyEvidenceSupport> getSupport() {
private Collection<AssemblyEvidenceSupport> getSupport(AssemblyEvidenceSource aes) {
if (support == null) {
support = new ArrayList<>();
if (!record.hasAttribute(SamTags.ASSEMBLY_EVIDENCE_CATEGORY)) {
Expand All @@ -85,6 +87,12 @@ private Collection<AssemblyEvidenceSupport> getSupport() {
log.error(msg);
throw new IllegalStateException(msg);
}
if (aes != null) {
final int[] lookup = aes.getAssemblyCategoryToProcessingContextCategoryLookup();
for (int i = 0; i < category.length; i++) {
category[i] = lookup[category[i]];
}
}
if (type.length != category.length
|| type.length != intervalStart.length
|| type.length != intervalEnd.length
Expand Down Expand Up @@ -212,12 +220,12 @@ private static boolean ensureUniqueEvidenceID(String assemblyName, Collection<Di
return isUnique;
}
private Stream<AssemblyEvidenceSupport> filterSupport(Range<Integer> assemblyContigOffset, Set<Integer> supportingCategories, Set<AssemblyEvidenceSupport.SupportType> supportTypes, AssemblyEvidenceSource aes) {
Stream<AssemblyEvidenceSupport> stream = getSupport().stream();
Stream<AssemblyEvidenceSupport> stream = getSupport(aes).stream();
if (assemblyContigOffset != null) {
stream = stream.filter(s -> s.getAssemblyContigOffset().isConnected(assemblyContigOffset));
}
if (supportingCategories != null) {
stream = stream.filter(s -> supportingCategories.contains(aes.getAssemblyCategoryToProcessingContextCategoryLookup()[s.getCategory()]));
stream = stream.filter(s -> supportingCategories.contains(s.getCategory()));
}
if (supportTypes != null) {
stream = stream.filter(s -> supportTypes.contains(s.getSupportType()));
Expand Down
8 changes: 4 additions & 4 deletions src/main/java/au/edu/wehi/idsv/AssemblyEvidenceSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public class AssemblyEvidenceSource extends SAMEvidenceSource {
private int cachedMaxReadMappedLength = -1;
private AssemblyTelemetry telemetry;
private SAMFileHeader header;
private List<String> assembledCategories;
protected List<String> assembledCategories;
private int[] assemblyOrdinalToProcessingCategoryLookup;
/**
* Generates assembly evidence based on the given evidence
Expand Down Expand Up @@ -489,7 +489,7 @@ public static void validateAllCategoriesAssembled(ProcessingContext pc, List<Ass
" If labels are used, they must match for both assembly and variant calling.",
extra.get(0),
aes1.getFile());
throw new RuntimeException(msg);
throw new IllegalArgumentException(msg);
}
// Assembly duplicate
for (int j = i + 1; j < aesList.size(); j++) {
Expand All @@ -504,7 +504,7 @@ public static void validateAllCategoriesAssembled(ProcessingContext pc, List<Ass
common.get(0),
aes1.getFile(),
aes2.getFile());
throw new RuntimeException(msg);
throw new IllegalArgumentException(msg);
}
}
}
Expand All @@ -518,7 +518,7 @@ public static void validateAllCategoriesAssembled(ProcessingContext pc, List<Ass
String msg = String.format(
"Fatal error: Missing assembly for %s. All input files must have a corresponding assembly.",
label);
throw new RuntimeException(msg);
throw new IllegalArgumentException(msg);
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/au/edu/wehi/idsv/GenomicProcessingContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,10 @@ private CloseableIterator<SAMRecord> getSamReaderIterator(SamReader reader, Sort
return applyCommonSAMRecordFilters(safeIterator);
}

public SAMFileWriterFactory getSamFileWriterFactory(boolean sorted) {
public SAMFileWriterFactory getSamFileWriterFactory() {
return new SAMFileWriterFactory()
.setTempDirectory(fsContext.getTemporaryDirectory())
.setCreateIndex(sorted); // also covered by -Dcreate_index=true
.setCreateIndex(true);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public void mergeSupplementaryAlignment(File input, List<File> aligned, File out
SAMFileUtil.merge(ImmutableList.of(tmpoutput, suppMerged),
output,
pc.getSamReaderFactory(),
pc.getSamFileWriterFactory(header.getSortOrder() == SortOrder.coordinate));
pc.getSamFileWriterFactory());
} else {
FileHelper.move(tmpoutput, output, true);
FileHelper.move(suppMerged, unorderedOutput, false);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/au/edu/wehi/idsv/VcfBreakendToReadPair.java
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public void writeVisualisationBam(GenomicProcessingContext pc, File vcf, File ba
SAMFileWriter writer = null;
SAMFileWriter writerFiltered = null;
try {
SAMFileWriterFactory factory = pc.getSamFileWriterFactory(true);
SAMFileWriterFactory factory = pc.getSamFileWriterFactory();
SAMFileHeader header = pc.getBasicSamHeader();
writer = factory.makeSAMOrBAMWriter(header, false, working);
writerFiltered = factory.makeSAMOrBAMWriter(header, false, workingFiltered);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ private File packageMinimalAssemblyErrorReproductionData(Set<DirectedEvidence> e
header = reader.getFileHeader();
}
// TODO: preserve original BAM ordering?
try (SAMFileWriter writer = context.getSamFileWriterFactory(true).setCreateIndex(true).makeBAMWriter(header, false, outSam)) {
try (SAMFileWriter writer = context.getSamFileWriterFactory().setCreateIndex(true).makeBAMWriter(header, false, outSam)) {
evidenceInCurrentAssembler.stream()
.filter(de -> de.getEvidenceSource() == source)
.forEach(a -> writer.addAlignment(a.getUnderlyingSAMRecord()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ private SAMEvidenceSource constructSamEvidenceSource(File file, File nameSortedF
}
}
private <T> T getOffset(List<T> list, int offset, T defaultValue) {
if (offset >= list.size()) return defaultValue;
if (list == null || offset >= list.size()) return defaultValue;
T obj = list.get(offset);
if (obj == null) return defaultValue;
return obj;
Expand Down
24 changes: 24 additions & 0 deletions src/test/java/au/edu/wehi/idsv/AssemblyAttributesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@

import au.edu.wehi.idsv.sam.SamTags;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Range;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.metrics.Header;
import org.junit.Assert;
import org.junit.Test;

import java.util.ArrayList;

import static org.junit.Assert.*;

public class AssemblyAttributesTest extends TestHelper {
Expand Down Expand Up @@ -73,4 +78,23 @@ public void getMaxQualPosition_should_return_min_position() {
AssemblyAttributes attr = new AssemblyAttributes(r);
assertEquals(3, attr.getMaxQualPosition(Range.closed(0, 15), null, null, null));
}
@Test
public void categories_should_map_to_processing_context() {
DirectedEvidence e1 = SCE(FWD, withMapq(10, Read(0, 1, "1M2S")));
DirectedEvidence e2 = SCE(FWD, withMapq(20, Read(0, 1, "1M1S")));
SAMRecord ass1 = AssemblyFactory.createUnanchoredBreakend(getContext(), AES(), new SequentialIdGenerator("asm"), new BreakendSummary(0, FWD, 1, 1, 2), ImmutableList.of(e1, e2), null, B("GTAC"), new byte[] {1,2,3,4});
ProcessingContext pc = new ProcessingContext(getFSContext(), SMALL_FA_FILE, SMALL_FA, new ArrayList<Header>(), getConfig());
pc.registerCategory("One");
pc.registerCategory("Two");
pc.registerCategory("Three");
pc.registerCategory("Four");
pc.registerCategory("Normal");
pc.registerCategory("Tumour");
StubAssemblyEvidenceSource aes = new StubAssemblyEvidenceSource(pc);
aes.setAssembledCategories(ImmutableList.of("Normal", "Tumour"));
NonReferenceReadPair asse = NonReferenceReadPair.create(aes, ass1);
AssemblyAttributes aa = new AssemblyAttributes(ass1);
Assert.assertEquals(0, aa.getEvidenceIDs(null, ImmutableSet.of(0, 1, 2, 3), null, aes).size());
Assert.assertEquals(2, aa.getEvidenceIDs(null, ImmutableSet.of(4, 5), null, aes).size());
}
}
18 changes: 17 additions & 1 deletion src/test/java/au/edu/wehi/idsv/AssemblyEvidenceSourceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ public void should_realign_very_poor_matches_to_the_reference() {
assertEquals("131M", ra.getCigarString());
}
@Test
public void chunck_spanning_assemblies_should_not_be_repeated() throws IOException {
public void chunk_spanning_assemblies_should_not_be_repeated() throws IOException {
List<SAMRecord> in = new ArrayList<>();
for (int i = 50; i < 150; i++) {
in.add(withSequence("AATTAATCGCAAGAGCGGGTTGTATTCGACGCCAAGTCAGCTGAAGCACCATTACCCGATCAAAACATATCAGAAATGATTGACGTATCACAAGCCGGA", Read(0, i, "41M58S"))[0]);
Expand Down Expand Up @@ -357,4 +357,20 @@ public void should_not_filter_assembly_contigs_that_overlap_source_assembly_posi
ArrayList<DirectedEvidence> result = Lists.newArrayList(aes.iterator(SAMEvidenceSource.EvidenceSortOrder.EvidenceStartPosition));
Assert.assertEquals(1, result.size());
}
@Test
public void should_remap_assembly_categories() {
SAMFileHeader header = AES().getHeader().clone();
header.setComments(ImmutableList.of("gridss_input_category=Tumour", "gridss_input_category=Normal"));
createBAM(input, header);
AssemblyEvidenceSource aes = new AssemblyEvidenceSource(getCommandlineContext(), ImmutableList.of(SES(10, 10)), input);
Assert.assertEquals(ImmutableList.of("Tumour", "Normal"), aes.getAssemblyCategories());
Assert.assertArrayEquals(new int[] { 1, 0 }, aes.getAssemblyCategoryToProcessingContextCategoryLookup());

header = AES().getHeader().clone();
header.setComments(ImmutableList.of("gridss_input_category=Tumour"));
createBAM(input, header);
aes = new AssemblyEvidenceSource(getCommandlineContext(), ImmutableList.of(SES(10, 10)), input);
Assert.assertEquals(ImmutableList.of("Tumour"), aes.getAssemblyCategories());
Assert.assertArrayEquals(new int[] { 1 }, aes.getAssemblyCategoryToProcessingContextCategoryLookup());
}
}
23 changes: 17 additions & 6 deletions src/test/java/au/edu/wehi/idsv/IntermediateFilesTest.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package au.edu.wehi.idsv;

import au.edu.wehi.idsv.sam.SAMFileUtil;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
import htsjdk.samtools.*;
Expand All @@ -12,6 +14,7 @@
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFFileReader;
import htsjdk.variant.vcf.VCFHeader;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
Expand Down Expand Up @@ -53,20 +56,24 @@ public void setReference(File ref) {
reference = ref;
}
public ProcessingContext getCommandlineContext() {
return getCommandlineContext(ImmutableList.of("Normal", "Tumour"));
}
public ProcessingContext getCommandlineContext(List<String> categories) {
List<Header> headers = Lists.newArrayList();
headers.add(new StringHeader("TestHeader"));
ProcessingContext pc;
if (reference.equals(SMALL_FA_FILE)) {
pc = new ProcessingContext(
new FileSystemContext(testFolder.getRoot(), 500000), reference, SMALL_FA, headers,
getConfig(testFolder.getRoot()));
new FileSystemContext(testFolder.getRoot(), 500000), reference, SMALL_FA, headers,
getConfig(testFolder.getRoot()));
} else {
pc = new ProcessingContext(
new FileSystemContext(testFolder.getRoot(), 500000), reference, null, headers,
new FileSystemContext(testFolder.getRoot(), 500000), reference, null, headers,
getConfig(testFolder.getRoot()));
}
pc.registerCategory("Normal");
pc.registerCategory("Tumour");
for (String c : categories) {
pc.registerCategory(c);
}
return pc;
}
@After
Expand Down Expand Up @@ -171,8 +178,10 @@ public List<VariantContextDirectedEvidence> breakpoints(final List<IdsvVariantCo
public List<IdsvVariantContext> getVcf(final File file, final EvidenceSource source) {
assertTrue(file.exists());
VCFFileReader reader = new VCFFileReader(file, false);
VCFHeader header = reader.getHeader();
List<IdsvVariantContext> list = Lists.newArrayList();
for (VariantContext r : reader) {
r.fullyDecode(header, false);
list.add(IdsvVariantContext.create(getCommandlineContext().getDictionary(), source == null ? AES() : source, r));
}
reader.close();
Expand All @@ -181,7 +190,9 @@ public List<IdsvVariantContext> getVcf(final File file, final EvidenceSource sou
public List<VariantContext> getRawVcf(final File file) {
assertTrue(file.exists());
try (VCFFileReader reader = new VCFFileReader(file, false)) {
return Lists.newArrayList(reader.iterator());
VCFHeader header = reader.getHeader();
ArrayList<VariantContext> list = Lists.newArrayList(reader.iterator());
return list;
}
}
public List<IdsvVariantContext> getVcf(final File file, final ProcessingContext pc, final EvidenceSource source) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ protected SplitReadRealigner createAligner() {
}
@Override
protected SplitReadRealigner createAligner(ProcessingContext pc) {
return new IterativeSplitReadRealigner(pc, new ExternalProcessFastqAligner(pc.getSamReaderFactory(), pc.getSamFileWriterFactory(false), ExternalAlignerTests.COMMAND_LINE));
return new IterativeSplitReadRealigner(pc, new ExternalProcessFastqAligner(pc.getSamReaderFactory(), pc.getSamFileWriterFactory(), ExternalAlignerTests.COMMAND_LINE));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,6 @@ public void should_track_read_name() {
assertFalse(e.getOriginatingFragmentID(Range.closed(0,0), ImmutableSet.of(0), null, aes).contains("r4"));
}
@Test
public void should_convert_from_assembly_categories_to_processing_context_categories() {
Assert.fail();
}
@Test
public void getEvidenceIDs_should_return_underlying_evidence() {
DirectedEvidence e1 = SCE(BWD, Read(0, 1, "5S5M"));
DirectedEvidence e2 = SCE(BWD, Read(0, 1, "6S5M"));
Expand Down
3 changes: 3 additions & 0 deletions src/test/java/au/edu/wehi/idsv/TestHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -981,6 +981,9 @@ public CloseableIterator<DirectedEvidence> iterator(QueryInterval[] qi, SAMEvide
return new AutoClosingIterator<>(assemblies.stream().filter(
e -> QueryIntervalUtil.overlaps(qi, e.getBreakendSummary())).iterator());
}
public void setAssembledCategories(ImmutableList<String> categories) {
this.assembledCategories = categories;
}
}
public static AssemblyEvidenceSource AES() {
return new AssemblyEvidenceSource(getContext(),
Expand Down
Loading

0 comments on commit f529c82

Please sign in to comment.