Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions bin/physlr-make
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ t=16
bloom_filter_size=10000000000 #10GB

# Molecule separation stratergy
mol_strategy=distributed
mol_strategy=distributed+sqcosbin

# Path to the Physlr project.
physlr_path=$(shell dirname $$(dirname $(realpath $(MAKEFILE_LIST))))
Expand Down Expand Up @@ -103,13 +103,13 @@ arcs=false
.PHONY: f1chr4 f1chr2R f1 fishchr25 fish physical-map scaffolds
all: f1chr4 f1chr2R f1 fishchr25 fish

$(lr).physlr.physical-map.path: $(lr).k$k-w$w.n$(minimum_barcode_multiplicity)-$(maximum_barcode_multiplicity).c2-x.physlr.overlap.m$m.mol.mol2-bcs.backbone.path
$(lr).physlr.physical-map.path: $(lr).k$k-w$w.n$(minimum_barcode_multiplicity)-$(maximum_barcode_multiplicity).c2-x.physlr.overlap.m$m.mol.backbone.path
ln -sf $< $@

$(lr).physlr.physical-map.$(ref).n10.paf.gz: $(lr).k$k-w$w.n$(minimum_barcode_multiplicity)-$(maximum_barcode_multiplicity).c2-x.physlr.overlap.m$m.mol.mol2-bcs.backbone.map-split.$(ref).n10.paf.gz
$(lr).physlr.physical-map.$(ref).n10.paf.gz: $(lr).k$k-w$w.n$(minimum_barcode_multiplicity)-$(maximum_barcode_multiplicity).c2-x.physlr.overlap.m$m.mol.backbone.map-split.$(ref).n10.paf.gz
ln -sf $< $@

$(draft).physlr.fa: $(lr).k$k-w$w.n$(minimum_barcode_multiplicity)-$(maximum_barcode_multiplicity).c2-x.physlr.overlap.m$m.mol.mol2-bcs.backbone.map-split.$(draft).n10.sort.best.bed.path.fa
$(draft).physlr.fa: $(lr).k$k-w$w.n$(minimum_barcode_multiplicity)-$(maximum_barcode_multiplicity).c2-x.physlr.overlap.m$m.mol.backbone.map-split.$(draft).n10.sort.best.bed.path.fa
ln -sf $< $@

scaffolds:
Expand Down Expand Up @@ -151,6 +151,7 @@ physical-map: \
$(lr).physlr.physical-map.$(ref).n10.qpos.chain.metrics.tsv
endif
endif

# Help
help:
@echo "Usage: ./physlr-make [COMMAND] [OPTION=VALUE]..."
Expand Down Expand Up @@ -184,11 +185,11 @@ help:
@echo " min_component_size minimum number of barcodes in a backbone [50]."
@echo " minimum_barcode_multiplicity minimum number of minimizers per barcode [10]."
@echo " maximum_barcode_multiplicity maximum number of minimizers per barcode [5000]."
@echo " mol_strategy molecule separation strategy [distributed]. Available options are bc, bc+k3, distributed, ext."
@echo " mol_strategy molecule separation strategy [distributed+sqcosbin]. Available options are bc, bc+k3, distributed, distributed+sqcosbin."
@echo " bc (biconnected componenets) is the least conservative and is only suitable for datasets with low barcode multiplicity."
@echo " bc+k3 (biconnected componenets + k-3 cliques) is more conservative than bc and requires more time."
@echo " distributed is a modified version of bc+k3 that is faster than bc+k3 but may be more (or even less) conservative."
@echo " ext (extensive) mixes distributed with a modified version of sqcos (cosine similarity of squared adjacency matrix) which makes it more conservative."
@echo " distributed+sqcosbin mixes distributed with a modified version of sqcos (cosine similarity of squared adjacency matrix) which makes it more conservative."
@echo " bloom_filter_size size of bloom filter [10000000000] (10G)."
@echo " arcs Use ARCS to augment scaffolds (only compatible with ARCS v1.1.1) [false]."
@echo ""
Expand Down Expand Up @@ -898,7 +899,7 @@ endif

# Determine overlaps and output the graph in TSV.
%.physlr.overlap.tsv: %.physlr.tsv
$(time) $(physlr_path)/src/physlr-overlap -t1 -n10 $< >$@
$(time) $(physlr_path)/src/physlr-overlap -t$t -m10 $< >$@

# Determine the degree of each vertex.
%.deg.tsv: %.tsv
Expand Down Expand Up @@ -958,7 +959,7 @@ min_path_size=200
$(python) $(bin)/physlr flesh-backbone --min-component-size=$(min_component_size) -V$V $< $*.backbone.path >$@

# Split the minimizers to molecules
%.overlap.m$m.mol.mol2-bcs.split.tsv: %.overlap.m$m.mol.mol2-bcs.tsv %.tsv
%.overlap.m$m.mol.split.tsv: %.overlap.m$m.mol.tsv %.tsv
$(time) $(physlr_path)/src/physlr-split-minimizers -t$t $< $*.tsv >$@

# Split the reads into molecules
Expand All @@ -979,7 +980,7 @@ min_path_size=200
$(time) $(python) $(bin)/physlr map -V$V -n10 $^ >$@

# Map the draft assembly to the backbone graph and output BED.
%.backbone.map-split.$(draft).n10.bed: %.backbone.path $(lr).k$k-w$w.n$(minimum_barcode_multiplicity)-$(maximum_barcode_multiplicity).c2-x.physlr.overlap.m$m.mol.mol2-bcs.split.tsv $(draft).k$k-w$w.physlr.tsv
%.backbone.map-split.$(draft).n10.bed: %.backbone.path %.split.tsv $(draft).k$k-w$w.physlr.tsv
$(time) $(python) $(bin)/physlr map --mx-type split --map-pos 10 -V$V -n10 $^ >$@

# Map the draft assembly to the backbone graph and output BED.
Expand Down Expand Up @@ -1034,6 +1035,10 @@ min_path_size=200
%.map.$(ref).n10.paf.gz: %.path $(lr).k$k-w$w.n$(minimum_barcode_multiplicity)-$(maximum_barcode_multiplicity).c2-x.physlr.tsv $(name)/$(ref).k$k-w$w.physlr.tsv
$(time) $(python) $(bin)/physlr map-paf -V$V -n10 $^ | $(gzip) >$@

# Map the reference to the backbone graph with split minimizers and output PAF.
%.backbone.map-split.$(ref).n10.paf.gz: %.backbone.path %.split.tsv $(name)/$(ref).k$k-w$w.physlr.tsv
$(time) $(python) $(bin)/physlr map-paf --mx-type split -V$V -n10 $^ | $(gzip) >$@

# Lift over query coordinates of a PAF file from minimzer index to nucleotide coordinate.
%.qpos.paf.gz: $(name)/$(ref).k$k-w$w.physlr.tsv %.paf.gz
$(zcat) $*.paf.gz | $(time) $(python) $(bin)/physlr liftover-paf -V$V $< - | $(gzip) >$@
Expand Down
2 changes: 1 addition & 1 deletion src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ check-physlr-molecules: all
./physlr-molecules -s bc data/tiny.mol.input.tsv | diff -q - data/tiny.mol.tsv.good

check-physlr-split-minimizers: all
./physlr-split-minimizers -t4 data/tiny.split-minimizers.mol.mol2-bcs.tsv data/tiny.split-minimizers.physlr.tsv | sort |diff -q - data/tiny.split-minimizers.mol.mol2-bcs.split.tsv.good
./physlr-split-minimizers -t4 data/tiny.split-minimizers.ext.mol.tsv data/tiny.split-minimizers.physlr.tsv | sort |diff -q - data/tiny.split-minimizers.ext.mol.split.tsv.good

install: physlr-indexlr physlr-filter-barcodes physlr-overlap physlr-filter-bxmx physlr-makebf physlr-molecules physlr-split-minimizers
install -d $(DESTDIR)$(PREFIX)/bin
Expand Down
14 changes: 14 additions & 0 deletions src/data/tiny.split-minimizers.ext.mol.split.tsv.good
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
100_55_25_0
100_55_25_90 1 3 5 7
100_55_25_91 2 6 8 15
22_1_9_0 2 6 8
543_288_92_0 1 3 5 7
75_288_50_0 1 3 7
92_300_57_0 2 6 15
AAACACCAGAAACCTA-1_0
AAACACCAGAAACCTA-1_90 1 3 5 7
AAACACCAGAAACCTA-1_91 2 6 8 15
AAACACCAGAAAGCTT-1_0 1 3 5 7
AAACACCAGAACGACC-1_0 1 3 7
AAACACCAGAACGACT-1_0 2 6 15
AAACACCAGAACGCCA-1_0 2 6 8
25 changes: 25 additions & 0 deletions src/data/tiny.split-minimizers.ext.mol.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
U m
AAACACCAGAAACCTA-1_0 1254
AAACACCAGAAACCTA-1_91 1254
AAACACCAGAAACCTA-1_90 1254
AAACACCAGAAAGCTT-1_0 1313
AAACACCAGAACGACC-1_0 1819
AAACACCAGAACGACT-1_0 1819
AAACACCAGAACGCCA-1_0 4173
100_55_25_0 1254
100_55_25_91 1254
100_55_25_90 1254
543_288_92_0 1313
75_288_50_0 1819
92_300_57_0 1819
22_1_9_0 4173

U V m
100_55_25_90 543_288_92_0 1
100_55_25_90 75_288_50_0 1
100_55_25_91 92_300_57_0 1
100_55_25_91 22_1_9_0 1
AAACACCAGAAACCTA-1_90 AAACACCAGAAAGCTT-1_0 1
AAACACCAGAAACCTA-1_90 AAACACCAGAACGACC-1_0 1
AAACACCAGAAACCTA-1_91 AAACACCAGAACGACT-1_0 1
AAACACCAGAAACCTA-1_91 AAACACCAGAACGCCA-1_0 1
16 changes: 0 additions & 16 deletions src/data/tiny.split-minimizers.mol.mol2-bcs.split.tsv.good

This file was deleted.

27 changes: 0 additions & 27 deletions src/data/tiny.split-minimizers.mol.mol2-bcs.tsv

This file was deleted.

4 changes: 2 additions & 2 deletions src/physlr-split-minimizers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ findMoleculesPerBarcode(bxToMolIdx_t& bxToMolIdx, const graph_t& g)
{
auto vertexItRange = boost::vertices(g);
for (auto vertexIt = vertexItRange.first; vertexIt != vertexItRange.second; ++vertexIt) {
std::string pattern = R"((\S+)_\d+_\d+$)";
std::string pattern = R"((\S+)_\d+$)";
std::regex rgx(pattern);
std::smatch matches;

Expand Down Expand Up @@ -228,7 +228,7 @@ splitMinimizers(
tsl::robin_set<Minimizer> neighbourMxsUnion;
for (auto neighbourItr = neighbours.first; neighbourItr != neighbours.second;
++neighbourItr) {
std::string pattern = R"((\S+)_\d+_\d+$)";
std::string pattern = R"((\S+)_\d+$)";
std::regex rgx(pattern);
std::smatch matches;
if (std::regex_search(g[*neighbourItr].name, matches, rgx)) {
Expand Down