Skip to content

Commit

Permalink
Fixed a bug in IO where the first record in each contig was skipped.
Browse files Browse the repository at this point in the history
  • Loading branch information
mchaisso committed Oct 2, 2023
1 parent c82279f commit 782ad1d
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 96 deletions.
12 changes: 7 additions & 5 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ PROG = vamos
LIBS = -lhts -lz -lpthread
#ABPOA_LIBS = -lm -lz -lpthread

ifeq ($(CC),"cc")
CC=gcc
ifeq ($(CC),cc)
CC = gcc
endif

CC ?= gcc
CXX ?= g++ -std=c++11


Expand All @@ -18,7 +18,7 @@ asan ?= ""
tsan ?= ""

ifeq ("$(DEBUG)", "TRUE")
CFLAGS = -g3 -std=c++14 #-fsanitize=address -static-libasan -gdwarf-4 #-Wall -Wextra
CFLAGS = -g -std=c++14 #-fsanitize=address -static-libasan -gdwarf-4 #-Wall -Wextra
else
CFLAGS = -O3 -DNDEBUG #-W -Wall -pedantic -fopenmp -lpthread -lrt
endif
Expand Down Expand Up @@ -48,7 +48,9 @@ ifneq ($(ZLIB_ROOT), "")
endif

$(ABPOA_DIR)/lib/libabpoa.a:
cd $(ABPOA_DIR) && make sse2=TRUE PREFIX=. CC="$(CC)" $(ZLPATH) lib/libabpoa.a
echo $(CC)
echo "^^CC"
cd $(ABPOA_DIR) && make sse2=true PREFIX=. CC="$(CC)" $(ZLPATH) lib/libabpoa.a

testmsa: TestMSA.cpp
$(CXX) $(CFLAGS) -o $@ $^ -L $(CONDA_PREFIX)/lib $(LIBS) -I $(ABPOA_DIR)/include -L $(ABPOA_DIR)/lib -labpoa -lrt -L$(mchaisso)/software/lib
Expand Down
106 changes: 18 additions & 88 deletions src/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,89 +391,6 @@ int QueryAndSetPhase(bam1_t* aln, int &setHap) {
return false;
}

/*
void IO::StoreVNTRLoci(vector<VNTR*> &vntrs, vector<int> &vntrMap, Pileup &pileup, int &refAlnPos, int &curVNTR) {
//
// Find snps that overlap with the current read.
//
int offset=0;
vector<int> snpOffset;
for (auto & consIt : pileup.consensus) {
if (consIt.second.Calc(3)) {
snpOffset.push_back(offset);
}
offset++;
}
vector<bool> cluster(snpOffset.size());
for (int i =0; i + 1 < snpOffset.size(); i++ ) {
if (snpOffset[i+1] - snpOffset[i] < 20) {
cluster[i] = true;
cluster[i+1] = true;
}
}
int c,i;
for (c=0,i=0; i < snpOffset.size(); i++ ) {
if (cluster[i] == false) {
snpOffset[c] = snpOffset[i];
c++;
}
}
snpOffset.resize(c);
while (curVNTR < vntrMap.size() and refAlnPos > vntrs[vntrMap[curVNTR]]->ref_end) {
VNTR* vntr=vntrs[vntrMap[curVNTR]];
pileup.PurgeBefore(vntr->ref_start);
// cerr << "Adding " << snpOffset.size() << " snps for locus " << curVNTR << endl;
for (auto pread : pileup.reads) {
//
// Add this read to the vntr locus
//
string readLocusSeq;
if (pread.GetSubstr(vntr->ref_start, vntr->ref_end, readLocusSeq)) {
READ *read = new READ;
read->seq = readLocusSeq; // new char[readLocusSeq.size() + 1];
// memcpy(read->seq, readLocusSeq.c_str(), readLocusSeq.size());
// read->seq[readLocusSeq.size()] = '\0';
read->len =readLocusSeq.size();
read->flag = pread.flag;
vntr->reads.push_back(read);
read->phased = pread.phased;
read->haplotype = pread.hap;
for (auto offset : snpOffset ) {
if (offset + pileup.consensusStart >= pread.refStartPos and
offset + pileup.consensusStart < pread.refEndPos) {
SNV snv;
snv.nuc = '-';
Consensus &consRef=pileup.consensus[offset + pileup.consensusStart];
assert(offset + pileup.consensusStart - pread.refStartPos >= 0);
if (consRef.a == pread.readSeq[offset + pileup.consensusStart - pread.refStartPos]) {
snv.nuc = consRef.a;
snv.pos = offset + pileup.consensusStart - pread.refStartPos;
}
if (consRef.a == pread.readSeq[offset + pileup.consensusStart - pread.refStartPos]) {
snv.nuc = consRef.b;
snv.pos = offset + pileup.consensusStart - pread.refStartPos;
}
if (snv.nuc != '-') {
read->snvs.push_back(snv);
}
}
}
}
}
if (readsArePhased == false ) {
MaxCutPhase(vntr);
}
curVNTR++;
}
}
*/

void IO::StoreSeq(bam1_t *aln, string &readSeq) {
uint8_t *read = bam_get_seq(aln);
Expand All @@ -493,9 +410,10 @@ void SetVNTRBounds(vector<VNTR*> &vntrs,
LowerBoundSearchVNTRPos lbComp;
UpperBoundSearchVNTRPos ubComp;
lbComp.vntrs = &vntrs;
ubComp.vntrs = &vntrs;
startIt = std::upper_bound(vntrIndex.begin(), vntrIndex.end(), regionStart, ubComp);
endIt = std::lower_bound(vntrIndex.begin(), vntrIndex.end(), regionEnd, lbComp);
ubComp.vntrs = &vntrs;
startIt = std::lower_bound(vntrIndex.begin(), vntrIndex.end(), regionStart, lbComp);
endIt = std::upper_bound(vntrIndex.begin(), vntrIndex.end(), regionEnd, ubComp);
cerr << "Set VNTR Bounds for " << regionStart << "-" << regionEnd << " " << *startIt << " " << *endIt << endl;

}

Expand Down Expand Up @@ -585,13 +503,25 @@ void IO::StoreReadsOnChrom(string &chrom, int regionStart, int regionEnd, vector
vector<int>::iterator it, endIt;
SetVNTRBounds(vntrs, vntrMap, chrom, refAlnStart, refAlnEnd, it, endIt);

while(it != vntrIndex.end() and vntrs[*it]->ref_end <= refAlnEnd) {
while ( it != vntrIndex.end() and vntrs[*it]->ref_end <= refAlnEnd ) {
//
// Need to skip this vntr since it is outside the region.
// Need to skip this vntr since it is outside the region. Here the loop exits because
// all additional VNTRs will not overlap this read.
//
if (vntrs[*it]->ref_end < regionStart or vntrs[*it]->ref_start > regionEnd ) {
break;
}

//
// Handle the corner case where a read starts at the same position as the tandem repeat.
// Since this is not informative for the full length of the TR, do not process it.
//
if (vntrs[*it]->ref_start == refAlnStart) {
++it;
continue;
}


int readStart = MappedStartPosInRead(readMap, refAlnStart, vntrs[*it]->ref_start-1);
int readEnd = MappedEndPosInRead(readMap, refAlnStart, vntrs[*it]->ref_end-1);
string vntrSeq;
Expand Down
2 changes: 1 addition & 1 deletion src/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ class IO
int phaseFlank;
IO()
{
version = "1.3.5.2";
version = "1.3.6";
region_and_motifs = "";
input_bam = "";
vntr_bed = "";
Expand Down
5 changes: 3 additions & 2 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,11 @@ void *CallSNVs (void *procInfoValue) {
*(procInfo->vntrMap),
pileup, procInfo->thread, readsArePhased);
vector<int>::iterator it,end;
int regionStart = (*(procInfo->bucketEndPos))[curChrom][curRegion];
int regionEnd = (*(procInfo->bucketEndPos))[curChrom][curRegion+1];
SetVNTRBounds(*(procInfo->vntrs),
(*(procInfo->vntrMap)), curChrom,
(*(procInfo->bucketEndPos))[curChrom][curRegion],
(*(procInfo->bucketEndPos))[curChrom][curRegion+1], it, end);
regionStart, regionEnd, it, end);

SDTables sdTables;
while (it != end) {
Expand Down

0 comments on commit 782ad1d

Please sign in to comment.