diff --git a/Makefile b/Makefile index b58113f..9354ca8 100644 --- a/Makefile +++ b/Makefile @@ -1,24 +1,26 @@ # include SeqAn libraries, don't warn about MD5 deprecation CXXFLAGS+=-I. -Wno-deprecated-declarations +#include htslib by setting -I and link to the htslib library +HTSDIR= +CXXFLAGS+=-I$(HTSDIR)/include -# RELEASE build -CXXFLAGS+= -O3 -DSEQAN_ENABLE_TESTING=0 -DSEQAN_ENABLE_DEBUG=0 -DSEQAN_HAS_ZLIB=1 -LDLIBS=-lz -lssl -lcrypto - +# RELEASE build +CXX=g++ -std=c++11 -pthread +CXXFLAGS+= -O3 -DSEQAN_ENABLE_TESTING=0 -DSEQAN_ENABLE_DEBUG=0 -DSEQAN_HAS_ZLIB=1 +LDFLAGS=-L$(HTSDIR)/lib -lz -lssl -lcrypto -Wl,-rpath,$(HTSDIR)/lib -lhts TARGET = bamhash_checksum_bam bamhash_checksum_fastq bamhash_checksum_fasta all: $(TARGET) bamhash_checksum_bam: bamhash_checksum_common.o bamhash_checksum_bam.o - $(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS) + $(CXX) $(LDFLAGS) -o $@ $^ bamhash_checksum_fastq: bamhash_checksum_common.o bamhash_checksum_fastq.o - $(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS) + $(CXX) $(LDFLAGS) -o $@ $^ bamhash_checksum_fasta: bamhash_checksum_common.o bamhash_checksum_fasta.o - $(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS) - + $(CXX) $(LDFLAGS) -o $@ $^ clean: - $(RM) *.o *~ $(TARGET) + $(RM) *.o *~ $(TARGET) diff --git a/README.md b/README.md index df36656..e23e79e 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ composed of the readname, whether it is first or last in pair, sequence and qual All the hash values are summed up so the result is independent of the ordering within the files. The result can be compared to verify that the pair of FASTQ files contain the same read information as the aligned BAM file. +The program is written in C++ and uses SeqAnHTS v1.0 for parsing FASTQ, gzip compressed FASTQ and BAM files. +SeqAnHTS is a fork of SeqAn library ( Döring etal. , 2008 ) that uses htslib to read SAM/BAM/CRAM files. ## Manuscript @@ -35,6 +37,7 @@ Both multiline FASTA and FASTQ are supported and gzipped input for FASTA and FAS ~~~ bamhash_checksum_bam [OPTIONS] ... +bamhash_checksum_bam [OPTIONS] -r ~~~ processes a number of BAM files. BAM files are assumed to contain paired end reads. If you run with `--no-paired` it treats all reads as single end and displays a warning if any read is marked as "second in pair" in the BAM file. @@ -57,4 +60,8 @@ processes a number of FASTA files. All FASTA files are assumed to be single end ## Compiling -The only external dependency is on OpenSSL for the MD5 implementation. +External dependencies are on: + OpenSSL for the MD5 implementation + htslib library (version 1.9) + + diff --git a/bamhash_checksum_bam.cpp b/bamhash_checksum_bam.cpp index f5fda99..e6edee0 100644 --- a/bamhash_checksum_bam.cpp +++ b/bamhash_checksum_bam.cpp @@ -1,6 +1,7 @@ +#include "htslib/hts.h" +#include #include #include -#include #include #include #include @@ -9,26 +10,29 @@ #include #include #include +#include -#include "bamhash_checksum_common.h" +#include "bamhash_checksum_common.h" -/** only needed for seqan 1.4.1 and lower -inline bool -hasFlagSupplementary(seqan::BamAlignmentRecord const & record) -{ - return (record.flag & 0x0800) == 0x0800; -} -*/ struct Baminfo { std::vector bamfiles; bool debug; bool noReadNames; bool noQuality; bool paired; + seqan::CharString reference; - Baminfo() : debug(false), noReadNames(false), noQuality(false), paired(true) {} + Baminfo() : debug(false), noReadNames(false), noQuality(false), paired(true), reference("") {} + +}; + +struct Counts { + uint64_t sum; + uint64_t count; + + Counts() : sum(0), count(0) {} }; @@ -38,16 +42,15 @@ parseCommandLine(Baminfo& options, int argc, char const **argv) { seqan::ArgumentParser parser("bamhash_checksum_bam"); //readlink("/proc/self/exe", options.bindir, sizeof(options.bindir)-1); - setShortDescription(parser, "Checksum of a bam file"); //TODO change description + setShortDescription(parser, "Checksum of a sam, bam or cram file"); //TODO change description setVersion(parser, BAMHASH_VERSION); - setDate(parser, "May 2015"); + setDate(parser, "Oct 2018"); addUsageLine(parser, "[\\fIOPTIONS\\fP] \\fI ...\\fP"); addDescription(parser, "Program for checksum of sequence reads. "); - addArgument(parser, seqan::ArgParseArgument(seqan::ArgParseArgument::INPUTFILE,"bamfile", "False", 1)); - - setValidValues(parser, 0,"bam sam"); + addArgument(parser, seqan::ArgParseArgument(seqan::ArgParseArgument::INPUT_FILE,"bamfile", "False", 1)); + setValidValues(parser, 0,"sam bam cram"); addSection(parser, "Options"); @@ -55,7 +58,11 @@ parseCommandLine(Baminfo& options, int argc, char const **argv) { addOption(parser, seqan::ArgParseOption("d", "debug", "Debug mode. Prints full hex for each read to stdout")); addOption(parser, seqan::ArgParseOption("R", "no-readnames", "Do not use read names as part of checksum")); addOption(parser, seqan::ArgParseOption("Q", "no-quality", "Do not use read quality as part of checksum")); - addOption(parser, seqan::ArgParseOption("P", "no-paired", "Bam files were not generated with paired-end reads")); + addOption(parser, seqan::ArgParseOption("P", "no-paired", "Cram files were not generated with paired-end reads")); + addOption(parser, seqan::ArgParseOption("r", "reference-file", "Path to reference-file if reference not given in header", + seqan::ArgParseArgument::INPUT_FILE)); + + setValidValues(parser, "reference-file", "fa"); // Parse command line. seqan::ArgumentParser::ParseResult res = seqan::parse(parser, argc, argv); @@ -67,6 +74,7 @@ parseCommandLine(Baminfo& options, int argc, char const **argv) { options.noReadNames = isSet(parser, "no-readnames"); options.noQuality = isSet(parser, "no-quality"); options.paired = !isSet(parser, "no-paired"); + getOptionValue(options.reference, parser, "reference-file"); options.bamfiles = getArgumentValues(parser, 0); @@ -74,6 +82,68 @@ parseCommandLine(Baminfo& options, int argc, char const **argv) { } +// ----------------------------------------------------------------------------- +// FUNCTION getSampleIdAndLaneNames() +// ----------------------------------------------------------------------------- + +void getLaneNames(std::map & laneNames, std::string const & header) +{ + for (int i = 0; i < header.size(); /*empty on purpose*/) + { + auto hdr_find_it = std::find(header.begin() + i, header.end(), '\n'); + std::string line = header.substr(i, hdr_find_it - header.begin() - i); + + if (line.size() > 7 && line[0] == '@' && line[1] == 'R' && line[2] == 'G' && line[3] == '\t') + { + for (int j = 0; j < static_cast(line.size()); /*empty on purpose*/) + { + auto line_find_it = std::find(line.begin() + j, line.end(), '\t'); + std::string field = line.substr(j, line_find_it - line.begin() - j); + + if (field.size() > 3 && field[0] == 'I' && field[1] == 'D' && field[2] == ':') + { + seqan::CharString read_group_name = field.substr(3); + int read_group_index = laneNames.size(); + laneNames[read_group_name] = read_group_index; + } + + j = std::distance(line.begin(), line_find_it) + 1; + } + } + + i = std::distance(header.begin(), hdr_find_it) + 1; + } +} + +// ----------------------------------------------------------------------------- +// FUNCTION getLane() +// ----------------------------------------------------------------------------- + +int getLane(seqan::BamAlignmentRecord & record, + seqan::BamTagsDict & tagsDict, + std::map & laneNames) +{ + unsigned tagIdx = 0; + + if (!seqan::findTagKey(tagIdx, tagsDict, "RG")) + { + std::cerr << "ERROR: Found a read with a missing read group (RG) tag\n"; + return -1; + } + + seqan::CharString read_group; + + if(!seqan::extractTagValue(read_group, tagsDict, tagIdx)) + { + std::cerr << "ERROR: Failed to extract read group (RG) tag value\n"; + return -1; + } + + return laneNames[read_group]; +} + + + int main(int argc, char const **argv) { Baminfo info; // Define structure variable @@ -83,58 +153,48 @@ int main(int argc, char const **argv) { return res == seqan::ArgumentParser::PARSE_ERROR; } - uint64_t sum = 0; - uint64_t count = 0; +//Moving below to struct +// uint64_t sum = 0; +// uint64_t count = 0; bool pairedWarning = false; - + //adding new stuff + std::map laneNames; + // Initialize all counts for each lane. + seqan::String counts; for (int i = 0; i < info.bamfiles.size(); i++) { - // Open BGZF Stream for reading. - seqan::Stream inStream; const char* bamfile = info.bamfiles[i].c_str(); - - if (!open(inStream, bamfile, "r")) { - std::cerr << "ERROR: Could not open " << bamfile << " for reading.\n"; - return 1; - } + const char* reference = toCString(info.reference); - // Setup name store, cache, and BAM I/O context. - typedef seqan::StringSet TNameStore; - typedef seqan::NameStoreCache TNameStoreCache; - typedef seqan::BamIOContext TBamIOContext; - TNameStore nameStore; - TNameStoreCache nameStoreCache(nameStore); - TBamIOContext context(nameStore, nameStoreCache); - - // Read header. - seqan::BamHeader header; - if (readRecord(header, context, inStream, seqan::Bam()) != 0) { - std::cerr << "ERROR: Could not read header from BAM file " << bamfile << "\n"; - return 1; - } - seqan::clear(header); + // Open stream for reading + seqan::HtsFile inStream(bamfile, "r", reference); + + // Initialize lane names (read groups). + std::string header(inStream.hdr->text, inStream.hdr->l_text); + getLaneNames(laneNames, header); + unsigned lanecount = laneNames.size(); + resize(counts, lanecount); // Define: seqan::BamAlignmentRecord record; seqan::CharString string2hash; - //char hexCstr[33]; // Read record - while (!atEnd(inStream)) { - if (readRecord(record, context, inStream, seqan::Bam()) != 0) { - std::cerr << "ERROR: Could not read record from BAM File " << bamfile << "\n"; - return 1; - } + while (seqan::readRecord(record, inStream)){ + seqan::BamTagsDict tagsDict(record.tags); + int l = getLane(record, tagsDict, laneNames); + if (l == -1) return 1; + // Check if flag: reverse complement and change record accordingly if (hasFlagRC(record)) { - reverseComplement(record.seq); - reverse(record.qual); + seqan::reverseComplement(record.seq); + seqan::reverse(record.qual); } // Check if flag: supplementary and exclude those if (!hasFlagSupplementary(record) && !hasFlagSecondary(record)) { - count +=1; + counts[l].count +=1; // Construct one string from record if (!info.noReadNames) { seqan::append(string2hash, record.qName); @@ -143,7 +203,7 @@ int main(int argc, char const **argv) { seqan::append(string2hash, "/2"); } else { if (!pairedWarning) { - std::cerr << "WARNING: BamHash was run with --no-paired mode, but BAM file has reads marked as second pair" << std::endl; + std::cerr << "WARNING: seqread was run with --no-paired mode, but BAM file has reads marked as second pair" << std::endl; pairedWarning = true; } seqan::append(string2hash, "/1"); @@ -162,26 +222,25 @@ int main(int argc, char const **argv) { // Get MD5 hash hash_t hex = str2md5(toCString(string2hash), length(string2hash)); - if (info.debug) { std::cout << string2hash << " " << std::hex << hex.p.low << "\n"; } else { - hexSum(hex, sum); + hexSum(hex, counts[l].sum); } seqan::clear(string2hash); } } - - // print result - } if (!info.debug) { - std::cout << std::hex << sum << "\t"; - std::cout << std::dec << count << "\n"; + for (std::map::iterator it = laneNames.begin(); it != laneNames.end(); ++it) { + std::cout << it->first << "\t"; + int lid = it->second; + std::cout << std::hex << counts[lid].sum << "\t"; + std::cout << std::dec << counts[lid].count << "\n"; + } } - + return 0; } - diff --git a/bamhash_checksum_common.h b/bamhash_checksum_common.h index b3732d9..e8c6b30 100644 --- a/bamhash_checksum_common.h +++ b/bamhash_checksum_common.h @@ -1,7 +1,7 @@ #ifndef BAMHASH_CHECKSUM_COMMON_H #define BAMHASH_CHECKSUM_COMMON_H -#define BAMHASH_VERSION "1.1" +#define BAMHASH_VERSION "1.3" #include #include diff --git a/bamhash_checksum_fasta.cpp b/bamhash_checksum_fasta.cpp index f6279e6..f8d1adb 100644 --- a/bamhash_checksum_fasta.cpp +++ b/bamhash_checksum_fasta.cpp @@ -28,12 +28,12 @@ parseCommandLine(Fastainfo& options, int argc, char const **argv) { setShortDescription(parser, "Checksum of a set of fasta files"); setVersion(parser, BAMHASH_VERSION); - setDate(parser, "May 2015"); + setDate(parser, "Okt 2018"); addUsageLine(parser, "[\\fIOPTIONS\\fP] \\fI\\fP [\\fIin2.fasta ... \\fP]"); addDescription(parser, "Program for checksum of sequence reads. "); - addArgument(parser, seqan::ArgParseArgument(seqan::ArgParseArgument::INPUTFILE,"fastafiles", true)); + addArgument(parser, seqan::ArgParseArgument(seqan::ArgParseArgument::INPUT_FILE,"fastafiles", true)); setValidValues(parser, 0,"fa fa.gz fasta fasta.gz"); @@ -58,7 +58,6 @@ parseCommandLine(Fastainfo& options, int argc, char const **argv) { return seqan::ArgumentParser::PARSE_OK; } - int main(int argc, char const **argv) { Fastainfo info; // Define structure variable seqan::ArgumentParser::ParseResult res = parseCommandLine(info, argc, argv); // Parse the command line. @@ -77,29 +76,26 @@ int main(int argc, char const **argv) { seqan::CharString seq; hash_t hex; - - // Open GZStream - seqan::Stream gzStream; - - + // Open stream + seqan::SeqFileIn seqFileIn; for (int i = 0; i < info.fastafiles.size(); i++) { const char* fasta = info.fastafiles[i].c_str(); - if (!open(gzStream, fasta, "r")) { + if (!open(seqFileIn, fasta)) { std::cerr << "ERROR: Could not open the file: " << fasta << " for reading.\n"; return 1; } - //Setup RecordReader for reading FASTA file from gzip-compressed file - seqan::RecordReader, seqan::SinglePass<> > reader(gzStream); - - - // Read record - while (!atEnd(reader)) { - if (readRecord(id, seq, reader, seqan::Fasta()) != 0) { - if (atEnd(reader)) { + while (!seqan::atEnd(seqFileIn)) { + try + { + readRecord(id, seq, seqFileIn); + } + catch (seqan::Exception const & e) + { + if (seqan::atEnd(seqFileIn)) { std::cerr << "WARNING: Could not continue reading " << fasta << " at line: " << count+1 << ".\n"; return 1; } @@ -109,10 +105,8 @@ int main(int argc, char const **argv) { count +=1; - // cut away after first space - seqan::strSplit(idSub, id, ' ', false, 1); - + seqan::strSplit(idSub, id, seqan::EqualsChar<' '>(), false, 1); if (!info.noReadNames) { seqan::append(string2hash, idSub[0]); @@ -131,8 +125,6 @@ int main(int argc, char const **argv) { seqan::clear(string2hash); seqan::clear(idSub); - - } } @@ -141,7 +133,6 @@ int main(int argc, char const **argv) { std::cout << std::hex << sum << "\t"; std::cout << std::dec << count << "\n"; } - return 0; } diff --git a/bamhash_checksum_fastq.cpp b/bamhash_checksum_fastq.cpp index 653fadf..f4d4ed2 100644 --- a/bamhash_checksum_fastq.cpp +++ b/bamhash_checksum_fastq.cpp @@ -31,12 +31,12 @@ parseCommandLine(Fastqinfo& options, int argc, char const **argv) { setShortDescription(parser, "Checksum of a set of fastq files"); setVersion(parser, BAMHASH_VERSION); - setDate(parser, "May 2015"); + setDate(parser, "Okt 2018"); addUsageLine(parser, "[\\fIOPTIONS\\fP] \\fI\\fP [\\fIin2.fastq.gz ... \\fP]"); addDescription(parser, "Program for checksum of sequence reads. "); - addArgument(parser, seqan::ArgParseArgument(seqan::ArgParseArgument::INPUTFILE,"fastqfiles", true)); + addArgument(parser, seqan::ArgParseArgument(seqan::ArgParseArgument::INPUT_FILE,"fastqfiles", true)); setValidValues(parser, 0,"fq fq.gz fastq fastq.gz"); @@ -89,10 +89,10 @@ int main(int argc, char const **argv) { hash_t hex1; hash_t hex2; - // Open GZStream - seqan::Stream gzStream1; - seqan::Stream gzStream2; + // Open Files + seqan::SeqFileIn seqFileIn1; + seqan::SeqFileIn seqFileIn2; if (info.paired && (info.fastqfiles.size() % 2 != 0)) { std::cerr << "ERROR: Running with paired end mode, but supplied an odd number of input files "; @@ -109,38 +109,56 @@ int main(int argc, char const **argv) { if (info.paired) { fastq2 = info.fastqfiles[i+1].c_str(); } - - if (!open(gzStream1, fastq1, "r")) { - std::cerr << "ERROR: Could not open the file: " << fastq1 << " for reading.\n"; - return 1; - } - if (info.paired && !open(gzStream2, fastq2, "r")) { - std::cerr << "ERROR: Could not open the file: " << fastq2 << " for reading.\n"; - return 1; + + if (!open(seqFileIn1, fastq1)) + { + std::cerr << "ERROR: Could not open the file: " << fastq1 << " for reading.\n"; + return 1; } - //Setup RecordReader for reading FASTQ file from gzip-compressed file - seqan::RecordReader, seqan::SinglePass<> > reader1(gzStream1); - seqan::RecordReader, seqan::SinglePass<> > reader2(gzStream2); + if (!open(seqFileIn2, fastq2)) + { + std::cerr << "ERROR: Could not open the file: " << fastq1 << " for reading.\n"; + return 1; + } +/* + StringSet seqs; +*/ // Read record - while (!atEnd(reader1)) { - if(info.paired) { - if(atEnd(reader2)) { break; } + while (!atEnd(seqFileIn1)) { + if(info.paired) + { + if(atEnd(seqFileIn2)) { break; } } - if (readRecord(id1, seq1, qual1, reader1, seqan::Fastq()) != 0) { - if (atEnd(reader1)) { - std::cerr << "WARNING: Could not continue reading " << fastq1 << " at line: " << count+1 << ".\n"; + try + { + seqan::readRecord(id1, seq1, qual1, seqFileIn1); + } + catch (seqan::Exception const & e) + { + if (atEnd(seqFileIn1)) + { + std::cerr << "WARNING: Could not continue reading " << fastq1 << " at line: " << count+1 << ". Check if files have the same number of reads.\n"; return 1; } std::cerr << "ERROR: Could not read from " << fastq1 << "\n"; return 1; } - if (info.paired && readRecord(id2, seq2, qual2, reader2, seqan::Fastq()) != 0) { - if (atEnd(reader2)) { + try + { + if (info.paired) + { + seqan::readRecord(id2, seq2, qual2, seqFileIn2); + } + } + catch (seqan::Exception const & e) + { + if (atEnd(seqFileIn2)) + { std::cerr << "WARNING: Could not continue reading " << fastq2 << " at line: " << count+1 << ". Check if files have the same number of reads.\n"; return 1; } @@ -153,16 +171,16 @@ int main(int argc, char const **argv) { // If include id, then cut id on first whitespace if (seqan::endsWith(id1,"/1") || seqan::endsWith(id1,"/2")) { - seqan::strSplit(idSub1, id1, '/', false, 1); + seqan::strSplit(idSub1, id1, seqan::EqualsChar<'/'>(), false, 1); } else { - seqan::strSplit(idSub1, id1, ' ', false, 1); + seqan::strSplit(idSub1, id1, seqan::EqualsChar<' '>(), false, 1); } if (info.paired) { if (seqan::endsWith(id2,"/1") || seqan::endsWith(id2,"/2")) { - seqan::strSplit(idSub2, id2, '/', false, 1); + seqan::strSplit(idSub2, id2, seqan::EqualsChar<'/'>(), false, 1); } else { - seqan::strSplit(idSub2, id2, ' ', false, 1); + seqan::strSplit(idSub2, id2, seqan::EqualsChar<' '>(), false, 1); } } @@ -181,7 +199,6 @@ int main(int argc, char const **argv) { seqan::append(string2hash1, qual1); } - if (info.paired) { if (!info.noReadNames) { seqan::append(string2hash2, idSub2[0]); @@ -213,6 +230,11 @@ int main(int argc, char const **argv) { } } + if (!info.debug && count == 0) + { + std::cerr << "WARNING: Read count is : " << count << "\n"; + return 1; + } if (!info.debug) { std::cout << std::hex << sum << "\t"; diff --git a/seqan/LICENSE b/seqan/LICENSE index 0b7f5a9..7362ed0 100644 --- a/seqan/LICENSE +++ b/seqan/LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2006-2011, Knut Reinert, FU Berlin +Copyright (c) 2006-2015, Knut Reinert, FU Berlin All rights reserved. Redistribution and use in source and binary forms, with or without @@ -9,18 +9,19 @@ modification, are permitted provided that the following conditions are met: * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - * Neither the name of the nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. + * Neither the name of Knut Reinert or the FU Berlin nor the names of + its contributors may be used to endorse or promote products derived + from this software without specific prior written permission. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +DAMAGE. diff --git a/seqan/align.h b/seqan/align.h index 55f9c10..41a9399 100644 --- a/seqan/align.h +++ b/seqan/align.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Manuel Holtgrewe // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_H_ // TODO(holtgrew): Usage of gapped value in align module is not consistent, need proxies in many places, reference not cleanly implemented everywhere yet. // TODO(holtgrew): The Author: tag at the top has to be corrected in the headers of this module. @@ -51,6 +51,7 @@ #include #include // ModifiedAlphabet<>. +#include #include // TODO(holtgrew): We should not have to depend on this. // TODO(holtgrew): Why not use priority queue from STL? @@ -70,6 +71,8 @@ // Gaps & Gaps Iterator Data Structures // ============================================================================ +#include + #include #include @@ -84,7 +87,6 @@ // Alignment Data Structures and Columns // ============================================================================ -#include #include #include @@ -114,6 +116,7 @@ #include #include #include +#include // Stores the actual trace segment that was detected during traceback. // The trace segments can be adapted into any alignment representation @@ -121,15 +124,11 @@ #include #include -// Ensures the backwards compatibility for the global interfaces of the -// alignment algorithms. Based on the called function this selects the -// correct parameters for the new alignment module. -#include - // Implements the different recursion formula of the alignment algorithms. #include #include #include +#include // Defines meta informations which determine how to compute a column and a // certain cell for different profiles. @@ -147,6 +146,12 @@ #include #include +// Ensures the backwards compatibility for the global interfaces of the +// alignment algorithms. Based on the called function this selects the +// correct parameters for the new alignment module. +#include +#include + // The actual implementations of the traceback and the dynamic programming that // is used by all different alignment algorithms. #include @@ -206,4 +211,4 @@ #include #include -#endif // SEQAN_CORE_INCLUDE_SEQAN_ALIGN_H_ +#endif // SEQAN_INCLUDE_SEQAN_ALIGN_H_ diff --git a/seqan/align/INFO b/seqan/align/INFO deleted file mode 100644 index a5a30e3..0000000 --- a/seqan/align/INFO +++ /dev/null @@ -1,13 +0,0 @@ -Name: seqan-align -Author: Andreas Gogol-Doering -Author: Birte Kehr -Author: Anne-Katrin Emde -Author: Manuel Holtgrewe -Author: Rene Rahn -Maintainer: Rene Rahn -License: BSD 3-clause -Copyright: 2006-2013, FU Berlin -Status: mature -Description: Alignment data structures. - Definition of alignment data structures and pairwise DP alignment - algorithms. diff --git a/seqan/align/align_base.h b/seqan/align/align_base.h index c26ca1c..4aa31d9 100644 --- a/seqan/align/align_base.h +++ b/seqan/align/align_base.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // Gap objects, one for each row of the alignment. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_ namespace seqan { @@ -44,9 +44,6 @@ namespace seqan { // Forwards // ============================================================================ -struct TagRaw_; -typedef Tag Raw; - // ============================================================================ // Tags, Classes, Enums // ============================================================================ @@ -75,41 +72,17 @@ typedef Tag Raw; * * Here is an example of using an Align object with @link globalAlignment @endlink. * - * @include demos/align/align.cpp + * @include demos/dox/align/align.cpp * * The output is as follows: * - * @include demos/align/align.cpp.stdout + * @include demos/dox/align/align.cpp.stdout * * @see globalAlignment * @see localAlignment * @see Gaps */ -/** -.Class.Align: -..cat:Alignments -..summary:An alignment of sequences. -..signature:Align -..param.TSource:Type of the ungapped sequences. -...metafunction:Metafunction.Source -..param.TSpec:The specializing type. -...metafunction:Metafunction.Spec -...default:@Spec.ArrayGaps@ -..remarks:The default implementation of $Align$ stores the alignment in a set of @Class.Gaps|Gaps@ objects. -Hence, the default implementation is row-based, so it will be faster to access the alignment row-wise than column-wise. -..example.file:demos/align/align.cpp -..example.text:The output is as follows: -..example.output: -score = -4 -align - 0 . : - AGTTTAATCA - ||| | | | - AGTAT-ACGA -..include:seqan/align.h -*/ - template class Align { @@ -154,8 +127,6 @@ class Align * @return Type The resulting column type. */ -///.Metafunction.Cols.param.T.type:Class.Align - template struct Cols > { @@ -183,8 +154,6 @@ struct Cols const> * @return Type The resulting value type. */ -///.Metafunction.Value.param.T.type:Class.Align - template struct Value >: Value > @@ -209,8 +178,6 @@ struct Value const>: * @return Type The resulting get-value type. */ -///.Metafunction.GetValue.param.T.type:Class.Align - template struct GetValue >: GetValue > @@ -235,8 +202,6 @@ struct GetValue const>: * @return Type The resulting reference type. */ -///.Metafunction.Reference.param.T.type:Class.Align - template struct Reference >: Reference > @@ -272,8 +237,6 @@ struct Reference const>: * @return Type The resulting rows type. */ -///.Metafunction.Rows.param.T.type:Class.Align - template struct Rows > { @@ -293,15 +256,13 @@ struct Rows const> * @mfn Align#Source * @brief Return the type of the underlying sequence. * - * @signature Rows::Type + * @signature Rows::Type; * * @tparam TAlign The Align object to get the underlying sequence type for. * * @return Type The resulting sequence type. */ -///.Metafunction.Source.param.T.type:Class.Align - template struct Source > { @@ -365,23 +326,9 @@ void move(Align & target, Align & source) * * @signature TRows rows(align); * - * @param align The Align object to get the rows for. + * @param[in] align The Align object to get the rows for. * - * @signature TRows A container with the Gaps of the Align object. - */ - -/** -.Function.rows: -..cat:Alignments -..summary:The container of rows in an alignment. -..signature:Rows rows(align) -..param.align:An alignment. -...type:Class.Align -..returns:The container of rows in $align$. -...metafunction:Metafunction.Rows -..see:Function.cols -..see:Metafunction.Rows -..include:seqan/align.h + * @return TRows A container with the Gaps of the Align object. */ template @@ -408,27 +355,10 @@ rows(Align const & me) * * @signature TRow row(align, pos); * - * @param align The Align object to get the row of. - * @param pos The number of the row to get. + * @param[in] align The Align object to get the row of. + * @param[in] pos The number of the row to get. * - * @signature TRow Reference to the given row of align (Type: @link Gap#Row @endlink). - */ - -/** -.Function.row: -..cat:Alignments -..summary:A row in an alignment. -..signature:Row & row(align, position) -..param.align:An alignment. -...type:Class.Align -..param.position:A position in the @Function.rows@ container of $align$. -..returns:The row in @Function.rows@ container of $align$ at the given $position$. -...metafunction:Metafunction.Row -..remarks:This function is equivalent to $value(rows(align), position)$. -..see:Function.rows -..see:Function.col -..see:Metafunction.Row -..include:seqan/align.h + * @return TRow Reference to the given row of align (Metafunction: @link Align#Row @endlink). */ template @@ -457,24 +387,11 @@ row(Align const & me, * * @signature TCols cols(align); * - * @param align The Align object to get the cols of. + * @param[in] align The Align object to get the cols of. * - * @signature TCols The columns of the Align object (type @link Align#Cols @endlink). + * @return TCols The columns of the Align object (Metafunction: @link Align#Cols @endlink). */ -/** -.Function.cols: -..cat:Alignments -..summary:The container of columns in an alignment. -..signature:Cols cols(align) -..param.align:An alignment. -...type:Class.Align -..returns:The container of columns in $align$. -...metafunction:Metafunction.Cols -..see:Metafunction.Cols -..include:seqan/align.h -*/ - template inline typename Cols >::Type cols(Align & me) @@ -499,27 +416,11 @@ cols(Align const & me) * * @signature TCol col(align); * - * @param align The Align object to get the cols of. + * @param[in] align The Align object to get the cols of. * - * @signature TCol The column of the Align object (type @link Align#Col @endlink). + * @return TCol The column of the Align object (Metafunction: @link Align#Col @endlink). */ -/** -.Function.col: -..cat:Alignments -..summary:A column in an alignment. -..signature:Col & col(align, position) -..param.align:An alignment. -...type:Class.Align -..param.position:A position in the @Function.cols@ container of $align$. -..returns:The column in @Function.cols@ container of $align$ at the given $position$. -...metafunction:Metafunction.Col -..remarks:This function is equivalent to $value(cols(align), position)$. -..see:Function.cols -..see:Metafunction.Col -..include:seqan/align.h -*/ - template inline typename Col >::Type col(Align & me, @@ -540,8 +441,6 @@ col(Align const & me, // Function detach() // ---------------------------------------------------------------------------- -///.Function.detach.param.object.type:Class.Align - template inline void detach(Align & me) @@ -569,21 +468,16 @@ detach(Align & me) * @deprecated Old-style I/O. * @brief Writing of Gaps to Streams in human-readable format. * - * @signature void write(stream, align, id, Raw()); + * @signature void write(stream, align); * - * @param stream The Stream to write to. - * @param align The Align object to write out. - * @param id ID string (ignored). + * @param[in,out] stream The Stream to write to. + * @param[in] align The Align object to write out. */ -// TODO(holtgrew): Part of the old I/O system. Undocumented. Rename Raw() to HumanReadable() or OnScreen()? - -template +template inline void write(TFile & target, - Align const & source, - TIDString const &, - Raw) + Align const & source) { typedef Align const TAlign; typedef typename Row::Type TRow; @@ -603,28 +497,28 @@ write(TFile & target, windowSize_ = end_ - begin_; // Print header line - char buffer[100]; - sprintf(buffer, "%7u", (unsigned)baseCount); - streamPut(target, buffer); + char buffer[20]; + int len = sprintf(buffer, "%7u", (unsigned)baseCount); + write(target, buffer, len); baseCount += windowSize_; - streamPut(target, ' '); + writeValue(target, ' '); for (TPosition i = 1; i <= windowSize_; ++i) { if ((i % 10) == 0) - streamPut(target, ':'); + writeValue(target, ':'); else if ((i % 5) == 0) - streamPut(target, '.'); + writeValue(target, '.'); else - streamPut(target, ' '); + writeValue(target, ' '); } - streamPut(target, ' '); - streamPut(target, '\n'); + writeValue(target, ' '); + writeValue(target, '\n'); // Print sequences for (TRowsPosition i = 0; i < 2 * row_count - 1; ++i) { for (unsigned int j = 0; j < leftSpace + 2; ++j) - streamPut(target, ' '); + writeValue(target, ' '); if ((i % 2) == 0) { TRow & row_ = row(source, i / 2); @@ -634,9 +528,9 @@ write(TFile & target, for (; begin1_ != end1_; ++begin1_) { if (isGap(begin1_)) - streamPut(target, gapValue()); + writeValue(target, gapValue()); else - streamPut(target, *begin1_); + writeValue(target, getValue(begin1_)); } } else @@ -647,20 +541,20 @@ write(TFile & target, (!isGap(row(source, (i + 1) / 2), begin_ + j)) && (row(source, (i - 1) / 2)[begin_ + j] == row(source, (i + 1) / 2)[begin_ + j])) { - streamPut(target, '|'); + writeValue(target, '|'); } else { - streamPut(target, ' '); + writeValue(target, ' '); } } } - streamPut(target, '\n'); + writeValue(target, '\n'); } - streamPut(target, '\n'); + writeValue(target, '\n'); begin_ += 50; } - streamPut(target, '\n'); + writeValue(target, '\n'); } // ---------------------------------------------------------------------------- @@ -673,7 +567,7 @@ write(TFile & target, * * @signature void clearClipping(align); * - * @param align Align object to clear clippings of. + * @param[in,out] align Align object to clear clippings of. */ // TODO(holtgrew): Undocumented. @@ -699,8 +593,8 @@ clearClipping(Align & align_) * * @signature TStream operator<<(stream, align); * - * @param stream std::ostream to write to. - * @param align Align object to write out. + * @param[in,out] stream std::ostream to write to. + * @param[in] align Align object to write out. * * @return TStream Reference to stream after output of align. */ @@ -712,7 +606,8 @@ inline TStream & operator<<(TStream & target, Align const & source) { - write(target, source); + typename DirectionIterator::Type it = directionIterator(target, Output()); + write(it, source); return target; } @@ -726,22 +621,8 @@ operator<<(TStream & target, * * @signature void setStrings(align, stringSet); * - * @param align Align object to set underlying sequence of. - * @param stringSet The source of the data. - */ - -/** -.Function.setStrings: -..cat:Alignments -..summary:Loads the sequences of a stringset into an alignment. -..signature:setStrings(align, stringset) -..param.align:An alignment. -...type:Class.Align -..param.stringset:A string set. -...type:Class.StringSet -..remarks:The function clears $align$ and creates an new global alignment between strings in $stringset$ that contains only trainling gaps. -The alignment will be dependent from the strings in the stringset; use @Function.detach@ to make $align$ the owner of its strings. -..include:seqan/align.h + * @param[in,out] align Align object to set underlying sequence of. + * @param[in] stringSet The @link StringSet @endlink with the data. */ template @@ -777,7 +658,7 @@ setStrings(Align & me, * * @signature void clearGaps(align); * - * @param align The Align object to clear all all gaps from. + * @param[in] align The Align object to clear all all gaps from. */ template @@ -802,17 +683,11 @@ clearGaps(Align & me) * * @signature TStringSet stringSet(align); * - * @param align Align object to get sequences of. + * @param[in] align Align object to get sequences of. * - * @return TStringSet The set of ungapped sequences. Get type with Gaps#StringSetType. + * @return TStringSet The set of ungapped sequences (Metafunction: @link Align#StringSetType @endlink). */ -/** -.Function.stringSet: -..param.g.type:Class.Align -..include:seqan/align.h -*/ - template inline typename StringSetType >::Type stringSet(Align & me) @@ -862,4 +737,4 @@ inline bool operator!=(Align const & lhs, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_ diff --git a/seqan/align/align_cols.h b/seqan/align/align_cols.h index d140c2d..e5aafde 100644 --- a/seqan/align/align_cols.h +++ b/seqan/align/align_cols.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // rows of an alignment. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_COLS_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_COLS_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_COLS_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_COLS_H_ namespace seqan { @@ -65,20 +65,6 @@ namespace seqan { * @tparam TAlign The Align type. */ -/** -.Class.AlignCols: -..cat:Alignments -..summary:Pseudo columns container for row-based alignment classes. -..signature:AlignCols -..param.TAlign:Alignment type. -...metafunction:Metafunction.Host -..remarks: -This class emulates a container of columns on alignment classes that store the alignment in a container of rows. -Note that accessing a row-based alignment column-wise can be significantly slower than accessing the alignment row-wise. -..see:Class.Align -..include:seqan/align.h - */ - template struct AlignCols { @@ -112,17 +98,6 @@ struct AlignCols // Specialization AlignCols // ---------------------------------------------------------------------------- -/** -.Spec.AlignColIterator: -..cat:Iterators -..summary:Iterator for @Class.AlignCols@ pseudo container. -..signature:Iter< TAlign, AlignColIterator > -..param.TSpec:Specialization tag. -..general:Class.Iter -..see:Class.AlignCols -..include:seqan/align.h -*/ - template struct AlignColIterator; @@ -136,8 +111,6 @@ struct AlignColIterator; // TODO(holtgrew): Add HostedTypeConcept and make AlignCols object implement the concept. -///.Metafunction.Host.param.T.type:Class.AlignCols - template struct Host > { @@ -153,8 +126,6 @@ struct Host const> // Metafunction AlignColIterator // ---------------------------------------------------------------------------- -///.Metafunction.Iterator.param.T.type:Class.AlignCols - template struct Iterator, TIteratorSpec> { @@ -172,8 +143,6 @@ struct Iterator const, TIteratorSpec> // Iterator is also used as Value -///.Metafunction.Value.param.T.type:Class.AlignCols - template struct Value >: Iterator, Standard> @@ -188,8 +157,6 @@ struct Value const>: // Metafunction Size // ---------------------------------------------------------------------------- -///.Metafunction.Size.param.T.type:Class.AlignCols - template struct Size >: Size::Type> @@ -204,8 +171,6 @@ struct Size const>: // Metafunction Position // ---------------------------------------------------------------------------- -///.Metafunction.Position.param.T.type:Class.AlignCols - template struct Position >: Position::Type> @@ -224,8 +189,6 @@ struct Position const>: // Function host() // ---------------------------------------------------------------------------- -///.Function.host.param.object.type:Class.AlignCols - template inline typename Host >::Type & host(AlignCols & me) @@ -246,7 +209,25 @@ host(AlignCols const & me) // Function iter() // ---------------------------------------------------------------------------- -///.Function.iter.param.object.type:Class.AlignCols +/*! + * @fn AlignCols#iter + * @headerfile + * @brief Iterator to the item at the given position in the alignment columns. + * + * @signature TIterator iter(cols, pos[, tag]); + * + * @param[in] cols The AlignCols object to get the iterator for. + * @param[in] pos The position to get the iterator for. + * @param[in] tag The tag to pick the type of the iterator. + * + * @return TIterator The resulting iterator. If TTag is the type of tag and TAlignCols the + * type of cols then TIterator is of the type Iterator<TAlignCols, + * TTag>::Type. + * + * @section Remarks + * + * If pos is out of range then the iterator is invalid. + */ template inline typename Iterator, Tag const>::Type @@ -270,7 +251,15 @@ iter(AlignCols const & me, // Function value() // ---------------------------------------------------------------------------- -///.Function.value.param.container.type:Class.AlignCols +/*! + * @fn AlignCols#value + * @headerfile + * @brief Returns AlignCols value at a position. + * + * @signature TColumn value(alignCols, pos); + * + * @return TColumn The column, as determined by Value metafunction. + */ template inline typename Value >::Type @@ -292,8 +281,6 @@ value(AlignCols const & me, // Function beginPosition() // ---------------------------------------------------------------------------- -///.Function.beginPosition.param.object.type:Class.AlignCols - template inline typename Position::Type _beginPositionAlignCols(TAlignCols const & me) @@ -339,8 +326,6 @@ beginPosition(AlignCols & me) // Function begin() // ---------------------------------------------------------------------------- -///.Function.begin.param.object.type:Class.AlignCols - template inline typename Iterator, Tag const>::Type begin(AlignCols & me, @@ -361,8 +346,6 @@ begin(AlignCols const & me, // Function endPosition() // ---------------------------------------------------------------------------- -///.Function.endPosition.param.object.type:Class.AlignCols - template inline typename Position::Type _endPositionAlignCols(TAlignCols const & me) @@ -402,8 +385,6 @@ endPosition(AlignCols const & me) // Function end() // ---------------------------------------------------------------------------- -///.Function.end.param.object.type:Class.AlignCols - template inline typename Iterator, Tag const>::Type end(AlignCols & me, @@ -445,4 +426,4 @@ operator==(AlignCols const & left, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_COLS_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_COLS_H_ diff --git a/seqan/align/align_config.h b/seqan/align/align_config.h index 5adbade..c90c066 100644 --- a/seqan/align/align_config.h +++ b/seqan/align/align_config.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Tobias Rausch // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_CONFIG_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_CONFIG_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_CONFIG_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_CONFIG_H_ namespace seqan { @@ -55,7 +55,7 @@ namespace seqan { * @brief Indication of whether begin/end gaps are free for DP alignment algorithms. * * @signature template - * AlignConfig; + * struct AlignConfig; * * @tparam TOP Whether or not the begin gaps in the vertical sequence are free. * @tparam LEFT Whether or not the begin gaps in the horizontal sequence are free. @@ -65,6 +65,8 @@ namespace seqan { * * Used in the DP alignment algorithms to configure the begin/end gap free-nes. * + * @see globalAlignment + * * @section Specialization List * * The following gives an (incomplete) list of useful AlignConfig specializations. @@ -87,26 +89,6 @@ namespace seqan { * */ -/** -.Class.AlignConfig: -..cat:Alignments -..summary:The AlignConfig class encapsulates how DP is carried out. -It indicates at what ends gaps are free, the so-called free ends-space alignments. -..signature:AlignConfig -..param.TOP:If true then 0's in top row. -...default:$false$ -..param.LEFT:If true then 0's in the left column. -...default:$false$ -..param.RIGHT:If true then maximum is also searched in the last column. -...default:$false$ -..param.BOTTOM:If true then maximum is also searched in the last row. -...default:$false$ -..param.TSpec:The specializing type. -...metafunction:Metafunction.Spec -...default:$Default$, see @Tag.Default@. -..include:seqan/align.h -*/ - template class AlignConfig {}; @@ -124,11 +106,11 @@ class AlignConfig template inline void _initFirstColumn(AlignConfig const, - TElement& element, - TCost const cost) + TElement& element, + TCost const cost) { - SEQAN_CHECKPOINT - element = cost; + SEQAN_CHECKPOINT + element = cost; } ////////////////////////////////////////////////////////////////////////////// @@ -136,11 +118,11 @@ _initFirstColumn(AlignConfig const, template inline void _initFirstColumn(AlignConfig const, - TElement& element, - TCost const) + TElement& element, + TCost const) { - SEQAN_CHECKPOINT - element = 0; + SEQAN_CHECKPOINT + element = 0; } ////////////////////////////////////////////////////////////////////////////// @@ -148,11 +130,11 @@ _initFirstColumn(AlignConfig const, template inline void _initFirstRow(AlignConfig const, - TElement& element, - TCost const cost) + TElement& element, + TCost const cost) { - SEQAN_CHECKPOINT - element = cost; + SEQAN_CHECKPOINT + element = cost; } ////////////////////////////////////////////////////////////////////////////// @@ -160,11 +142,11 @@ _initFirstRow(AlignConfig const, template inline void _initFirstRow(AlignConfig const, - TElement& element, - TCost const) + TElement& element, + TCost const) { - SEQAN_CHECKPOINT - element = 0; + SEQAN_CHECKPOINT + element = 0; } ////////////////////////////////////////////////////////////////////////////// @@ -175,13 +157,13 @@ _initFirstRow(AlignConfig const, template inline void _lastRow(AlignConfig const, - TValue1&, - TIndex1&, - TValue2 const, - TIndex2 const) + TValue1&, + TIndex1&, + TValue2 const, + TIndex2 const) { - SEQAN_CHECKPOINT - // Nop + SEQAN_CHECKPOINT + // Nop } ////////////////////////////////////////////////////////////////////////////// @@ -189,16 +171,16 @@ _lastRow(AlignConfig const, template inline void _lastRow(AlignConfig const, - TValue1& maxValue, - TIndex1& maxIndex, - TValue2 const val, - TIndex2 const index) + TValue1& maxValue, + TIndex1& maxIndex, + TValue2 const val, + TIndex2 const index) { - SEQAN_CHECKPOINT - if (val > maxValue[0]) { - maxValue[0] = val; - maxIndex[0] = index; - } + SEQAN_CHECKPOINT + if (val > maxValue[0]) { + maxValue[0] = val; + maxIndex[0] = index; + } } ////////////////////////////////////////////////////////////////////////////// @@ -206,12 +188,12 @@ _lastRow(AlignConfig const, template inline void _lastColumn(AlignConfig const, - TValue1& maxValue, - TIndex1&, - TColumn const& column) + TValue1& maxValue, + TIndex1&, + TColumn const& column) { - SEQAN_CHECKPOINT - maxValue[1] = column[length(column) - 1]; + SEQAN_CHECKPOINT + maxValue[1] = column[length(column) - 1]; } ////////////////////////////////////////////////////////////////////////////// @@ -219,23 +201,23 @@ _lastColumn(AlignConfig const, template inline void _lastColumn(AlignConfig const, - TValue1& maxValue, - TIndex1& maxIndex, - TColumn const& column) + TValue1& maxValue, + TIndex1& maxIndex, + TColumn const& column) { - SEQAN_CHECKPOINT; - typedef typename Size::Type TSize; - typedef typename Iterator::Type TColIter; - TSize limit = length(column) - 1; - maxValue[1] = column[limit]; - TColIter itCol = begin(column, Standard()); - TColIter itColEnd = end(column, Standard()); - for(TSize i = 0;itCol != itColEnd; ++i, ++itCol) { - if (*itCol > maxValue[1]) { - maxValue[1] = *itCol; - maxIndex[1] = i; - } - } + SEQAN_CHECKPOINT; + typedef typename Size::Type TSize; + typedef typename Iterator::Type TColIter; + TSize limit = length(column) - 1; + maxValue[1] = column[limit]; + TColIter itCol = begin(column, Standard()); + TColIter itColEnd = end(column, Standard()); + for(TSize i = 0;itCol != itColEnd; ++i, ++itCol) { + if (*itCol > maxValue[1]) { + maxValue[1] = *itCol; + maxIndex[1] = i; + } + } } ////////////////////////////////////////////////////////////////////////////// @@ -243,13 +225,13 @@ _lastColumn(AlignConfig const, template inline TScoreValue _maxOfAlignment(AlignConfig const, - TValue& maxValue, - TIndex&, - TSize const, - TSize const) + TValue& maxValue, + TIndex&, + TSize const, + TSize const) { - SEQAN_CHECKPOINT - return maxValue[1]; + SEQAN_CHECKPOINT + return maxValue[1]; } ////////////////////////////////////////////////////////////////////////////// @@ -257,14 +239,14 @@ _maxOfAlignment(AlignConfig const, template inline TScoreValue _maxOfAlignment(AlignConfig const, - TValue& maxValue, - TIndex& maxIndex, - TSize const len1, - TSize const) + TValue& maxValue, + TIndex& maxIndex, + TSize const len1, + TSize const) { - SEQAN_CHECKPOINT - maxIndex[0] = len1; - return maxValue[1]; + SEQAN_CHECKPOINT + maxIndex[0] = len1; + return maxValue[1]; } ////////////////////////////////////////////////////////////////////////////// @@ -272,14 +254,14 @@ _maxOfAlignment(AlignConfig const, template inline TScoreValue _maxOfAlignment(AlignConfig const, - TValue& maxValue, - TIndex& maxIndex, - TSize const, - TSize const len2) + TValue& maxValue, + TIndex& maxIndex, + TSize const, + TSize const len2) { - SEQAN_CHECKPOINT - maxIndex[1] = len2; - return maxValue[0]; + SEQAN_CHECKPOINT + maxIndex[1] = len2; + return maxValue[0]; } ////////////////////////////////////////////////////////////////////////////// @@ -287,16 +269,16 @@ _maxOfAlignment(AlignConfig const, template inline TScoreValue _maxOfAlignment(AlignConfig const, - TValue& maxValue, - TIndex& maxIndex, - TSize const len1, - TSize const len2) + TValue& maxValue, + TIndex& maxIndex, + TSize const len1, + TSize const len2) { - SEQAN_CHECKPOINT - // Find the maximum - if (maxValue[1] > maxValue[0]) maxIndex[0] = len1; - else maxIndex[1] = len2; - return (maxValue[0] > maxValue[1]) ? maxValue[0] : maxValue[1]; + SEQAN_CHECKPOINT + // Find the maximum + if (maxValue[1] > maxValue[0]) maxIndex[0] = len1; + else maxIndex[1] = len2; + return (maxValue[0] > maxValue[1]) ? maxValue[0] : maxValue[1]; } ////////////////////////////////////////////////////////////////////////////// @@ -304,14 +286,14 @@ _maxOfAlignment(AlignConfig const, template inline void _lastColumn(AlignConfig const, - TValue1& maxValue, - TIndex1& maxIndex, - TValue2 const val, - TIndex2 const row, - TIndex2 const col) + TValue1& maxValue, + TIndex1& maxIndex, + TValue2 const val, + TIndex2 const row, + TIndex2 const col) { - SEQAN_CHECKPOINT - maxValue[1] = val; maxIndex[2] = row; maxIndex[3] = col; + SEQAN_CHECKPOINT + maxValue[1] = val; maxIndex[2] = row; maxIndex[3] = col; } ////////////////////////////////////////////////////////////////////////////// @@ -319,29 +301,29 @@ _lastColumn(AlignConfig const, template inline void _lastColumn(AlignConfig const, - TValue1& maxValue, - TIndex1& maxIndex, - TValue2 const val, - TIndex2 const row, - TIndex2 const col) + TValue1& maxValue, + TIndex1& maxIndex, + TValue2 const val, + TIndex2 const row, + TIndex2 const col) { - SEQAN_CHECKPOINT - if (val > maxValue[1]) {maxValue[1] = val; maxIndex[2] = row; maxIndex[3] = col; } + SEQAN_CHECKPOINT + if (val > maxValue[1]) {maxValue[1] = val; maxIndex[2] = row; maxIndex[3] = col; } } ////////////////////////////////////////////////////////////////////////////// template inline void -_lastRow(AlignConfig const, - TValue1& maxValue, - TIndex1& maxIndex, - TValue2 const val, - TIndex2 const row, - TIndex2 const col) +_lastRow(AlignConfig const, + TValue1& maxValue, + TIndex1& maxIndex, + TValue2 const val, + TIndex2 const row, + TIndex2 const col) { - SEQAN_CHECKPOINT - maxValue[0] = val; maxIndex[0] = row; maxIndex[1] = col; + SEQAN_CHECKPOINT + maxValue[0] = val; maxIndex[0] = row; maxIndex[1] = col; } ////////////////////////////////////////////////////////////////////////////// @@ -349,14 +331,14 @@ _lastRow(AlignConfig const, template inline void _lastRow(AlignConfig const, - TValue1& maxValue, - TIndex1& maxIndex, - TValue2 const val, - TIndex2 const row, - TIndex2 const col) + TValue1& maxValue, + TIndex1& maxIndex, + TValue2 const val, + TIndex2 const row, + TIndex2 const col) { - SEQAN_CHECKPOINT - if (val > maxValue[0]) {maxValue[0] = val; maxIndex[0] = row; maxIndex[1] = col; } + SEQAN_CHECKPOINT + if (val > maxValue[0]) {maxValue[0] = val; maxIndex[0] = row; maxIndex[1] = col; } } @@ -366,14 +348,14 @@ template inline bool _configValueTop(AlignConfig const) { - return true; + return true; } template inline bool _configValueTop(AlignConfig const) { - return false; + return false; } ////////////////////////////////////////////////////////////////////////////// @@ -382,14 +364,14 @@ template inline bool _configValueLeft(AlignConfig const) { - return true; + return true; } template inline bool _configValueLeft(AlignConfig const) { - return false; + return false; } ////////////////////////////////////////////////////////////////////////////// @@ -398,14 +380,14 @@ template inline bool _configValueRight(AlignConfig const) { - return true; + return true; } template inline bool _configValueRight(AlignConfig const) { - return false; + return false; } ////////////////////////////////////////////////////////////////////////////// @@ -414,16 +396,16 @@ template inline bool _configValueBottom(AlignConfig const) { - return true; + return true; } template inline bool _configValueBottom(AlignConfig const) { - return false; + return false; } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_CONFIG_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_CONFIG_H_ diff --git a/seqan/align/align_iterator_base.h b/seqan/align/align_iterator_base.h index 26364d9..8a4f455 100644 --- a/seqan/align/align_iterator_base.h +++ b/seqan/align/align_iterator_base.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -45,6 +45,8 @@ namespace SEQAN_NAMESPACE_MAIN // TODO(holtgrew): Extend class Iter? /*! * @class AlignColIterator + * @extends Iter + * @headerfile * @brief Iterator for alignment columns. * * @signature template @@ -58,58 +60,58 @@ template class Iter > { public: - typedef typename Rows::Type TRows; - typedef typename Row::Type TRow; - typedef typename Iterator::Type TRowIterator; - typedef typename Position::Type TRowPosition; - typedef String TIterators; + typedef typename Rows::Type TRows; + typedef typename Row::Type TRow; + typedef typename Iterator::Type TRowIterator; + typedef typename Position::Type TRowPosition; + typedef String TIterators; - TAlign * data_host; - TIterators data_iterators; + TAlign * data_host; + TIterators data_iterators; public: - Iter() - { -SEQAN_CHECKPOINT - } - Iter(TAlign & _align): - data_host(& _align) - { -SEQAN_CHECKPOINT - typename Position::Type _i = length(rows(_align)); - resize(data_iterators, _i, Exact()); - } - Iter(TAlign & _align, TRowPosition _pos): - data_host(& _align) - { -SEQAN_CHECKPOINT - typename Position::Type _i = length(rows(_align)); - resize(data_iterators, _i, Exact()); - - while (_i > 0) - { - --_i; - data_iterators[_i] = iter(row(_align, _i), _pos); - } - } - Iter(Iter const & _other): - data_host(_other.data_host), - data_iterators(_other.data_iterators) - { - } - ~Iter() - { -SEQAN_CHECKPOINT - } - - Iter const & - operator = (Iter const & _other) - { -SEQAN_CHECKPOINT - data_host = _other.data_host; - data_iterators = _other.data_iterators; - return *this; - } + Iter() + { +SEQAN_CHECKPOINT + } + Iter(TAlign & _align): + data_host(& _align) + { +SEQAN_CHECKPOINT + typename Position::Type _i = length(rows(_align)); + resize(data_iterators, _i, Exact()); + } + Iter(TAlign & _align, TRowPosition _pos): + data_host(& _align) + { +SEQAN_CHECKPOINT + typename Position::Type _i = length(rows(_align)); + resize(data_iterators, _i, Exact()); + + while (_i > 0) + { + --_i; + data_iterators[_i] = iter(row(_align, _i), _pos); + } + } + Iter(Iter const & _other): + data_host(_other.data_host), + data_iterators(_other.data_iterators) + { + } + ~Iter() + { +SEQAN_CHECKPOINT + } + + Iter const & + operator = (Iter const & _other) + { +SEQAN_CHECKPOINT + data_host = _other.data_host; + data_iterators = _other.data_iterators; + return *this; + } //____________________________________________________________________________ }; @@ -122,14 +124,14 @@ inline TAlign & host(Iter > & me) { SEQAN_CHECKPOINT - return *me.data_host; + return *me.data_host; } template inline TAlign & host(Iter > const & me) { SEQAN_CHECKPOINT - return *me.data_host; + return *me.data_host; } ////////////////////////////////////////////////////////////////////////////// @@ -139,7 +141,7 @@ inline void setHost(Iter > & me, TAlign & _host) { SEQAN_CHECKPOINT - me.data_host = & _host; + me.data_host = & _host; } ////////////////////////////////////////////////////////////////////////////// @@ -149,14 +151,14 @@ inline typename Cols::Type container(Iter > & me) { SEQAN_CHECKPOINT - return cols(*me.data_host); + return cols(*me.data_host); } template inline typename Cols::Type container(Iter > const & me) { SEQAN_CHECKPOINT - return cols(*me.data_host); + return cols(*me.data_host); } ////////////////////////////////////////////////////////////////////////////// @@ -166,40 +168,40 @@ inline void goNext(Iter > & me) { SEQAN_CHECKPOINT - typedef typename Row::Type TRow; - typedef typename Iterator::Type TRowIterator; - typedef String TIterators; - typedef typename Iterator::Type TIteratorsIterator; + typedef typename Row::Type TRow; + typedef typename Iterator::Type TRowIterator; + typedef String TIterators; + typedef typename Iterator::Type TIteratorsIterator; - TIteratorsIterator _it = begin(me.data_iterators); - TIteratorsIterator _it_end = end(me.data_iterators); + TIteratorsIterator _it = begin(me.data_iterators); + TIteratorsIterator _it_end = end(me.data_iterators); - while (_it != _it_end) - { - goNext(*_it); - ++_it; - } + while (_it != _it_end) + { + goNext(*_it); + ++_it; + } } //____________________________________________________________________________ template -inline Iter > & +inline Iter > & operator ++(Iter > & me) { SEQAN_CHECKPOINT - goNext(me); - return me; + goNext(me); + return me; } //____________________________________________________________________________ template -inline Iter > +inline Iter > operator ++(Iter > & me, int) { SEQAN_CHECKPOINT - Iter > ret = me; - goNext(me); - return ret; + Iter > ret = me; + goNext(me); + return ret; } ////////////////////////////////////////////////////////////////////////////// @@ -209,40 +211,40 @@ inline void goPrevious(Iter > & me) { SEQAN_CHECKPOINT - typedef typename Row::Type TRow; - typedef typename Iterator::Type TRowIterator; - typedef String TIterators; - typedef typename Iterator::Type TIteratorsIterator; + typedef typename Row::Type TRow; + typedef typename Iterator::Type TRowIterator; + typedef String TIterators; + typedef typename Iterator::Type TIteratorsIterator; - TIteratorsIterator _it = begin(me.data_iterators); - TIteratorsIterator _it_end = end(me.data_iterators); + TIteratorsIterator _it = begin(me.data_iterators); + TIteratorsIterator _it_end = end(me.data_iterators); - while (_it != _it_end) - { - goPrevious(*_it); - ++_it; - } + while (_it != _it_end) + { + goPrevious(*_it); + ++_it; + } } //____________________________________________________________________________ template -inline Iter > & +inline Iter > & operator --(Iter > & me) { SEQAN_CHECKPOINT - goPrevious(me); - return me; + goPrevious(me); + return me; } //____________________________________________________________________________ template -inline Iter > +inline Iter > operator --(Iter > & me, int) { SEQAN_CHECKPOINT - Iter > ret = me; - goPrevious(me); - return ret; + Iter > ret = me; + goPrevious(me); + return ret; } ////////////////////////////////////////////////////////////////////////////// @@ -250,34 +252,34 @@ SEQAN_CHECKPOINT template inline bool operator ==(Iter > & _left, - Iter > & _right) + Iter > & _right) { SEQAN_CHECKPOINT - return getValue(_left.data_iterators, 0) == getValue(_right.data_iterators, 0); + return getValue(_left.data_iterators, 0) == getValue(_right.data_iterators, 0); } template inline bool operator ==(Iter > const & _left, - Iter > & _right) + Iter > & _right) { SEQAN_CHECKPOINT - return value(_left.data_iterators, 0) == value(_right.data_iterators, 0); + return value(_left.data_iterators, 0) == value(_right.data_iterators, 0); } template inline bool operator ==(Iter > & _left, - Iter > const & _right) + Iter > const & _right) { SEQAN_CHECKPOINT - return value(_left.data_iterators, 0) == value(_right.data_iterators, 0); + return value(_left.data_iterators, 0) == value(_right.data_iterators, 0); } template inline bool operator ==(Iter > const & _left, - Iter > const & _right) + Iter > const & _right) { SEQAN_CHECKPOINT - return value(_left.data_iterators, 0) == value(_right.data_iterators, 0); + return value(_left.data_iterators, 0) == value(_right.data_iterators, 0); } ////////////////////////////////////////////////////////////////////////////// @@ -285,34 +287,34 @@ SEQAN_CHECKPOINT template inline bool operator !=(Iter > & _left, - Iter > & _right) + Iter > & _right) { SEQAN_CHECKPOINT - return value(_left.data_iterators, 0) != value(_right.data_iterators, 0); + return value(_left.data_iterators, 0) != value(_right.data_iterators, 0); } template inline bool operator !=(Iter > const & _left, - Iter > & _right) + Iter > & _right) { SEQAN_CHECKPOINT - return value(_left.data_iterators, 0) != value(_right.data_iterators, 0); + return value(_left.data_iterators, 0) != value(_right.data_iterators, 0); } template inline bool operator !=(Iter > & _left, - Iter > const & _right) + Iter > const & _right) { SEQAN_CHECKPOINT - return value(_left.data_iterators, 0) != value(_right.data_iterators, 0); + return value(_left.data_iterators, 0) != value(_right.data_iterators, 0); } template inline bool operator !=(Iter > const & _left, - Iter > const & _right) + Iter > const & _right) { SEQAN_CHECKPOINT - return value(_left.data_iterators, 0) != value(_right.data_iterators, 0); + return value(_left.data_iterators, 0) != value(_right.data_iterators, 0); } ////////////////////////////////////////////////////////////////////////////// @@ -320,36 +322,36 @@ SEQAN_CHECKPOINT template inline typename Reference::Type value(Iter > & me, - TPosition pos_) + TPosition pos_) { SEQAN_CHECKPOINT - return value(me.data_iterators[pos_]); + return value(me.data_iterators[pos_]); } template inline typename Reference::Type value(Iter > const & me, - TPosition pos_) + TPosition pos_) { SEQAN_CHECKPOINT - return value(me.data_iterators[pos_]); + return value(me.data_iterators[pos_]); } ////////////////////////////////////////////////////////////////////////////// template inline typename GetValue::Type getValue(Iter > & me, - TPosition pos_) + TPosition pos_) { SEQAN_CHECKPOINT - return getValue(me.data_iterators[pos_]); + return getValue(me.data_iterators[pos_]); } template inline typename GetValue::Type getValue(Iter > const & me, - TPosition pos_) + TPosition pos_) { SEQAN_CHECKPOINT - return getValue(me.data_iterators[pos_]); + return getValue(me.data_iterators[pos_]); } ////////////////////////////////////////////////////////////////////////////// @@ -357,38 +359,38 @@ SEQAN_CHECKPOINT template inline void assignValue(Iter > & me, - TPosition pos_, - TValue & val) + TPosition pos_, + TValue & val) { SEQAN_CHECKPOINT - return assignValue(me.data_iterators[pos_], val); + return assignValue(me.data_iterators[pos_], val); } template inline void assignValue(Iter > & me, - TPosition pos_, - TValue const & val) + TPosition pos_, + TValue const & val) { SEQAN_CHECKPOINT - return assignValue(me.data_iterators[pos_], val); + return assignValue(me.data_iterators[pos_], val); } template inline void assignValue(Iter > const & me, - TPosition pos_, - TValue & val) + TPosition pos_, + TValue & val) { SEQAN_CHECKPOINT - return assignValue(me.data_iterators[pos_], val); + return assignValue(me.data_iterators[pos_], val); } template inline void assignValue(Iter > const & me, - TPosition pos_, - TValue const & val) + TPosition pos_, + TValue const & val) { SEQAN_CHECKPOINT - return assignValue(me.data_iterators[pos_], val); + return assignValue(me.data_iterators[pos_], val); } ////////////////////////////////////////////////////////////////////////////// @@ -396,38 +398,38 @@ SEQAN_CHECKPOINT template inline void moveValue(Iter > & me, - TPosition pos_, - TValue & val) + TPosition pos_, + TValue & val) { SEQAN_CHECKPOINT - return moveValue(me.data_iterators[pos_], val); + return moveValue(me.data_iterators[pos_], val); } template inline void moveValue(Iter > & me, - TPosition pos_, - TValue const & val) + TPosition pos_, + TValue const & val) { SEQAN_CHECKPOINT - return moveValue(me.data_iterators[pos_], val); + return moveValue(me.data_iterators[pos_], val); } template inline void moveValue(Iter > const & me, - TPosition pos_, - TValue & val) + TPosition pos_, + TValue & val) { SEQAN_CHECKPOINT - return moveValue(me.data_iterators[pos_], val); + return moveValue(me.data_iterators[pos_], val); } template inline void moveValue(Iter > const & me, - TPosition pos_, - TValue const & val) + TPosition pos_, + TValue const & val) { SEQAN_CHECKPOINT - return moveValue(me.data_iterators[pos_], val); + return moveValue(me.data_iterators[pos_], val); } ////////////////////////////////////////////////////////////////////////////// @@ -436,96 +438,96 @@ SEQAN_CHECKPOINT //disabled since GapsIterator has no operator - and + /* template -inline Iter > & +inline Iter > & operator +=(Iter > & me, - TSize size) + TSize size) { SEQAN_CHECKPOINT - typedef typename Row::Type TRow; - typedef typename Iterator::Type TRowIterator; - typedef String TIterators; - typedef typename Iterator::Type TIteratorsIterator; + typedef typename Row::Type TRow; + typedef typename Iterator::Type TRowIterator; + typedef String TIterators; + typedef typename Iterator::Type TIteratorsIterator; - TIteratorsIterator _it = begin(me.data_iterators); - TIteratorsIterator _it_end = end(me.data_iterators); + TIteratorsIterator _it = begin(me.data_iterators); + TIteratorsIterator _it_end = end(me.data_iterators); - while (_it != _it_end) - { - *_it += size; - ++_it; - } - return me; + while (_it != _it_end) + { + *_it += size; + ++_it; + } + return me; } ////////////////////////////////////////////////////////////////////////////// template -inline Iter > +inline Iter > operator +(Iter > & me, - TSize size) + TSize size) { SEQAN_CHECKPOINT - Iter > ret = me; - me += size; - return me; + Iter > ret = me; + me += size; + return me; } template -inline Iter > +inline Iter > operator +(Iter > const & me, - TSize size) + TSize size) { SEQAN_CHECKPOINT - Iter > ret = me; - me += size; - return me; + Iter > ret = me; + me += size; + return me; } ////////////////////////////////////////////////////////////////////////////// template -inline Iter > & +inline Iter > & operator -=(Iter > & me, - TSize size) + TSize size) { SEQAN_CHECKPOINT - typedef typename Row::Type TRow; - typedef typename Iterator::Type TRowIterator; - typedef String TIterators; - typedef typename Iterator::Type TIteratorsIterator; + typedef typename Row::Type TRow; + typedef typename Iterator::Type TRowIterator; + typedef String TIterators; + typedef typename Iterator::Type TIteratorsIterator; - TIteratorsIterator _it = begin(me.data_iterators); - TIteratorsIterator _it_end = end(me.data_iterators); + TIteratorsIterator _it = begin(me.data_iterators); + TIteratorsIterator _it_end = end(me.data_iterators); - while (_it != _it_end) - { - *_it -= size; - ++_it; - } - return me; + while (_it != _it_end) + { + *_it -= size; + ++_it; + } + return me; } ////////////////////////////////////////////////////////////////////////////// template -inline Iter > +inline Iter > operator -(Iter > & me, - TSize size) + TSize size) { SEQAN_CHECKPOINT - Iter > ret = me; - me -= size; - return me; + Iter > ret = me; + me -= size; + return me; } template -inline Iter > +inline Iter > operator -(Iter > const & me, - TSize size) + TSize size) { SEQAN_CHECKPOINT - Iter > ret = me; - me -= size; - return me; + Iter > ret = me; + me -= size; + return me; } //____________________________________________________________________________ @@ -533,13 +535,13 @@ SEQAN_CHECKPOINT template inline typename Difference::Type operator -(Iter > const & left, - Iter > const & right) + Iter > const & right) { SEQAN_CHECKPOINT - SEQAN_ASSERT_GT(length(left.data_iterators), 0u); - SEQAN_ASSERT_GT(length(right.data_iterators), 0u); + SEQAN_ASSERT_GT(length(left.data_iterators), 0u); + SEQAN_ASSERT_GT(length(right.data_iterators), 0u); - return (left.data_iterators[0] - right.data_iterators[0]); + return (left.data_iterators[0] - right.data_iterators[0]); } ////////////////////////////////////////////////////////////////////////////// @@ -549,14 +551,14 @@ inline typename Position::Type position(Iter > & me) { SEQAN_CHECKPOINT - return position(me.data_iterators[0], row(host(me), 0)); + return position(me.data_iterators[0], row(host(me), 0)); } template inline typename Position::Type position(Iter > const & me) { SEQAN_CHECKPOINT - return position(me.data_iterators[0], row(host(me), 0)); + return position(me.data_iterators[0], row(host(me), 0)); } */ ////////////////////////////////////////////////////////////////////////////// diff --git a/seqan/align/align_metafunctions.h b/seqan/align/align_metafunctions.h index 7cd7f67..33e76e1 100644 --- a/seqan/align/align_metafunctions.h +++ b/seqan/align/align_metafunctions.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Align-specific metafunctions. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_METAFUNCTIONS_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_METAFUNCTIONS_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_METAFUNCTIONS_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_METAFUNCTIONS_H_ namespace seqan { @@ -55,17 +55,6 @@ namespace seqan { // Metafunction Cols // ---------------------------------------------------------------------------- -/** -.Metafunction.Cols: -..cat:Alignments -..summary:Type of column container of an alignment. -..signature:Cols::Type -..param.T:An alignment type. -...type:Class.Align -..returns.param.Type:The type of the container that allows access to the columns of $T$. -..include:seqan/align.h -*/ - template struct Cols; @@ -85,20 +74,6 @@ struct Cols; * @return Type The resulting type. */ -/** -.Metafunction.Col: -..cat:Alignments -..summary:Type of a column in an alignment. -..signature:Col::Type -..param.T:An alignment type. -...type:Class.Align -..returns.param.Type:The column type of $T$. -..remarks:The returned type is equivalent to $Value::Type>::Type$. -..see:Metafunction.Cols -..see:Metafunction.Value -..include:seqan/align.h -*/ - template struct Col : Value::Type> {}; @@ -107,18 +82,6 @@ struct Col : Value::Type> // Metafunction Rows // ---------------------------------------------------------------------------- -/** -.Metafunction.Rows: -..cat:Alignments -..summary:Type of row container of an alignment. -..signature:Rows::Type -..param.T:An alignment type. -...type:Class.Align -..returns.param.Type:The type of the container that allows access to the rows of $T$. -..see:Metafunction.Cols -..include:seqan/align.h -*/ - template struct Rows; @@ -126,20 +89,6 @@ struct Rows; // Metafunction Row // ---------------------------------------------------------------------------- -/** -.Metafunction.Row: -..cat:Alignments -..summary:Type of a row in an alignment. -..signature:Row::Type -..param.T:An alignment type. -...type:Class.Align -..returns.param.Type:The row type of $T$. -..remarks:The returned type is equivalent to $Value::Type>::Type$. -..see:Metafunction.Rows -..see:Metafunction.Value -..include:seqan/align.h -*/ - template struct Row : Value::Type> {}; @@ -154,18 +103,6 @@ struct Row // Metafunction StringSetType // ---------------------------------------------------------------------------- -/** -.Metafunction.StringSetType: -..cat:Alignments -..summary:Return type of @Function.stringSet@ function. -..signature:StringSetType::Type -..param.T:Alignment data structure. -..param.T.type:Spec.Alignment Graph -..param.T.type:Class.Align -..returns.param.Type:A @Class.StringSet.string set@ type of a reference to a string set type. -..see:Function.stringSet -..include:seqan/align.h -*/ template struct StringSetType; @@ -175,4 +112,4 @@ struct StringSetType; } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_METAFUNCTIONS_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_METAFUNCTIONS_H_ diff --git a/seqan/align/align_traceback.h b/seqan/align/align_traceback.h index b6de5bf..1a5f269 100644 --- a/seqan/align/align_traceback.h +++ b/seqan/align/align_traceback.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,8 @@ // defined. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_TRACEBACK_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_TRACEBACK_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_TRACEBACK_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_TRACEBACK_H_ namespace seqan { @@ -59,15 +59,13 @@ namespace seqan { // TODO(holtgrew): Mark as internal with underscore? /*! - * @tag TraceBack + * @typedef TraceBack * @headerfile * @brief Traceback value. * * @signature struct TraceBack_; * @signature typedef SimpleType TraceBack. * - * @section Remarks - * * The ValueSize of TraceBack is 3. The values are defined in the following way: * *
    @@ -77,19 +75,6 @@ namespace seqan { *
*/ -/** -.Spec.TraceBack: -..cat:Alphabets -..summary: Trace back values. -..general:Class.SimpleType -..signature:TraceBack -..remarks: -...text:The @Metafunction.ValueSize@ of $TraceBack$ is 3. -The values are defined in the following way: 0=Diagonal Move, 1=Horizontal Move, 2=Vertical Move -..see:Metafunction.ValueSize -..include:seqan/align.h -*/ - struct TraceBack_ {}; typedef SimpleType TraceBack; @@ -123,12 +108,12 @@ template <> struct BitsPerValue */ /*! - * @var TSizes AlignTraceback#sizes + * @var TSizes AlignTraceback::sizes * @brief The traceback lengths. */ /*! - * @var TLengths AlignTraceback#tsv + * @var TLengths AlignTraceback::tsv * @brief The traceback lengths. */ @@ -136,9 +121,9 @@ template struct AlignTraceback { // Run lengths in the align matrix. - String sizes; + String sizes; // Trace values: 0 = diagonal, 1 = horizontal, 2 = vertical. - String tvs; + String tvs; }; // ============================================================================ @@ -167,10 +152,10 @@ _alignTracePrint(AlignTraceback & tb, TPos const segLen, TTraceValue const tv) { - appendValue(tb.sizes, segLen); - appendValue(tb.tvs, tv); + appendValue(tb.sizes, segLen); + appendValue(tb.tvs, tv); } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGN_TRACEBACK_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_TRACEBACK_H_ diff --git a/seqan/align/alignment_algorithm_tags.h b/seqan/align/alignment_algorithm_tags.h index 41008f4..321cbcc 100644 --- a/seqan/align/alignment_algorithm_tags.h +++ b/seqan/align/alignment_algorithm_tags.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // independently from the algorithms. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGNMENT_ALGORITHM_TAGS_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGNMENT_ALGORITHM_TAGS_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGNMENT_ALGORITHM_TAGS_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_ALIGNMENT_ALGORITHM_TAGS_H_ namespace seqan { @@ -68,21 +68,6 @@ namespace seqan { * @signature typedef Tag Gotoh; */ -/** -.Tag.Pairwise Global Alignment Algorithms -..cat:Alignments -..summary:Tags used for selecting pairwise global alignment algorithms. -..tag -...Gotoh:Gotoh's for affine gap costs. -...NeedlemanWunsch:The Needleman-Wunsch algorithm for linear gap costs. -...Hirschberg:Hirschberg's algorithm using linear space. -...MyersBitVector:Myer's bit-vector algorithm. -...MyersHirschberg:Combination of Myer's and Hirschberg's algorithm. -..see:Function.globalAlignment -..see:Function.globalAlignmentScore -..include:seqan/align.h -*/ - struct Gotoh_; typedef Tag Gotoh; @@ -143,18 +128,6 @@ typedef Tag MyersHirschberg; * @brief Tags for selecting algorithms. */ -/** -.Tag.Pairwise Local Alignment Algorithms -..cat:Alignments -..summary:Tags used for selecting pairwise global alignment algorithms. -..tag -...SmithWaterman:Smith-Waterman algorithm for local alignments. -...WatermanEggert:Smith-Waterman algorithm with declumping to identify suboptimal local alignments. -..see:Function.localAlignment -..see:Class.LocalAlignmentEnumerator -..include:seqan/align.h -*/ - /*! * @tag PairwiseLocalAlignmentAlgorithms#SmithWaterman * @headerfile @@ -189,4 +162,4 @@ typedef Tag WatermanEggert; } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGNMENT_ALGORITHM_TAGS_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGNMENT_ALGORITHM_TAGS_H_ diff --git a/seqan/align/alignment_operations.h b/seqan/align/alignment_operations.h index e2c705c..949bf33 100644 --- a/seqan/align/alignment_operations.h +++ b/seqan/align/alignment_operations.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Operations on alignments such as integration // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGNMENT_OPERATIONS_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGNMENT_OPERATIONS_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGNMENT_OPERATIONS_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_ALIGNMENT_OPERATIONS_H_ namespace seqan { @@ -55,6 +55,48 @@ namespace seqan { // Functions // ============================================================================ +template +inline void +integrateGaps(Gaps & targetRow, + Gaps const & sourceRow, + TPos const viewPos) +{ + typedef typename Iterator, Standard>::Type TTargetIt; + typedef typename Iterator const, Standard>::Type TSourceIt; + + // This assertion ensures that the number of sequence characters after viewPos is greater than or equal to + // the number of source characters in the clipped infix row. + SEQAN_ASSERT_GEQ(endPosition(targetRow) - toSourcePosition(targetRow, viewPos), + endPosition(sourceRow) - beginPosition(sourceRow)); + + // init iterators + TTargetIt it = iter(targetRow, viewPos); + + // walk through Gaps containers and copy gaps + for (TSourceIt sIt = begin(sourceRow, Standard()), sItEnd = end(sourceRow, Standard()); sIt != sItEnd;) + { + TPos gapSize = countGaps(sIt); + insertGaps(it, gapSize); + goFurther(it, gapSize+1); + goFurther(sIt, gapSize+1); + } +} + +template +inline void +integrateGaps(Gaps & targetRow, + Gaps const & sourceRow) +{ + typename Position::Type viewPos = beginPosition(source(sourceRow)) // correct for infixes + - beginPosition(source(targetRow)) // ... + + beginPosition(sourceRow); // respect source clipping + + integrateGaps(targetRow, sourceRow, toViewPosition(targetRow, viewPos)); +} + // ---------------------------------------------------------------------------- // Function integrateAlign() // ---------------------------------------------------------------------------- @@ -66,94 +108,42 @@ namespace seqan { * * @signature void integrateAlign(align1, align2[, positions]); * - * @param align1 Target Alignment object into which align2 is to be integrated. - * @param align2 Alignment object that is to be integrated into align1. - * @param positions The integration positions in align1 for all rows (view positions), String of positions. + * @param[in,out] align1 Target Alignment object into which align2 is to be integrated. + * @param[in] align2 Alignment object that is to be integrated into align1. + * @param[in] positions The integration positions in align1 for all rows (view positions), String of positions. * * @section Examples * - * @include demos/align/integrate_align.cpp + * @include demos/dox/align/integrate_align.cpp * * The output is as follows: * - * @include demos/align/integrate_align.cpp.stdout - */ - -/** -.Function.integrateAlign -..summary:Integrates an alignment into another by copying the gaps. -..cat:Alignments -...type:Class.Align -..signature:integrateAlign(align1, align2[, positions]) -..param.align1:Alignment object into which align2 is to be integrated. -...type:Class.Align -..param.align2:Alignment object that is to be integrated into align1. -...type:Class.Align -..param.positions:The integration positions in align1 for all rows (view positions). -...type:Class.String -..remarks:If the integration positions are not specified, the sources of align2 have to be @Metafunction.Infix@es of the sources of align1. -..include:seqan/align.h + * @include demos/dox/align/integrate_align.cpp.stdout */ -template +template void integrateAlign(Align & align, Align const & infixAlign, String const & viewPos) { - typedef Align TAlign; - typedef Align TInfixAlign; - typedef typename Size::Type TSize; - - typedef typename Row::Type TRow; - typedef typename Row::Type TInfixRow; - - // Iterators on align and infixAlign. - typename Iterator::Type it; - typedef typename Iterator::Type TInfixRowIt; - - + SEQAN_ASSERT_EQ_MSG(length(rows(infixAlign)), length(rows(align)), "Both align objects need same number of rows."); + typedef typename Size >::Type TSize; + //NOTE(h-2): could be parallelized for (TSize i = 0; i < length(rows(align)); ++i) - { - TInfixRow const & infixRow = row(infixAlign, i); - // This assertion ensures that the number of sequence characters after viewPos[i] is greater than or equal to - // the number of source characters in the clipped infix row. - SEQAN_ASSERT_GEQ(endPosition(row(align, i)) - toSourcePosition(row(align, i), viewPos[i]), - endPosition(infixRow) - beginPosition(infixRow)); - - // init iterators - it = iter(row(align, i), value(viewPos, i)); - - // walk through Gaps containers and copy gaps - for (TInfixRowIt infixIt = begin(infixRow, Standard()); !atEnd(infixIt);) - { - TSize gapSize = countGaps(infixIt); - insertGaps(it, gapSize); - goFurther(it, gapSize+1); - goFurther(infixIt, gapSize+1); - } - } + integrateGaps(row(align, i), row(infixAlign, i), viewPos[i]); } -template -void integrateAlign(Align & align, - Align::Type, TSpec2> const & infixAlign) +template +void integrateAlign(Align & align, + Align const & infixAlign) { - typedef typename Size::Type TSize; - typedef typename Position >::Type>::Type TPos; - - String viewPos; - TPos pos; - for (TSize i = 0; i < length(rows(infixAlign)); ++i) - { - pos = beginPosition(source(row(infixAlign, i))) - - beginPosition(source(row(align, i))) - + beginPosition(row(infixAlign, i)); - appendValue(viewPos, toViewPosition(row(align, i), pos)); - } - - integrateAlign(align, infixAlign, viewPos); + SEQAN_ASSERT_EQ_MSG(length(rows(infixAlign)), length(rows(align)), "Both align objects need same number of rows."); + typedef typename Size >::Type TSize; + //NOTE(h-2): could be parallelized + for (TSize i = 0; i < length(rows(align)); ++i) + integrateGaps(row(align, i), row(infixAlign, i)); } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_ALIGNMENT_OPERATIONS_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGNMENT_OPERATIONS_H_ diff --git a/seqan/align/dp_algorithm_impl.h b/seqan/align/dp_algorithm_impl.h index 7b31030..32a7ba8 100644 --- a/seqan/align/dp_algorithm_impl.h +++ b/seqan/align/dp_algorithm_impl.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -130,8 +130,8 @@ // sequence to determine there orientation within the matrix. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_ALGORITHM_IMPL_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_ALGORITHM_IMPL_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_ALGORITHM_IMPL_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_ALGORITHM_IMPL_H_ namespace seqan { @@ -159,7 +159,7 @@ namespace seqan { template inline bool _checkBandProperties(TSequenceH const & /*seqH*/, TSequenceV const & /*seqV*/, - DPBand_ const & /*band*/, + DPBandConfig const & /*band*/, TAlignmentProfile const & /*alignProfile*/) { return true; @@ -168,7 +168,7 @@ inline bool _checkBandProperties(TSequenceH const & /*seqH*/, template inline bool _checkBandProperties(TSequenceH const & seqH, TSequenceV const & seqV, - DPBand_ const & band, + DPBandConfig const & band, TAlignmentProfile const & /*alignProfile*/) { typedef typename MakeSigned::Type>::Type TSignedSize; @@ -238,7 +238,7 @@ inline bool _isValidDPSettings(TSequenceH const & seqH, // Returns true if a band is selected, otherwise false. template inline bool -_isBandEnabled(DPBand_ const & /*band*/) +_isBandEnabled(DPBandConfig const & /*band*/) { return IsSameType::VALUE; } @@ -270,14 +270,18 @@ _computeCell(TDPScout & scout, _computeScore(activeCell, previousDiagonal, previousHorizontal, previousVertical, seqHVal, seqVVal, scoringScheme, typename RecursionDirection_::Type(), TDPProfile())); -// std::cout << "("<< activeCell._score << "," << previousDiagonal._score << "," << previousHorizontal._score << "," << previousVertical._score << ") "; + if (TrackingEnabled_::VALUE) { - bool isLastColumn = IsSameType::VALUE; - bool isLastRow = And, - Or, - IsSameType > >::VALUE; - _scoutBestScore(scout, activeCell, traceMatrixNavigator, isLastColumn, isLastRow); + typedef typename IsSameType::Type TIsLastColumn; + typedef typename And, Or< + IsSameType,IsSameType< + typename TColumnDescriptor::TLocation, FullColumn> > >::Type + TIsLastRow; + _scoutBestScore(scout, activeCell, traceMatrixNavigator, + TIsLastColumn(), TIsLastRow()); } } @@ -311,7 +315,7 @@ _computeTrack(TDPScout & scout, previousCellVertical(dpScoreMatrixNavigator), seqHValue, seqVValue, scoringScheme, TColumnDescriptor(), FirstCell(), TDPProfile()); // std::cerr << _scoreOfCell(value(dpScoreMatrixNavigator)) << " \t"; -// std::cerr << _scoreOfCell(value(dpScoreMatrixNavigator)) << "(" << _horizontalScoreOfCell(value(dpScoreMatrixNavigator)) << "," << _verticalScoreOfCell(value(dpScoreMatrixNavigator)) << ")" << " \t"; +// std::cerr << _scoreOfCell(value(dpScoreMatrixNavigator)) << "(" << _horizontalScoreOfCell(value(dpScoreMatrixNavigator)) << "," << _verticalScoreOfCell(value(dpScoreMatrixNavigator)) << ")" << " \t"; TSeqVIterator iter = seqBegin; TSeqVIterator itEnd = (seqEnd - 1); @@ -346,8 +350,8 @@ _computeTrack(TDPScout & scout, // TODO(rmaerker): Debug code! //template +// typename TSeqVValue, typename TSeqVIterator, typename TScoringScheme, typename TDPProfile, +// typename TColumnDescriptor> //inline void //_computeTrack(TDPScout & scout, // TDPScoreMatrixNavigator & dpScoreMatrixNavigator, @@ -368,52 +372,52 @@ _computeTrack(TDPScout & scout, // _goNextCell(dpTraceMatrixNavigator, TColumnDescriptor(), FirstCell()); // // // Compute the first cell. -// _computeCell(scout, dpTraceMatrixNavigator, value(dpScoreMatrixNavigator), -// previousCellDiagonal(dpScoreMatrixNavigator), previousCellHorizontal(dpScoreMatrixNavigator), -// previousCellVertical(dpScoreMatrixNavigator), seqHValue, seqVValue, scoringScheme, -// TColumnDescriptor(), FirstCell(), TDPProfile()); -// // TODO(rmaerker): remove debug code -//// std::cout << _scoreOfCell(value(dpScoreMatrixNavigator)) << "\t"; -// std::stringstream stream; -// stream << _scoreOfCell(value(dpScoreMatrixNavigator)); -//// stream << col + row; -// testMatrix[col + row] = stream.str(); -// ++row; +// _computeCell(scout, dpTraceMatrixNavigator, value(dpScoreMatrixNavigator), +// previousCellDiagonal(dpScoreMatrixNavigator), previousCellHorizontal(dpScoreMatrixNavigator), +// previousCellVertical(dpScoreMatrixNavigator), seqHValue, seqVValue, scoringScheme, +// TColumnDescriptor(), FirstCell(), TDPProfile()); +// // TODO(rmaerker): remove debug code +//// std::cout << _scoreOfCell(value(dpScoreMatrixNavigator)) << "\t"; +// std::stringstream stream; +// stream << _scoreOfCell(value(dpScoreMatrixNavigator)); +//// stream << col + row; +// testMatrix[col + row] = stream.str(); +// ++row; // -// TSeqVIterator iter = seqBegin; -// TSeqVIterator itEnd = (seqEnd - 1); -// // Compute the inner cells of the current track. +// TSeqVIterator iter = seqBegin; +// TSeqVIterator itEnd = (seqEnd - 1); +// // Compute the inner cells of the current track. // for (; iter != itEnd; ++iter, ++row) // He will out of scope.... // { // // Set the iterator to the next cell within the track. // _goNextCell(dpScoreMatrixNavigator, TColumnDescriptor(), InnerCell()); // _goNextCell(dpTraceMatrixNavigator, TColumnDescriptor(), InnerCell()); // // Compute the inner cell. -// _computeCell(scout, dpTraceMatrixNavigator, value(dpScoreMatrixNavigator), -// previousCellDiagonal(dpScoreMatrixNavigator), previousCellHorizontal(dpScoreMatrixNavigator), -// previousCellVertical(dpScoreMatrixNavigator), seqHValue, value(iter), scoringScheme, -// TColumnDescriptor(), InnerCell(), TDPProfile()); -// // TODO(rmaerker): remove debug code -//// std::cout << _scoreOfCell(value(dpScoreMatrixNavigator)) << "\t"; -// stream.str(""); -// stream << _scoreOfCell(value(dpScoreMatrixNavigator)); -//// stream << col + row; -// testMatrix[col + row] = stream.str(); +// _computeCell(scout, dpTraceMatrixNavigator, value(dpScoreMatrixNavigator), +// previousCellDiagonal(dpScoreMatrixNavigator), previousCellHorizontal(dpScoreMatrixNavigator), +// previousCellVertical(dpScoreMatrixNavigator), seqHValue, value(iter), scoringScheme, +// TColumnDescriptor(), InnerCell(), TDPProfile()); +// // TODO(rmaerker): remove debug code +//// std::cout << _scoreOfCell(value(dpScoreMatrixNavigator)) << "\t"; +// stream.str(""); +// stream << _scoreOfCell(value(dpScoreMatrixNavigator)); +//// stream << col + row; +// testMatrix[col + row] = stream.str(); // } // // Set the iterator to the last cell of the track. -// _goNextCell(dpScoreMatrixNavigator, TColumnDescriptor(), LastCell()); -// _goNextCell(dpTraceMatrixNavigator, TColumnDescriptor(), LastCell()); -// // Compute the last cell. -// _computeCell(scout, dpTraceMatrixNavigator, value(dpScoreMatrixNavigator), -// previousCellDiagonal(dpScoreMatrixNavigator), previousCellHorizontal(dpScoreMatrixNavigator), -// previousCellVertical(dpScoreMatrixNavigator), seqHValue, value(iter), scoringScheme, -// TColumnDescriptor(), LastCell(), TDPProfile()); -// // TODO(rmaerker): remove debug code -//// std::cout << _scoreOfCell(value(dpScoreMatrixNavigator)) << "\n"; -// stream.str(""); -// stream << _scoreOfCell(value(dpScoreMatrixNavigator)); -//// stream << col + row; -// testMatrix[col + row] = stream.str(); +// _goNextCell(dpScoreMatrixNavigator, TColumnDescriptor(), LastCell()); +// _goNextCell(dpTraceMatrixNavigator, TColumnDescriptor(), LastCell()); +// // Compute the last cell. +// _computeCell(scout, dpTraceMatrixNavigator, value(dpScoreMatrixNavigator), +// previousCellDiagonal(dpScoreMatrixNavigator), previousCellHorizontal(dpScoreMatrixNavigator), +// previousCellVertical(dpScoreMatrixNavigator), seqHValue, value(iter), scoringScheme, +// TColumnDescriptor(), LastCell(), TDPProfile()); +// // TODO(rmaerker): remove debug code +//// std::cout << _scoreOfCell(value(dpScoreMatrixNavigator)) << "\n"; +// stream.str(""); +// stream << _scoreOfCell(value(dpScoreMatrixNavigator)); +//// stream << col + row; +// testMatrix[col + row] = stream.str(); //} // ---------------------------------------------------------------------------- @@ -583,7 +587,7 @@ _computeBandedAlignment(TDPScout & scout, MetaColumnDescriptor(), FirstCell(), TDPProfile()); // we might need to additionally track this point. if (TrackingEnabled_ >, FirstCell>::VALUE) - _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, true, false); + _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, True(), False()); return; } if (seqHIterEndColumnBottom == begin(seqH, Rooted())) @@ -600,7 +604,7 @@ _computeBandedAlignment(TDPScout & scout, MetaColumnDescriptor(), FirstCell(), TDPProfile()); // We might need to additionally track this point. if (TrackingEnabled_ >, LastCell>::VALUE) - _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, false, true); + _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, False(), True()); return; } @@ -635,7 +639,7 @@ _computeBandedAlignment(TDPScout & scout, MetaColumnDescriptor(), FirstCell(), TDPProfile()); // we might need to additionally track this point. if (TrackingEnabled_ >, FirstCell>::VALUE) - _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, false, false); + _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, False(), False()); } else // Upper diagonal >= 0 and lower Diagonal < 0 if (lowerDiagonal(band) <= -seqVlength) // The band is bounded by the top and bottom of the matrix. @@ -684,7 +688,7 @@ _computeBandedAlignment(TDPScout & scout, // We might want to track the current cell here, since this is the first cell that crosses the bottom but is // not part of the FullColumn tracks. if (TrackingEnabled_ >, LastCell>::VALUE) - _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, false, true); + _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, False(), True()); for (; seqHIter != seqHIterEndColumnMiddle; ++seqHIter) { _computeTrack(scout, dpScoreMatrixNavigator, dpTraceMatrixNavigator, @@ -719,7 +723,7 @@ _computeBandedAlignment(TDPScout & scout, { if (lowerDiagonal(band) + seqVlength < seqHlength) { - _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, false, true); + _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, False(), True()); } } @@ -759,7 +763,7 @@ _computeBandedAlignment(TDPScout & scout, MetaColumnDescriptor(), FirstCell(), TDPProfile()); // We might need to additionally track this point. if (TrackingEnabled_ >, LastCell>::VALUE) - _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, false, true); + _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, False(), True()); } else if (seqHIter == end(seqH, Rooted()) - 1) // Case 2: The band ends somewhere in the final column of the matrix. { @@ -779,7 +783,7 @@ _computeBandedAlignment(TDPScout & scout, MetaColumnDescriptor(), FirstCell(), TDPProfile()); // we might need to additionally track this point. if (TrackingEnabled_ >, LastCell>::VALUE) - _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, true, true); + _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, True(), True()); } else // Case2b: At least two cells intersect between the band and the matrix in the final column of the matrix. { @@ -805,7 +809,7 @@ _computeBandedAlignment(TDPScout & scout, seqVBegin, seqVEnd, scoringScheme, MetaColumnDescriptor(), dpProfile); if (TrackingEnabled_ >, LastCell>::VALUE) - _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, true, true); + _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, True(), True()); } else _computeTrack(scout, dpScoreMatrixNavigator, dpTraceMatrixNavigator, @@ -830,7 +834,7 @@ _computeBandedAlignment(TDPScout & scout, seqVBegin, seqVEnd, scoringScheme, MetaColumnDescriptor(), dpProfile); if (TrackingEnabled_ >, LastCell>::VALUE) - _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, true, true); + _scoutBestScore(scout, value(dpScoreMatrixNavigator), dpTraceMatrixNavigator, True(), True()); } else { @@ -1346,28 +1350,49 @@ _correctTraceValue(TTraceNavigator & traceNavigator, value(traceNavigator) &= ~TraceBitMap_::DIAGONAL; value(traceNavigator) |= TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX; } +} +template +inline void +_correctTraceValue(TTraceNavigator & traceNavigator, + DPScout_, TDPScoutSpec> const & dpScout) +{ + _setToPosition(traceNavigator, maxHostPosition(dpScout)); + if (isGapExtension(dpScout._maxScore, DynamicGapExtensionVertical())) + { + value(traceNavigator) &= ~TraceBitMap_::DIAGONAL; + value(traceNavigator) |= TraceBitMap_::MAX_FROM_VERTICAL_MATRIX; + } + else if (isGapExtension(dpScout._maxScore, DynamicGapExtensionHorizontal())) + { + value(traceNavigator) &= ~TraceBitMap_::DIAGONAL; + value(traceNavigator) |= TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX; + } } // ---------------------------------------------------------------------------- // Function _computeAligmnment() // ---------------------------------------------------------------------------- -template +template inline typename Value::Type -_computeAlignment(TTraceTarget & traceSegments, +_computeAlignment(DPContext & dpContext, + TTraceTarget & traceSegments, TScoutState & scoutState, TSequenceH const & seqH, TSequenceV const & seqV, TScoreScheme const & scoreScheme, - DPBand_ const & band, - DPProfile_ const & dpProfile) + DPBandConfig const & band, + DPProfile_ const & dpProfile) { - typedef typename Value::Type TScoreValue; - typedef DPCell_ TDPScoreValue; + typedef typename GetDPScoreMatrix >::Type TDPScoreMatrixHost; + typedef typename Value::Type TDPScoreValue; + + typedef typename GetDPTraceMatrix >::Type TDPTraceMatrixHost; + typedef typename Value::Type TTraceValue; + typedef typename DefaultScoreMatrixSpec_::Type TScoreMatrixSpec; - typedef typename TraceBitMap_::TTraceValue TTraceValue; typedef DPMatrix_ TDPScoreMatrix; typedef DPMatrix_ TDPTraceMatrix; @@ -1401,6 +1426,10 @@ _computeAlignment(TTraceTarget & traceSegments, setLength(dpTraceMatrix, +DPMatrixDimension_::VERTICAL, _min(static_cast(length(seqV)) + 1, bandSize)); } + // We set the host to the score matrix and the dp matrix. + setHost(dpScoreMatrix, getDpScoreMatrix(dpContext)); + setHost(dpTraceMatrix, getDpTraceMatrix(dpContext)); + resize(dpScoreMatrix); // We do not need to allocate the memory for the trace matrix if the traceback is disabled. if (IsTracebackEnabled_::VALUE) @@ -1442,20 +1471,6 @@ _computeAlignment(TTraceTarget & traceSegments, return maxScore(dpScout); } -template -inline typename Value::Type -_computeAlignment(TTraceTarget & traceSegments, - TSequenceH const & seqH, - TSequenceV const & seqV, - TScoreScheme const & scoreScheme, - DPBand_ const & band, - DPProfile_ const & dpProfile) -{ - DPScoutState_ noState; - return _computeAlignment(traceSegments, noState, seqH, seqV, scoreScheme, band, dpProfile); -} - } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_ALGORITHM_IMPL_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_ALGORITHM_IMPL_H_ diff --git a/seqan/align/dp_band.h b/seqan/align/dp_band.h index 49cc73f..545a10d 100644 --- a/seqan/align/dp_band.h +++ b/seqan/align/dp_band.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,12 +35,11 @@ // whether a band was selected or not. // ========================================================================== -// TODO(holtgrew): Documentation in this header necessary or internal only? +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_BAND_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_BAND_H_ -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_BAND_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_BAND_H_ - -namespace seqan { +namespace seqan +{ // ============================================================================ // Forwards @@ -50,6 +49,23 @@ namespace seqan { // Tags, Classes, Enums // ============================================================================ +/*! + * @defgroup DPBandSwitch + * @brief Tags used to switch between banded and unbanded alignment. + * + * @tag DPBandSwitch#BandOn + * @brief Switches banded alignment on. + * @headerfile + * @signature struct BandOn_; + * @signature typedef Tag BandOn; + * + * @tag DPBandSwitch#BandOff + * @brief Switches banded alignment off. + * @headerfile + * @signature struct BandOff_; + * @signature typedef Tag BandOff; + */ + // ---------------------------------------------------------------------------- // Tag BandOff // ---------------------------------------------------------------------------- @@ -67,76 +83,149 @@ struct BandOn_; typedef Tag BandOn; // ---------------------------------------------------------------------------- -// Class DPBand_ +// Class DPBandConfig // ---------------------------------------------------------------------------- +/*! + * @class DPBandConfig + * @headerfile + * @brief Simple class to configure banded alignments. + * + * @signature template + * class DPBandConfig; + * + * @tparam TSwitch Tag to switch between banded and unbanded alignments. + * One of @link DPBandSwitch @endlink. Defaults to @link DPBandSwitch#BandOff @endlink. + * + * To compute banded alignments use @link DPBand @endlink as a shortcut for the DPBandConfig with + * band switched on. + */ + // Simple band class. -template -struct DPBand_ {}; +template +struct DPBandConfig {}; // ---------------------------------------------------------------------------- -// Class DPBand_ [BandOff] +// Class DPBandConfig [BandOff] // ---------------------------------------------------------------------------- // The specialization when using no band. // Per default the member variables _lowerDiagonal and _upperDiagonal are // always 0. template <> -struct DPBand_ +struct DPBandConfig { typedef int TPosition; }; // ---------------------------------------------------------------------------- -// Class DPBand_ [BandOn] +// Class DPBandConfig [BandOn] // ---------------------------------------------------------------------------- // The specialization when using a band. // On construction the diagonals are set to 0. template <> -struct DPBand_ +struct DPBandConfig { typedef int TPosition; int _lowerDiagonal; int _upperDiagonal; - DPBand_() : +/*! + * @fn DPBandConfig::DPBandConfig + * @brief Constructor. + * + * @signature DPBandConfig(); + * @signature DPBandConfig(lowerDiag, upperDiag); + * + * @tparam TSwitch Tag to switch between banded and unbanded alignments. One of @link DPBandSwitch @endlink. + * The second constructor is only supported when @link DPBandConfig @endlink is specialized with + * @link DPBandSwitch#BandOn @endlink. + * + * @param lowerDiag The value for the lower diagonal of the band. + * @param upperDiag The value for the upper diagonal of the band. + * + * A negative value for the diagonals indicates an intersection of the diagonal with the vertical sequence (y-axis) + * and a positive value indicates an intersection with the horizontal sequence (x-axis). + * The value of the lower diagonal has to compare less or equal to the value of the upper diagonal. + */ + + DPBandConfig() : _lowerDiagonal(0), _upperDiagonal(0) {} - DPBand_(int lowerDiagonal, int upperDiagonal) : - _lowerDiagonal(lowerDiagonal), _upperDiagonal(upperDiagonal) {} + DPBandConfig(int lowerDiagonal, int upperDiagonal) : + _lowerDiagonal(lowerDiagonal), _upperDiagonal(upperDiagonal) + { + SEQAN_ASSERT_LEQ(lowerDiagonal, upperDiagonal); + } }; +/*! + * @typedef DPBand + * @headerfile + * @brief Global typedef used for @link DPBandConfig @endlink specialized with @link DPBandSwitch#BandOn @endlink. + * + * @signature typedef DPBandConfig DPBand; + */ + +// Typedef for Band. +typedef DPBandConfig DPBand; + // ============================================================================ // Metafunctions // ============================================================================ +/*! + * @mfn DPBandConfig#Position + * @headerfile + * @brief Metafunction returning the position type. + * + * @signature typename Position::Type; + * + * @tparam T The type @link DPBandConfig @endlink to query the position type for. + * @return TPosition The position type. + */ + // ---------------------------------------------------------------------------- // Metafunction Position // ---------------------------------------------------------------------------- template -struct Position > +struct Position > { - typedef typename DPBand_::TPosition Type; + typedef typename DPBandConfig::TPosition Type; }; template -struct Position const>: - Position >{}; +struct Position const>: + Position >{}; // ---------------------------------------------------------------------------- // Metafunction Size // ---------------------------------------------------------------------------- +/*! + * @mfn DPBandConfig#Size + * @headerfile + * @brief Metafunction returning the size type. + * + * @signature typename Size::Type; + * + * @tparam T The type @link DPBandConfig @endlink to query the size type for. + * @return TSize The size type. + */ + template -struct Size >: - Position >{}; +struct Size > +{ + typedef unsigned Type; +}; + template -struct Size const>: - Size >{}; +struct Size const>: + Size >{}; // ============================================================================ // Functions @@ -146,14 +235,27 @@ struct Size const>: // Function setLowerDiagonal // ---------------------------------------------------------------------------- +/*! + * @fn DPBandConfig#setLowerDiagonal + * @headerfile + * @brief Sets the value of the lower diagonal. + * + * @signature setLowerDiagonal(obj, val); + * + * @param obj The object of type @link DPBandConfig @endlink to set the lower diagonal for. + * @param val The new value for the lower diagonal. + * + * @note If the band is switched off, this function defaults to no-op. + */ + inline void -setLowerDiagonal(DPBand_ & /*dpBand*/, int /*newLowerDiagonal*/) +setLowerDiagonal(DPBandConfig & /*dpBand*/, int /*newLowerDiagonal*/) { //no-op } inline void -setLowerDiagonal(DPBand_ & dpBand, int newLowerDiagonal) +setLowerDiagonal(DPBandConfig & dpBand, int newLowerDiagonal) { dpBand._lowerDiagonal = newLowerDiagonal; } @@ -162,15 +264,28 @@ setLowerDiagonal(DPBand_ & dpBand, int newLowerDiagonal) // Function lowerDiagonal // ---------------------------------------------------------------------------- -inline int -lowerDiagonal(DPBand_ const & /*dpBand*/) +/*! + * @fn DPBandConfig#lowerDiagonal + * @headerfile + * @brief Returns the value of the lower diagonal. + * + * @signature TPosition lowerDiagonal(obj); + * + * @param obj The object of type @link DPBandConfig @endlink to query the lower diagonal for. + * + * @note If the band is switched off this function always returns 0. + * @return TPosition The value of the lower diagonal. + */ + +inline Position >::Type +lowerDiagonal(DPBandConfig const & /*dpBand*/) { return 0; } -template -inline int -lowerDiagonal(DPBand_ const & dpBand) +template +inline typename Position >::Type +lowerDiagonal(DPBandConfig const & dpBand) { return dpBand._lowerDiagonal; } @@ -179,14 +294,27 @@ lowerDiagonal(DPBand_ const & dpBand) // Function setUpperDiagonal // ---------------------------------------------------------------------------- +/*! + * @fn DPBandConfig#setUpperDiagonal + * @headerfile + * @brief Sets the value of the upper diagonal. + * + * @signature setUpperDiagonal(obj, val); + * + * @param obj The object of type @link DPBandConfig @endlink to set the upper diagonal for. + * @param val The new value for the upper diagonal. + * + * @note If the band is switched off, this function defaults to no-op. + */ + inline void -setUpperDiagonal(DPBand_ & /*dpBand*/, int /*newUpperDiagonal*/) +setUpperDiagonal(DPBandConfig & /*dpBand*/, int /*newUpperDiagonal*/) { //no-op } inline void -setUpperDiagonal(DPBand_ & dpBand, int newUpperDiagonal) +setUpperDiagonal(DPBandConfig & dpBand, int newUpperDiagonal) { dpBand._upperDiagonal = newUpperDiagonal; } @@ -195,14 +323,28 @@ setUpperDiagonal(DPBand_ & dpBand, int newUpperDiagonal) // Function upperDiagonal // ---------------------------------------------------------------------------- -inline int -upperDiagonal(DPBand_ const & /*dpBand*/) +/*! + * @fn DPBandConfig#upperDiagonal + * @headerfile + * @brief Returns the value of the upper diagonal. + * + * @signature TPosition upperDiagonal(obj); + * + * @param obj The object of type @link DPBandConfig @endlink to query the upper diagonal for. + * + * @note If the band is switched off this function always returns 0. + * @return TPosition The value of the upper diagonal. + */ + +inline Position >::Type +upperDiagonal(DPBandConfig const & /*dpBand*/) { return 0; } -inline int -upperDiagonal(DPBand_ const & dpBand) +template +inline typename Position >::Type +upperDiagonal(DPBandConfig const & dpBand) { return dpBand._upperDiagonal; } @@ -211,18 +353,32 @@ upperDiagonal(DPBand_ const & dpBand) // Function bandSize() // ---------------------------------------------------------------------------- -inline unsigned int -bandSize(DPBand_ const &) +/*! + * @fn DPBandConfig#bandSize + * @headerfile + * @brief Returns the size of the band. + * + * @signature TSize bandSize(obj); + * + * @param obj The object of type @link DPBandConfig @endlink to query the band size for. + * + * @note If the band is switched off this function always returns 0. + * @return TSize The number of diagonals covered by the band. + */ + +inline Size >::Type +bandSize(DPBandConfig const &) { return 0; } -inline unsigned int -bandSize(DPBand_ const & band) +template +inline typename Size >::Type +bandSize(DPBandConfig const & band) { return upperDiagonal(band) - lowerDiagonal(band) + 1; } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_BAND_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_BAND_H_ diff --git a/seqan/align/dp_cell.h b/seqan/align/dp_cell.h index 41d5c7f..99e2413 100644 --- a/seqan/align/dp_cell.h +++ b/seqan/align/dp_cell.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ // scores necessary for the affine gap function. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_CELL_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_CELL_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_CELL_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_CELL_H_ namespace seqan { @@ -123,8 +123,8 @@ template const TScoreValue DPCellDefaultInfinity >::VALUE = MinValue::VALUE / 2; template -struct DPCellDefaultInfinity const>: - DPCellDefaultInfinity >{}; +struct DPCellDefaultInfinity const> : + public DPCellDefaultInfinity >{}; // ============================================================================ // Functions @@ -207,7 +207,7 @@ template inline typename Reference const>::Type _horizontalScoreOfCell(DPCell_ const & dpCell) { - return dpCell._score; + return dpCell._score; } // ---------------------------------------------------------------------------- @@ -222,6 +222,17 @@ _setHorizontalScoreOfCell(DPCell_ & /*dpCell*/, TScoreVal // no-op } +// ---------------------------------------------------------------------------- +// Function setGapExtension() +// ---------------------------------------------------------------------------- + +template +inline void +setGapExtension(DPCell_ & /*dpCell*/, TF1 , TF2) +{ + // no-op +} + } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_CELL_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_CELL_H_ diff --git a/seqan/align/dp_cell_affine.h b/seqan/align/dp_cell_affine.h index 47a6c58..2165b8a 100644 --- a/seqan/align/dp_cell_affine.h +++ b/seqan/align/dp_cell_affine.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // values for the three matrices: diagonal, vertical and horizontal. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_CELL_AFFINE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_CELL_AFFINE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_CELL_AFFINE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_CELL_AFFINE_H_ namespace seqan { @@ -183,4 +183,4 @@ _setHorizontalScoreOfCell(DPCell_ & dpCell, TScoreValue } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_CELL_AFFINE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_CELL_AFFINE_H_ diff --git a/seqan/align/dp_cell_dynamic.h b/seqan/align/dp_cell_dynamic.h new file mode 100644 index 0000000..fd9ad3c --- /dev/null +++ b/seqan/align/dp_cell_dynamic.h @@ -0,0 +1,199 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== +// Implements the dynamic gap model published in "Dynamic Gaps Selector: +// A Smith Waterman Sequence Alignment Algorithm with Affine Gap Model +// Optimization" by Gianvito Urgese et al. +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_DP_CELL_DYNAMIC_H_ +#define INCLUDE_SEQAN_ALIGN_DP_CELL_DYNAMIC_H_ + +namespace seqan { + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +struct DynamicGapExtensionHorizontal_; +typedef Tag DynamicGapExtensionHorizontal; + +struct DynamicGapExtensionVertical_; +typedef Tag DynamicGapExtensionVertical; + +enum DynamicGapsMask +{ + MASK_VERTICAL_GAP = 1, + MASK_HORIZONTAL_GAP = 2 +}; + +// ---------------------------------------------------------------------------- +// Class DPCell [DynamicGaps] +// ---------------------------------------------------------------------------- + +// The specialization for linear gap cost function. +// It solely stores the maximal score. +template +class DPCell_ +{ +public: + TScoreValue _score; + char _flagMask; + + // The default c'tor. + DPCell_() : _score(DPCellDefaultInfinity::VALUE), _flagMask(0) + {} + + // The copy c'tor. + DPCell_(DPCell_ const & other) : _score(other._score), _flagMask(other._flagMask) + {} + + // Implicit c'tor. + DPCell_(TScoreValue const & score) : _score(score), _flagMask(0) + {} + + // The assignment operator. + DPCell_ & + operator=(DPCell_ const & other) + { + if (this != &other) + { + _score = other._score; + _flagMask = other._flagMask; + } + return *this; + } + + DPCell_ & + operator=(TScoreValue const & score) + { + _score = score; + return *this; + } +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +template +struct SetGapExtension; + +template +struct SetGapExtension, False, False> +{ + static const char VALUE = 0; +}; + +template +struct SetGapExtension, False, True> +{ + static const char VALUE = MASK_HORIZONTAL_GAP; +}; + +template +struct SetGapExtension, True, False> +{ + static const char VALUE = MASK_VERTICAL_GAP; +}; + +template +struct SetGapExtension, True, True> +{ + static const char VALUE = MASK_HORIZONTAL_GAP | MASK_VERTICAL_GAP; +}; + +// ============================================================================ +// Functions +// ============================================================================ + +template +inline void _setBit(DPCell_ & cell, + TFlag const & /*flag*/, + DynamicGapExtensionVertical const & /*tag*/) +{ + if (IsSameType::VALUE) + cell._flagMask |= MASK_VERTICAL_GAP; + else + cell._flagMask &= ~MASK_VERTICAL_GAP; +} + +template +inline void _setBit(DPCell_ & cell, + TFlag const & /*flag*/, + DynamicGapExtensionHorizontal const & /*tag*/) +{ + if (IsSameType::VALUE) + cell._flagMask |= MASK_HORIZONTAL_GAP; + else + cell._flagMask &= ~MASK_HORIZONTAL_GAP; +} + +template +inline bool isGapExtension(DPCell_ const & cell, + TSpec const & /*spec*/) +{ + if (IsSameType::VALUE) + return cell._flagMask & MASK_HORIZONTAL_GAP; + else + return cell._flagMask & MASK_VERTICAL_GAP; +} + +template +inline void +setGapExtension(DPCell_ & cell, + TFlagV const & /*vert*/, + TFlagH const & /*hori*/) +{ + cell._flagMask = SetGapExtension, TFlagV, TFlagH>::VALUE; +} + +// ---------------------------------------------------------------------------- +// Function operator<() +// ---------------------------------------------------------------------------- + +// Needed for banded chain alignment for the std::set. +template +inline bool operator<(DPCell_ const & left, + DPCell_ const & right) +{ + return left._score < right._score; +} + +} // namespace seqan + +#endif // INCLUDE_SEQAN_ALIGN_DP_CELL_DYNAMIC_H_ diff --git a/seqan/align/dp_cell_linear.h b/seqan/align/dp_cell_linear.h index 56f1572..dd43172 100644 --- a/seqan/align/dp_cell_linear.h +++ b/seqan/align/dp_cell_linear.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,8 @@ // entry. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_CELL_LINEAR_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_CELL_LINEAR_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_CELL_LINEAR_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_CELL_LINEAR_H_ namespace seqan { @@ -112,4 +112,4 @@ inline bool operator<(DPCell_ const & left, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_CELL_LINEAR_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_CELL_LINEAR_H_ diff --git a/seqan/align/dp_context.h b/seqan/align/dp_context.h new file mode 100644 index 0000000..717acec --- /dev/null +++ b/seqan/align/dp_context.h @@ -0,0 +1,187 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== +// Implements the context that can be passed to the dp functions in order +// to reuse memory blocks in mutliple calls of the same function. +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_DP_CONTEXT_H_ +#define INCLUDE_SEQAN_ALIGN_DP_CONTEXT_H_ + +namespace seqan +{ + +// ============================================================================ +// Forwards +// ============================================================================ + +template +struct GetDPScoreMatrix +{}; + +template +struct GetDPTraceMatrix +{}; + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +template +struct DPContext +{ + typedef typename GetDPScoreMatrix::Type TScoreMatrixHost; + typedef typename GetDPTraceMatrix::Type TTraceMatrixHost; + + TScoreMatrixHost _scoreMatrix; + TTraceMatrixHost _traceMatrix; + + DPContext() : _scoreMatrix(), _traceMatrix() + {} +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Metafunction GetDPScoreMatrix +// ---------------------------------------------------------------------------- + +template +struct GetDPScoreMatrix > +{ + typedef DPCell_ TDPScoreValue_; + typedef DPMatrix_ TDPScoreMatrix_; + + typedef typename Host::Type Type; +}; + +template +struct GetDPScoreMatrix const > +{ + typedef DPCell_ TDPScoreValue_; + typedef DPMatrix_ TDPScoreMatrix_; + + typedef typename Host::Type const Type; +}; + +// ---------------------------------------------------------------------------- +// Metafunction GetDPTraceMatrix +// ---------------------------------------------------------------------------- + +template +struct GetDPTraceMatrix > +{ + typedef typename TraceBitMap_::TTraceValue TTraceValue_; + typedef DPMatrix_ TDPScoreMatrix_; + + typedef typename Host::Type Type; +}; + +template +struct GetDPTraceMatrix const > +{ + typedef typename TraceBitMap_::TTraceValue TTraceValue_; + typedef DPMatrix_ TDPScoreMatrix_; + + typedef typename Host::Type const Type; +}; + +// ============================================================================ +// Functions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Function dpScoreMatrix() +// ---------------------------------------------------------------------------- + +template +inline typename GetDPScoreMatrix >::Type & +getDpScoreMatrix(DPContext & dpContext) +{ + return dpContext._scoreMatrix; +} + +template +inline typename GetDPScoreMatrix const >::Type & +getDpScoreMatrix(DPContext const & dpContext) +{ + return dpContext._scoreMatrix; +} + +// ---------------------------------------------------------------------------- +// Function dpTraceMatrix() +// ---------------------------------------------------------------------------- + +template +inline typename GetDPTraceMatrix >::Type & +getDpTraceMatrix(DPContext & dpContext) +{ + return dpContext._traceMatrix; +} + +template +inline typename GetDPTraceMatrix const >::Type & +getDpTraceMatrix(DPContext const & dpContext) +{ + return dpContext._traceMatrix; +} + +// ---------------------------------------------------------------------------- +// Function setDpScoreMatrix() +// ---------------------------------------------------------------------------- + +template +inline void +setDpTraceMatrix(DPContext & dpContext, + typename GetDPScoreMatrix >::Type const & scoreMatrix) +{ + dpContext._scoreMatrix = scoreMatrix; +} + +// ---------------------------------------------------------------------------- +// Function setDpTraceMatrix() +// ---------------------------------------------------------------------------- + +template +inline void +setDpTraceMatrix(DPContext & dpContext, + typename GetDPTraceMatrix >::Type const & traceMatrix) +{ + dpContext._tarceMatrix = traceMatrix; +} + +} + +#endif // INCLUDE_SEQAN_ALIGN_DP_CONTEXT_H_ diff --git a/seqan/align/dp_formula.h b/seqan/align/dp_formula.h index a3a9571..47a1d47 100644 --- a/seqan/align/dp_formula.h +++ b/seqan/align/dp_formula.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,8 @@ // TODO(holtgrew): Documentation in this header necessary or internal only? -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_FORMULA_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_FORMULA_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_FORMULA_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_FORMULA_H_ namespace seqan { @@ -173,8 +173,6 @@ _computeScore(DPCell_ & activeCell, return traceDir; } - - // ---------------------------------------------------------------------------- // Function _doComputeScore [RecursionDirectionDiagonal] // ---------------------------------------------------------------------------- @@ -193,7 +191,7 @@ _doComputeScore(DPCell_ & activeCell, TDPProfile const &) { activeCell._score = _scoreOfCell(previousDiagonal) + score(scoringScheme, seqHVal, seqVVal); - + setGapExtension(activeCell, False(), False()); if (!IsTracebackEnabled_::VALUE) return TraceBitMap_::NONE; @@ -223,4 +221,4 @@ _doComputeScore(DPCell_ & activeCell, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_FORMULA_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_FORMULA_H_ diff --git a/seqan/align/dp_formula_affine.h b/seqan/align/dp_formula_affine.h index e5c5e09..c4e32e0 100644 --- a/seqan/align/dp_formula_affine.h +++ b/seqan/align/dp_formula_affine.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Implements the affine gap cost functions. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_FORMULA_AFFINE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_FORMULA_AFFINE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_FORMULA_AFFINE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_FORMULA_AFFINE_H_ namespace seqan { @@ -164,9 +164,9 @@ _retrieveTraceAffine(TScoreValue const & globalMax, // Function _internalComputeScore [RecursionDirectionDiagonal, AffineGaps] // ---------------------------------------------------------------------------- -template +template inline typename TraceBitMap_::TTraceValue -_internalComputeScore(DPCell_ & activeCell, +_internalComputeScore(DPCell_ & activeCell, TScoreValue const & rightCompare, TTraceValueL, TTraceValueGap, @@ -178,9 +178,9 @@ _internalComputeScore(DPCell_ & activeCell, return TraceBitMap_::NONE; } -template +template inline typename TraceBitMap_::TTraceValue -_internalComputeScore(DPCell_ & activeCell, +_internalComputeScore(DPCell_ & activeCell, TScoreValue const & rightCompare, TTraceValueL leftTrace, TTraceValueGap gapTrace, @@ -195,9 +195,9 @@ _internalComputeScore(DPCell_ & activeCell, return leftTrace | gapTrace; } -template +template inline typename TraceBitMap_::TTraceValue -_internalComputeScore(DPCell_ & activeCell, +_internalComputeScore(DPCell_ & activeCell, TScoreValue const & rightCompare, TTraceValueL leftTrace, TTraceValueGap gapTrace, @@ -353,10 +353,8 @@ _internalComputeScore(DPCell_ & activeCell, { activeCell._score = activeCell._horizontalScore; return TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX; -// return traceRight; } return TraceBitMap_::MAX_FROM_VERTICAL_MATRIX; -// returntraceRight; } template @@ -368,16 +366,10 @@ _internalComputeScore(DPCell_ & activeCell, { activeCell._score = activeCell._horizontalScore; return TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX; -// return traceRight | (traceLeft & TraceBitMap_::VERTICAL_OPEN); } -// traceGap = TraceBitMap_::MAX_FROM_VERTICAL_MATRIX; if (activeCell._score == activeCell._horizontalScore) - { return TraceBitMap_::MAX_FROM_VERTICAL_MATRIX | TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX; -// return traceRight | traceLeft; - } return TraceBitMap_::MAX_FROM_VERTICAL_MATRIX; -// return traceLeft | (traceRight & TraceBitMap_::HORIZONTAL_OPEN); } // ---------------------------------------------------------------------------- @@ -406,7 +398,6 @@ _doComputeScore(DPCell_ & activeCell, TScoreValue tmpScore = _scoreOfCell(previousHorizontal) + scoreGapOpenHorizontal(scoringScheme, seqHVal, seqVVal); TTraceValue tvGap = _internalComputeScore(activeCell, tmpScore, TraceBitMap_::HORIZONTAL, TraceBitMap_::HORIZONTAL_OPEN, TTracebackConfig(), RecursionDirectionHorizontal()); -// activeCell._score = activeCell._horizontalScore; _max(activeCell._horizontal, tmpScore) // Now we can decide for the optimal score in horizontal score or not? activeCell._verticalScore = _verticalScoreOfCell(previousVertical) + scoreGapExtendVertical(scoringScheme, seqHVal, seqVVal); @@ -417,32 +408,6 @@ _doComputeScore(DPCell_ & activeCell, TTraceValue tvMax = _internalComputeScore(activeCell, TTracebackConfig()); // Stores from where the maximal score comes. tmpScore = _scoreOfCell(previousDiagonal) + score(scoringScheme, seqHVal, seqVVal); return _internalComputeScore(activeCell, tmpScore, tvGap, tvMax, TTracebackConfig(), RecursionDirectionDiagonal()); - - // This should be the fastest version. - -// TScoreValue tmpScoreDiagonal = _scoreOfCell(previousDiagonal) + score(scoringScheme, seqHVal, seqVVal); -// TScoreValue tmpGapExtendHorizontal = _horizontalScoreOfCell(previousHorizontal) + -// scoreGapExtendHorizontal(scoringScheme, seqHVal, seqVVal); -// TScoreValue tmpGapOpenHorizontal = _scoreOfCell(previousHorizontal) -// + scoreGapOpenHorizontal(scoringScheme, seqHVal, seqVVal); -// TScoreValue tmpGapExtendVertical = _verticalScoreOfCell(previousVertical) -// + scoreGapExtendVertical(scoringScheme, seqHVal, seqVVal); -// TScoreValue tmpGapOpenVertical = _scoreOfCell(previousVertical) -// + scoreGapOpenVertical(scoringScheme, seqHVal, seqVVal); -// -// activeCell._horizontalScore = _max(tmpGapExtendHorizontal, tmpGapOpenHorizontal); -// activeCell._verticalScore = _max(tmpGapExtendVertical, tmpGapOpenVertical); -// activeCell._score = _max(tmpScoreDiagonal, _max(activeCell._horizontalScore, activeCell._verticalScore)); -// -// if (!IsTracebackEnabled_::VALUE) -// return TraceBitMap_::NONE; -// -// TTraceValue traceGapOpen = TraceBitMap_::NONE; -// _conditionalOrOnInequality(traceGapOpen, _horizontalScoreOfCell(activeCell), tmpGapExtendHorizontal, TraceBitMap_::HORIZONTAL_OPEN); -// _conditionalOrOnInequality(traceGapOpen, _verticalScoreOfCell(activeCell), tmpGapExtendVertical, TraceBitMap_::VERTICAL_OPEN); -// return traceGapOpen | -// _retrieveTraceAffine(activeCell._score, tmpScoreDiagonal, tmpGapExtendHorizontal, tmpGapOpenHorizontal, -// tmpGapExtendVertical, tmpGapOpenVertical, RecursionDirectionAll()); } // ---------------------------------------------------------------------------- @@ -471,24 +436,6 @@ _doComputeScore(DPCell_ & activeCell, TTraceValue tv = _internalComputeScore(activeCell, tmpScore, TraceBitMap_::HORIZONTAL, TraceBitMap_::HORIZONTAL_OPEN, TTracebackConfig(), RecursionDirectionHorizontal()); tmpScore = _scoreOfCell(previousDiagonal) + score(scoringScheme, seqHVal, seqVVal); return _internalComputeScore(activeCell, tmpScore, tv, TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX, TTracebackConfig(), RecursionDirectionDiagonal()); - -// TScoreValue tmpScoreDiagonal = _scoreOfCell(previousDiagonal) + score(scoringScheme, seqHVal, seqVVal); -// TScoreValue tmpGapExtendHorizontal = _horizontalScoreOfCell(previousHorizontal) -// + scoreGapExtendHorizontal(scoringScheme, seqHVal, seqVVal); -// TScoreValue tmpGapOpenHorizontal = _scoreOfCell(previousHorizontal) -// + scoreGapOpenHorizontal(scoringScheme, seqHVal, seqVVal); -// -// activeCell._horizontalScore = _max(tmpGapExtendHorizontal, tmpGapOpenHorizontal); -// activeCell._score = _max(tmpScoreDiagonal, activeCell._horizontalScore); -// -// if (!IsTracebackEnabled_::VALUE) -// return TraceBitMap_::NONE; -// -// TTraceValue traceGapOpen = TraceBitMap_::NONE; -// _conditionalOrOnInequality(traceGapOpen, _horizontalScoreOfCell(activeCell), tmpGapExtendHorizontal, TraceBitMap_::HORIZONTAL_OPEN); -// return traceGapOpen | -// _retrieveTraceAffine(activeCell._score, tmpScoreDiagonal, tmpGapExtendHorizontal, tmpGapOpenHorizontal, -// TScoreValue(), TScoreValue(), RecursionDirectionUpperDiagonal()); } // ---------------------------------------------------------------------------- @@ -521,18 +468,6 @@ _doComputeScore(DPCell_ & activeCell, // Up to here, activeCell stores the highest value of vertical or vertical open. tmpScore = _scoreOfCell(previousDiagonal) + score(scoringScheme, seqHVal, seqVVal); return _internalComputeScore(activeCell, tmpScore, tv, TraceBitMap_::MAX_FROM_VERTICAL_MATRIX, TTracebackConfig(), RecursionDirectionDiagonal()); // Now we have this problem. How do we determine if the max comes from the vertical distance. - -// activeCell._verticalScore = _max(tmpGapExtendVertical, tmpGapOpenVertical); -// activeCell._score = _max(tmpScoreDiagonal, activeCell._verticalScore); -// -// if (!IsTracebackEnabled_::VALUE) -// return TraceBitMap_::NONE; -// -// TTraceValue traceGapOpen = TraceBitMap_::NONE; -// _conditionalOrOnInequality(traceGapOpen, _verticalScoreOfCell(activeCell), tmpGapExtendVertical, TraceBitMap_::VERTICAL_OPEN); -// return traceGapOpen | -// _retrieveTraceAffine(activeCell._score, tmpScoreDiagonal, TScoreValue(), TScoreValue(), tmpGapExtendVertical, -// tmpGapOpenVertical, RecursionDirectionLowerDiagonal()); } // ---------------------------------------------------------------------------- @@ -558,23 +493,6 @@ _doComputeScore(DPCell_ & activeCell, activeCell._verticalScore = DPCellDefaultInfinity >::VALUE; return _internalComputeScore(activeCell, tmpGapOpenHorizontal, TraceBitMap_::HORIZONTAL, TraceBitMap_::HORIZONTAL_OPEN, TTracebackConfig(), RecursionDirectionHorizontal()) | TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX; - -// typedef typename TraceBitMap_::TTraceValue TTraceValue; -// TScoreValue tmpGapOpenHorizontal = _scoreOfCell(previousHorizontal) + -// scoreGapOpenHorizontal(scoringScheme, seqHVal, seqVVal); -// TScoreValue tmpGapExtendHorizontal = _horizontalScoreOfCell(previousHorizontal) + -// scoreGapExtendHorizontal(scoringScheme, seqHVal, seqVVal); -// activeCell._horizontalScore = _max(tmpGapOpenHorizontal, tmpGapExtendHorizontal); -// activeCell._score = activeCell._horizontalScore; -// -// if (!IsTracebackEnabled_::VALUE) -// return TraceBitMap_::NONE; -// -// TTraceValue traceGapOpen = TraceBitMap_::NONE; -// _conditionalOrOnInequality(traceGapOpen, _horizontalScoreOfCell(activeCell), tmpGapExtendHorizontal, TraceBitMap_::HORIZONTAL_OPEN); -// return traceGapOpen | -// _retrieveTraceAffine(activeCell._score, TScoreValue(), tmpGapExtendHorizontal, tmpGapOpenHorizontal, -// TScoreValue(), TScoreValue(), RecursionDirectionHorizontal()); } // ---------------------------------------------------------------------------- @@ -601,24 +519,8 @@ _doComputeScore(DPCell_ & activeCell, // Here we distinguish between vertical and vertical open. activeCell._horizontalScore = DPCellDefaultInfinity >::VALUE; return _internalComputeScore(activeCell, tmpGapOpenVertical, TraceBitMap_::VERTICAL, TraceBitMap_::VERTICAL_OPEN, TTracebackConfig(), RecursionDirectionVertical()) | TraceBitMap_::MAX_FROM_VERTICAL_MATRIX; -// typedef typename TraceBitMap_::TTraceValue TTraceValue; -// TScoreValue tmpGapOpenVertical = _scoreOfCell(previousVertical) + -// scoreGapOpenVertical(scoringScheme, seqHVal, seqVVal); -// TScoreValue tmpGapExtendVertical = _verticalScoreOfCell(previousVertical) + -// scoreGapExtendVertical(scoringScheme, seqHVal, seqVVal); -// activeCell._verticalScore = _max(tmpGapExtendVertical, tmpGapOpenVertical); -// activeCell._score = activeCell._verticalScore; -// -// if (!IsTracebackEnabled_::VALUE) -// return TraceBitMap_::NONE; -// -// TTraceValue traceGapOpen = TraceBitMap_::NONE; -// _conditionalOrOnInequality(traceGapOpen, _verticalScoreOfCell(activeCell), tmpGapExtendVertical, TraceBitMap_::VERTICAL_OPEN); -// return traceGapOpen | -// _retrieveTraceAffine(activeCell._score, TScoreValue(), TScoreValue(), TScoreValue(), tmpGapExtendVertical, -// tmpGapOpenVertical, RecursionDirectionVertical()); } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_FORMULA_AFFINE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_FORMULA_AFFINE_H_ diff --git a/seqan/align/dp_formula_dynamic.h b/seqan/align/dp_formula_dynamic.h new file mode 100644 index 0000000..3cce01d --- /dev/null +++ b/seqan/align/dp_formula_dynamic.h @@ -0,0 +1,394 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Rene Rahn +// ========================================================================== +// Implements the score function for dynamic gap costs published in +// "Dynamic Gaps Selector: A Smith Waterman Sequence Alignment Algorithm with +// Affine Gap Model Optimization" by Gianvito Urgese et al. +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_DP_FORMULA_DYNAMIC_H_ +#define INCLUDE_SEQAN_ALIGN_DP_FORMULA_DYNAMIC_H_ + +namespace seqan { + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Function _internalComputeScore [RecursionDirectionDiagonal, DynamicGaps] +// ---------------------------------------------------------------------------- + +template +inline typename TraceBitMap_::TTraceValue +_internalComputeScore(DPCell_ & activeCell, + TScoreValue const & diagCompare, + TTraceValueL, + TTraceValueGap, + TracebackOff const &, + RecursionDirectionDiagonal const &) +{ + if(_scoreOfCell(activeCell) < diagCompare) + { + activeCell._score = diagCompare; + setGapExtension(activeCell, False(), False()); + return TraceBitMap_::NONE; + } + return TraceBitMap_::NONE; +} + +template +inline typename TraceBitMap_::TTraceValue +_internalComputeScore(DPCell_ & activeCell, + TScoreValue const & diagCompare, + TTraceValueL leftTrace, + TTraceValueGap gapTrace, + TracebackOn > const &, + RecursionDirectionDiagonal const &) +{ + if(_scoreOfCell(activeCell) <= diagCompare) + { + activeCell._score = diagCompare; + setGapExtension(activeCell, False(), False()); + return TraceBitMap_::DIAGONAL | leftTrace; + } + return leftTrace | gapTrace; +} + +template +inline typename TraceBitMap_::TTraceValue +_internalComputeScore(DPCell_ & activeCell, + TScoreValue const & diagCompare, + TTraceValueL leftTrace, + TTraceValueGap gapTrace, + TracebackOn > const &, + RecursionDirectionDiagonal const &) +{ + if(_scoreOfCell(activeCell) < diagCompare) + { + activeCell._score = diagCompare; // Maximal score comes from diagonal. + setGapExtension(activeCell, False(), False()); + return TraceBitMap_::DIAGONAL | leftTrace; // Return trace for Diagonal. + } + if (_scoreOfCell(activeCell) == diagCompare) // Maximal score comes from previous computed directions and diagonal. + return leftTrace | TraceBitMap_::DIAGONAL | gapTrace; // Return all directions inclusively the flag indicating max from gap. + + return leftTrace | gapTrace; // Maximum comes from gap. Return gap value inclusively the flag indicating max from gap. +} + +// ---------------------------------------------------------------------------- +// Function _internalComputeScore [RecursionDirectionHorizontal, DynamicGaps] +// ---------------------------------------------------------------------------- + +template +inline typename TraceBitMap_::TTraceValue +_internalComputeScore(DPCell_ & activeCell, + DPCell_ const & prevCell, + TValueH const & valH, + TValueV const & valV, + TScore const & score, + TracebackOff const &, + RecursionDirectionHorizontal const &) +{ + if(!isGapExtension(prevCell, DynamicGapExtensionHorizontal())) + activeCell._score = _scoreOfCell(prevCell) + scoreGapOpenHorizontal(score, valH, valV); + return TraceBitMap_::NONE; +} + +template +inline typename TraceBitMap_::TTraceValue +_internalComputeScore(DPCell_ & activeCell, + DPCell_ const & prevCell, + TValueH const & valH, + TValueV const & valV, + TScore const & score, + TracebackOn const &, + RecursionDirectionHorizontal const &) +{ + if (!isGapExtension(prevCell, DynamicGapExtensionHorizontal())) + { + activeCell._score = _scoreOfCell(prevCell) + scoreGapOpenHorizontal(score, valH, valV); + return TraceBitMap_::HORIZONTAL_OPEN; + } + return TraceBitMap_::HORIZONTAL; +} + +// ---------------------------------------------------------------------------- +// Function _internalComputeScore [RecursionDirectionVertical, DynamicGaps] +// ---------------------------------------------------------------------------- + +template +inline typename TraceBitMap_::TTraceValue +_internalComputeScore(DPCell_ & activeCell, + DPCell_ const & prevCell, + TValueH const & valH, + TValueV const & valV, + TScore const & score, + TracebackOff const &, + RecursionDirectionVertical const &) +{ + if(!isGapExtension(prevCell, DynamicGapExtensionVertical())) + activeCell._score = _scoreOfCell(prevCell) + scoreGapOpenVertical(score, valH, valV); + return TraceBitMap_::NONE; +} + +template +inline typename TraceBitMap_::TTraceValue +_internalComputeScore(DPCell_ & activeCell, + DPCell_ const & prevCell, + TValueH const & valH, + TValueV const & valV, + TScore const & score, + TracebackOn const &, + RecursionDirectionVertical const &) +{ + if (!isGapExtension(prevCell, DynamicGapExtensionVertical())) + { + activeCell._score = _scoreOfCell(prevCell) + scoreGapOpenVertical(score, valH, valV); + return TraceBitMap_::VERTICAL_OPEN; + } + return TraceBitMap_::VERTICAL; +} + +// ---------------------------------------------------------------------------- +// Function _internalComputeScore [Vertical vs Horizontal, DynamicGaps] +// ---------------------------------------------------------------------------- + +template +inline typename TraceBitMap_::TTraceValue +_internalComputeScore(DPCell_ & activeCell, + TScoreValue const & horizontalComp, + TracebackOff const &) +{ + if(_scoreOfCell(activeCell) < horizontalComp) + { + activeCell._score = horizontalComp; + setGapExtension(activeCell, False(), True()); + return TraceBitMap_::NONE; + } + setGapExtension(activeCell, True(), False()); + return TraceBitMap_::NONE; +} + +template +inline typename TraceBitMap_::TTraceValue +_internalComputeScore(DPCell_ & activeCell, + TScoreValue const & horizontalComp, + TracebackOn > const &) +{ + if(_scoreOfCell(activeCell) < horizontalComp) + { + activeCell._score = horizontalComp; + setGapExtension(activeCell, False(), True()); + return TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX; + } + setGapExtension(activeCell, True(), False()); + return TraceBitMap_::MAX_FROM_VERTICAL_MATRIX; +} + +template +inline typename TraceBitMap_::TTraceValue +_internalComputeScore(DPCell_ & activeCell, + TScoreValue const & horizontalComp, + TracebackOn > const &) +{ + if(_scoreOfCell(activeCell) < horizontalComp) + { + setGapExtension(activeCell, False(), True()); + activeCell._score = horizontalComp; + return TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX; + } + if (_scoreOfCell(activeCell) == horizontalComp) + { + setGapExtension(activeCell, True(), True()); + return TraceBitMap_::MAX_FROM_VERTICAL_MATRIX | TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX; + } + setGapExtension(activeCell, True(), False()); + return TraceBitMap_::MAX_FROM_VERTICAL_MATRIX; +} + +// ---------------------------------------------------------------------------- +// Function _doComputeScore [RecursionAllDirection, DynamicGaps] +// ---------------------------------------------------------------------------- + +template +inline typename TraceBitMap_::TTraceValue +_doComputeScore(DPCell_ & activeCell, + DPCell_ const & previousDiagonal, + DPCell_ const & previousHorizontal, + DPCell_ const & previousVertical, + TSequenceHValue const & seqHVal, + TSequenceVValue const & seqVVal, + TScoringScheme const & scoringScheme, + RecursionDirectionAll const &, + DPProfile_ const &) +{ + typedef typename TraceBitMap_::TTraceValue TTraceValue; + + // Compute best alignment from either horizontal open or extension. + DPCell_ tmpScore = _scoreOfCell(previousHorizontal) + scoreGapExtendHorizontal(scoringScheme, seqHVal, seqVVal); + TTraceValue tvGap = _internalComputeScore(tmpScore, previousHorizontal, seqHVal, seqVVal, scoringScheme, + TTracebackConfig(), RecursionDirectionHorizontal()); + + // Compute best alignment between vertical and vertical open gap. + activeCell._score = _scoreOfCell(previousVertical) + scoreGapExtendVertical(scoringScheme, seqHVal, seqVVal); + tvGap |= _internalComputeScore(activeCell, previousVertical, seqHVal, seqVVal, scoringScheme, + TTracebackConfig(), RecursionDirectionVertical()); + + // Finds the maximum between the vertical and the horizontal matrix. Stores the flag for coming from a potential direction. + TTraceValue tvMax = _internalComputeScore(activeCell, tmpScore._score, TTracebackConfig()); // Stores from where the maximal score comes. + tmpScore._score = _scoreOfCell(previousDiagonal) + score(scoringScheme, seqHVal, seqVVal); + return _internalComputeScore(activeCell, tmpScore._score, tvGap, tvMax, TTracebackConfig(), RecursionDirectionDiagonal()); +} + +// ---------------------------------------------------------------------------- +// Function _doComputeScore [RecursionUpperDiagonalDirection, DynamicGaps] +// ---------------------------------------------------------------------------- + +template +inline typename TraceBitMap_::TTraceValue +_doComputeScore(DPCell_ & activeCell, + DPCell_ const & previousDiagonal, + DPCell_ const & previousHorizontal, + DPCell_ const & /*previousVertical*/, + TSequenceHValue const & seqHVal, + TSequenceVValue const & seqVVal, + TScoringScheme const & scoringScheme, + RecursionDirectionUpperDiagonal const &, + DPProfile_ const &) +{ + typedef typename TraceBitMap_::TTraceValue TTraceValue; + + // This computes the difference between the horizontal extend and horizontal open. + activeCell._score = _scoreOfCell(previousHorizontal) + scoreGapExtendHorizontal(scoringScheme, seqHVal, seqVVal); + TTraceValue tv = _internalComputeScore(activeCell, previousHorizontal, seqHVal, seqVVal, scoringScheme, + TTracebackConfig(), RecursionDirectionHorizontal()); + + setGapExtension(activeCell, False(), True()); + TScoreValue tmpScore = _scoreOfCell(previousDiagonal) + score(scoringScheme, seqHVal, seqVVal); + return _internalComputeScore(activeCell, tmpScore, tv, TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX, + TTracebackConfig(), RecursionDirectionDiagonal()); +} + +// ---------------------------------------------------------------------------- +// Function _doComputeScore [RecursionDirectionLowerDiagonal, DynamicGaps] +// ---------------------------------------------------------------------------- + +template +inline typename TraceBitMap_::TTraceValue +_doComputeScore(DPCell_ & activeCell, + DPCell_ const & previousDiagonal, + DPCell_ const & /*previousHorizontal*/, + DPCell_ const & previousVertical, + TSequenceHValue const & seqHVal, + TSequenceVValue const & seqVVal, + TScoringScheme const & scoringScheme, + RecursionDirectionLowerDiagonal const &, + DPProfile_ const &) +{ + typedef typename TraceBitMap_::TTraceValue TTraceValue; + + // This computes the difference between the vertical extend and vertical open. + activeCell._score = _scoreOfCell(previousVertical) + scoreGapExtendVertical(scoringScheme, seqHVal, seqVVal); + TTraceValue tv = _internalComputeScore(activeCell, previousVertical, seqHVal, seqVVal, scoringScheme, + TTracebackConfig(), RecursionDirectionVertical()); + setGapExtension(activeCell, True(), False()); + TScoreValue tmpScore = _scoreOfCell(previousDiagonal) + score(scoringScheme, seqHVal, seqVVal); + return _internalComputeScore(activeCell, tmpScore, tv, TraceBitMap_::MAX_FROM_VERTICAL_MATRIX, + TTracebackConfig(), RecursionDirectionDiagonal()); +} + +// ---------------------------------------------------------------------------- +// Function _doComputeScore [RecursionHorizontalDirection] +// ---------------------------------------------------------------------------- + +template +inline typename TraceBitMap_::TTraceValue +_doComputeScore(DPCell_ & activeCell, + DPCell_ const & /*previousDiagonal*/, + DPCell_ const & previousHorizontal, + DPCell_ const & /*previousVertical*/, + TSequenceHValue const & seqHVal, + TSequenceVValue const & seqVVal, + TScoringScheme const & scoringScheme, + RecursionDirectionHorizontal const & tag, + DPProfile_ const &) +{ + activeCell._score = _scoreOfCell(previousHorizontal) + scoreGapExtendHorizontal(scoringScheme, seqHVal, seqVVal); + setGapExtension(activeCell, False(), True()); + return _internalComputeScore(activeCell, previousHorizontal, seqHVal, seqVVal, scoringScheme, + TTracebackConfig(), tag) | TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX; +} + +// ---------------------------------------------------------------------------- +// Function _doComputeScore [RecursionVerticalDirection] +// ---------------------------------------------------------------------------- + +template +inline typename TraceBitMap_::TTraceValue +_doComputeScore(DPCell_ & activeCell, + DPCell_ const & /*previousDiagonal*/, + DPCell_ const & /*previousHorizontal*/, + DPCell_ const & previousVertical, + TSequenceHValue const & seqHVal, + TSequenceVValue const & seqVVal, + TScoringScheme const & scoringScheme, + RecursionDirectionVertical const & tag, + DPProfile_ const &) +{ + activeCell._score = _scoreOfCell(previousVertical) + scoreGapExtendVertical(scoringScheme, seqHVal, seqVVal); + setGapExtension(activeCell, True(), False()); + return _internalComputeScore(activeCell, previousVertical, seqHVal, seqVVal, scoringScheme, + TTracebackConfig(), tag) | TraceBitMap_::MAX_FROM_VERTICAL_MATRIX; +} + +} // namespace seqan + +#endif // INCLUDE_SEQAN_ALIGN_DP_FORMULA_DYNAMIC_H_ diff --git a/seqan/align/dp_formula_linear.h b/seqan/align/dp_formula_linear.h index 7b18d58..e7a2531 100644 --- a/seqan/align/dp_formula_linear.h +++ b/seqan/align/dp_formula_linear.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Defines the methods to compute the score when using linear gap costs. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_FORMULA_LINEAR_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_FORMULA_LINEAR_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_FORMULA_LINEAR_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_FORMULA_LINEAR_H_ namespace seqan { @@ -292,4 +292,4 @@ _doComputeScore(DPCell_ & activeCell, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_FORMULA_LINEAR_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_FORMULA_LINEAR_H_ diff --git a/seqan/align/dp_matrix.h b/seqan/align/dp_matrix.h index e973648..38a7350 100644 --- a/seqan/align/dp_matrix.h +++ b/seqan/align/dp_matrix.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -43,8 +43,8 @@ // TODO(holtgrew): Documentation in this header necessary or internal only? -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_H_ namespace seqan { @@ -59,6 +59,13 @@ struct DefaultScoreMatrixSpec_; // Tags, Classes, Enums // ============================================================================ +// ---------------------------------------------------------------------------- +// Tag MatrixMember +// ---------------------------------------------------------------------------- + +struct DPMatrixMember_; +typedef Tag DPMatrixMember; + // ---------------------------------------------------------------------------- // Tag SparseDPMatrix // ---------------------------------------------------------------------------- @@ -73,7 +80,6 @@ typedef Tag SparseDPMatrix; struct FullDPMatrix_; typedef Tag FullDPMatrix; - // ---------------------------------------------------------------------------- // Enum DPMatrixDimension // ---------------------------------------------------------------------------- @@ -106,30 +112,15 @@ class DPMatrix_ { public: - typedef Matrix THost; + typedef typename Member::Type THost; - Holder _dataHost; // The host containing the actual matrix. + Holder data_host; // The host containing the actual matrix. DPMatrix_() : - _dataHost() + data_host() { - create(_dataHost); + create(data_host); } - - DPMatrix_(DPMatrix_ const & other) : - _dataHost(other._dataHost) {} - - ~DPMatrix_() {} - - DPMatrix_ & operator=(DPMatrix_ const & other) - { - if (this != &other) - { - _dataHost = other._dataHost; - } - return *this; - } - }; // ============================================================================ @@ -156,49 +147,44 @@ struct DefaultScoreMatrixSpec_ > }; // ---------------------------------------------------------------------------- -// Metafunction _DataHost +// Metafunction DataHost_ // ---------------------------------------------------------------------------- // Returns the type of the underlying matrix. -template -struct _DataHost {}; - template -struct _DataHost > +struct Member, DPMatrixMember> { - typedef DPMatrix_ TDPMatrix_; - typedef typename TDPMatrix_::THost Type; + typedef Matrix Type; }; template -struct _DataHost const> +struct Member const, DPMatrixMember> { - typedef DPMatrix_ TDPMatrix_; - typedef typename TDPMatrix_::THost const Type; + typedef Matrix const Type; }; // ---------------------------------------------------------------------------- -// Metafunction _SizeArr +// Metafunction SizeArr_ // ---------------------------------------------------------------------------- // Returns the type of the containers to store the dimensions and the factors // in order to move properly in the matrix. template -struct _SizeArr {}; +struct SizeArr_ {}; template -struct _SizeArr > +struct SizeArr_ > { typedef DPMatrix_ TDPMatrix_; - typedef typename _DataHost::Type TDataHost_; + typedef typename Member::Type TDataHost_; typedef typename SizeArr_::Type Type; }; template -struct _SizeArr const> +struct SizeArr_ const> { typedef DPMatrix_ TDPMatrix_; - typedef typename _DataHost::Type TDataHost_; + typedef typename Member::Type TDataHost_; typedef typename SizeArr_::Type const Type; }; @@ -299,7 +285,7 @@ template struct Host > { typedef DPMatrix_ TDPMatrix_; - typedef typename _DataHost::Type TDataMatrix_; + typedef typename Member::Type TDataMatrix_; typedef typename Host::Type Type; }; @@ -307,7 +293,7 @@ template struct Host const> { typedef DPMatrix_ TDPMatrix_; - typedef typename _DataHost::Type TDataMatrix_; + typedef typename Member::Type TDataMatrix_; typedef typename Host::Type const Type; }; @@ -339,7 +325,7 @@ template struct Iterator, Rooted const> { typedef DPMatrix_ TDPMatrix_; - typedef typename _DataHost::Type TDataMatrix_; + typedef typename Member::Type TDataMatrix_; typedef typename Iterator::Type Type; }; @@ -347,7 +333,7 @@ template struct Iterator const, Rooted const> { typedef DPMatrix_ TDPMatrix_; - typedef typename _DataHost::Type TDataMatrix_; + typedef typename Member::Type TDataMatrix_; typedef typename Iterator::Type Type; }; @@ -371,17 +357,17 @@ inline bool _checkCorrectDimension(DPMatrixDimension_::TValue dim) // Returns a reference to the hosted matrix. template -inline typename _DataHost >::Type & -_dataHost(DPMatrix_&dpMatrix) +inline Holder >::Type> & +_dataHost(DPMatrix_& dpMatrix) { - return value(dpMatrix._dataHost); + return _dataHost(value(dpMatrix.data_host)); } template -inline typename _DataHost const>::Type & +inline Holder >::Type> const & _dataHost(DPMatrix_ const & dpMatrix) { - return value(dpMatrix._dataHost); + return _dataHost(value(dpMatrix.data_host)); } // ---------------------------------------------------------------------------- @@ -390,17 +376,17 @@ _dataHost(DPMatrix_ const & dpMatrix) // Returns a reference to the _dataLengths container of the hosted matrix. template -inline typename _SizeArr >::Type & +inline typename SizeArr_ >::Type & _dataLengths(DPMatrix_&dpMatrix) { - return _dataLengths(_dataHost(dpMatrix)); + return _dataLengths(value(dpMatrix.data_host)); } template -inline typename _SizeArr const>::Type & +inline typename SizeArr_ const>::Type & _dataLengths(DPMatrix_ const & dpMatrix) { - return _dataLengths(_dataHost(dpMatrix)); + return _dataLengths(value(dpMatrix.data_host)); } // ---------------------------------------------------------------------------- @@ -409,57 +395,17 @@ _dataLengths(DPMatrix_ const & dpMatrix) // Returns a reference to the _dataFactors container of the hosted matrix. template -inline typename _SizeArr >::Type & +inline typename SizeArr_ >::Type & _dataFactors(DPMatrix_&dpMatrix) { - return _dataFactors(_dataHost(dpMatrix)); + return _dataFactors(value(dpMatrix.data_host)); } template -inline typename _SizeArr const>::Type & +inline typename SizeArr_ const>::Type & _dataFactors(DPMatrix_ const & dpMatrix) { - return _dataFactors(_dataHost(dpMatrix)); -} - -// ---------------------------------------------------------------------------- -// Function host() -// ---------------------------------------------------------------------------- - -// Returns a reference to the underlying vector of the hosted matrix. -template -inline typename Host >::Type & -host(DPMatrix_&dpMatrix) -{ - return host(_dataHost(dpMatrix)); -} - -template -inline typename Host const>::Type & -host(DPMatrix_ const & dpMatrix) -{ - return host(_dataHost(dpMatrix)); -} - -// ---------------------------------------------------------------------------- -// Function setHost() -// ---------------------------------------------------------------------------- - -// Sets a new value to the underlying vector of the hosted matrix. -template -inline void -setHost(DPMatrix_ & dpMatrix, - THost & newHost) -{ - setHost(_dataHost(dpMatrix), newHost); -} - -template -inline void -setHost(DPMatrix_ & dpMatrix, - THost const & newHost) -{ - setHost(_dataHost(dpMatrix), newHost); + return _dataFactors(value(dpMatrix.data_host)); } // ---------------------------------------------------------------------------- @@ -472,7 +418,7 @@ inline typename Reference >::Type value(DPMatrix_ & dpMatrix, TPosition const & pos) { - return value(_dataHost(dpMatrix), pos); + return value(value(dpMatrix.data_host), pos); } template @@ -480,7 +426,7 @@ inline typename Reference const>::Type value(DPMatrix_ const & dpMatrix, TPosition const & pos) { - return value(_dataHost(dpMatrix), pos); + return value(value(dpMatrix.data_host), pos); } // Returns the value of the matrix at the two given coordinates. @@ -490,7 +436,7 @@ value(DPMatrix_ & dpMatrix, TPositionV const & posDimV, TPositionH const & posDimH) { - return value(_dataHost(dpMatrix), posDimV, posDimH); + return value(value(dpMatrix.data_host), posDimV, posDimH); } template @@ -499,7 +445,7 @@ value(DPMatrix_ const & dpMatrix, TPositionV const & posDimV, TPositionH const & posDimH) { - return value(_dataHost(dpMatrix), posDimV, posDimH); + return value(value(dpMatrix.data_host), posDimV, posDimH); } // ---------------------------------------------------------------------------- @@ -514,7 +460,7 @@ length(DPMatrix_ const & dpMatrix, { SEQAN_ASSERT(_checkCorrectDimension(dimension)); - return length(_dataHost(dpMatrix), dimension); + return length(value(dpMatrix.data_host), dimension); } // Returns the overall length of the underlying vector of the hosted matrix. @@ -522,7 +468,7 @@ template inline typename Size const>::Type length(DPMatrix_ const & dpMatrix) { - return length(_dataHost(dpMatrix)); // Note that even if the dimensional lengths are set but the matrix was not resized + return length(value(dpMatrix.data_host)); // Note that even if the dimensional lengths are set but the matrix was not resized // this function returns 0 or the previous length of the host before the resize. } @@ -565,7 +511,26 @@ setLength(DPMatrix_ & dpMatrix, TSize const & newLength) { SEQAN_ASSERT(_checkCorrectDimension(dimension)); - setLength(_dataHost(dpMatrix), dimension, newLength); + setLength(value(dpMatrix.data_host), dimension, newLength); +} + +// ---------------------------------------------------------------------------- +// Function updateFactors() +// ---------------------------------------------------------------------------- + +template +inline typename Size >::Type +updateFactors(DPMatrix_ & dpMatrix) +{ + typedef typename Size >::Type TSize; + + TSize factor_ = _dataFactors(dpMatrix)[0] * length(dpMatrix, 0); + for (unsigned int i = 1; (factor_ > 0) && (i < dimension(value(dpMatrix.data_host))); ++i) + { + _dataFactors(dpMatrix)[i] = factor_; + factor_ *= length(dpMatrix, i); + } + return factor_; } // ---------------------------------------------------------------------------- @@ -577,7 +542,11 @@ template inline void resize(DPMatrix_ & dpMatrix) { - resize(_dataHost(dpMatrix)); + typedef typename Size >::Type TSize; + + TSize reqSize = updateFactors(dpMatrix); + if (reqSize >= length(dpMatrix)) + resize(host(dpMatrix), reqSize, Exact()); } template @@ -585,7 +554,11 @@ inline void resize(DPMatrix_ & dpMatrix, TValue const & fillValue) { - resize(_dataHost(dpMatrix), fillValue); + typedef typename Size >::Type TSize; + + TSize reqSize = updateFactors(dpMatrix); + if (reqSize >= length(dpMatrix)) + resize(host(dpMatrix), reqSize, fillValue, Exact()); } // ---------------------------------------------------------------------------- @@ -610,14 +583,14 @@ template inline typename Iterator, Rooted const>::Type begin(DPMatrix_ & dpMatrix, Rooted const) { - return begin(_dataHost(dpMatrix)); + return begin(value(dpMatrix.data_host)); } template inline typename Iterator const, Rooted const>::Type begin(DPMatrix_ const & dpMatrix, Rooted const) { - return begin(_dataHost(dpMatrix)); + return begin(value(dpMatrix.data_host)); } // ---------------------------------------------------------------------------- @@ -642,14 +615,14 @@ template inline typename Iterator, Rooted const>::Type end(DPMatrix_ & dpMatrix, Rooted const) { - return end(_dataHost(dpMatrix)); + return end(value(dpMatrix.data_host)); } template inline typename Iterator const, Rooted const>::Type end(DPMatrix_ const & dpMatrix, Rooted const) { - return end(_dataHost(dpMatrix)); + return end(value(dpMatrix.data_host)); } // ---------------------------------------------------------------------------- @@ -663,9 +636,9 @@ coordinate(DPMatrix_ const & dpMatrix, TPosition hostPos, typename DPMatrixDimension_::TValue dimension) { - return coordinate(_dataHost(dpMatrix), hostPos, dimension); + return coordinate(value(dpMatrix.data_host), hostPos, dimension); } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_H_ diff --git a/seqan/align/dp_matrix_navigator.h b/seqan/align/dp_matrix_navigator.h index 56c6725..eb45a64 100644 --- a/seqan/align/dp_matrix_navigator.h +++ b/seqan/align/dp_matrix_navigator.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -45,8 +45,8 @@ // TODO(holtgrew): Documentation in this header necessary or internal only? -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_H_ namespace seqan { @@ -227,4 +227,4 @@ container(DPMatrixNavigator_ const & } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_H_ diff --git a/seqan/align/dp_matrix_navigator_score_matrix.h b/seqan/align/dp_matrix_navigator_score_matrix.h index 574377b..6ea4f00 100644 --- a/seqan/align/dp_matrix_navigator_score_matrix.h +++ b/seqan/align/dp_matrix_navigator_score_matrix.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,8 @@ // cells needed for the recursion formula. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_SCORE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_SCORE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_SCORE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_SCORE_H_ namespace seqan { @@ -102,12 +102,13 @@ template inline void _init(DPMatrixNavigator_, DPScoreMatrix, NavigateColumnWise> & navigator, DPMatrix_ & dpMatrix, - DPBand_ const &) + DPBandConfig const &) { navigator._ptrDataContainer = &dpMatrix; navigator._activeColIterator = begin(dpMatrix, Standard()); navigator._prevColIterator = navigator._activeColIterator - _dataFactors(dpMatrix)[DPMatrixDimension_::HORIZONTAL]; navigator._laneLeap = 1; + assignValue(navigator._activeColIterator, TValue()); } // Initializes the navigator for a banded alignment. @@ -115,7 +116,7 @@ template inline void _init(DPMatrixNavigator_, DPScoreMatrix, NavigateColumnWise> & navigator, DPMatrix_ & dpMatrix, - DPBand_ const & band) + DPBandConfig const & band) { typedef typename Size >::Type TMatrixSize; typedef typename MakeSigned::Type TSignedSize; @@ -142,6 +143,7 @@ _init(DPMatrixNavigator_, DPScoreMatrix, Navigat } // Set previous iterator to same position, one column left. navigator._prevColIterator = navigator._activeColIterator - _dataFactors(dpMatrix)[DPMatrixDimension_::HORIZONTAL]; + assignValue(navigator._activeColIterator, TValue()); } // ---------------------------------------------------------------------------- @@ -414,4 +416,4 @@ previousCellVertical(DPMatrixNavigator_ inline void _init(DPMatrixNavigator_, DPScoreMatrix, NavigateColumnWise> & navigator, DPMatrix_ & dpMatrix, - DPBand_ const &) + DPBandConfig const &) { navigator._ptrDataContainer = &dpMatrix; navigator._activeColIterator = begin(dpMatrix, Standard()); navigator._prevColIterator = navigator._activeColIterator; navigator._laneLeap = 1 - _dataLengths(dpMatrix)[DPMatrixDimension_::VERTICAL]; + assignValue(navigator._activeColIterator, TValue()); } // Initializes the navigator for banded alignments @@ -114,7 +115,7 @@ template inline void _init(DPMatrixNavigator_, DPScoreMatrix, NavigateColumnWise> & navigator, DPMatrix_ & dpMatrix, - DPBand_ const & band) + DPBandConfig const & band) { typedef DPMatrix_ TSparseDPMatrix; typedef typename Size::Type TSize; @@ -138,6 +139,7 @@ _init(DPMatrixNavigator_, DPScoreMatrix, Navig navigator._activeColIterator = begin(dpMatrix, Standard()) + length(dpMatrix, DPMatrixDimension_::VERTICAL) + navigator._laneLeap - 1; } navigator._prevColIterator = navigator._activeColIterator; + assignValue(navigator._activeColIterator, TValue()); } // ---------------------------------------------------------------------------- @@ -376,4 +378,4 @@ _goNextCell(DPMatrixNavigator_, DPScoreMatrix, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_SCORE_SPARSE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_SCORE_SPARSE_H_ diff --git a/seqan/align/dp_matrix_navigator_trace_matrix.h b/seqan/align/dp_matrix_navigator_trace_matrix.h index 8b48c82..a32b7d2 100644 --- a/seqan/align/dp_matrix_navigator_trace_matrix.h +++ b/seqan/align/dp_matrix_navigator_trace_matrix.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,8 @@ // is thrown. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_TRACE_MATRIX_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_TRACE_MATRIX_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_TRACE_MATRIX_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_TRACE_MATRIX_H_ namespace seqan { @@ -102,7 +102,7 @@ template inline void _init(DPMatrixNavigator_, DPTraceMatrix, NavigateColumnWise> & navigator, DPMatrix_ & dpMatrix, - DPBand_ const &) + DPBandConfig const &) { if (IsSameType::VALUE) return; // Leave navigator uninitialized because it is never used. @@ -110,6 +110,7 @@ _init(DPMatrixNavigator_, DPTraceMatrix inline void _init(DPMatrixNavigator_, DPTraceMatrix, NavigateColumnWise> & navigator, DPMatrix_ & dpMatrix, - DPBand_ const & band) + DPBandConfig const & band) { typedef typename Size >::Type TMatrixSize; typedef typename MakeSigned::Type TSignedSize; @@ -149,6 +150,7 @@ _init(DPMatrixNavigator_, DPTraceMatrix, TNavigationSpe } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_TRACE_MATRIX_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_NAVIGATOR_TRACE_MATRIX_H_ diff --git a/seqan/align/dp_matrix_sparse.h b/seqan/align/dp_matrix_sparse.h index 7fdf290..526376e 100644 --- a/seqan/align/dp_matrix_sparse.h +++ b/seqan/align/dp_matrix_sparse.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Rene Rahn // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_SPARSE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_SPARSE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_SPARSE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_SPARSE_H_ namespace seqan { @@ -54,30 +54,15 @@ class DPMatrix_ { public: - typedef Matrix THost; + typedef typename Member::Type THost; - Holder _dataHost; // The host containing the actual matrix. + Holder data_host; // The host containing the actual matrix. DPMatrix_() : - _dataHost() + data_host() { - create(_dataHost); + create(data_host); } - - DPMatrix_(DPMatrix_ const & other) : - _dataHost(other._dataHost) {} - - ~DPMatrix_() {} - - DPMatrix_ & operator=(DPMatrix_ const & other) - { - if (this != &other) - { - _dataHost = other._dataHost; - } - return *this; - } - }; // ============================================================================ @@ -101,7 +86,7 @@ resize(DPMatrix_ & dpMatrix) TSize _dimVertical = length(dpMatrix, DPMatrixDimension_::VERTICAL); - if (_dimVertical > 0) + if (_dimVertical >= length(dpMatrix)) resize(host(dpMatrix), _dimVertical, Exact()); } @@ -115,7 +100,7 @@ resize(DPMatrix_ & dpMatrix, TSize _dimVertical = length(dpMatrix, DPMatrixDimension_::VERTICAL); - if (_dimVertical > 0) + if (_dimVertical > length(dpMatrix)) resize(host(dpMatrix), _dimVertical, fillValue, Exact()); } @@ -162,4 +147,4 @@ coordinate(DPMatrix_ const & /*dpMatrix*/, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_MATRIX_SPARSE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_MATRIX_SPARSE_H_ diff --git a/seqan/align/dp_meta_info.h b/seqan/align/dp_meta_info.h index e89b423..06641a1 100644 --- a/seqan/align/dp_meta_info.h +++ b/seqan/align/dp_meta_info.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -40,8 +40,8 @@ // TODO(holtgrew): Documentation in this header necessary or internal only? -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_META_INFO_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_META_INFO_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_META_INFO_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_META_INFO_H_ namespace seqan { @@ -172,7 +172,7 @@ struct DPMetaColumn_ {}; // ---------------------------------------------------------------------------- -// Class DPMetaColumn_ [FullColumn] +// Class DPMetaColumn_ [FullColumn] // ---------------------------------------------------------------------------- template @@ -221,7 +221,7 @@ struct DPMetaColumn_ > }; // ---------------------------------------------------------------------------- -// Class DPMetaColumn_ [PartialColumnTop] +// Class DPMetaColumn_ [PartialColumnTop] // ---------------------------------------------------------------------------- template @@ -273,7 +273,7 @@ struct DPMetaColumn_ @@ -322,7 +322,7 @@ struct DPMetaColumn_ @@ -450,4 +450,4 @@ struct TrackingEnabled_: } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_META_INFO_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_META_INFO_H_ diff --git a/seqan/align/dp_profile.h b/seqan/align/dp_profile.h index e3b0fb6..b925620 100644 --- a/seqan/align/dp_profile.h +++ b/seqan/align/dp_profile.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -41,8 +41,8 @@ // TODO(holtgrew): Documentation in this header necessary or internal only? -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_PROFILE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_PROFILE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_PROFILE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_PROFILE_H_ namespace seqan { @@ -82,6 +82,8 @@ typedef Tag SplitBreakpointAlignment; template > struct GlobalAlignment_; +typedef GlobalAlignment_<> DPGlobal; + // ---------------------------------------------------------------------------- // Class SuboptimalAlignment // ---------------------------------------------------------------------------- @@ -103,6 +105,8 @@ typedef Tag SuboptimalAlignment; template struct LocalAlignment_; +typedef LocalAlignment_<> DPLocal; +typedef LocalAlignment_ DPLocalEnumerate; // ---------------------------------------------------------------------------- // Class TraceBitMap_ @@ -189,6 +193,22 @@ typedef Tag LinearGaps; struct AffineGaps_; typedef Tag AffineGaps; +// ---------------------------------------------------------------------------- +// Tag DynamicGaps +// ---------------------------------------------------------------------------- + +/*! + * @tag AlignmentAlgorithmTags#DynamicGaps + * @headerfile + * @brief Tag for selecting dynamic gap cost model. This tag can be used for all standard DP algorithms. + * + * @signature struct DynamicGaps_; + * @signature typedef Tag DynamicGaps; + */ + +struct DynamicGaps_; +typedef Tag DynamicGaps; + // ---------------------------------------------------------------------------- // Class DPProfile // ---------------------------------------------------------------------------- @@ -232,6 +252,21 @@ typedef Tag DPLastRow; struct DPLastColumn_; typedef Tag DPLastColumn; +template , + typename TTraceConfig = TracebackOn > > +class AlignConfig2 +{ +public: + TBand _band; + + AlignConfig2() : _band() + {} + + template + AlignConfig2(TPosition const & lDiag, TPosition const & uDiag) : _band(lDiag, uDiag) + {} +}; + // ============================================================================ // Metafunctions // ============================================================================ @@ -453,4 +488,4 @@ struct IsFreeEndGap_ const } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_PROFILE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_PROFILE_H_ diff --git a/seqan/align/dp_scout.h b/seqan/align/dp_scout.h index 1a6283c..d488df8 100644 --- a/seqan/align/dp_scout.h +++ b/seqan/align/dp_scout.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -47,8 +47,8 @@ // see dp_scout_xdrop.h for an example. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_TEST_ALIGNMENT_DP_SCOUT_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_TEST_ALIGNMENT_DP_SCOUT_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_TEST_ALIGNMENT_DP_SCOUT_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_TEST_ALIGNMENT_DP_SCOUT_H_ namespace seqan { @@ -198,16 +198,15 @@ struct ScoutStateSpecForScout_ // ---------------------------------------------------------------------------- // Tracks the new score, if it is the new maximum. -template +template inline void _scoutBestScore(DPScout_ & dpScout, TDPCell const & activeCell, TTraceMatrixNavigator const & navigator, - bool isLastColumn = false, - bool isLastRow = false) + TIsLastColumn const & /**/, + TIsLastRow const & /**/) { - (void)isLastColumn; - (void)isLastRow; if (_scoreOfCell(activeCell) > _scoreOfCell(dpScout._maxScore)) { @@ -216,6 +215,28 @@ _scoutBestScore(DPScout_ & dpScout, } } +// TODO(rmaerker): Why is this needed? +template +inline void +_scoutBestScore(DPScout_ & dpScout, + TDPCell const & activeCell, + TTraceMatrixNavigator const & navigator, + TIsLastColumn const & /**/) +{ + return _scoutBestScore(dpScout, activeCell, navigator, TIsLastColumn(), + False()); +} + +// TODO(rmaerker): Why is this needed? +template +inline void +_scoutBestScore(DPScout_ & dpScout, + TDPCell const & activeCell, + TTraceMatrixNavigator const & navigator) +{ + return _scoutBestScore(dpScout, activeCell, navigator, False(), False()); +} + // ---------------------------------------------------------------------------- // Function maxScore() // ---------------------------------------------------------------------------- @@ -266,4 +287,4 @@ terminateScout(DPScout_ > & scout) } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_TEST_ALIGNMENT_DP_SCOUT_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_TEST_ALIGNMENT_DP_SCOUT_H_ diff --git a/seqan/align/dp_setup.h b/seqan/align/dp_setup.h index 63f802c..8e1ab28 100644 --- a/seqan/align/dp_setup.h +++ b/seqan/align/dp_setup.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Rene Rahn // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_SETUP_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_SETUP_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_SETUP_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_SETUP_H_ namespace seqan { @@ -45,6 +45,7 @@ namespace seqan { // Tags, Classes, Enums // ============================================================================ + // ============================================================================ // Metafunctions // ============================================================================ @@ -181,38 +182,52 @@ struct SubstituteAlignConfig_ > }; // ---------------------------------------------------------------------------- -// SetUpAlignmentProfile +// Metafunction SubstituteAlgoTag_ // ---------------------------------------------------------------------------- -template -struct SetupAlignmentProfile_; +// NOTE(rmaerker): Needed to substitute the global alingment algo tags to the correct gap model. +template +struct SubstituteAlgoTag_ +{ + typedef TTag Type; +}; -// Profile for Needleman-Wunsch algorithm. -template -struct SetupAlignmentProfile_ +template <> +struct SubstituteAlgoTag_ { - typedef typename SubstituteAlignConfig_::Type TFreeEndGaps_; - typedef DPProfile_, LinearGaps, TTraceSwitch> Type; + typedef LinearGaps Type; }; -// Profile for Gotoh algorithm. -template -struct SetupAlignmentProfile_ +template <> +struct SubstituteAlgoTag_ +{ + typedef AffineGaps Type; +}; + +// ---------------------------------------------------------------------------- +// SetUpAlignmentProfile +// ---------------------------------------------------------------------------- + +template +struct SetupAlignmentProfile_; + +// Profile for Needleman-Wunsch algorithm. +template +struct SetupAlignmentProfile_ { - typedef typename SubstituteAlignConfig_::Type TFreeEndGaps_; - typedef DPProfile_, AffineGaps, TTraceSwitch> Type; + typedef DPProfile_, TGapCosts, TTraceSwitch> Type; }; // Profile for Smith-Waterman algorithm. -template -struct SetupAlignmentProfile_ +template +struct SetupAlignmentProfile_ { typedef DPProfile_, TGapCosts, TTraceSwitch> Type; }; // Profile for Waterman-Eggert algorithm -template -struct SetupAlignmentProfile_ +template +struct SetupAlignmentProfile_ { typedef DPProfile_, TGapCosts, TracebackOn > > Type; }; @@ -223,682 +238,137 @@ struct SetupAlignmentProfile_ -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - DPScoutState_ & dpScoutState, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - TAlgoTag const &, - TGapsTag const &) +template +inline bool +_usesAffineGaps(TScoringScheme const & scoringScheme, + TSeqH const & seqH, + TSeqV const & seqV) { - typedef Score TScoringScheme; - typedef typename SequenceEntryForScore::Type TSequenceHEntry; - typedef typename SequenceEntryForScore::Type TSequenceVEntry; - - SEQAN_ASSERT_GEQ(length(seqH), 1u); - SEQAN_ASSERT_GEQ(length(seqV), 1u); + typedef typename SequenceEntryForScore::Type TSequenceHEntry; + typedef typename SequenceEntryForScore::Type TSequenceVEntry; TSequenceHEntry seqHEntry = sequenceEntryForScore(scoringScheme, seqH, 0); TSequenceVEntry seqVEntry = sequenceEntryForScore(scoringScheme, seqV, 0); - if (scoreGapExtendHorizontal(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenHorizontal(scoringScheme, seqHEntry, seqVEntry) || - scoreGapExtendVertical(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenVertical(scoringScheme, seqHEntry, seqVEntry)) - { - typedef typename SetupAlignmentProfile_, AffineGaps, TracebackOn >::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(), TDPProfile()); - } - else - { - typedef typename SetupAlignmentProfile_, LinearGaps, TracebackOn >::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(), TDPProfile()); - } -} - -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - TAlgoTag const & algoTag, - TGapsTag const & gapsTag) -{ - DPScoutState_ noState; - return _setUpAndRunAlignment(traceSegments, noState, seqH, seqV, scoringScheme, algoTag, gapsTag); + return (scoreGapExtendHorizontal(scoringScheme, seqHEntry, seqVEntry) != + scoreGapOpenHorizontal(scoringScheme, seqHEntry, seqVEntry)) || + (scoreGapExtendVertical(scoringScheme, seqHEntry, seqVEntry) != + scoreGapOpenVertical(scoringScheme, seqHEntry, seqVEntry)); } -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - DPScoutState_ & dpScoutState, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - TAlgoTag const & algoTag) -{ - return _setUpAndRunAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, algoTag, TracebackConfig_()); -} - -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - TAlgoTag const & algoTag) -{ - DPScoutState_ noState; - return _setUpAndRunAlignment(traceSegments, noState, seqH, seqV, scoringScheme, algoTag, TracebackConfig_()); -} +// ---------------------------------------------------------------------------- +// Function _setUpAndRunAlignment() +// ---------------------------------------------------------------------------- -// Interface with AlignConfig. -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, +template +typename Value >::Type +_setUpAndRunAlignment(DPContext & dpContext, + String & traceSegments, DPScoutState_ & dpScoutState, TSequenceH const & seqH, TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const &, - TAlgoTag const &, - TGapsTag const &) + Score const & scoringScheme, + AlignConfig2 const & alignConfig) { - typedef Score TScoringScheme; - typedef typename SequenceEntryForScore::Type TSequenceHEntry; - typedef typename SequenceEntryForScore::Type TSequenceVEntry; - typedef AlignConfig TAlignConfig; - SEQAN_ASSERT_GEQ(length(seqH), 1u); SEQAN_ASSERT_GEQ(length(seqV), 1u); - TSequenceHEntry seqHEntry = sequenceEntryForScore(scoringScheme, seqH, 0); - TSequenceVEntry seqVEntry = sequenceEntryForScore(scoringScheme, seqV, 0); - - if (scoreGapExtendHorizontal(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenHorizontal(scoringScheme, seqHEntry, seqVEntry) || - scoreGapExtendVertical(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenVertical(scoringScheme, seqHEntry, seqVEntry)) - { - typedef typename SetupAlignmentProfile_ >::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(), TDPProfile()); - } - else - { - typedef typename SetupAlignmentProfile_ >::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(), TDPProfile()); - } + typedef typename SetupAlignmentProfile_::Type TDPProfile; + return _computeAlignment(dpContext, traceSegments, dpScoutState, seqH, seqV, scoringScheme, alignConfig._band, + TDPProfile()); } -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, +template +typename Value >::Type +_setUpAndRunAlignment(DPContext & dpContext, + String & traceSegments, DPScoutState_ & dpScoutState, TSequenceH const & seqH, TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const & algoTag) + Score const & scoringScheme, + AlignConfig2 const & alignConfig) { - return _setUpAndRunAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, alignConfig, algoTag, - TracebackConfig_()); -} - -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const & algoTag, - TGapsTag const & gapsTag) -{ - DPScoutState_ noState; - return _setUpAndRunAlignment(traceSegments, noState, seqH, seqV, scoringScheme, alignConfig, algoTag, gapsTag); -} - -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const & algoTag) -{ - DPScoutState_ noState; - return _setUpAndRunAlignment(traceSegments, noState, seqH, seqV, scoringScheme, alignConfig, algoTag, TracebackConfig_()); -} - -// Interface without AlignConfig and with traceback disabled. -template -typename Value >::Type -_setUpAndRunAlignment(DPScoutState_ & dpScoutState, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - TAlgoTag const &, - TGapsTag const & /*unused*/) -{ - typedef Score TScoringScheme; - typedef typename SequenceEntryForScore::Type TSequenceHEntry; - typedef typename SequenceEntryForScore::Type TSequenceVEntry; - SEQAN_ASSERT_GEQ(length(seqH), 1u); SEQAN_ASSERT_GEQ(length(seqV), 1u); - String > traceSegments; - - TSequenceHEntry seqHEntry = sequenceEntryForScore(scoringScheme, seqH, 0); - TSequenceVEntry seqVEntry = sequenceEntryForScore(scoringScheme, seqV, 0); - - if (scoreGapExtendHorizontal(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenHorizontal(scoringScheme, seqHEntry, seqVEntry) || - scoreGapExtendVertical(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenVertical(scoringScheme, seqHEntry, seqVEntry)) - { - typedef typename SetupAlignmentProfile_, AffineGaps, TracebackOff>::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(), TDPProfile()); - } - else - { - typedef typename SetupAlignmentProfile_, LinearGaps, TracebackOff>::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(), TDPProfile()); - } + typedef typename SetupAlignmentProfile_::Type TDPProfile; + return _computeAlignment(dpContext, traceSegments, dpScoutState, seqH, seqV, scoringScheme, alignConfig._band, + TDPProfile()); } -template -typename Value >::Type -_setUpAndRunAlignment(DPScoutState_ & dpScoutState, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - TAlgoTag const & algoTag) -{ - return _setUpAndRunAlignment(dpScoutState, seqH, seqV, scoringScheme, algoTag, TracebackConfig_()); -} - -template -typename Value >::Type -_setUpAndRunAlignment(TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - TAlgoTag const &, - TGapsTag const & gapsTag) -{ - DPScoutState_ noState; - return _setUpAndRunAlignment(noState, seqH, seqV, scoringScheme, gapsTag); -} - -template -typename Value >::Type -_setUpAndRunAlignment(TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - TAlgoTag const &) -{ - // Note that GapsLeft could be nothing, is unused in callee without traceback. - DPScoutState_ noState; - return _setUpAndRunAlignment(noState, seqH, seqV, scoringScheme, TracebackConfig_()); -} - -// Interface with AlignConfig and with traceback disabled. -template -typename Value >::Type -_setUpAndRunAlignment(DPScoutState_ & dpScoutState, +template +typename Value >::Type +_setUpAndRunAlignment(DPContext & dpContext, + String & traceSegments, + DPScoutState_ & dpScoutState, TSequenceH const & seqH, TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const &, - TAlgoTag const &, - TGapsTag const & /*unused*/) + Score const & scoringScheme, + AlignConfig2 const & alignConfig) { - typedef AlignConfig TAlignConfig; - typedef Score TScoringScheme; - typedef typename SequenceEntryForScore::Type TSequenceHEntry; - typedef typename SequenceEntryForScore::Type TSequenceVEntry; - SEQAN_ASSERT_GEQ(length(seqH), 1u); SEQAN_ASSERT_GEQ(length(seqV), 1u); - String > traceSegments; - - TSequenceHEntry seqHEntry = sequenceEntryForScore(scoringScheme, seqH, 0); - TSequenceVEntry seqVEntry = sequenceEntryForScore(scoringScheme, seqV, 0); - - if (scoreGapExtendHorizontal(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenHorizontal(scoringScheme, seqHEntry, seqVEntry) || - scoreGapExtendVertical(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenVertical(scoringScheme, seqHEntry, seqVEntry)) - { - typedef typename SetupAlignmentProfile_::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(), TDPProfile()); - } - else - { - typedef typename SetupAlignmentProfile_::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(), TDPProfile()); - } -} - -template -typename Value >::Type -_setUpAndRunAlignment(DPScoutState_ & dpScoutState, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const &) -{ - // Note that GapsLeft could be nothing, is unused in callee without traceback. - return _setUpAndRunAlignment(dpScoutState, seqH, seqV, scoringScheme, alignConfig, TracebackConfig_()); -} - -template -typename Value >::Type -_setUpAndRunAlignment(TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const & algoTag, - TGapsTag const & gapsTag) -{ - DPScoutState_ noState; - return _setUpAndRunAlignment(noState, seqH, seqV, scoringScheme, alignConfig, algoTag, gapsTag); -} - -template -typename Value >::Type -_setUpAndRunAlignment(TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const & algoTag) -{ - DPScoutState_ noState; - // Note that GapsLeft could be nothing, is unused in callee without traceback. - return _setUpAndRunAlignment(noState, seqH, seqV, scoringScheme, alignConfig, algoTag, TracebackConfig_()); + typedef typename SetupAlignmentProfile_::Type TDPProfile; + return _computeAlignment(dpContext, traceSegments, dpScoutState, seqH, seqV, scoringScheme, alignConfig._band, + TDPProfile()); } -// ---------------------------------------------------------------------------- -// Function _setUpAndRunAlignment() [Banded] -// ---------------------------------------------------------------------------- - -// Interface without AlignConfig. -template -typename Value >::Type +template +typename Value >::Type _setUpAndRunAlignment(String & traceSegments, DPScoutState_ & dpScoutState, TSequenceH const & seqH, TSequenceV const & seqV, - Score const & scoringScheme, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const &, - TGapsTag const &) + Score const & scoringScheme, + AlignConfig2 const & alignConfig, + TGapModel const & /**/) { - typedef Score TScoringScheme; - typedef typename SequenceEntryForScore::Type TSequenceHEntry; - typedef typename SequenceEntryForScore::Type TSequenceVEntry; - - SEQAN_ASSERT_GEQ(length(seqH), 1u); - SEQAN_ASSERT_GEQ(length(seqV), 1u); - - TSequenceHEntry seqHEntry = sequenceEntryForScore(scoringScheme, seqH, 0); - TSequenceVEntry seqVEntry = sequenceEntryForScore(scoringScheme, seqV, 0); - - if (scoreGapExtendHorizontal(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenHorizontal(scoringScheme, seqHEntry, seqVEntry) || - scoreGapExtendVertical(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenVertical(scoringScheme, seqHEntry, seqVEntry)) - { - typedef typename SetupAlignmentProfile_, AffineGaps, TracebackOn >::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(lowerDiagonal, upperDiagonal), TDPProfile()); - } - else + if (IsSameType::VALUE) { - typedef typename SetupAlignmentProfile_, LinearGaps, TracebackOn >::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(lowerDiagonal, upperDiagonal), TDPProfile()); + DPContext dpContext; + return _setUpAndRunAlignment(dpContext, traceSegments, dpScoutState, seqH, seqV, scoringScheme, alignConfig); } -} - -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - DPScoutState_ & dpScoutState, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const & algoTag) -{ - // Note that GapsLeft could be nothing, is unused in callee without traceback. - return _setUpAndRunAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, lowerDiagonal, upperDiagonal, - algoTag, TracebackConfig_()); -} - -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const & algoTag, - TGapsTag const & /*ignored*/) -{ - DPScoutState_ noState; - return _setUpAndRunAlignment(traceSegments, noState, seqH, seqV, scoringScheme, lowerDiagonal, upperDiagonal, algoTag); -} - -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const & algoTag) -{ - // Note that GapsLeft could be nothing, is unused in callee without traceback. - return _setUpAndRunAlignment(traceSegments, seqH, seqV, scoringScheme, lowerDiagonal, upperDiagonal, algoTag, TracebackConfig_()); -} - -// Interface with AlignConfig. -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - DPScoutState_ & dpScoutState, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - TAlignConfig const &, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const &, - TGapsTag const &) -{ - typedef Score TScoringScheme; - typedef typename SequenceEntryForScore::Type TSequenceHEntry; - typedef typename SequenceEntryForScore::Type TSequenceVEntry; - - SEQAN_ASSERT_GEQ(length(seqH), 1u); - SEQAN_ASSERT_GEQ(length(seqV), 1u); - - TSequenceHEntry seqHEntry = sequenceEntryForScore(scoringScheme, seqH, 0); - TSequenceVEntry seqVEntry = sequenceEntryForScore(scoringScheme, seqV, 0); - - if (scoreGapExtendHorizontal(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenHorizontal(scoringScheme, seqHEntry, seqVEntry) || - scoreGapExtendVertical(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenVertical(scoringScheme, seqHEntry, seqVEntry)) + else if (IsSameType::VALUE) { - typedef typename SetupAlignmentProfile_ >::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(lowerDiagonal, upperDiagonal), TDPProfile()); + DPContext dpContext; + return _setUpAndRunAlignment(dpContext, traceSegments, dpScoutState, seqH, seqV, scoringScheme, alignConfig); } else { - typedef typename SetupAlignmentProfile_ >::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(lowerDiagonal, upperDiagonal), TDPProfile()); + DPContext dpContext; + return _setUpAndRunAlignment(dpContext, traceSegments, dpScoutState, seqH, seqV, scoringScheme, alignConfig); } } -template -typename Value >::Type +template +typename Value >::Type _setUpAndRunAlignment(String & traceSegments, DPScoutState_ & dpScoutState, TSequenceH const & seqH, TSequenceV const & seqV, - Score const & scoringScheme, - TAlignConfig const & alignConfig, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const & algoTag) + Score const & scoringScheme, + AlignConfig2 const & alignConfig) { - return _setUpAndRunAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, alignConfig, - lowerDiagonal, upperDiagonal, algoTag, TracebackConfig_()); -} - - -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - TAlignConfig const & alignConfig, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const & algoTag, - TGapsTag const & gapsTag) -{ - DPScoutState_ noState; - return _setUpAndRunAlignment(traceSegments, noState, seqH, seqV, scoringScheme, alignConfig, lowerDiagonal, - upperDiagonal, algoTag, gapsTag); -} - -template -typename Value >::Type -_setUpAndRunAlignment(String & traceSegments, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - TAlignConfig const & alignConfig, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const & algoTag) -{ - return _setUpAndRunAlignment(traceSegments, seqH, seqV, scoringScheme, alignConfig, lowerDiagonal, upperDiagonal, - algoTag, TracebackConfig_()); -} - -// Interface without AlignConfig and with traceback disabled. -template -typename Value >::Type -_setUpAndRunAlignment(DPScoutState_ & dpScoutState, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const &, - TGapsTag const &) -{ -// typedef String const TSequenceH; -// typedef String const TSequenceV; - typedef Score TScoringScheme; - typedef typename SequenceEntryForScore::Type TSequenceHEntry; - typedef typename SequenceEntryForScore::Type TSequenceVEntry; - - SEQAN_ASSERT_GEQ(length(seqH), 1u); - SEQAN_ASSERT_GEQ(length(seqV), 1u); - - String > traceSegments; - - TSequenceHEntry seqHEntry = sequenceEntryForScore(scoringScheme, seqH, 0); - TSequenceVEntry seqVEntry = sequenceEntryForScore(scoringScheme, seqV, 0); - - if (scoreGapExtendHorizontal(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenHorizontal(scoringScheme, seqHEntry, seqVEntry) || - scoreGapExtendVertical(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenVertical(scoringScheme, seqHEntry, seqVEntry)) - { - typedef typename SetupAlignmentProfile_, AffineGaps, TracebackOff>::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(lowerDiagonal, upperDiagonal), TDPProfile()); - } + if (_usesAffineGaps(scoringScheme, seqH, seqV)) + return _setUpAndRunAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, alignConfig, AffineGaps()); else - { - typedef typename SetupAlignmentProfile_, LinearGaps, TracebackOff>::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(lowerDiagonal, upperDiagonal), TDPProfile()); - } -} - -template -typename Value >::Type -_setUpAndRunAlignment(DPScoutState_ & dpScoutState, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const & algoTag) -{ - // Note that GapsLeft could be nothing, is unused in callee without traceback. - return _setUpAndRunAlignment(dpScoutState, seqH, seqV, scoringScheme, lowerDiagonal, upperDiagonal, algoTag, - TracebackConfig_()); -} - -template -typename Value >::Type -_setUpAndRunAlignment(TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const & algoTag) -{ - DPScoutState_ noState; - return _setUpAndRunAlignment(noState, seqH, seqV, scoringScheme, lowerDiagonal, upperDiagonal, algoTag, TracebackConfig_()); -} - -// Interface with AlignConfig and with traceback disabled. -template -typename Value >::Type -_setUpAndRunAlignment(DPScoutState_ & dpScoutState, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const &, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const &, - TGapsTag const & /*ignored*/) -{ - typedef AlignConfig TAlignConfig; -// typedef String const TSequenceH; -// typedef String const TSequenceV; - typedef Score TScoringScheme; - typedef typename SequenceEntryForScore::Type TSequenceHEntry; - typedef typename SequenceEntryForScore::Type TSequenceVEntry; - - SEQAN_ASSERT_GEQ(length(seqH), 1u); - SEQAN_ASSERT_GEQ(length(seqV), 1u); - - String > traceSegments; - - TSequenceHEntry seqHEntry = sequenceEntryForScore(scoringScheme, seqH, 0); - TSequenceVEntry seqVEntry = sequenceEntryForScore(scoringScheme, seqV, 0); - - if (scoreGapExtendHorizontal(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenHorizontal(scoringScheme, seqHEntry, seqVEntry) || - scoreGapExtendVertical(scoringScheme, seqHEntry, seqVEntry) != - scoreGapOpenVertical(scoringScheme, seqHEntry, seqVEntry)) - { - typedef typename SetupAlignmentProfile_::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(lowerDiagonal, upperDiagonal), TDPProfile()); - } - else - { - typedef typename SetupAlignmentProfile_::Type TDPProfile; - return _computeAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, DPBand_(lowerDiagonal, upperDiagonal), TDPProfile()); - } -} - -template -typename Value >::Type -_setUpAndRunAlignment(DPScoutState_ & dpScoutState, - TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const & alignConfig, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const & algoTag) -{ - // Note that GapsLeft could be nothing, is unused in callee without traceback. - return _setUpAndRunAlignment(dpScoutState, seqH, seqV, scoringScheme, alignConfig, lowerDiagonal, upperDiagonal, - algoTag, TracebackConfig_()); -} - -template -typename Value >::Type -_setUpAndRunAlignment(TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const & alignConfig, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const & algoTag, - TGapsTag const & /*ignored*/) -{ - DPScoutState_ noState; - return _setUpAndRunAlignment(noState, seqH, seqV, scoringScheme, alignConfig, lowerDiagonal, upperDiagonal, - algoTag); -} - -template -typename Value >::Type -_setUpAndRunAlignment(TSequenceH const & seqH, - TSequenceV const & seqV, - Score const & scoringScheme, - AlignConfig const & alignConfig, - int lowerDiagonal, - int upperDiagonal, - TAlgoTag const & algoTag) -{ - // Note that GapsLeft could be nothing, is unused in callee without traceback. - return _setUpAndRunAlignment(seqH, seqV, scoringScheme, alignConfig, lowerDiagonal, upperDiagonal, algoTag, TracebackConfig_()); + return _setUpAndRunAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, alignConfig, LinearGaps()); } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_SETUP_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_SETUP_H_ diff --git a/seqan/align/dp_trace_segment.h b/seqan/align/dp_trace_segment.h index c2fb366..a469ee5 100644 --- a/seqan/align/dp_trace_segment.h +++ b/seqan/align/dp_trace_segment.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ // user-defined structure, such as Align or AlignmentGraph objects. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_TRACE_SEGMENT_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_TRACE_SEGMENT_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_TRACE_SEGMENT_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_TRACE_SEGMENT_H_ namespace seqan { @@ -338,4 +338,4 @@ inline void _recordSegment(TTraceSegments & traceSegments, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_TRACE_SEGMENT_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_TRACE_SEGMENT_H_ diff --git a/seqan/align/dp_traceback_adaptor.h b/seqan/align/dp_traceback_adaptor.h index e06674f..040daac 100644 --- a/seqan/align/dp_traceback_adaptor.h +++ b/seqan/align/dp_traceback_adaptor.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // alignment representing structure. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_ADAPTOR_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_ADAPTOR_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_ADAPTOR_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_ADAPTOR_H_ namespace seqan { @@ -47,12 +47,7 @@ namespace seqan { template inline void _writeTraceSegmentToFile(TFile & file, TSeq0Value const & seq0Val, TSeq1Value const & seq1Val) { - streamPut(file, '('); - streamPut(file, seq0Val); - streamPut(file, ','); - streamPut(file, seq1Val); - streamPut(file, ')'); - streamPut(file, '\n'); + file << '(' << seq0Val << ',' << seq1Val << ")\n"; } // ---------------------------------------------------------------------------- @@ -144,13 +139,9 @@ _adaptTraceSegmentsTo(Graph > & g, // insert leading gaps TTraceSegment traceBegin = traceSegments[length(traceSegments) - 1]; if (_getBeginVertical(traceBegin) != 0) - { addVertex(g, seqVId, 0, _getBeginVertical(traceBegin)); - } if (_getBeginHorizontal(traceBegin) != 0) - { addVertex(g, seqHId, 0, _getBeginHorizontal(traceBegin)); - } for (TSize i = 0; i < length(traceSegments); ++i) @@ -169,22 +160,19 @@ _adaptTraceSegmentsTo(Graph > & g, case TraceBitMap_::HORIZONTAL: addVertex(g, seqHId, traceSegments[i]._horizontalBeginPos, traceSegments[i]._length); - break; } } // insert trailing gaps TTraceSegment traceEnd = traceSegments[0]; + if (_getEndVertical(traceEnd) != length(value(stringSet(g), idToPosition(stringSet(g), seqVId)))) - { addVertex(g, seqVId, _getEndVertical(traceEnd), length(value(stringSet(g), idToPosition(stringSet(g), seqVId))) - _getEndVertical(traceEnd)); - } + if (_getEndHorizontal(traceEnd) != length(value(stringSet(g), idToPosition(stringSet(g), seqHId)))) - { addVertex(g, seqHId, _getEndHorizontal(traceEnd), length(value(stringSet(g), idToPosition(stringSet(g), seqHId))) - _getEndHorizontal(traceEnd)); - } } // ---------------------------------------------------------------------------- @@ -203,34 +191,29 @@ _adaptTraceSegmentsTo(TFile & file, { switch (traceSegments[k - 1]._traceValue) { - case TraceBitMap_::DIAGONAL: - { - int j = traceSegments[k - 1]._verticalBeginPos; - for (int i = traceSegments[k - 1]._horizontalBeginPos; i < (int) (traceSegments[k - 1]._horizontalBeginPos + traceSegments[k - 1]._length); ++i) + case TraceBitMap_::DIAGONAL: { - _writeTraceSegmentToFile(file, seqH[i], seqV[j]); - ++j; + int j = traceSegments[k - 1]._verticalBeginPos; + for (int i = traceSegments[k - 1]._horizontalBeginPos; i < (int) (traceSegments[k - 1]._horizontalBeginPos + traceSegments[k - 1]._length); ++i) + { + _writeTraceSegmentToFile(file, seqH[i], seqV[j]); + ++j; + } + break; } - break; - } - case TraceBitMap_::VERTICAL: - { - for (int i = traceSegments[k - 1]._verticalBeginPos; i < (int) (traceSegments[k - 1]._verticalBeginPos + traceSegments[k - 1]._length); ++i) + case TraceBitMap_::VERTICAL: { - _writeTraceSegmentToFile(file, gapValue(), seqV[i]); + for (int i = traceSegments[k - 1]._verticalBeginPos; i < (int) (traceSegments[k - 1]._verticalBeginPos + traceSegments[k - 1]._length); ++i) + _writeTraceSegmentToFile(file, gapValue(), seqV[i]); + break; } - break; - } - case TraceBitMap_::HORIZONTAL: - { - for (int i = traceSegments[k - 1]._horizontalBeginPos; i < (int) (traceSegments[k - 1]._horizontalBeginPos + traceSegments[k - 1]._length); ++i) + case TraceBitMap_::HORIZONTAL: { - _writeTraceSegmentToFile(file, seqH[i], gapValue()); + for (int i = traceSegments[k - 1]._horizontalBeginPos; i < (int) (traceSegments[k - 1]._horizontalBeginPos + traceSegments[k - 1]._length); ++i) + _writeTraceSegmentToFile(file, seqH[i], gapValue()); } - break; - } } } } @@ -250,20 +233,16 @@ _adaptTraceSegmentsTo(String, TStringSpec> & matc typedef Fragment TFragment; for (TSize2 i = 0; i < length(traceSegments); ++i) - { if (traceSegments[i]._traceValue == TraceBitMap_::DIAGONAL) - { appendValue( matches, TFragment(seqHId, traceSegments[i]._horizontalBeginPos, seqVId, traceSegments[i]._verticalBeginPos, traceSegments[i]._length), Generous()); - } - } } // ---------------------------------------------------------------------------- -// Function _adaptTraceSegmentsTo() [VertexDescriptor] +// Function _adaptTraceSegmentsTo() [VertexDescriptor] // ---------------------------------------------------------------------------- //// TODO (rmaerker): Check if we really need this! @@ -339,4 +318,4 @@ _adaptTraceSegmentsTo(String, TStringSpec> & matc } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_ADAPTOR_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_ADAPTOR_H_ diff --git a/seqan/align/dp_traceback_impl.h b/seqan/align/dp_traceback_impl.h index 5ae7587..b10e221 100644 --- a/seqan/align/dp_traceback_impl.h +++ b/seqan/align/dp_traceback_impl.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Implements the traceback algorithm. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_IMPL_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_IMPL_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_IMPL_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_IMPL_H_ // TODO(holtgrew): GapsRight traceback is currently untested. // TODO(rmaerker): Change Tracback to TraceConfig | TraceBackOff @@ -69,7 +69,7 @@ class TracebackCoordinator_ template TracebackCoordinator_(TPosition currColumn, TPosition currRow, - DPBand_ const & band, + DPBandConfig const & band, TSizeH seqHSize, TSizeV seqVSize) : _currColumn(currColumn), @@ -88,7 +88,7 @@ class TracebackCoordinator_ TPosition currRow, TPosition endColumn, TPosition endRow, - DPBand_ const & band, + DPBandConfig const & band, TSizeH seqHSize, TSizeV seqVSize) : _currColumn(currColumn), @@ -144,11 +144,11 @@ _hasReachedEnd(TracebackCoordinator_ const & coordinator) template inline void _initTracebackCoordinator(TracebackCoordinator_ & coordinator, - DPBand_ const & band, + DPBandConfig const & band, TSizeH seqHSize, TSizeV seqVSize) { - typedef typename Position >::Type TBandPosition; + typedef typename Position >::Type TBandPosition; if (IsSameType::VALUE) { // Adapt the current column value when the lower diagonal is positive (shift right in horizontal direction). @@ -204,7 +204,7 @@ _doTracebackGoDiagonal(TTarget & target, { _recordSegment(target, tracebackCoordinator._currColumn, tracebackCoordinator._currRow, fragmentLength, lastTraceValue); - + lastTraceValue = TraceBitMap_::DIAGONAL; fragmentLength = 0; } @@ -469,7 +469,7 @@ _retrieveInitialTraceDirection(TTraceValue & traceValue, TDPProfile const & /*dp traceValue &= (TraceBitMap_::HORIZONTAL | TraceBitMap_::HORIZONTAL_OPEN | TraceBitMap_::MAX_FROM_HORIZONTAL_MATRIX); return TraceBitMap_::HORIZONTAL; } - return TraceBitMap_::DIAGONAL; // We set the last value to the + return TraceBitMap_::DIAGONAL; // We set the last value to the } if (traceValue & TraceBitMap_::DIAGONAL) @@ -493,7 +493,7 @@ void _computeTraceback(TTarget & target, unsigned maxHostPosition, TSequenceH const & seqH, TSequenceV const & seqV, - DPBand_ const & band, + DPBandConfig const & band, DPProfile_ const & dpProfile) { typedef typename Container::Type TContainer; @@ -561,7 +561,7 @@ void _computeTraceback(TTarget & target, DPScout_ const & dpScout, TSequenceH const & seqH, TSequenceV const & seqV, - DPBand_ const & band, + DPBandConfig const & band, DPProfile_ const & dpProfile) { _computeTraceback(target, matrixNavigator, maxHostPosition(dpScout), seqH, seqV, band, dpProfile); @@ -569,4 +569,4 @@ void _computeTraceback(TTarget & target, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_IMPL_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_DP_TRACEBACK_IMPL_H_ diff --git a/seqan/align/evaluate_alignment.h b/seqan/align/evaluate_alignment.h index 076a446..b01b89b 100644 --- a/seqan/align/evaluate_alignment.h +++ b/seqan/align/evaluate_alignment.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Compute alignment score given a pairwise alignment. // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_ -#define CORE_INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_ +#ifndef INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_ +#define INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_ namespace seqan { @@ -56,7 +56,19 @@ namespace seqan { * @headerfile * @brief Statistics about a tabular alignment. * - * The default constructor initializes all members to 0. + * @signature struct AlignmentStats; + * + * @see computeAlignmentStats + * + * @fn AlignmentStats::AlignmentStats + * @brief Constructor + * + * @signature AlignmentStats::AlignmentStats(); + * + * All members are initialized to 0. + * + * @var unsigned AlignmentStats::numGaps; + * @brief Number of gap characters (sum of numGapOpens and numGapExtensions) * * @var unsigned AlignmentStats::numGapOpens; * @brief Number of gap open events. @@ -64,6 +76,12 @@ namespace seqan { * @var unsigned AlignmentStats::numGapExtensions; * @brief Number of gap extension events. * + * @var unsigned AlignmentStats::numInsertions; + * @brief Number of gaps in reference relative to query. + * + * @var unsigned AlignmentStats::numDeletions; + * @brief Number of gaps in query relative to reference. + * * @var unsigned AlignmentStats::numMatches; * @brief Number of match (identity) events. * @@ -76,15 +94,28 @@ namespace seqan { * @var unsigned AlignmentStats::numNegativeScores; * @brief Number of residues aligned with negative score. * + * @var unsigned AlignmentStats::alignmentLength; + * @brief Length of the aligned region + * + * @var float AlignmentStats::alignmentSimilarity; + * @brief The resulting alignment percent similarity (positive). + * + * @var float AlignmentStats::alignmentIdentity; + * @brief The resulting alignment percent identity (match). + * * @var int AlignmentStats::alignmentScore; * @brief The resulting alignment score. */ struct AlignmentStats { - // Number of gap opens/gap extensions. + // Number of gap characters/opens/gap extensions. + unsigned numGaps; unsigned numGapOpens; unsigned numGapExtensions; + // Number of insertions and deletions. + unsigned numInsertions; + unsigned numDeletions; // Number of matches, mismatches. unsigned numMatches; unsigned numMismatches; @@ -92,11 +123,19 @@ struct AlignmentStats unsigned numPositiveScores; unsigned numNegativeScores; + // length of the alignment + unsigned alignmentLength; + + // the alignment identity and similarity scores + float alignmentSimilarity; + float alignmentIdentity; + // The alignment score. int alignmentScore; - AlignmentStats() : numGapOpens(0), numGapExtensions(0), numMatches(0), numMismatches(0), - numPositiveScores(0), numNegativeScores(0), alignmentScore(0) + AlignmentStats() : numGaps(0), numGapOpens(0), numGapExtensions(0), numInsertions(0), numDeletions(0), + numMatches(0), numMismatches(0), numPositiveScores(0), numNegativeScores(0), + alignmentLength(0), alignmentSimilarity(0.0), alignmentIdentity(0.0), alignmentScore(0) {} }; @@ -114,7 +153,7 @@ struct AlignmentStats /*! * @fn AlignmentStats#clear - * @brief Clear AlignmentStats object. + * @brief Resets all members to 0. * * @signature void clear(stats); * @@ -124,12 +163,18 @@ struct AlignmentStats inline void clear(AlignmentStats & stats) { + stats.numGaps = 0; stats.numGapOpens = 0; stats.numGapExtensions = 0; + stats.numInsertions = 0; + stats.numDeletions = 0; stats.numMatches = 0; stats.numMismatches = 0; stats.numPositiveScores = 0; stats.numNegativeScores = 0; + stats.alignmentLength = 0; + stats.alignmentSimilarity = 0.0; + stats.alignmentIdentity = 0.0; stats.alignmentScore = 0; } @@ -142,42 +187,47 @@ void clear(AlignmentStats & stats) * @headerfile * @brief Compute alignment statistics. * - * @signature TScoreVal computeAlignmentStats([stats, ]align, scoringScheme); + * @signature TScoreVal computeAlignmentStats(stats, align, scoringScheme); + * @signature TScoreVal computeAlignmentStats(stats, row0, row1, scoringScheme); * * @param[out] stats The @link AlignmentStats @endlink object to store alignment statistics in. * @param[in] align The @link Align @endlink object to score. + * @param[in] row0 The first row (@link Gaps @endlink object). + * @param[in] row1 The second row (@link Gaps @endlink object). * @param[in] score The @link Score @endlink object to use for the scoring scheme. * + * @return TScoreVal The score value of the alignment, of the same type as the value type of scoringScheme + * * @see AlignmentStats * * @section Examples * - * @include demos/align/compute_alignment_stats.cpp + * @include demos/dox/align/compute_alignment_stats.cpp * * The output is as follows: * - * @include demos/align/compute_alignment_stats.cpp.stdout + * @include demos/dox/align/compute_alignment_stats.cpp.stdout */ -template +template TScoreVal computeAlignmentStats(AlignmentStats & stats, - Align const & align, + Gaps const & row0, + Gaps const & row1, Score const & scoringScheme) { - SEQAN_ASSERT_EQ_MSG(length(rows(align)), 2u, "Only works with pairwise alignments."); - SEQAN_ASSERT_EQ_MSG(length(row(align, 0)), length(row(align, 1)), "Invalid alignment!"); clear(stats); - typedef Align const TAlign; - typedef typename Row::Type TGaps; - typedef typename Iterator::Type TGapsIter; - typedef typename Value::Type>::Type TAlphabet; + typedef typename Iterator const, Standard>::Type TGapsIter0; + typedef typename Iterator const, Standard>::Type TGapsIter1; + typedef typename Value::Type TAlphabet; // Get iterators. - TGapsIter it0 = begin(row(align, 0)); - TGapsIter itEnd0 = end(row(align, 0)); - TGapsIter it1 = begin(row(align, 1)); - TGapsIter itEnd1 = end(row(align, 1)); + TGapsIter0 it0 = begin(row0); + TGapsIter0 itEnd0 = end(row0); + TGapsIter1 it1 = begin(row1); + TGapsIter1 itEnd1 = end(row1); // State whether we have already opened a gap. bool isGapOpen0 = false, isGapOpen1 = false; @@ -186,7 +236,7 @@ TScoreVal computeAlignmentStats(AlignmentStats & stats, { if (isGap(it0)) { - if (isGapOpen0) + if (!isGapOpen0) { stats.numGapOpens += 1; stats.alignmentScore += scoreGapOpen(scoringScheme); @@ -196,6 +246,7 @@ TScoreVal computeAlignmentStats(AlignmentStats & stats, stats.numGapExtensions += 1; stats.alignmentScore += scoreGapExtend(scoringScheme); } + stats.numDeletions += 1; isGapOpen0 = true; } else @@ -215,6 +266,7 @@ TScoreVal computeAlignmentStats(AlignmentStats & stats, stats.numGapExtensions += 1; stats.alignmentScore += scoreGapExtend(scoringScheme); } + stats.numInsertions += 1; isGapOpen1 = true; } else @@ -225,12 +277,13 @@ TScoreVal computeAlignmentStats(AlignmentStats & stats, if (!isGap(it0) && !isGap(it1)) { // Compute the alignment score and register in stats. - TAlphabet c0 = *it0, c1 = *it1; + TAlphabet c0 = *it0; + TAlphabet c1 = static_cast(*it1); TScoreVal scoreVal = score(scoringScheme, c0, c1); stats.alignmentScore += scoreVal; // Register other statistics. bool isMatch = (c0 == c1); - bool isPositive = (scoreVal >= 0); + bool isPositive = (scoreVal > 0); stats.numMatches += isMatch; stats.numMismatches += !isMatch; stats.numPositiveScores += isPositive; @@ -240,9 +293,30 @@ TScoreVal computeAlignmentStats(AlignmentStats & stats, SEQAN_ASSERT(it0 == itEnd0); SEQAN_ASSERT(it1 == itEnd1); + stats.numGaps = stats.numGapOpens + stats.numGapExtensions; + + // Finally, compute the alignment similarity from the various counts + stats.alignmentLength = length(row0); + stats.alignmentSimilarity = 100.0 * static_cast(stats.numPositiveScores) + / static_cast(stats.alignmentLength); + stats.alignmentIdentity = 100.0 * static_cast(stats.numMatches) + / static_cast(stats.alignmentLength); + return stats.alignmentScore; } +template +TScoreVal computeAlignmentStats(AlignmentStats & stats, + Align const & align, + Score const & scoringScheme) +{ + SEQAN_ASSERT_EQ_MSG(length(rows(align)), 2u, "Only works with pairwise alignments."); + SEQAN_ASSERT_EQ_MSG(length(row(align, 0)), length(row(align, 1)), "Invalid alignment!"); + + return computeAlignmentStats(stats, row(align, 0), row(align, 1), scoringScheme); +} + +// NOTE(h-2): this interface is deprecated. Don't use it. template TScoreVal computeAlignmentStats(Align const & align, Score const & scoringScheme) @@ -254,4 +328,4 @@ TScoreVal computeAlignmentStats(Align const & align, } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_ +#endif // #ifndef INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_ diff --git a/seqan/graph_types/graph_impl_fragment.h b/seqan/align/fragment.h similarity index 62% rename from seqan/graph_types/graph_impl_fragment.h rename to seqan/align/fragment.h index fa3a65c..dcdb6ac 100644 --- a/seqan/graph_types/graph_impl_fragment.h +++ b/seqan/align/fragment.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -29,14 +29,15 @@ // DAMAGE. // // ========================================================================== +// Author: Tobias Rausch +// ========================================================================== -#ifndef SEQAN_HEADER_GRAPH_IMPL_FRAGMENT_H -#define SEQAN_HEADER_GRAPH_IMPL_FRAGMENT_H +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_FRAGMENT_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_FRAGMENT_H_ -namespace SEQAN_NAMESPACE_MAIN +namespace seqan { - ////////////////////////////////////////////////////////////////////////////// // Fragment Specs ////////////////////////////////////////////////////////////////////////////// @@ -44,7 +45,7 @@ namespace SEQAN_NAMESPACE_MAIN /*! * @class ExactFragment * @extends Fragment - * @headerfile + * @headerfile * @brief A type for ungapped, pairwise segment matches. * * @signature template <[typename TSize[, typename TSpec]]> @@ -54,28 +55,14 @@ namespace SEQAN_NAMESPACE_MAIN * @tparam TSpec Specializing type. Default: ExactFragment<>. */ -/** -.Spec.ExactFragment -..cat:Alignments -..general:Class.Fragment -..summary:A type for ungapped, pairwise segment matches. -..signature:Fragment > -..param.TSize: The Size type of the underlying sequences. -...metafunction:Metafunction.Size -..param.TSpec:The specializing type for the graph. -...metafunction:Metafunction.Spec -..include:seqan/graph_types.h -..see:Spec.ExactReversableFragment -*/ - template -struct ExactFragment; +struct ExactFragment; /*! * @class ExactReversableFragment * @extends Fragment - * @headerfile + * @headerfile * @brief A type for ungapped, pairwise segment matches that maybe in reverse orientation. * * Compared to the @link ExactFragment @endlink specialzing type of @link Fragment @endlink, a @link @@ -89,23 +76,8 @@ struct ExactFragment; * @tparam TSpec Specializing type. Default: ExactFragment<>. */ -/** -.Spec.ExactReversableFragment -..cat:Alignments -..general:Class.Fragment -..summary:A type for ungapped, pairwise segment matches that may be in reverse orientation. -..signature:Fragment > -..param.TSize: The Size type of the underlying sequences. -...metafunction:Metafunction.Size -..param.TSpec:The specializing type for the graph. -...metafunction:Metafunction.Spec -..remarks:Compared to the @Spec.ExactFragment@ specialzing type of @Class.Fragment@, a @Spec.ExactReversableFragment@ stores an additional bool value to indicate whether a match is in reverse orientation or not. -..include:seqan/graph_types.h -..see:Spec.ExactFragment -*/ - template -struct ExactReversableFragment; +struct ExactReversableFragment; ////////////////////////////////////////////////////////////////////////////// @@ -114,7 +86,7 @@ struct ExactReversableFragment; /*! * @class Fragment - * @headerfile + * @headerfile * @brief A type for pairwise segment matches. * * @signature template <[typename TSize[, typename TSpec]]> @@ -129,7 +101,7 @@ struct ExactReversableFragment; * // Construct fragment. * unsigned seqId1 = 0, beg1 = 0, seqId2 = 32, beg2 = 42, len = 33; * Fragment<> fragment(seqId1, beg1, seqId2, beg2, len); - * + * * // Update fragment's properties. * fragmentBegin(fragment, 0) = 10; * fragmentBegin(fragment, 1) = 10; @@ -139,31 +111,6 @@ struct ExactReversableFragment; * @endcode */ -/** -.Class.Fragment: -..cat:Alignments -..summary:A type for ungapped, pairwise segment matches. -..signature:Fragment -..param.TSize:The size type of the underlying sequences. -...metafunction:Metafunction.Size -..param.TSpec:The specializing type. -...metafunction:Metafunction.Spec -...default:@Spec.ExactFragment@ -..include:seqan/graph_types.h -..example:A small example using fragments. -..example.code: -// Construct fragment. -unsigned seqId1 = 0, beg1 = 0, seqId2 = 32, beg2 = 42, len = 33; -Fragment<> fragment(seqId1, beg1, seqId2, beg2, len); - -// Update fragment's properties. -fragmentBegin(fragment, 0) = 10; -fragmentBegin(fragment, 1) = 10; -sequenceId(fragment, 0) = 33; -sequenceId(fragment, 1) = 44; -fragmentLength(fragment) += 42; -*/ - template >::Type, typename TSpec = ExactFragment<> > class Fragment; @@ -175,20 +122,20 @@ class Fragment; template struct Size > { - typedef TSize Type; + typedef TSize Type; }; template struct Size const> { - typedef TSize Type; + typedef TSize Type; }; ////////////////////////////////////////////////////////////////////////////// // Exact Fragment ////////////////////////////////////////////////////////////////////////////// - + template class Fragment > { @@ -214,26 +161,11 @@ class Fragment > { * @param[in] beg2 Begin position of segment match in second sequence. Type: TSize. * @param[in] l The length of the segment match. Type: TSize. */ - -/** -.Memfunc.ExactFragment#Fragment: -..class:Spec.ExactFragment -..summary:Constructor. -..signature:Fragment() -..signature:Fragment(seqId1, beg1, seqId2, beg2, len) -..param.seqId1:The id of the first sequence. -...type:Metafunction.Id -..param.beg1:The TSize begin position on the first sequence. -..param.seqId2:The id of the second sequence. -...type:Metafunction.Id -..param.beg2:The TSize begin position on the second sequence. -..param.len:The TSize length of the segment match. -*/ Fragment() : seqId1(0), begin1(0), seqId2(0), begin2(0), len(0) {} Fragment(TId sqId1, TSize beg1, TId sqId2, TSize beg2, TSize l) : - seqId1(sqId1), begin1(beg1), seqId2(sqId2), begin2(beg2), len(l) + seqId1(sqId1), begin1(beg1), seqId2(sqId2), begin2(beg2), len(l) {} }; @@ -281,7 +213,7 @@ operator<(Fragment > const & left, ////////////////////////////////////////////////////////////////////////////// // Exact Fragment that is a forward or reverse match ////////////////////////////////////////////////////////////////////////////// - + template class Fragment > { @@ -310,32 +242,15 @@ class Fragment > { * @param[in] reversed A bool; true if the segments match in reverse orientation, false otherwise. */ -/** -.Memfunc.ExactReversableFragment#Fragment: -..class:Spec.ExactReversableFragment -..summary:Constructor. -..signature:Fragment() -..signature:Fragment(seqId1, beg1, seqId2, beg2, len[, reversed]) -..param.seqId1:The id of the first sequence. -...type:Metafunction.Id -..param.beg1:The TSize begin position on the first sequence. -..param.seqId2:The id of the second sequence. -...type:Metafunction.Id -..param.beg2:The TSize begin position on the second sequence. -..param.len:The TSize length of the segment match. -..param.reversed:$true$ if the segments match in reverse orientation, $false$ otherwise. -...default:$false$ -...type:nolink:$bool$ -*/ - + Fragment() : seqId1(0), begin1(0), seqId2(0), begin2(0), len(0), reversed(false) {} - + Fragment(TId_ sqId1, TSize beg1, TId_ sqId2, TSize beg2, TSize l) : - seqId1(sqId1), begin1(beg1), seqId2(sqId2), begin2(beg2), len(l), reversed(false) + seqId1(sqId1), begin1(beg1), seqId2(sqId2), begin2(beg2), len(l), reversed(false) {} - + Fragment(TId_ sqId1, TSize beg1, TId_ sqId2, TSize beg2, TSize l, bool rev) : - seqId1(sqId1), begin1(beg1), seqId2(sqId2), begin2(beg2), len(l), reversed(rev) + seqId1(sqId1), begin1(beg1), seqId2(sqId2), begin2(beg2), len(l), reversed(rev) {} }; @@ -397,25 +312,15 @@ operator<(Fragment > const & left, * @param[in] seqID The id of the sequence for which the label should be retrieved. */ -/** -.Function.label -..class:Class.Fragment -..signature:label(f,str,seqId) -..param.f:A fragment. -...type:Class.Fragment -..param.str:The string set underlying the fragment. -..param.seqId:The id of the sequence for which the label should be retrieved. -...remarks: -*/ template inline typename Infix::Type>::Type label(Fragment const& f, TStringSet& str, TVal const seqId) { - SEQAN_CHECKPOINT - typedef typename Id >::Type TId; - return ((TId) seqId == (f.seqId1)) ? infix(getValueById(str, (TId) seqId), f.begin1, f.begin1 + f.len) : infix(getValueById(str, (TId) seqId), f.begin2, f.begin2 + f.len); + SEQAN_CHECKPOINT + typedef typename Id >::Type TId; + return ((TId) seqId == (f.seqId1)) ? infix(getValueById(str, (TId) seqId), f.begin1, f.begin1 + f.len) : infix(getValueById(str, (TId) seqId), f.begin2, f.begin2 + f.len); } ////////////////////////////////////////////////////////////////////////////// @@ -434,30 +339,21 @@ label(Fragment const& f, * @return TId Reference to the sequence fragment id member. */ -/** -.Function.sequenceId -..class:Class.Fragment -..signature:sequenceId(f,seqNum) -..param.f:A fragment. -...type:Class.Fragment -..param.seqNum:The sequence number for which the id should be retrieved. -...remarks:Note that @Class.Fragment@ stores information about exactly two sequences which can be accessed with seqNum 0 or 1, but whose ids may differ from their seqNum. -*/ template inline typename Id >::Type & sequenceId(Fragment const& f, - TVal const seqId) + TVal const seqId) { - SEQAN_CHECKPOINT - typedef typename Id >::Type TId; - return ((TId) seqId == 0) ? const_cast(f.seqId1) : const_cast(f.seqId2); + SEQAN_CHECKPOINT + typedef typename Id >::Type TId; + return ((TId) seqId == 0) ? const_cast(f.seqId1) : const_cast(f.seqId2); } ////////////////////////////////////////////////////////////////////////////// /*! * @fn Fragment#fragmentBegin - * @label Return fragment begin. + * @brief Return fragment begin. * * @signature TSize fragmentBegin(frag, seqId); * @@ -467,23 +363,14 @@ sequenceId(Fragment const& f, * @return TSize Reference to the fragment begin position member. */ -/** -.Function.fragmentBegin -..class:Class.Fragment -..signature:fragmentBegin(f, seqId) -..param.f:A fragment. -...type:Class.Fragment -..param.seqId:The sequence id for which the begin position should be retrieved. -...remarks:Retrieve with @Function.sequenceId@. -*/ template inline TSize& fragmentBegin(Fragment const& f, - TVal const seqId) + TVal const seqId) { - SEQAN_CHECKPOINT - typedef typename Id >::Type TId; - return ((TId) seqId == f.seqId1) ? const_cast(f.begin1) : const_cast(f.begin2); + SEQAN_CHECKPOINT + typedef typename Id >::Type TId; + return ((TId) seqId == f.seqId1) ? const_cast(f.begin1) : const_cast(f.begin2); } ////////////////////////////////////////////////////////////////////////////// @@ -491,10 +378,10 @@ fragmentBegin(Fragment const& f, template inline TSize& fragmentLength(Fragment const& f, - TVal const) + TVal const) { - SEQAN_CHECKPOINT - return const_cast(f.len); + SEQAN_CHECKPOINT + return const_cast(f.len); } ////////////////////////////////////////////////////////////////////////////// @@ -510,65 +397,40 @@ fragmentLength(Fragment const& f, * @return TSize Reference to the Fragment's length. */ -/** -.Function.fragmentLength -..class:Class.Fragment -..signature:fragmentBegin(f) -..param.f:A fragment. -...type:Class.Fragment -*/ template inline TSize& fragmentLength(Fragment const& f) { - SEQAN_CHECKPOINT - return const_cast(f.len); + SEQAN_CHECKPOINT + return const_cast(f.len); } ////////////////////////////////////////////////////////////////////////////// -/** -.Function.getProjectedPosition -..cat:Alignments -..class:Class.Fragment -..signature:getProjectedPosition(f,seqId,pos,seqId2,pos2) -..summary:Projects a position of one sequence taking part in a pairwise match onto the other sequence. -..signature:getProjectedPosition(f,seqId1,pos1,seqId2,pos2) -..param.f:A fragment. -...type:Class.Fragment -..param.seqId:The id of the sequence to project from. -...type:Metafunction.Id -..param.pos:The position to project. -...type:Metafunction.Size -..param.seqId2:The resulting id of the sequence that pos was projected onto. -...type:Metafunction.Id -..param.pos2:The resulting projected position. -...type:Metafunction.Size -*/ template inline void getProjectedPosition(Fragment > const& f, - TId1 const seqId, - TPosition1 const pos, - TId2& seqId2, - TPosition2& pos2) + TId1 const seqId, + TPosition1 const pos, + TId2& seqId2, + TPosition2& pos2) { - SEQAN_CHECKPOINT - typedef typename Id >::Type TId; - - if ((TId) seqId == f.seqId1) { - SEQAN_ASSERT((TPosition1)f.begin1<=pos); - SEQAN_ASSERT(pos - f.begin1 < f.len) ; - pos2 = f.begin2 + (pos - f.begin1); - seqId2 = f.seqId2; - return; - } else { - SEQAN_ASSERT((TPosition1)f.begin2<=pos); - SEQAN_ASSERT(pos - f.begin2 < f.len); - pos2 = f.begin1 + (pos - f.begin2); - seqId2 = f.seqId1; - return; - } + SEQAN_CHECKPOINT + typedef typename Id >::Type TId; + + if ((TId) seqId == f.seqId1) { + SEQAN_ASSERT((TPosition1)f.begin1<=pos); + SEQAN_ASSERT(pos - f.begin1 < f.len) ; + pos2 = f.begin2 + (pos - f.begin1); + seqId2 = f.seqId2; + return; + } else { + SEQAN_ASSERT((TPosition1)f.begin2<=pos); + SEQAN_ASSERT(pos - f.begin2 < f.len); + pos2 = f.begin1 + (pos - f.begin2); + seqId2 = f.seqId1; + return; + } } @@ -577,28 +439,28 @@ getProjectedPosition(Fragment > const& f, template inline void getProjectedPosition(Fragment > const& f, - TValue seg_num, - TId1 const seqId, - TPosition1 const pos, - TId2& seqId2, - TPosition2& pos2) + TValue seg_num, + TId1 const seqId, + TPosition1 const pos, + TId2& seqId2, + TPosition2& pos2) { - (void) seqId; // When compiled without assertions. - SEQAN_ASSERT((seg_num == 0 && seqId == f.seqId1) || (seg_num == 1 && seqId == f.seqId2)); - - if (seg_num == 0) { - SEQAN_ASSERT((TPosition1)f.begin1<=pos); - SEQAN_ASSERT(pos - f.begin1 < f.len) ; - pos2 = f.begin2 + (pos - f.begin1); - seqId2 = f.seqId2; - return; - } else { - SEQAN_ASSERT((TPosition1)f.begin2<=pos); - SEQAN_ASSERT(pos - f.begin2 < f.len); - pos2 = f.begin1 + (pos - f.begin2); - seqId2 = f.seqId1; - return; - } + (void) seqId; // When compiled without assertions. + SEQAN_ASSERT((seg_num == 0 && seqId == f.seqId1) || (seg_num == 1 && seqId == f.seqId2)); + + if (seg_num == 0) { + SEQAN_ASSERT((TPosition1)f.begin1<=pos); + SEQAN_ASSERT(pos - f.begin1 < f.len) ; + pos2 = f.begin2 + (pos - f.begin1); + seqId2 = f.seqId2; + return; + } else { + SEQAN_ASSERT((TPosition1)f.begin2<=pos); + SEQAN_ASSERT(pos - f.begin2 < f.len); + pos2 = f.begin1 + (pos - f.begin2); + seqId2 = f.seqId1; + return; + } } @@ -608,29 +470,29 @@ getProjectedPosition(Fragment > const& f, template inline void getProjectedPosition(Fragment > const& f, - TId1 const seqId, - TPosition1 const pos, - TId2& seqId2, - TPosition2& pos2) + TId1 const seqId, + TPosition1 const pos, + TId2& seqId2, + TPosition2& pos2) { - SEQAN_CHECKPOINT - typedef typename Id >::Type TId; - - if ((TId) seqId == f.seqId1) { - SEQAN_ASSERT((TPosition1)f.begin1<=pos); - SEQAN_ASSERT(pos - f.begin1 < f.len) ; - if (f.reversed) pos2 = (f.begin2 + f.len - 1) - (pos - f.begin1); - else pos2 = f.begin2 + (pos - f.begin1); - seqId2 = f.seqId2; - return; - } else { - SEQAN_ASSERT((TPosition1)f.begin2<=pos); - SEQAN_ASSERT(pos - f.begin2 < f.len); - if (f.reversed) pos2 = (f.begin1 + f.len - 1) - (pos - f.begin2); - else pos2 = f.begin1 + (pos - f.begin2); - seqId2 = f.seqId1; - return; - } + SEQAN_CHECKPOINT + typedef typename Id >::Type TId; + + if ((TId) seqId == f.seqId1) { + SEQAN_ASSERT((TPosition1)f.begin1<=pos); + SEQAN_ASSERT(pos - f.begin1 < f.len) ; + if (f.reversed) pos2 = (f.begin2 + f.len - 1) - (pos - f.begin1); + else pos2 = f.begin2 + (pos - f.begin1); + seqId2 = f.seqId2; + return; + } else { + SEQAN_ASSERT((TPosition1)f.begin2<=pos); + SEQAN_ASSERT(pos - f.begin2 < f.len); + if (f.reversed) pos2 = (f.begin1 + f.len - 1) - (pos - f.begin2); + else pos2 = f.begin1 + (pos - f.begin2); + seqId2 = f.seqId1; + return; + } } @@ -639,31 +501,31 @@ getProjectedPosition(Fragment > const& f, template inline void getProjectedPosition(Fragment > const& f, - TValue seg_num, - TId1 const seqId, - TPosition1 const pos, - TId2& seqId2, - TPosition2& pos2) + TValue seg_num, + TId1 const seqId, + TPosition1 const pos, + TId2& seqId2, + TPosition2& pos2) { - SEQAN_CHECKPOINT - (void) seqId; // When compiled without assertions. - SEQAN_ASSERT((seg_num == 0 && seqId==f.seqId1) || (seg_num == 1 && seqId==f.seqId2)); - - if (seg_num == 0) { - SEQAN_ASSERT((TPosition1)f.begin1<=pos); - SEQAN_ASSERT(pos - f.begin1 < f.len) ; - if (f.reversed) pos2 = (f.begin2 + f.len - 1) - (pos - f.begin1); - else pos2 = f.begin2 + (pos - f.begin1); - seqId2 = f.seqId2; - return; - } else { - SEQAN_ASSERT((TPosition1)f.begin2<=pos); - SEQAN_ASSERT(pos - f.begin2 < f.len); - if (f.reversed) pos2 = (f.begin1 + f.len - 1) - (pos - f.begin2); - else pos2 = f.begin1 + (pos - f.begin2); - seqId2 = f.seqId1; - return; - } + SEQAN_CHECKPOINT + (void) seqId; // When compiled without assertions. + SEQAN_ASSERT((seg_num == 0 && seqId==f.seqId1) || (seg_num == 1 && seqId==f.seqId2)); + + if (seg_num == 0) { + SEQAN_ASSERT((TPosition1)f.begin1<=pos); + SEQAN_ASSERT(pos - f.begin1 < f.len) ; + if (f.reversed) pos2 = (f.begin2 + f.len - 1) - (pos - f.begin1); + else pos2 = f.begin2 + (pos - f.begin1); + seqId2 = f.seqId2; + return; + } else { + SEQAN_ASSERT((TPosition1)f.begin2<=pos); + SEQAN_ASSERT(pos - f.begin2 < f.len); + if (f.reversed) pos2 = (f.begin1 + f.len - 1) - (pos - f.begin2); + else pos2 = f.begin1 + (pos - f.begin2); + seqId2 = f.seqId1; + return; + } } ////////////////////////////////////////////////////////////////////////////// @@ -679,22 +541,12 @@ getProjectedPosition(Fragment > const& f, * @return bool true if the fragment is reversed and false otherwise. */ -/** -.Function.isReversed -..cat:Alignments -..class:Class.Fragment -..signature:isReversed >(f) -..summary:Returns true if the segment match is in reverse orientation. -..signature:isReversed(f) -..param.f:A fragment. -...type:Class.Fragment -*/ template inline bool isReversed(Fragment > const& f) { - SEQAN_CHECKPOINT - return f.reversed; + SEQAN_CHECKPOINT + return f.reversed; } // Compare lexicographically as tuple. @@ -716,6 +568,6 @@ inline bool operator>(Fragment > const & lhs, return false; } -}// namespace SEQAN_NAMESPACE_MAIN +} // namespace seqan -#endif //#ifndef SEQAN_HEADER_... +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_FRAGMENT_H_ diff --git a/seqan/align/gap_anchor.h b/seqan/align/gap_anchor.h index 7e82f38..f2c9a14 100644 --- a/seqan/align/gap_anchor.h +++ b/seqan/align/gap_anchor.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: David Weese // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAP_ANCHOR_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAP_ANCHOR_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAP_ANCHOR_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GAP_ANCHOR_H_ namespace seqan { @@ -62,138 +62,116 @@ typedef Tag const SortGapPos; /*! * @class GapAnchor + * @implements ComparableConcept * @headerfile - * @brief Stores the position of an alignment character in sequence-space and in gap-space. - * + * @brief Stores the position of an alignment character in sequence-space and gap-space in @link AnchorGaps @endlink. + * * @signature template * struct GapAnchor; - * + * * @tparam TPos Type to store gapped/ungapped positions. - * + * * @section Remarks - * - * Value types of the gaps strings in @link ReadStoreElement @endlink and @link ContigStoreElement @endlink. + * + * Used as value type for the gaps strings in @link ReadStoreElement @endlink and @link ContigStoreElement + * @endlink. + * + * @see AnchorGaps */ -/** -.Class.GapAnchor -..summary:Stores the position of an alignment character in sequence-space and in gap-space. -..cat:Alignments -..signature:GapAnchor -..param.TPos:Type to store gapped/ungapped positions. -..remarks:Value types of the $gaps$ strings in @Class.ReadStoreElement@ and @Class.ContigStoreElement@. - -.Memfunc.GapAnchor#GapAnchor -..summary:Constructor -..signature:GapAnchor () -..signature:GapAnchor (TPos seqPos, TPos gapPos) -..param.seqPos:Sequence character position in the ungapped sequence. -..param.gapPos:Sequence character position in the gapped sequence. -..remarks:Default constructor sets both positions to $0$. -..class:Class.GapAnchor -.Memvar.GapAnchor#seqPos -..summary:Sequence character position in the ungapped sequence. -..class:Class.GapAnchor -.Memvar.GapAnchor#gapPos -..summary:Sequence character position in the gapped sequence. -..class:Class.GapAnchor -..include:seqan/store.h -*/ - // We store gap anchors only for the first text character behind a gap or a clipped sequence character template struct GapAnchor { /*! - * @var VariableType GapAnchor::seqPos + * @var TPos GapAnchor::seqPos * @brief Sequence character position in the ungapped sequence. */ - TPos seqPos; // sequence character position in the ungapped sequence + TPos seqPos; // sequence character position in the ungapped sequence /*! - * @var VariableType GapAnchor::gapPos + * @var TPos GapAnchor::gapPos * @brief Sequence character position in the gapped sequence. */ - TPos gapPos; // sequence character position in the gapped sequence + TPos gapPos; // sequence character position in the gapped sequence /*! * @fn GapAnchor::GapAnchor - * - * @brief Constructor - * - * @signature GapAnchor::GapAnchor([other]) - * @signature GapAnchor::GapAnchor(seqPos, gapPos) + * @brief Constructor. + * + * @signature GapAnchor::GapAnchor([other]); + * @signature GapAnchor::GapAnchor(seqPos, gapPos); + * + * @param[in] other GapAnchor object to copy from. + * @param[in] seqPos Sequence character position in the ungapped sequence (of type TPos). + * @param[in] gapPos Sequence character position in the gapped sequence (of type TPos). * - * @param other GapAnchor object to copy from. - * @param seqPos Sequence character position in the ungapped sequence. - * @param gapPos Sequence character position in the gapped sequence. - * * @section Remarks - * - * Default constructor sets both positions to 0. + * + * The default constructor sets both positions to 0. */ - GapAnchor() : seqPos(0), gapPos(0) {} - GapAnchor(TPos sP, TPos gP) : seqPos(sP), gapPos(gP) {} - - template - GapAnchor(GapAnchor const &other) - { - seqPos = other.seqPos; - gapPos = other.gapPos; - } - - template - inline GapAnchor const & - operator = (GapAnchor const &other) - { - seqPos = other.seqPos; - gapPos = other.gapPos; - return *this; - } - - template - inline bool - operator == (TOther const &other) const - { - return seqPos == other.seqPos && gapPos == other.gapPos; - } - - template - inline bool - operator != (TOther const &other) const - { - return !(*this == other); - } - - template - inline bool - operator < (TOther const &other) const - { - return seqPos < other.seqPos || gapPos < other.gapPos; - } - - template - inline bool - operator > (TOther const &other) const - { - return seqPos > other.seqPos || gapPos > other.gapPos; - } - - template - inline bool - operator <= (TOther const &other) const - { - return seqPos < other.seqPos || gapPos <= other.gapPos; - } - - template - inline bool - operator >= (TOther const &other) const - { - return seqPos > other.seqPos || gapPos >= other.gapPos; - } + GapAnchor() : seqPos(0), gapPos(0) {} + GapAnchor(TPos sP, TPos gP) : seqPos(sP), gapPos(gP) {} + + template + GapAnchor(GapAnchor const &other) + { + seqPos = other.seqPos; + gapPos = other.gapPos; + } + + template + inline GapAnchor const & + operator = (GapAnchor const &other) + { + seqPos = other.seqPos; + gapPos = other.gapPos; + return *this; + } + + template + inline bool + operator == (TOther const &other) const + { + return seqPos == other.seqPos && gapPos == other.gapPos; + } + + template + inline bool + operator != (TOther const &other) const + { + return !(*this == other); + } + + template + inline bool + operator < (TOther const &other) const + { + return seqPos < other.seqPos || gapPos < other.gapPos; + } + + template + inline bool + operator > (TOther const &other) const + { + return seqPos > other.seqPos || gapPos > other.gapPos; + } + + template + inline bool + operator <= (TOther const &other) const + { + return seqPos < other.seqPos || gapPos <= other.gapPos; + } + + template + inline bool + operator >= (TOther const &other) const + { + return seqPos > other.seqPos || gapPos >= other.gapPos; + } }; // ============================================================================ @@ -243,22 +221,22 @@ struct _LessGapAnchor; template struct _LessGapAnchor : - public ::std::binary_function + public std::binary_function { - inline bool - operator() (TGapAnchor const& a1, TGapAnchor const& a2) const { - return (a1.seqPos) < (a2.seqPos); - } + inline bool + operator() (TGapAnchor const& a1, TGapAnchor const& a2) const { + return (a1.seqPos) < (a2.seqPos); + } }; template struct _LessGapAnchor : - public ::std::binary_function + public std::binary_function { - inline bool - operator() (TGapAnchor const& a1, TGapAnchor const& a2) const { - return (a1.gapPos) < (a2.gapPos); - } + inline bool + operator() (TGapAnchor const& a1, TGapAnchor const& a2) const { + return (a1.gapPos) < (a2.gapPos); + } }; // ---------------------------------------------------------------------------- @@ -267,66 +245,66 @@ struct _LessGapAnchor : template inline typename Iterator::Type -lowerBoundGapAnchor(TGapAnchor const & gaps, - TSearchValue const val, - SortSeqPos) +lowerBoundGapAnchor(TGapAnchor const & gaps, + TSearchValue const val, + SortSeqPos) { - typedef typename Value::Type TGapAnchorElement; - TGapAnchorElement el; - el.seqPos = val; - return ::std::lower_bound( - begin(gaps, Standard()), - end(gaps, Standard()), - el, - _LessGapAnchor::Type, SortSeqPos const>() ); + typedef typename Value::Type TGapAnchorElement; + TGapAnchorElement el; + el.seqPos = val; + return std::lower_bound( + begin(gaps, Standard()), + end(gaps, Standard()), + el, + _LessGapAnchor::Type, SortSeqPos const>() ); } template inline typename Iterator::Type -lowerBoundGapAnchor(TGapAnchor & gaps, - TSearchValue const val, - SortSeqPos) +lowerBoundGapAnchor(TGapAnchor & gaps, + TSearchValue const val, + SortSeqPos) { - typedef typename Value::Type TGapAnchorElement; - TGapAnchorElement el; - el.seqPos = val; - return ::std::lower_bound( - begin(gaps, Standard()), - end(gaps, Standard()), - el, - _LessGapAnchor::Type, SortSeqPos const>() ); + typedef typename Value::Type TGapAnchorElement; + TGapAnchorElement el; + el.seqPos = val; + return std::lower_bound( + begin(gaps, Standard()), + end(gaps, Standard()), + el, + _LessGapAnchor::Type, SortSeqPos const>() ); } template inline typename Iterator::Type -lowerBoundGapAnchor(TGapAnchor const & gaps, - TSearchValue const val, - SortGapPos) +lowerBoundGapAnchor(TGapAnchor const & gaps, + TSearchValue const val, + SortGapPos) { - typedef typename Value::Type TGapAnchorElement; - TGapAnchorElement el; - el.gapPos = val; - return ::std::lower_bound( - begin(gaps, Standard()), - end(gaps, Standard()), - el, - _LessGapAnchor::Type, SortGapPos const>() ); + typedef typename Value::Type TGapAnchorElement; + TGapAnchorElement el; + el.gapPos = val; + return std::lower_bound( + begin(gaps, Standard()), + end(gaps, Standard()), + el, + _LessGapAnchor::Type, SortGapPos const>() ); } template inline typename Iterator::Type -lowerBoundGapAnchor(TGapAnchor & gaps, - TSearchValue const val, - SortGapPos) +lowerBoundGapAnchor(TGapAnchor & gaps, + TSearchValue const val, + SortGapPos) { - typedef typename Value::Type TGapAnchorElement; - TGapAnchorElement el; - el.gapPos = val; - return ::std::lower_bound( - begin(gaps, Standard()), - end(gaps, Standard()), - el, - _LessGapAnchor::Type, SortGapPos const>() ); + typedef typename Value::Type TGapAnchorElement; + TGapAnchorElement el; + el.gapPos = val; + return std::lower_bound( + begin(gaps, Standard()), + end(gaps, Standard()), + el, + _LessGapAnchor::Type, SortGapPos const>() ); } // ---------------------------------------------------------------------------- @@ -336,67 +314,67 @@ lowerBoundGapAnchor(TGapAnchor & gaps, template inline typename Iterator::Type upperBoundGapAnchor(TGapAnchors const & gaps, - TSearchValue const val, - SortSeqPos) + TSearchValue const val, + SortSeqPos) { - typedef typename Value::Type TGapAnchorElement; - TGapAnchorElement el; - el.seqPos = val; - return ::std::upper_bound( - begin(gaps, Standard()), - end(gaps, Standard()), - el, - _LessGapAnchor::Type, SortSeqPos const>() ); + typedef typename Value::Type TGapAnchorElement; + TGapAnchorElement el; + el.seqPos = val; + return std::upper_bound( + begin(gaps, Standard()), + end(gaps, Standard()), + el, + _LessGapAnchor::Type, SortSeqPos const>() ); } template inline typename Iterator::Type upperBoundGapAnchor(TGapAnchors & gaps, - TSearchValue const val, - SortSeqPos) + TSearchValue const val, + SortSeqPos) { - typedef typename Value::Type TGapAnchorElement; - TGapAnchorElement el; - el.seqPos = val; - return ::std::upper_bound( - begin(gaps, Standard()), - end(gaps, Standard()), - el, - _LessGapAnchor::Type, SortSeqPos const>() ); + typedef typename Value::Type TGapAnchorElement; + TGapAnchorElement el; + el.seqPos = val; + return std::upper_bound( + begin(gaps, Standard()), + end(gaps, Standard()), + el, + _LessGapAnchor::Type, SortSeqPos const>() ); } template inline typename Iterator::Type -upperBoundGapAnchor(TGapAnchors const & gaps, - TSearchValue const val, - SortGapPos) +upperBoundGapAnchor(TGapAnchors const & gaps, + TSearchValue const val, + SortGapPos) { - typedef typename Value::Type TGapAnchorElement; - TGapAnchorElement el; - el.gapPos = val; - return ::std::upper_bound( - begin(gaps, Standard()), - end(gaps, Standard()), - el, - _LessGapAnchor::Type, SortGapPos const>() ); + typedef typename Value::Type TGapAnchorElement; + TGapAnchorElement el; + el.gapPos = val; + return std::upper_bound( + begin(gaps, Standard()), + end(gaps, Standard()), + el, + _LessGapAnchor::Type, SortGapPos const>() ); } template inline typename Iterator::Type -upperBoundGapAnchor(TGapAnchors & gaps, - TSearchValue const val, - SortGapPos) +upperBoundGapAnchor(TGapAnchors & gaps, + TSearchValue const val, + SortGapPos) { - typedef typename Value::Type TGapAnchorElement; - TGapAnchorElement el; - el.gapPos = val; - return ::std::upper_bound( - begin(gaps, Standard()), - end(gaps, Standard()), - el, - _LessGapAnchor::Type, SortGapPos const>() ); + typedef typename Value::Type TGapAnchorElement; + TGapAnchorElement el; + el.gapPos = val; + return std::upper_bound( + begin(gaps, Standard()), + end(gaps, Standard()), + el, + _LessGapAnchor::Type, SortGapPos const>() ); } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAP_ANCHOR_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAP_ANCHOR_H_ diff --git a/seqan/align/gapped_value_type.h b/seqan/align/gapped_value_type.h index 9e98bbc..30e62b6 100644 --- a/seqan/align/gapped_value_type.h +++ b/seqan/align/gapped_value_type.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -33,8 +33,8 @@ // Author: Manuel Holtgrewe // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPPED_VALUE_TYPE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPPED_VALUE_TYPE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPPED_VALUE_TYPE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GAPPED_VALUE_TYPE_H_ namespace seqan { @@ -62,17 +62,6 @@ namespace seqan { * @return Type A type that can store the values of T and the value '-'. */ -/** -.Metafunction.GappedValueType: -..cat:Alignments -..summary:Returns a value type that contains a blank value '-'. -..signature:GappedValueType::Type -..param.T:The value type that should be expanded (if needed) by '-'. -..returns.param.Type:A value type that can be used to store store values in $T$ and the value '-'. -..remarks:The default implementation returns $T$. -..include:seqan/align.h -*/ - // TODO(holtgrew): Would it be enough to define it for Nothing instead of having a default implementation. // We need a default implementation since we need it for Nothing. @@ -85,8 +74,8 @@ struct GappedValueType template struct GappedValueType > { - typedef SimpleType THost_; - typedef ModifiedAlphabet > Type; + typedef SimpleType THost_; + typedef ModifiedAlphabet > Type; }; // ============================================================================ @@ -95,4 +84,4 @@ struct GappedValueType > } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPPED_VALUE_TYPE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPPED_VALUE_TYPE_H_ diff --git a/seqan/align/gaps_anchor.h b/seqan/align/gaps_anchor.h index ca25d50..ef00050 100644 --- a/seqan/align/gaps_anchor.h +++ b/seqan/align/gaps_anchor.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -38,8 +38,8 @@ // TODO(holtgrew): Clipping in leading and trailing gaps is not possible right now. Dave and I have to discuss this further. // TODO(holtgrew): Also, inserting gaps in the front changes the clipped begin position which is unexpected. -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ANCHOR_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ANCHOR_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ANCHOR_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ANCHOR_H_ namespace seqan { @@ -91,34 +91,14 @@ inline void _reinitAnchorGaps(Gaps > & gaps); * @signature Gaps::Gaps(source[, anchors]); * @signature Gaps::Gaps(anchors); * - * @param other Another @link AnchorGaps @endlink object to copy from. - * @param source The underling sequence to construct the Gaps object from. - * @param anchors The string of anchors to construct with. + * @param[in] other Another @link AnchorGaps @endlink object to copy from. + * @param[in] source The underling sequence to construct the Gaps object from. + * @param[in] anchors The string of anchors to construct with. * * An AnchorGaps object has a default constructor, can be constructed from the underlying source, and/or a string of * gap anchors. */ -/** -.Spec.AnchorGaps: -..cat:Alignments -..general:Class.Gaps -..summary:Stores gaps anchors of the first characters behind gaps. -..signature:Gaps > -..param.TSource:Type of the ungapped sequence. -...metafunction:Metafunction.Source -..param.TGapAnchors:Type of the sequence of gap anchors, e.g. a string of $Class.GapAnchor$. -..include:seqan/store.h - -.Memfunc.Gaps#Gaps -..class:Class.Gaps -..summary:Constructor -..signature:Gaps > () -..signature:Gaps > (source[, anchors]) -..signature:Gaps > (anchors) -..param.source:The underlying ungapped sequence. -..param.anchors:The sequence of gap anchors, e.g. the $gaps$ members in $Class.ReadStoreElement$ or $Class.ContigStoreElement$. -*/ template > > struct AnchorGaps @@ -137,6 +117,9 @@ class Gaps > typedef typename Position::Type TPosition_; typedef typename Value::Type TValue_; + typedef typename RemoveReference::Type>::Type TSourceNoConstNoRef; + typedef TSourceNoConstNoRef const & TSourceConstRef; + // ----------------------------------------------------------------------- // Member Variables // ----------------------------------------------------------------------- @@ -174,7 +157,7 @@ class Gaps > data_cutBegin(0), data_cutEnd(0), data_viewCutBegin(0), - data_viewCutEnd(0) + data_viewCutEnd(0) { } @@ -187,21 +170,9 @@ class Gaps > { } - // Note: We need the variants with the first parameter "TSource const &" here because TSource can be a Segment which - // is often given as a temporary. - - Gaps(TSource & source, TGapAnchors & anchors) : - data_source(source), - data_gaps(anchors), - data_cutBegin(0), - data_cutEnd(0), - data_viewCutBegin(0), - data_viewCutEnd(0) - { - } - - Gaps(TSource & source, TGapAnchors const & anchors) : - data_source(source), + // everybody has const & constructors + Gaps(TSourceNoConstNoRef const & source, TGapAnchors & anchors) : + data_source(source), data_gaps(anchors), data_cutBegin(0), data_cutEnd(0), @@ -210,22 +181,8 @@ class Gaps > { } - // TODO(holtgrew): These constructors are only here because of const-Holder issues. - - template - Gaps(TSource2 & source, TGapAnchors & anchors) : - data_source(source), - data_gaps(anchors), - data_cutBegin(0), - data_cutEnd(0), - data_viewCutBegin(0), - data_viewCutEnd(0) - { - } - - template - Gaps(TSource2 & source, TGapAnchors const & anchors) : - data_source(source), + Gaps(TSourceNoConstNoRef const & source, TGapAnchors const & anchors) : + data_source(source), data_gaps(anchors), data_cutBegin(0), data_cutEnd(0), @@ -234,19 +191,21 @@ class Gaps > { } - template - Gaps(TSource2 const & source, TGapAnchors & anchors) : + // if source is not const & (but possibly const) there are also regular & constructors + Gaps(TSourceNoConstNoRef & source, TGapAnchors & anchors, + SEQAN_CTOR_DISABLE_IF(IsSameType)) : data_source(source), - data_gaps(anchors), + data_gaps(anchors), data_cutBegin(0), data_cutEnd(0), data_viewCutBegin(0), data_viewCutEnd(0) { + ignoreUnusedVariableWarning(dummy); } - template - Gaps(TSource2 const & source, TGapAnchors const & anchors) : + Gaps(TSourceNoConstNoRef & source, TGapAnchors const & anchors, + SEQAN_CTOR_DISABLE_IF(IsSameType)) : data_source(source), data_gaps(anchors), data_cutBegin(0), @@ -254,6 +213,7 @@ class Gaps > data_viewCutBegin(0), data_viewCutEnd(0) { + ignoreUnusedVariableWarning(dummy); } // ----------------------------------------------------------------------- @@ -276,6 +236,23 @@ class Gaps > // Functions // ============================================================================ +// ---------------------------------------------------------------------------- +// Function swap() +// ---------------------------------------------------------------------------- + +template +void swap(Gaps > & lhs, + Gaps > & rhs) +{ + swap(lhs.data_source, rhs.data_source); + swap(lhs.data_gaps, rhs.data_gaps); + + std::swap(lhs.data_cutBegin, rhs.data_cutBegin); + std::swap(lhs.data_cutEnd, rhs.data_cutEnd); + std::swap(lhs.data_viewCutBegin, rhs.data_viewCutBegin); + std::swap(lhs.data_viewCutEnd, rhs.data_viewCutEnd); +} + // ---------------------------------------------------------------------------- // Helper Function _reinitAnchorGaps() // ---------------------------------------------------------------------------- @@ -438,6 +415,24 @@ clearGaps(Gaps > & gaps) _reinitAnchorGaps(gaps); } +// ---------------------------------------------------------------------------- +// Function clear() +// ---------------------------------------------------------------------------- + +template +inline void +clear(Gaps > & gaps) +{ + clear(gaps.data_source); // clear source holder + + // clear gaps, but on holder level + clear(gaps.data_gaps); + gaps.data_cutBegin = 0; + gaps.data_cutEnd = 0; + gaps.data_viewCutBegin = 0; + gaps.data_viewCutEnd = 0; +} + // ---------------------------------------------------------------------------- // Function isGap() // ---------------------------------------------------------------------------- @@ -699,25 +694,6 @@ assignSource(Gaps > & gaps, TSequence2 const & value(gaps.data_source) = source; } -// ---------------------------------------------------------------------------- -// Helper Function _helperIsNegative() -// ---------------------------------------------------------------------------- - -// to remove '... < 0 is always false' warning -template -inline bool -_helperIsNegative(T, False) -{ - return false; -} - -template -inline bool -_helperIsNegative(T t, True) -{ - return t < 0; -} - // ---------------------------------------------------------------------------- // Function positionGapToSeq() // ---------------------------------------------------------------------------- @@ -730,45 +706,48 @@ _helperIsNegative(T t, True) * * @signature TPos positionGapToSeq(gaps, pos); * - * @param gaps Contig AnchorGaps (e.g. from FragmentStore). - * @param pos Position in gap space. + * @param[in] gaps Contig AnchorGaps (e.g. from FragmentStore). + * @param[in] pos Position in gap space. * - * @return Position in sequence space. + * @return TPos Position in sequence space (Metafunction: @link ContainerConcept#Position @endlink). * * See the example below to construct the Gaps ojbect. Note that this construction is fast since it ionly a thing wrapper * around underlying objects. * - * @section Example + * @section Examples * * Convert from gap space to positions pace when the contig required to be loaded. * Converts position aligned read with * index idx in the aligned read store. * - * @code + * @code{.cpp} * typedef typename TFragmentStore::TContigStore TContigStore; * typedef typename Value::Type TContig; * typedef typename TFragmentStore::TContigSeq TContigSeq; * typedef Gaps > TContigGaps; - * + * * typedef typename TFragmentStore::TAlignedReadStore TAlignedReadStore; * typedef typename Value::Type TAlignedRead; * typedef typename TAlignedRead::TPos TAlignedReadPos; - * + * * unsigned contigId = alignedReadStore[idx].contigId; * TContigGaps contigGaps(contigStore[contigId].seq, contigStore[contigId].gaps); * TAlignedRead const & alignedRead = alignedReadStore[idx]; * // Translate end position from aligned read record to sequence space in reference. * TAlignedReadPos endPos = positionGapToSeq(contigGaps, alignedRead.endPos); - * ..example.text:Convert from gap space to position space when the contigs are not required. - * ..example.text:Converts position aligned read with index $idx$ in the aligned read store. - * ..example.code: + * @endcode + * + * Convert from gap space to position space when the contigs are not required. + * Converts position aligned read with index $idx$ in the aligned read store. + * + * @code{.cpp} * typedef typename TFragmentStore::TContigStore TContigStore; * typedef typename Value::Type TContig; * typedef Gaps > TContigGaps; - * + * * typedef typename TFragmentStore::TAlignedReadStore TAlignedReadStore; * typedef typename Value::Type TAlignedRead; * typedef typename TAlignedRead::TPos TAlignedReadPos; - * + * * unsigned contigId = alignedReadStore[idx].contigId; * TContigGaps contigGaps(Nothing(), contigStore[contigId].gaps); * TAlignedRead const & alignedRead = alignedReadStore[idx]; @@ -777,53 +756,6 @@ _helperIsNegative(T t, True) * @endcode */ -/** -.Function.positionGapToSeq -..cat:Fragment Store -..signature:positionGapToSeq(gaps, pos) -..summary:Convert from gap-space in the global alignment to the sequence-space on the reference. -..param.gaps:Contig gaps from fragment store. -...type:Spec.AnchorGaps -..param.pos:Position in gap space. -..returns:Position in sequence space. -..remarks:See the example below to construct the gaps object. Note that this construction is fast since it is only a thin wrapper around underlying objects. -..include:seqan/store.h -..example.text:Convert from gap space to position space when the contigs required to be loaded. -..example.text:Converts position aligned read with index $idx$ in the aligned read store. -..example.code: -typedef typename TFragmentStore::TContigStore TContigStore; -typedef typename Value::Type TContig; -typedef typename TFragmentStore::TContigSeq TContigSeq; -typedef Gaps > TContigGaps; - -typedef typename TFragmentStore::TAlignedReadStore TAlignedReadStore; -typedef typename Value::Type TAlignedRead; -typedef typename TAlignedRead::TPos TAlignedReadPos; - -unsigned contigId = alignedReadStore[idx].contigId; -TContigGaps contigGaps(contigStore[contigId].seq, contigStore[contigId].gaps); -TAlignedRead const & alignedRead = alignedReadStore[idx]; -// Translate end position from aligned read record to sequence space in reference. -TAlignedReadPos endPos = positionGapToSeq(contigGaps, alignedRead.endPos); -..example.text:Convert from gap space to position space when the contigs are not required. -..example.text:Converts position aligned read with index $idx$ in the aligned read store. -..example.code: -typedef typename TFragmentStore::TContigStore TContigStore; -typedef typename Value::Type TContig; -typedef Gaps > TContigGaps; - -typedef typename TFragmentStore::TAlignedReadStore TAlignedReadStore; -typedef typename Value::Type TAlignedRead; -typedef typename TAlignedRead::TPos TAlignedReadPos; - -unsigned contigId = alignedReadStore[idx].contigId; -TContigGaps contigGaps(Nothing(), contigStore[contigId].gaps); -TAlignedRead const & alignedRead = alignedReadStore[idx]; -// Translate end position from aligned read record to sequence space in reference. -TAlignedReadPos endPos = positionGapToSeq(contigGaps, alignedRead.endPos); -..see:Function.positionSeqToGap - */ - template inline TPosition positionGapToSeq(Gaps > const & me, TPosition pos) @@ -834,7 +766,7 @@ positionGapToSeq(Gaps > const & me, TPosition p TPosition seqPos; int anchorIdx; - if (_helperIsNegative(pos, typename IsSameType::Type>::Type())) + if (isNegative(pos)) anchorIdx = -1; else { @@ -866,15 +798,15 @@ positionGapToSeq(Gaps > const & me, TPosition p // ---------------------------------------------------------------------------- /*! - * @fn Gaps#positionSeqToGap + * @fn AnchorGaps#positionSeqToGap * @brief Convert from sequence space on the reference to gap space in the global alignment. * - * @signature TPosition positionSeqToGap(gaps, pos); + * @signature TPos positionSeqToGap(gaps, pos); * - * @param gaps The AnchorGaps object to use for the translation. - * @param pos The gap space position to conver to sequence space. + * @param[in] gaps The AnchorGaps object to use for the translation. + * @param[in] pos The gap space position to conver to sequence space. * - * @return TPosition The resulting position in sequence space. + * @return TPos The resulting position in sequence space (Metafunction: @link ContainerConcept#Position @endlink). * * See the example below to construct the gaps object. Note that this construction is fast since it is only a thin * wrapper around underlying objects. @@ -883,59 +815,28 @@ positionGapToSeq(Gaps > const & me, TPosition p * * Convert from gap space to position space on contig $contigId$ when the contigs required to be loaded. * - * @code + * @code{.cpp} * typedef typename TFragmentStore::TContigStore TContigStore; * typedef typename Value::Type TContig; * typedef typename TFragmentStore::TContigSeq TContigSeq; * typedef Gaps > TContigGaps; - * + * * TContigGaps contigGaps(contigStore[contigId].seq, contigStore[contigId].gaps); * TAlignedReadPos pos = positionGapToSeq(contigGaps, 33); * @endcode * * Convert from gap space to position space on contig $contigId$ when the contigs are not required. * - * @code + * @code{.cpp} * typedef typename TFragmentStore::TContigStore TContigStore; * typedef typename Value::Type TContig; * typedef Gaps > TContigGaps; - * + * * TContigGaps contigGaps(Nothing(), contigStore[contigId].gaps); * TAlignedReadPos endPos = positionGapToSeq(contigGaps, 33); * @endcode */ -/** -.Function.positionSeqToGap -..cat:Fragment Store -..signature:positionGapToSeq(gaps, pos) -..summary:Convert from sequence space on the reference to gap space in the global alignment. -..param.gaps:Contig gaps from fragment store. -...type:Spec.AnchorGaps -..param.pos:Position in the reference. -..returns:Position in the gap space of the global alignment space. -..remarks:See the example below to construct the gaps object. Note that this construction is fast since it is only a thin wrapper around underlying objects. -..include:seqan/store.h -..example.text:Convert from gap space to position space on contig $contigId$ when the contigs required to be loaded. -..example.code: -typedef typename TFragmentStore::TContigStore TContigStore; -typedef typename Value::Type TContig; -typedef typename TFragmentStore::TContigSeq TContigSeq; -typedef Gaps > TContigGaps; - -TContigGaps contigGaps(contigStore[contigId].seq, contigStore[contigId].gaps); -TAlignedReadPos pos = positionGapToSeq(contigGaps, 33); -..example.text:Convert from gap space to position space on contig $contigId$ when the contigs are not required. -..example.code: -typedef typename TFragmentStore::TContigStore TContigStore; -typedef typename Value::Type TContig; -typedef Gaps > TContigGaps; - -TContigGaps contigGaps(Nothing(), contigStore[contigId].gaps); -TAlignedReadPos endPos = positionGapToSeq(contigGaps, 33); -..see:Function.positionSeqToGap - */ - template inline TPosition positionSeqToGap(Gaps > const & me, TPosition pos) @@ -946,7 +847,7 @@ positionSeqToGap(Gaps > const & me, TPosition p TPosition gapPos; int anchorIdx; - if (_helperIsNegative(pos, typename IsSameType::Type>::Type())) + if (isNegative(pos)) anchorIdx = -1; else { @@ -1043,4 +944,4 @@ clippedEndPosition(Gaps > const & gaps) } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ANCHOR_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ANCHOR_H_ diff --git a/seqan/align/gaps_array.h b/seqan/align/gaps_array.h index af128b7..8416eea 100644 --- a/seqan/align/gaps_array.h +++ b/seqan/align/gaps_array.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -38,8 +38,8 @@ // TODO(holtgrew): Currently, operations are a function of the whole gap count, could be of clipped region only. // TODO(holtgrew): Problem with the gap value, getValue(), value(). -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ARRAY_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ARRAY_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ARRAY_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ARRAY_H_ namespace seqan { @@ -77,6 +77,7 @@ typedef Tag ArrayGaps; /*! * @class ArrayGaps * @headerfile + * @extends Gaps * @brief Stores length of gap- and non-gap runs in an array. * * @signature template @@ -85,16 +86,17 @@ typedef Tag ArrayGaps; * @tparam TSequence The type of the underling sequence. */ -/** -.Spec.ArrayGaps -..cat:Alignments -..general:Class.Gaps -..summary:Stores length of gap- and non-gapped runs in an array. -..signature:Gaps -..param.TSequence:Type of the ungapped sequence. -...metafunction:Metafunction.Source -..include:seqan/align.h -*/ +/*! + * @fn ArrayGaps::Gaps + * @headerfile + * @brief Constructor. + * + * @signature Gaps::Gaps([other]); + * @signature Gaps::Gaps(seq); + * + * @param[in] other Other Gaps object to copy from. + * @param[in] seq Sequence concept to construct the gaps for. + */ template class Gaps @@ -179,13 +181,29 @@ class Gaps // Array Subscript Operator // ----------------------------------------------------------------------- - inline TValue_ - operator[](TPosition_ clippedViewPos) const - { + inline TValue_ + operator[](TPosition_ clippedViewPos) const + { return value(*this, clippedViewPos); - } + } }; +// ---------------------------------------------------------------------------- +// Function swap() +// ---------------------------------------------------------------------------- + +template +void swap(Gaps & lhs, Gaps & rhs) +{ + swap(lhs._source, rhs._source); + swap(lhs._array, rhs._array); + + std::swap(lhs._sourceBeginPos, rhs._sourceBeginPos); + std::swap(lhs._sourceEndPos, rhs._sourceEndPos); + std::swap(lhs._clippingBeginPos, rhs._clippingBeginPos); + std::swap(lhs._clippingEndPos, rhs._clippingEndPos); +} + // ============================================================================ // Metafunctions // ============================================================================ @@ -670,6 +688,24 @@ clearGaps(Gaps & gaps) _reinitArrayGaps(gaps); } +// ---------------------------------------------------------------------------- +// Function clear() +// ---------------------------------------------------------------------------- + +template +inline void +clear(Gaps & gaps) +{ + clear(gaps._source); + clear(gaps._array); + gaps._sourceBeginPos = 0; + gaps._sourceEndPos = 0; + gaps._clippingBeginPos = 0; + gaps._clippingEndPos = 0; + // cannot use clearGaps() here, since that calls value() on _source + // which instates the Holder to Owner; we want it to be empty +} + // ---------------------------------------------------------------------------- // Function isGap() // ---------------------------------------------------------------------------- @@ -844,4 +880,4 @@ endPosition(Gaps & gaps) } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ARRAY_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ARRAY_H_ diff --git a/seqan/align/gaps_base.h b/seqan/align/gaps_base.h index e667600..ea9af69 100644 --- a/seqan/align/gaps_base.h +++ b/seqan/align/gaps_base.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // TODO(holtgrew): Switch to Host interface. -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_BASE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_BASE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_BASE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_BASE_H_ namespace seqan { @@ -73,7 +73,7 @@ typedef Tag ArrayGaps; /*! * @class Gaps - * @implements SequenceConcept + * @implements ContainerConcept * @headerfile * @brief Store the gapped version of a sequence. * @@ -83,12 +83,12 @@ typedef Tag ArrayGaps; * @tparam TSequence The type of the underlying sequence. * @tparam TSpec Tag for specialization. * - * Gaps wrap a @link SequenceConcept Sequence @endlink and allows to (1) insert gaps into the sequence and (2) select + * Gaps wrap a @link ContainerConcept Sequence @endlink and allows to (1) insert gaps into the sequence and (2) select * an infix of the gapped sequence (clipping). The gaps are not inserted into the underlying sequence (source) but * stored separately. Using the clipping is optional and meant for selecting parts of the alignment as a part of the * result of a local alignment algorithm. * - * * * In the figure above, the source sequence has seven characters, the gapped sequence has four gaps and thus consists * of eleven characters. The gapped sequence is clipped to start at position 0 in the gapped sequence and to end at @@ -104,47 +104,11 @@ typedef Tag ArrayGaps; * The following example shows the construction of the gaps object from the image above together with some calls to * toViewPosition and toSourcePosition. * - * @include demos/align/gaps_example.cpp + * @include demos/dox/align/gaps_example.cpp * * The output is as follows: * - * @include demos/align/gaps_example.cpp.stdout - */ - -/** -.Class.Gaps -..cat:Alignments -..implements:Concept.SequenceConcept -..summary:Efficient storage of gaps for a sequence. -..signature:Gaps -..description.text: -Gaps wrap a @Concept.SequenceConcept@ and allows to (1) insert gaps into the sequence and (2) select an infix of the gapped sequence (clipping). -The gaps are not inserted into the underlying sequence (source) but stored separately. -Using the clipping is optional and meant for selecting parts of the alignment as a part of the result of a local alignment algorithm. -..description.image:gaps_illustration|Illustration of Gaps object and positions with clipping. -..description: -In the figure above, the source sequence has seven characters, the gapped sequence has four gaps and thus consists of eleven characters. -The gapped sequence is clipped to start at position 0 in the gapped sequence and to end at position 8 in the gapped sequence (the positions given as half-open intervals $[begin, end)$). -..description.text: -The figure shows the three coordinate systems that are used with Gaps objects. -The source position is the position in the underlying sequence. -The unclipped view position is the position in the gapped sequence without gaps. -The view position is the position in the gapped sequence but including the clipping: -All (clipped) view positions have the clipping begin position subtracted from them. -..example.text: -The following example shows the construction of the gaps object from the image above together with some calls to $toViewPosition$ and $toSourcePosition$. -These functions allow the transformation between the source position and the clipped view position. -..example.file:demos/align/gaps_example.cpp -..example.text:This yields the following output: -..example.output:Resulting gaps: GG-T-A- -toSourcePosition(gaps, 0) == 1 -toSourcePosition(gaps, 4) == 4 -toViewPosition(gaps, 0) == -1 -toViewPosition(gaps, 5) == 9 -..param.TSequence:The type of the underlying sequence. -...type:Concept.SequenceConcept -..param.TSpec:Specialization tag. -..include:seqan/align.h + * @include demos/dox/align/gaps_example.cpp.stdout */ template @@ -158,9 +122,6 @@ class Gaps; // Metafunction Value // ---------------------------------------------------------------------------- -///.Metafunction.Value.param.T.type:Class.Gaps -///.Metafunction.Value.class:Class.Gaps - template struct Value > { @@ -176,28 +137,22 @@ struct Value const> : Value > // Metafunction Iterator // ---------------------------------------------------------------------------- -///.Metafunction.Iterator.param.T.type:Class.Gaps -///.Metafunction.Iterator.class:Class.Gaps - template struct Iterator, TIteratorSpec> { - typedef Iter, GapsIterator > Type; + typedef Iter, GapsIterator > Type; }; template struct Iterator const, TIteratorSpec> { - typedef Iter const, GapsIterator > Type; + typedef Iter const, GapsIterator > Type; }; // ---------------------------------------------------------------------------- // Metafunction GetValue // ---------------------------------------------------------------------------- -///.Metafunction.GetValue.param.T.type:Class.Gaps -///.Metafunction.GetValue.class:Class.Gaps - template struct GetValue > : Value > {}; @@ -210,9 +165,6 @@ struct GetValue const> : GetValue // Metafunction Position // ---------------------------------------------------------------------------- -///.Metafunction.Position.param.T.type:Class.Gaps -///.Metafunction.Position.class:Class.Gaps - template struct Position > { @@ -228,30 +180,24 @@ struct Position const> : Position // Metafunction Reference // ---------------------------------------------------------------------------- -///.Metafunction.Reference.param.T.type:Class.Gaps -///.Metafunction.Reference.class:Class.Gaps - template struct Reference > { - typedef typename Iterator, Standard>::Type TIterator_; - typedef Proxy > Type; + typedef typename Iterator, Standard>::Type TIterator_; + typedef Proxy > Type; }; template struct Reference const> { - typedef typename Iterator const, Standard>::Type TIterator_; - typedef Proxy > Type; + typedef typename Iterator const, Standard>::Type TIterator_; + typedef Proxy > Type; }; // ---------------------------------------------------------------------------- // Metafunction Size // ---------------------------------------------------------------------------- -///.Metafunction.Size.param.T.type:Class.Gaps -///.Metafunction.Size.class:Class.Gaps - template struct Size > { @@ -268,18 +214,6 @@ struct Size const> : Size > // TODO(holtgrew): Switch to Hosted Type interface -/** -.Metafunction.Source -..cat:Alignments -..summary:Return underlying sequence of Gaps/Alignments. -..signature:Source::Type -..param.T:The type to query for underlying sequence. -..include:seqan/align.h -*/ - -///.Metafunction.Source.param.T.type:Class.Gaps -///.Metafunction.Source.class:Class.Gaps - template struct Source > { @@ -296,9 +230,6 @@ struct Source const> : Source > // Metafunction IsSequence // ---------------------------------------------------------------------------- -///.Metafunction.IsSequence.param.T.type:Class.Gaps -///.Metafunction.IsSequence.class:Class.Gaps - template struct IsSequence > { @@ -319,27 +250,23 @@ struct IsSequence const> : IsSequenceIterator::Type where TTag is * the type of tag. */ // TODO(holtgrew): Adding links to implemented sequence. This should be cleaned up once we have better documentation with concepts. -///.Function.begin.class:Class.Gaps -///.Function.end.class:Class.Gaps -///.Function.iter.class:Class.Gaps - // ---------------------------------------------------------------------------- // Function setSource() // ---------------------------------------------------------------------------- @@ -358,22 +285,9 @@ struct IsSequence const> : IsSequence const> : IsSequence const> : IsSequence const> : IsSequence const> : IsSequence const> : IsSequence const> : IsSequence +bool isGap(Gaps const & gaps, TPos clippedViewPos) +{ + return isGap(iter(gaps, clippedViewPos, Standard())); +} + +// ---------------------------------------------------------------------------- +// Function isCharacter() +// ---------------------------------------------------------------------------- + +/*! + * @fn Gaps#isCharacer + * @brief Query positions in a Gaps object for being a character. + * + * @signature bool isGap(gaps, viewPos); + * + * @param[in] gaps The Gaps object to query. + * @param[in] viewPos The view position (including clipping and gaps). * * @return bool The query result. */ -/** -.Function.Gaps#isGap -..class:Class.Gaps -..summary:Query whether a given clipped view position is a gap. -..cat:Alignments -..signature:bool isGap(gaps, clippedViewPos) -..param.gaps:The @Class.Gaps@ object to query. -...type:Class.Gaps -..param.clippedViewPos:The position in the view to query. -...type:Metafunction.Position -..returns:Whether or not there is a gap at the given clipped view position. -...type:nolink:$bool$ -..see:Function.insertGap -..see:Function.removeGap -..see:Function.removeGaps -..include:seqan/align.h -*/ +template +bool isCharacter(Gaps const & gaps, TPos clippedViewPos) +{ + return isCharacter(iter(gaps, clippedViewPos, Standard())); +} // ---------------------------------------------------------------------------- // Function insertGaps() @@ -564,30 +421,11 @@ struct IsSequence const> : IsSequence const> : IsSequence @@ -638,33 +459,13 @@ insertGap(Gaps & gaps, TPosition clippedViewPos) * * @signature TSize removeGaps(gaps, viewPos, count); * - * @param gaps The gaps object to remove gap characters from. - * @param viewPos The view positions to remove gap characters from. - * @param count The number of gap characters to remove. + * @param[in,out] gaps The gaps object to remove gap characters from. + * @param[in] viewPos The view positions to remove gap characters from. + * @param[in] count The number of gap characters to remove. * - * @return TSize The number of gap characters removed. + * @return TSize The number of gap characters removed (Metafunction: @link ContainerConcept#Size @endlink). */ -/** -.Function.removeGaps -..class:Class.Gaps -..summary:Remove multiple gaps from a gapped sequence. -..cat:Alignments -..signature:TSize removeGaps(gaps, clippedViewPos, count) -..param.gaps:The @Class.Gaps@ object to remove gaps into. -...type:Class.Gaps -..param.clippedViewPos:The position in the view to remove gaps from. -...type:Metafunction.Position -..param.count:The number of gaps to remove -...type:nolink:$unsigned$ -..returns:The number of removed gaps. -...type:Metafunction.Size -..see:Function.insertGap -..see:Function.insertGaps -..see:Function.removeGap -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function removeGap() // ---------------------------------------------------------------------------- @@ -675,30 +476,12 @@ insertGap(Gaps & gaps, TPosition clippedViewPos) * * @signature TSize removeGap(gaps, viewPos); * - * @param gaps The gaps object to remove one gap character from. - * @param viewPos The view positions to remove one gap character from. + * @param[in,out] gaps The gaps object to remove one gap character from. + * @param[in] viewPos The view positions to remove one gap character from. * - * @return TSize The number of gap characters removed. + * @return TSize The number of gap characters removed (Metafunction: @link ContainerConcept#Size @endlink). */ -/** -.Function.removeGap -..class:Class.Gaps -..summary:Remove one gap from a gapped sequence. -..cat:Alignments -..signature:TSize removeGap(gaps, clippedViewPos) -..param.gaps:The @Class.Gaps@ object to remove gap into. -...type:Class.Gaps -..param.clippedViewPos:The position in the view to remove gap from. -...type:Metafunction.Position -..returns:The number of removed gaps. -...type:Metafunction.Size -..see:Function.insertGap -..see:Function.insertGaps -..see:Function.removeGaps -..include:seqan/align.h -*/ - // Forward to removeGaps() which has to be implemented in each subclass. template @@ -707,7 +490,7 @@ removeGap(Gaps & gaps, TPosition clippedViewPos) { return removeGaps(gaps, clippedViewPos, 1u); } - + // ---------------------------------------------------------------------------- // Function countGaps() // ---------------------------------------------------------------------------- @@ -718,24 +501,63 @@ removeGap(Gaps & gaps, TPosition clippedViewPos) * * @signature TSize countGaps(gaps, viewPos); * - * @param gaps The Gaps object to query. - * @param viewPos View position (including clipping and gaps) to query at. + * @param[in] gaps The Gaps object to query. + * @param[in] viewPos View position (including clipping and gaps) to query at. + * + * @return TSize The number of gap characters at viewPos (Metafunction: @link ContainerConcept#Size + * @endlink). + */ + +template +typename Size >::Type +countGaps(Gaps const & gaps, TPos clippedViewPos) +{ + return countGaps(iter(gaps, clippedViewPos, Standard())); +} + +// ---------------------------------------------------------------------------- +// Function countLeadingGaps() +// ---------------------------------------------------------------------------- + +/*! + * @fn Gaps#countLeadingGaps + * @brief The number of leading gaps. + * + * @signature TSize countLeadingGaps(gaps); + * + * @param[in] gaps The Gaps object to query. + * + * @return TSize The number of leading gap characters (Metafunction: @link ContainerConcept#Size @endlink). + */ + +template +inline typename Size::Type +countLeadingGaps(TGaps const & gaps) +{ + return toViewPosition(gaps, 0); +} + +// ---------------------------------------------------------------------------- +// Function countTrailingGaps() +// ---------------------------------------------------------------------------- + +/*! + * @fn Gaps#countTrailingGaps + * @brief The number of trailing gaps. + * + * @signature TSize countTrailingGaps(gaps); * - * @return TSize The number of gap characters at viewPos. + * @param[in] gaps The Gaps object to query. + * + * @return TSize The number of trailing gap characters (Metafunction: @link ContainerConcept#Size @endlink). */ -/** -.Function.Gaps#countGaps -..class:Class.Gaps -..summary:Reports number of continues gaps right of current iterator position. -..cat:Alignments -..signature:TSize countGaps(iter) -..param.iter:Iterator of the @Class.Gaps@ object to count gaps for. -...type:Metafunction.Iterator -..returns:The number of gaps right of the current iterator position, including the current position, or $0$ if there is no gap. -...type:Metafunction.Size -..include:seqan/align.h -*/ +template +inline typename Size::Type +countTrailingGaps(TGaps const & gaps) +{ + return length(gaps) - toViewPosition(gaps, length(source(gaps)) - 1) - 1; +} // ---------------------------------------------------------------------------- // Function countCharacters() @@ -747,12 +569,20 @@ removeGap(Gaps & gaps, TPosition clippedViewPos) * * @signature TSize countCharacters(gaps, viewPos); * - * @param gaps The Gaps object to query. - * @param viewPos View position (including clipping and gaps) to query at. + * @param[in] gaps The Gaps object to query. + * @param[in] viewPos View position (including clipping and gaps) to query at. * - * @return TSize The number of non-gaps characters characters at viewPos. + * @return TSize The number of non-gaps characters characters at viewPos (Metafunction: @link + * ContainerConcept#Size @endlink). */ +template +typename Size >::Type +countCharacters(Gaps const & gaps, TPos clippedViewPos) +{ + return countCharacters(iter(gaps, clippedViewPos, Standard())); +} + // ---------------------------------------------------------------------------- // Function setClippedBeginPosition() // ---------------------------------------------------------------------------- @@ -763,31 +593,10 @@ removeGap(Gaps & gaps, TPosition clippedViewPos) * * @signature void setClippedBeginPosition(gaps, unclippedViewPos); * - * @param gaps The Gaps object to set the clipping begin position of. - * @param unclippedViewPos View position (including gaps but excluding clipping) to set the clipping begin to. + * @param[in,out] gaps The Gaps object to set the clipping begin position of. + * @param[in] unclippedViewPos View position (including gaps but excluding clipping) to set the clipping begin to. */ -/** -.Function.Gaps#setClippedBeginPosition -..class:Class.Gaps -..summary:Sets the begin position of the clipping. -..signature:void setClippedBeginPosition(gaps, unclippedViewPosition) -..param.gaps:The @Class.Gaps@ object to query. -...type:Class.Gaps -..param.unclippedViewPosition:The position in the unclipped view to set as the clipping begin position. -...type:Metafunction.Position -..returns:$void$ -..remarks:Note that the position is *not* a clipped view position but an uncliped one! -..see:Function.Gaps#beginPosition -..see:Function.Gaps#endPosition -..see:Function.Gaps#setBeginPosition -..see:Function.Gaps#setEndPosition -..see:Function.Gaps#clippedBeginPosition -..see:Function.Gaps#clippedEndPosition -..see:Function.Gaps#setClippedEndPosition -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function setClippedEndPosition() // ---------------------------------------------------------------------------- @@ -798,31 +607,10 @@ removeGap(Gaps & gaps, TPosition clippedViewPos) * * @signature void setClippedEndPosition(gaps, unclippedViewPos); * - * @param gaps The Gaps object to set the clipping end position of. - * @param unclippedViewPos View position (including gaps but excluding clipping) to set the clipping end to. + * @param[in,out] gaps The Gaps object to set the clipping end position of. + * @param[in] unclippedViewPos View position (including gaps but excluding clipping) to set the clipping end to. */ -/** -.Function.Gaps#setClippedEndPosition -..class:Class.Gaps -..summary:Sets the end position of the clipping. -..signature:void setClippedEndPosition(gaps, unclippedViewPosition) -..param.gaps:The @Class.Gaps@ object to query. -...type:Class.Gaps -..param.unclippedViewPosition:The position in the unclipped view to set as the clipping end position. -...type:Metafunction.Position -..returns:$void$ -..remarks:Note that the position is *not* a clipped view position but an uncliped one! -..see:Function.Gaps#beginPosition -..see:Function.Gaps#endPosition -..see:Function.Gaps#setBeginPosition -..see:Function.Gaps#setEndPosition -..see:Function.Gaps#clippedBeginPosition -..see:Function.Gaps#clippedEndPosition -..see:Function.Gaps#setClippedBeginPosition -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function clippedBeginPosition() // ---------------------------------------------------------------------------- @@ -833,56 +621,26 @@ removeGap(Gaps & gaps, TPosition clippedViewPos) * * @signature TPos clippedBeginPosition(gaps); * - * @param gaps The Gaps object to query. + * @param[in] gaps The Gaps object to query. * - * @return TPos The begin position of the unclipped view. + * @return TPos The begin position of the unclipped view (Metafunction: @link ContainerConcept#Position @endlink). * * @section Example * * In the following gaps configuration, the result of clippedBeginPosition(gaps) is 1. * - * @code + * @code{.txt} * clipping [ ) - * (half-open interval) - * + * (half-open interval) + * * gapped sequence: X--XXX-XX- - * + * * source position: 0111234456 * unclipped view position: 0123456789 * clipped view position: 0123456 * @endcode */ -/** -.Function.Gaps#clippedBeginPosition -..class:Class.Gaps -..summary:Return the begin position of the clipping in the unclipped gapped sequence. -..signature:TPosition clippedBeginPosition(gaps) -..param.gaps:The @Class.Gaps@ object to query. -...type:Class.Gaps -..returns:The begin position of the current clipped view in the unclipped gapped sequence. -...type:Metafunction.Position -..see:Function.Gaps#beginPosition -..see:Function.Gaps#endPosition -..see:Function.Gaps#setBeginPosition -..see:Function.Gaps#setEndPosition -..see:Function.Gaps#clippedEndPosition -..see:Function.Gaps#setClippedBeginPosition -..see:Function.Gaps#setClippedEndPosition -..example: -In the following gaps configuration, the result of $clippedBeginPosition(gaps)$ is $1$. -..example.code: -clipping [ ) - (half-open interval) - -gapped sequence: X--XXX-XX- - -source position: 0111234456 -unclipped view position: 0123456789 -clipped view position: 0123456 -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function clippedEndPosition() // ---------------------------------------------------------------------------- @@ -893,56 +651,26 @@ clipped view position: 0123456 * * @signature TPos clippedEndPosition(gaps); * - * @param gaps The Gaps object to query. + * @param[in] gaps The Gaps object to query. * - * @return TPos The end position of the unclipped view. + * @return TPos The end position of the unclipped view (Metafunction: @link ContainerConcept#Position @endlink). * * @section Example * * In the following gaps configuration, the result of clippedEndPosition(gaps) is 7. * - * @code + * @code{.txt} * clipping [ ) - * (half-open interval) - * + * (half-open interval) + * * gapped sequence: X--XXX-XX- - * + * * source position: 0111234456 * unclipped view position: 0123456789 * clipped view position: 0123456 * @endcode */ -/** -.Function.Gaps#clippedEndPosition -..class:Class.Gaps -..summary:Return the end position of the clipping in the unclipped gapped sequence. -..signature:TPosition clippedEndPosition(gaps) -..param.gaps:The @Class.Gaps@ object to query. -...type:Class.Gaps -..returns:The end position of the current clipped view in the unclipped gapped sequence. -...type:Metafunction.Position -..see:Function.Gaps#beginPosition -..see:Function.Gaps#endPosition -..see:Function.Gaps#setBeginPosition -..see:Function.Gaps#setEndPosition -..see:Function.Gaps#clippedBeginPosition -..see:Function.Gaps#setClippedBeginPosition -..see:Function.Gaps#setClippedEndPosition -..example: -In the following gaps configuration, the result of $clippedEndPosition(gaps)$ is $7$. -..example.code: -clipping [ ) - (half-open interval) - -gapped sequence: X--XXX-XX- - -source position: 0111234456 -unclipped view position: 0123456789 -clipped view position: 0123456 -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function setBeginPosition() // ---------------------------------------------------------------------------- @@ -953,30 +681,10 @@ clipped view position: 0123456 * * @signature void setBeginPosition(gaps, sourcePos); * - * @param gaps The Gaps object to set the begin position in. - * @param sourcePos Position in the underlying sequence to set clipping to. + * @param[in,out] gaps The Gaps object to set the begin position in. + * @param[in] sourcePos Position in the underlying sequence to set clipping to. */ -/** -.Function.Gaps#setBeginPosition -..class:Class.Gaps -..summary:Set the begin position of the clipped gapped sequence, given a source position. -..signature:void setBeginPosition(gaps, sourcePosition) -..param.gaps:The @Class.Gaps@ object to query. -...type:Class.Gaps -..param.sourcePosition:The source position to set the clipping begin to. -...type:Metafunction.Position -..returns:$void$ -..see:Function.Gaps#beginPosition -..see:Function.Gaps#endPosition -..see:Function.Gaps#setEndPosition -..see:Function.Gaps#clippedBeginPosition -..see:Function.Gaps#clippedEndPosition -..see:Function.Gaps#setClippedBeginPosition -..see:Function.Gaps#setClippedEndPosition -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function setEndPosition() // ---------------------------------------------------------------------------- @@ -987,30 +695,10 @@ clipped view position: 0123456 * * @signature void setEndPosition(gaps, sourcePos); * - * @param gaps The Gaps object to set the end position in. - * @param sourcePos Position in the underlying sequence to set clipping to. + * @param[in,out] gaps The Gaps object to set the end position in. + * @param[in] sourcePos Position in the underlying sequence to set clipping to. */ -/** -.Function.Gaps#setEndPosition -..class:Class.Gaps -..summary:Set the end position of the clipped gapped sequence, given a source position. -..signature:void setEndPosition(gaps, sourcePosition) -..param.gaps:The @Class.Gaps@ object to query. -...type:Class.Gaps -..param.sourcePosition:The source position to set the clipping end to. -...type:Metafunction.Position -..returns:$void$ -..see:Function.Gaps#beginPosition -..see:Function.Gaps#endPosition -..see:Function.Gaps#setBeginPosition -..see:Function.Gaps#clippedBeginPosition -..see:Function.Gaps#clippedEndPosition -..see:Function.Gaps#setClippedBeginPosition -..see:Function.Gaps#setClippedEndPosition -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function beginPosition() // ---------------------------------------------------------------------------- @@ -1019,60 +707,29 @@ clipped view position: 0123456 * @fn Gaps#beginPosition * @brief Return the clipping begin position as a source position. * - * @signature TPosition beginPosition(gaps); + * @signature TPos beginPosition(gaps); * - * @param gaps The Gaps object to query. + * @param[in] gaps The Gaps object to query. * - * @return TPosition The clipping begin position in the source. + * @return TPos The clipping begin position in the source (Metafunction: @link ContainerConcept#Position @endlink). * * @section Example * * In the following gaps configuration, the result of beginPosition(gaps) is $1$. The clipping starts in a * gap and the source position of the first non-gap character right of the clipping begin has source position 1. * - * @code + * @code{.txt} * clipping [ ) - * (half-open interval) - * + * (half-open interval) + * * gapped sequence: X--XXX-XX- - * + * * source position: 0111234456 * unclipped view position: 0123456789 * clipped view position: 0123456 * @endcode */ -/** -.Function.Gaps#beginPosition -..class:Class.Gaps -..summary:Return the clipping begin position as a source position. -..signature:TPosition beginPosition(gaps) -..param.gaps:The @Class.Gaps@ object to query. -...type:Class.Gaps -..returns:The begin position of the current clipped view in the source. -...type:Metafunction.Position -..see:Function.Gaps#endPosition -..see:Function.Gaps#setBeginPosition -..see:Function.Gaps#setEndPosition -..see:Function.Gaps#clippedBeginPosition -..see:Function.Gaps#clippedEndPosition -..see:Function.Gaps#setClippedBeginPosition -..see:Function.Gaps#setClippedEndPosition -..example: -In the following gaps configuration, the result of $beginPosition(gaps)$ is $1$. -The clipping starts in a gap and the source position of the first non-gap character right of the clipping begin has source position $1$. -..example.code: -clipping [ ) - (half-open interval) - -gapped sequence: X--XXX-XX- - -source position: 0111234456 -unclipped view position: 0123456789 -clipped view position: 0123456 -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function endPosition() // ---------------------------------------------------------------------------- @@ -1081,81 +738,47 @@ clipped view position: 0123456 * @fn Gaps#endPosition * @brief Return the clipping end position as a source position. * - * @signature TPosition endPosition(gaps); + * @signature TPos endPosition(gaps); * - * @param gaps The Gaps object to query for the end position as a source position. + * @param[in] gaps The Gaps object to query for the end position as a source position. * - * @return TPosition The end position as a source position. + * @return TPos The end position as a source position (Metafunction: @link ContainerConcept#Position @endlink). * * @section Example * * In the following gaps configuration, the result of endPositioN(gaps) is 4. * - * @code + * @code{.txt} * clipping [ ) - * (half-open interval) - * + * (half-open interval) + * * gapped sequence: X--XXX-XX- - * + * * source position: 0111234456 * unclipped view position: 0123456789 * clipped view position: 0123456 * @endcode */ -/** -.Function.Gaps#endPosition -..class:Class.Gaps -..summary:Return the clipping end position as a source position. -..signature:TPosition endPosition(gaps) -..param.gaps:The @Class.Gaps@ object to query. -...type:Class.Gaps -..returns:The end position of the current clipped view in the source. -...type:Metafunction.Position -..see:Function.Gaps#beginPosition -..see:Function.Gaps#setBeginPosition -..see:Function.Gaps#setEndPosition -..see:Function.Gaps#clippedBeginPosition -..see:Function.Gaps#clippedEndPosition -..see:Function.Gaps#setClippedBeginPosition -..see:Function.Gaps#setClippedEndPosition -..example: -In the following gaps configuration, the result of $endPosition(gaps)$ is $4$. -..example.code: -clipping [ ) - (half-open interval) - -gapped sequence: X--XXX-XX- - -source position: 0111234456 -unclipped view position: 0123456789 -clipped view position: 0123456 -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function write() // ---------------------------------------------------------------------------- -template +template inline void -write(TFile & target, - Gaps const & source, - TIDString const &, - Raw) +write(TTarget & target, + Gaps const & source) { -//IOREV _nodoc_ specialization not documented - - // Print gaps row - typedef typename Iterator const>::Type TIter; - TIter begin_ = begin(source); - TIter end_ = end(source); - for (; begin_ != end_; ++begin_) { - if (isGap(begin_)) - streamPut(target, gapValue()); - else - streamPut(target, convert(*begin_)); - } + // Print gaps row + typedef typename Iterator const>::Type TIter; + TIter begin_ = begin(source); + TIter end_ = end(source); + for (; begin_ != end_; ++begin_) { + if (isGap(begin_)) + writeValue(target, gapValue()); + else + writeValue(target, convert(getValue(begin_))); + } } // ---------------------------------------------------------------------------- @@ -1164,23 +787,13 @@ write(TFile & target, // TODO(holtgrew): Document appropriately. -template -inline TStream & -operator<<(TStream & stream, Gaps const & gaps) +template +inline TTarget & +operator<<(TTarget & target, Gaps const & gaps) { - typedef Gaps const TGaps; - typedef typename Iterator::Type TIter; - - for (TIter it = begin(gaps, Rooted()); !atEnd(it); goNext(it)) - { - // TODO(holtgrew): Ideally, we could simply print the expanded alphabet char but that is broken. - if (isGap(it)) - stream << gapValue(); - else - stream << convert(*it); - } - - return stream; + typename DirectionIterator::Type it = directionIterator(target, Output()); + write(it, gaps); + return target; } // ---------------------------------------------------------------------------- @@ -1204,51 +817,39 @@ void _pumpTraceToGaps(Gaps & gapsH, typedef typename Iterator::Type TGapsVIter; // TODO(holtgrew): I don't understand the following. Originally, this function used Align objects, but I did not understand it there either. - // TODO(rausch): Pump trace into align_ (note: this is relatively slow code here. it could be improved if specialized to the Align Specs). + // TODO(rausch): Pump trace into align_ (note: this is relatively slow code here. it could be improved if specialized to the Align Specs). clearGaps(gapsH); clearClipping(gapsH); clearGaps(gapsV); clearClipping(gapsV); - TSize i = length(trace.sizes); // Scan trace backwards. - TGapsHIter itH = begin(gapsH); - TGapsVIter itV = begin(gapsV); - while (i > 0) - { - --i; - TSize size = trace.sizes[i]; - switch ((int) trace.tvs[i]) - { - case 1: // Go horizontal. - insertGaps(itV, size); - break; - - case 2: // Go vertical. - insertGaps(itH, size); - break; - } - goFurther(itH, size); - goFurther(itV, size); - } + TSize i = length(trace.sizes); // Scan trace backwards. + TGapsHIter itH = begin(gapsH); + TGapsVIter itV = begin(gapsV); + while (i > 0) + { + --i; + TSize size = trace.sizes[i]; + switch ((int) trace.tvs[i]) + { + case 1: // Go horizontal. + insertGaps(itV, size); + break; + + case 2: // Go vertical. + insertGaps(itH, size); + break; + } + goFurther(itH, size); + goFurther(itV, size); + } } // ---------------------------------------------------------------------------- // Function source() // ---------------------------------------------------------------------------- -// TODO(holtgrew): source concept in dox? - -/** -.Function.source -..summary:Return underlying object. -..cat:Basic -..signature:source(obj) -..param.obj:The object to get underlying sequence of. -...type:Class.Gaps -..returns:The underlying object. -...type:Metafunction.Source -..include:seqan/align.h -*/ +// TODO(holtgrew): Document TSource via metafunctio. /*! * @fn Gaps#source @@ -1256,23 +857,11 @@ void _pumpTraceToGaps(Gaps & gapsH, * * @signature TSource source(gaps); * - * @param gaps The Gaps object to return the underling sequence for. - * + * @param[in] gaps The Gaps object to return the underling sequence for. + * * @return TSource Reference to the source of the Gaps. */ -/* -.Function.Gaps#source -..summary:Return underlying sequence. -..cat:Alignments -..signature:TSource source(gaps) -..param.obj:The object to get underlying sequence of. -...type:Class.Gaps -..returns:Reference to the underlying sequence. -...type:Metafunction.Source -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function sourceSegment() // ---------------------------------------------------------------------------- @@ -1313,27 +902,10 @@ sourceSegment(Gaps & gaps) * * @signature void assignSource(gaps, seq); * - * @param gaps The Gaps object to assign the source of. - * @param seq The @link SequenceConcept sequence @endlink to assign to the underlying string.:w + * @param[in,out] gaps The Gaps object to assign the source of. + * @param[in] seq The @link ContainerConcept sequence @endlink to assign to the underlying string. */ -/** -.Function.Gaps#assignSource -..class:Class.Gaps -..summary:Assign the source of a Gaps object, copying data. -..cat:Alignments -..signature:void assignSource(gaps, sequence) -..param.gaps:The @Class.Gaps@ object to assign the source of. -...type:Class.Gaps -..param.sequence:The @Concept.SequenceConcept@ to assign as the source. -...type:Metafunction.Source -..remarks:This will copy $sequence$ into the source of $gaps$. -..returns:$void$ -..see:Function.Gaps#setSource -..see:Function.source -..include:seqan/align.h -*/ - // TOOD(holtgrew): Switch to Hosted Type? template @@ -1347,23 +919,6 @@ assignSource(Gaps & gaps, TValue const & value) // Function setSource() // ---------------------------------------------------------------------------- -/** -.Function.Gaps#setSource -..class:Class.Gaps -..summary:Set the source of a Gaps object, do not copy if possible. -..cat:Alignments -..signature:void setSource(gaps, sequence) -..param.gaps:The @Class.Gaps@ object to set the source of. -...type:Class.Gaps -..param.sequence:The @Concept.SequenceConcept@ to set as the source. -...type:Metafunction.Source -..remarks:This will avoid copying if possible. -..returns:$void$ -..see:Function.Gaps#assignSource -..see:Function.source -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function copyGaps() // ---------------------------------------------------------------------------- @@ -1376,8 +931,8 @@ assignSource(Gaps & gaps, TValue const & value) * * @signature void copyGaps(dest, source); * - * @param[out] dest The destination Gaps object (appropriate clipping, no gaps). - * @param[in] source The source Gaps object. + * @param[in,out] dest The destination Gaps object (appropriate clipping, no gaps). + * @param[in] source The source Gaps object. */ template @@ -1419,8 +974,8 @@ void copyGaps(Gaps & dest, Gaps @@ -1431,10 +986,40 @@ void copyClipping(Gaps & dest, Gaps +inline void clipSemiGlobal(TGlobalGaps & global, TLocalGaps & local) +{ + typedef typename Size::Type TGapsSize; + + TGapsSize leadingGaps = countLeadingGaps(local); + TGapsSize trailingGaps = countTrailingGaps(local); + TGapsSize globalLenght = length(global); + TGapsSize localLength = length(local); + + setClippedBeginPosition(global, leadingGaps); + setClippedBeginPosition(local, leadingGaps); + setClippedEndPosition(global, globalLenght - trailingGaps); + setClippedEndPosition(local, localLength - trailingGaps); +} + +// ---------------------------------------------------------------------------- +// Function clear() +// ---------------------------------------------------------------------------- template inline void clearGaps(Gaps & gaps); @@ -1482,4 +1067,4 @@ inline bool operator!=(Gaps const & lhs, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_BASE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_BASE_H_ diff --git a/seqan/align/gaps_iterator_anchor.h b/seqan/align/gaps_iterator_anchor.h index 7692a8a..fb8c87a 100644 --- a/seqan/align/gaps_iterator_anchor.h +++ b/seqan/align/gaps_iterator_anchor.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Manuel Holtgrewe // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ANCHOR_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ANCHOR_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ANCHOR_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ANCHOR_H_ namespace seqan { @@ -51,82 +51,82 @@ template //Gaps > > { public: - typedef TGaps_ TGaps; - typedef typename Source::Type TSource; - typedef TGapAnchors_ TGapAnchors; + typedef TGaps_ TGaps; + typedef typename Source::Type TSource; + typedef TGapAnchors_ TGapAnchors; // TODO(holtgrew): Why is the following commented out? -// typedef typename Value::Type TGapAnchor; +// typedef typename Value::Type TGapAnchor; typedef typename Size::Type>::Type TGapAnchorSize_; - typedef GapAnchor::Type> TGapAnchor; - typedef typename MakeSigned::Type>::Type TGapPos; - typedef typename Iterator::Type TAnchorIter; - - TGaps * data_container; //the gaps object - TGapPos seqLength; - mutable TGapAnchor current; - mutable TGapAnchor prevAnchor; - mutable TGapAnchor nextAnchor; - mutable TGapAnchor viewBegin; - mutable TGapAnchor viewEnd; - mutable int anchorIdx; + typedef GapAnchor::Type> TGapAnchor; + typedef typename MakeSigned::Type>::Type TGapPos; + typedef typename Iterator::Type TAnchorIter; + + TGaps * data_container; //the gaps object + TGapPos seqLength; + mutable TGapAnchor current; + mutable TGapAnchor prevAnchor; + mutable TGapAnchor nextAnchor; + mutable TGapAnchor viewBegin; + mutable TGapAnchor viewEnd; + mutable int anchorIdx; public: - Iter() - { + Iter() + { SEQAN_CHECKPOINT - data_container = NULL; - seqLength = 0; - } -/* Iter(Iter const & other_): - data_container(other_.data_container), - seqLength(other_.seqLength), - current(other_.current), - prevAnchor(other_.prevAnchor), - nextAnchor(other_.nextAnchor), - anchorIdx(other_.anchorIdx) - { + data_container = NULL; + seqLength = 0; + } +/* Iter(Iter const & other_): + data_container(other_.data_container), + seqLength(other_.seqLength), + current(other_.current), + prevAnchor(other_.prevAnchor), + nextAnchor(other_.nextAnchor), + anchorIdx(other_.anchorIdx) + { SEQAN_CHECKPOINT - } -*/ Iter(TGaps & container_): - data_container(&container_) - { + } +*/ Iter(TGaps & container_): + data_container(&container_) + { SEQAN_CHECKPOINT - _assignSourceLength(seqLength, container_); - _goToGapAnchorIterator(*this, data_container->data_viewCutBegin + data_container->data_cutBegin); - viewBegin = current; - viewEnd.gapPos = _unclippedLength(*data_container) + data_container->data_cutBegin - data_container->data_viewCutEnd; - viewEnd.seqPos = positionGapToSeq(*data_container, viewEnd.gapPos); - } - Iter(TGaps & container_, TGapPos clippedViewPosition): - data_container(&container_) - { + _assignSourceLength(seqLength, container_); + _goToGapAnchorIterator(*this, data_container->data_viewCutBegin + data_container->data_cutBegin); + viewBegin = current; + viewEnd.gapPos = _unclippedLength(*data_container) + data_container->data_cutBegin - data_container->data_viewCutEnd; + viewEnd.seqPos = positionGapToSeq(*data_container, viewEnd.gapPos); + } + Iter(TGaps & container_, TGapPos clippedViewPosition): + data_container(&container_) + { SEQAN_CHECKPOINT - _assignSourceLength(seqLength, container_); - _goToGapAnchorIterator(*this, clippedViewPosition + data_container->data_viewCutBegin + data_container->data_cutBegin); - viewBegin.gapPos = data_container->data_viewCutBegin + data_container->data_cutBegin; - viewEnd.gapPos = _unclippedLength(*data_container) + data_container->data_cutBegin - data_container->data_viewCutEnd; - viewBegin.seqPos = positionGapToSeq(*data_container, viewBegin.gapPos); - viewEnd.seqPos = positionGapToSeq(*data_container, viewEnd.gapPos); - } - ~Iter() - { + _assignSourceLength(seqLength, container_); + _goToGapAnchorIterator(*this, clippedViewPosition + data_container->data_viewCutBegin + data_container->data_cutBegin); + viewBegin.gapPos = data_container->data_viewCutBegin + data_container->data_cutBegin; + viewEnd.gapPos = _unclippedLength(*data_container) + data_container->data_cutBegin - data_container->data_viewCutEnd; + viewBegin.seqPos = positionGapToSeq(*data_container, viewBegin.gapPos); + viewEnd.seqPos = positionGapToSeq(*data_container, viewEnd.gapPos); + } + ~Iter() + { SEQAN_CHECKPOINT - } + } - Iter const & operator = (Iter const & other_) - { + Iter const & operator = (Iter const & other_) + { SEQAN_CHECKPOINT - data_container = other_.data_container; - seqLength = other_.seqLength; - current = other_.current; - prevAnchor = other_.prevAnchor; - nextAnchor = other_.nextAnchor; - anchorIdx = other_.anchorIdx; - viewBegin = other_.viewBegin; - viewEnd = other_.viewEnd; - return *this; - } + data_container = other_.data_container; + seqLength = other_.seqLength; + current = other_.current; + prevAnchor = other_.prevAnchor; + nextAnchor = other_.nextAnchor; + anchorIdx = other_.anchorIdx; + viewBegin = other_.viewBegin; + viewEnd = other_.viewEnd; + return *this; + } }; // ============================================================================ @@ -147,14 +147,14 @@ template inline TGaps & container(Iter > > & me) { - return *me.data_container; + return *me.data_container; } template inline TGaps & container(Iter > > const & me) { - return *me.data_container; + return *me.data_container; } // ---------------------------------------------------------------------------- @@ -165,14 +165,14 @@ template inline typename Source > > const>::Type source(Iter > > & me) { - return begin(source(*me.data_container), Rooted()) + me.current.seqPos; + return begin(source(*me.data_container), Rooted()) + me.current.seqPos; } template inline typename Source > > >::Type source(Iter > > const & me) { - return begin(source(*me.data_container), Rooted()) + me.current.seqPos; + return begin(source(*me.data_container), Rooted()) + me.current.seqPos; } // ---------------------------------------------------------------------------- @@ -183,20 +183,20 @@ template inline typename GetValue< Iter > > >::Type getValue(Iter > > & me) { - typedef typename Value > >::Type TValue; - if (isGap(me)) return gapValue(); - else if (isUnknown(me)) return unknownValue(); - else return getValue(source(me)); + typedef typename Value > >::Type TValue; + if (isGap(me)) return gapValue(); + else if (isUnknown(me)) return unknownValue(); + else return getValue(source(me)); } template inline typename GetValue< Iter > > const>::Type getValue(Iter > > const & me) { - typedef typename Value > const>::Type TValue; - if (isGap(me)) return gapValue(); - else if (isUnknown(me)) return unknownValue(); - else return getValue(source(me)); + typedef typename Value > const>::Type TValue; + if (isGap(me)) return gapValue(); + else if (isUnknown(me)) return unknownValue(); + else return getValue(source(me)); } // ---------------------------------------------------------------------------- @@ -227,7 +227,7 @@ template inline bool isGap(Iter > > const & me) { - return me.current.seqPos == me.nextAnchor.seqPos; + return me.current.seqPos == me.nextAnchor.seqPos; } // ---------------------------------------------------------------------------- @@ -238,9 +238,9 @@ template inline bool isUnknown(Iter > > const & me) { - int len; - _assignSourceLength(len, *me.data_container); - return me.current.seqPos < 0 || me.current.seqPos >= len; + int len; + _assignSourceLength(len, *me.data_container); + return me.current.seqPos < 0 || me.current.seqPos >= len; } // ---------------------------------------------------------------------------- @@ -251,7 +251,7 @@ template inline bool isClipped(Iter > > const & me) { - return me.current.gapPos == me.nextAnchor.gapPos; + return me.current.gapPos == me.nextAnchor.gapPos; } // ---------------------------------------------------------------------------- @@ -262,8 +262,8 @@ template inline typename Size::Type countGaps(Iter > > const & me) { - if (!isGap(me)) - return 0; + if (!isGap(me)) + return 0; if (me.nextAnchor.gapPos > me.viewEnd.gapPos) return me.viewEnd.gapPos - me.current.gapPos; return me.nextAnchor.gapPos - me.current.gapPos; @@ -303,19 +303,19 @@ blockLength(Iter > > & me) // ---------------------------------------------------------------------------- template -inline bool +inline bool atBegin(Iter > > & me) { -// return me.current.seqPos == 0 && me.current.gapPos == 0; - return me.current <= me.viewBegin; +// return me.current.seqPos == 0 && me.current.gapPos == 0; + return me.current <= me.viewBegin; } template -inline bool +inline bool atBegin(Iter > > const & me) { -// return me.current.seqPos == 0 && me.current.gapPos == 0; - return me.current <= me.viewBegin; +// return me.current.seqPos == 0 && me.current.gapPos == 0; + return me.current <= me.viewBegin; } // ---------------------------------------------------------------------------- @@ -323,19 +323,19 @@ atBegin(Iter > > const & me) // ---------------------------------------------------------------------------- template -inline bool +inline bool atEnd(Iter > > & me) { -// return me.current == me.nextAnchor; - return me.current >= me.viewEnd; +// return me.current == me.nextAnchor; + return me.current >= me.viewEnd; } template -inline bool +inline bool atEnd(Iter > > const & me) { -// return me.current == me.nextAnchor; - return me.current >= me.viewEnd; +// return me.current == me.nextAnchor; + return me.current >= me.viewEnd; } // ---------------------------------------------------------------------------- @@ -343,12 +343,12 @@ atEnd(Iter > > const & me) // ---------------------------------------------------------------------------- template -inline bool +inline bool operator == ( - Iter > > const & left, - Iter > > const & right) + Iter > > const & left, + Iter > > const & right) { - return left.current == right.current; + return left.current == right.current; } // ---------------------------------------------------------------------------- @@ -356,12 +356,12 @@ operator == ( // ---------------------------------------------------------------------------- template -inline bool +inline bool operator != ( - Iter > > const & left, - Iter > > const & right) + Iter > > const & left, + Iter > > const & right) { - return left.current != right.current; + return left.current != right.current; } // ---------------------------------------------------------------------------- @@ -369,12 +369,12 @@ operator != ( // ---------------------------------------------------------------------------- template -inline bool +inline bool operator < ( - Iter > > const & left, - Iter > > const & right) + Iter > > const & left, + Iter > > const & right) { - return left.current < right.current; + return left.current < right.current; } // ---------------------------------------------------------------------------- @@ -382,12 +382,12 @@ operator < ( // ---------------------------------------------------------------------------- template -inline bool +inline bool operator<=( - Iter > > const & left, - Iter > > const & right) + Iter > > const & left, + Iter > > const & right) { - return !(left.current > right.current); + return !(left.current > right.current); } // ---------------------------------------------------------------------------- @@ -395,12 +395,12 @@ operator<=( // ---------------------------------------------------------------------------- template -inline bool +inline bool operator > ( - Iter > > const & left, - Iter > > const & right) + Iter > > const & left, + Iter > > const & right) { - return left.current > right.current; + return left.current > right.current; } // ---------------------------------------------------------------------------- @@ -422,47 +422,47 @@ operator>=(Iter > > const & lhs, template inline void insertGaps(Iter > > const & me, - TCount size) -{ - TGapAnchors & anchors = _dataAnchors(*me.data_container); - typedef typename Iterator::Type TIter; - - if (size <= 0) return; - - // insert a new anchor - if (!isGap(me)) - { - if (me.prevAnchor.gapPos == me.current.gapPos) - { - me.nextAnchor = me.prevAnchor; - _getAnchor(me.prevAnchor, *me.data_container, --me.anchorIdx); - } - else - { - me.nextAnchor = me.current; - insertValue(anchors, me.anchorIdx, me.nextAnchor, Generous()); - } - } - else - { - if (me.anchorIdx >= (int)length(anchors)) - { - // add gap after the sequence and in (or at the right boundary of) the view - if (me.current.gapPos <= me.viewEnd.gapPos) - { - container(me).data_cutEnd -= size; - me.viewEnd.gapPos += size; - } - return; - } - if (empty(anchors)) - appendValue(anchors, me.nextAnchor, Generous()); - } - if (me.anchorIdx < (int)length(anchors)) - { - if (me.anchorIdx >= 0) + TCount size) +{ + TGapAnchors & anchors = _dataAnchors(*me.data_container); + typedef typename Iterator::Type TIter; + + if (size <= 0) return; + + // insert a new anchor + if (!isGap(me)) + { + if (me.prevAnchor.gapPos == me.current.gapPos) { - me.nextAnchor.gapPos += size; + me.nextAnchor = me.prevAnchor; + _getAnchor(me.prevAnchor, *me.data_container, --me.anchorIdx); + } + else + { + me.nextAnchor = me.current; + insertValue(anchors, me.anchorIdx, me.nextAnchor, Generous()); + } + } + else + { + if (me.anchorIdx >= (int)length(anchors)) + { + // add gap after the sequence and in (or at the right boundary of) the view + if (me.current.gapPos <= me.viewEnd.gapPos) + { + container(me).data_cutEnd -= size; + me.viewEnd.gapPos += size; + } + return; + } + if (empty(anchors)) + appendValue(anchors, me.nextAnchor, Generous()); + } + if (me.anchorIdx < (int)length(anchors)) + { + if (me.anchorIdx >= 0) + { + me.nextAnchor.gapPos += size; TIter it = begin(anchors, Standard()); TIter itEnd = end(anchors, Standard()); if (me.anchorIdx >= 0) @@ -471,22 +471,22 @@ insertGaps(Iter > > const & me, (*it).gapPos += size; } else - // add gap before the sequence and in (or at the left boundary of) the view - if (me.current.gapPos >= me.viewBegin.gapPos) - { - container(me).data_cutBegin -= size; - me.viewBegin.gapPos -= size; - me.current.gapPos -= size; - return; - } - } - if (me.current.gapPos <= me.viewEnd.gapPos) - me.viewEnd.gapPos += size; + // add gap before the sequence and in (or at the left boundary of) the view + if (me.current.gapPos >= me.viewBegin.gapPos) + { + container(me).data_cutBegin -= size; + me.viewBegin.gapPos -= size; + me.current.gapPos -= size; + return; + } + } + if (me.current.gapPos <= me.viewEnd.gapPos) + me.viewEnd.gapPos += size; /* - Iter > > it2 = begin(*me.data_container) + me.current.gapPos; - if (me.current != it2.current || me.prevAnchor != it2.prevAnchor || me.nextAnchor != it2.nextAnchor || me.anchorIdx != it2.anchorIdx) - std::cout<<"*"; + Iter > > it2 = begin(*me.data_container) + me.current.gapPos; + if (me.current != it2.current || me.prevAnchor != it2.prevAnchor || me.nextAnchor != it2.nextAnchor || me.anchorIdx != it2.anchorIdx) + std::cout<<"*"; */ } @@ -497,40 +497,40 @@ insertGaps(Iter > > const & me, template inline typename Size::Type removeGaps(Iter > > const & it, - TCount size_) + TCount size_) { - TGapAnchors & anchors = _dataAnchors(*it.data_container); - typedef typename Iterator::Type TAnchorsIter; + TGapAnchors & anchors = _dataAnchors(*it.data_container); + typedef typename Iterator::Type TAnchorsIter; typedef Iter > > TIter; typedef typename TIter::TGapAnchor TGapAnchor; - // typedef typename Value::Type TGapAnchor; + // typedef typename Value::Type TGapAnchor; typedef typename Position::Type TPos; - if (size_ <= 0 || !isGap(it)) + if (size_ <= 0 || !isGap(it)) return 0; TPos size = size_; // static_cast(Nothing()); // static_cast(Nothing()); - if (it.current.gapPos + size > it.nextAnchor.gapPos) - size = it.nextAnchor.gapPos - it.current.gapPos; - - if (it.prevAnchor.gapPos + it.current.seqPos == it.current.gapPos + it.prevAnchor.seqPos && - it.current.gapPos + size == it.nextAnchor.gapPos) - { - // remove the gap - if (it.anchorIdx < (int)length(anchors)) - erase(anchors, it.anchorIdx); - _getAnchor(it.nextAnchor, *it.data_container, it.anchorIdx); - } - - // shift anchors - if (it.anchorIdx < (int)length(anchors)) - { + if (it.current.gapPos + size > it.nextAnchor.gapPos) + size = it.nextAnchor.gapPos - it.current.gapPos; + + if (it.prevAnchor.gapPos + it.current.seqPos == it.current.gapPos + it.prevAnchor.seqPos && + it.current.gapPos + size == it.nextAnchor.gapPos) + { + // remove the gap + if (it.anchorIdx < (int)length(anchors)) + erase(anchors, it.anchorIdx); + _getAnchor(it.nextAnchor, *it.data_container, it.anchorIdx); + } + + // shift anchors + if (it.anchorIdx < (int)length(anchors)) + { if (it.anchorIdx >= 0) { - it.nextAnchor.gapPos -= size; + it.nextAnchor.gapPos -= size; TAnchorsIter itA = begin(anchors, Standard()); TAnchorsIter itAEnd = end(anchors, Standard()); if (it.anchorIdx >= 0) @@ -539,31 +539,31 @@ removeGaps(Iter > > const & it, (*itA).gapPos -= size; } else - // remove gap before the sequence and in (or at the left boundary of) the view - if (it.current.gapPos >= it.viewBegin.gapPos) - { - // assure that we don't remove more gaps than available - if (size > it.nextAnchor.gapPos - it.current.gapPos) - size = it.nextAnchor.gapPos - it.current.gapPos; - container(it).data_cutBegin += size; - it.viewBegin.gapPos += size; - it.current.gapPos += size; - return size; - } - } - else - { - if (it.current.gapPos <= it.viewEnd.gapPos) - container(it).data_cutEnd += size; - } - if (it.current.gapPos <= it.viewEnd.gapPos) - it.viewEnd.gapPos -= size; + // remove gap before the sequence and in (or at the left boundary of) the view + if (it.current.gapPos >= it.viewBegin.gapPos) + { + // assure that we don't remove more gaps than available + if (size > it.nextAnchor.gapPos - it.current.gapPos) + size = it.nextAnchor.gapPos - it.current.gapPos; + container(it).data_cutBegin += size; + it.viewBegin.gapPos += size; + it.current.gapPos += size; + return size; + } + } + else + { + if (it.current.gapPos <= it.viewEnd.gapPos) + container(it).data_cutEnd += size; + } + if (it.current.gapPos <= it.viewEnd.gapPos) + it.viewEnd.gapPos -= size; return size; /* - Iter > > it2 = begin(*me.data_container) + me.current.gapPos; - if (me.current != it2.current || me.prevAnchor != it2.prevAnchor || me.nextAnchor != it2.nextAnchor || me.anchorIdx != it2.anchorIdx) - std::cout<<"*"; + Iter > > it2 = begin(*me.data_container) + me.current.gapPos; + if (me.current != it2.current || me.prevAnchor != it2.prevAnchor || me.nextAnchor != it2.nextAnchor || me.anchorIdx != it2.anchorIdx) + std::cout<<"*"; */ } @@ -572,20 +572,20 @@ removeGaps(Iter > > const & it, // ---------------------------------------------------------------------------- template -inline void +inline void _goNextGapAnchorIterator(T & me) { - if (me.current.gapPos < me.nextAnchor.gapPos) - { - ++me.current.gapPos; - if (me.current.seqPos < me.nextAnchor.seqPos) - ++me.current.seqPos; - } - while (me.current.gapPos == me.nextAnchor.gapPos) - { - me.current = me.prevAnchor = me.nextAnchor; - _getAnchor(me.nextAnchor, *me.data_container, ++me.anchorIdx + 1); - } + if (me.current.gapPos < me.nextAnchor.gapPos) + { + ++me.current.gapPos; + if (me.current.seqPos < me.nextAnchor.seqPos) + ++me.current.seqPos; + } + while (me.current.gapPos == me.nextAnchor.gapPos) + { + me.current = me.prevAnchor = me.nextAnchor; + _getAnchor(me.nextAnchor, *me.data_container, ++me.anchorIdx + 1); + } } // ---------------------------------------------------------------------------- @@ -593,19 +593,19 @@ _goNextGapAnchorIterator(T & me) // ---------------------------------------------------------------------------- template -inline void +inline void _goPreviousGapAnchorIterator(T & me) -{ - while (me.current.gapPos == me.prevAnchor.gapPos) - { - me.current = me.nextAnchor = me.prevAnchor; - _getAnchor(me.prevAnchor, *me.data_container, --me.anchorIdx); - } - --me.current.gapPos; - if (me.nextAnchor.seqPos - me.prevAnchor.seqPos > me.current.gapPos - me.prevAnchor.gapPos) - me.current.seqPos = me.prevAnchor.seqPos + (me.current.gapPos - me.prevAnchor.gapPos); - else - me.current.seqPos = me.nextAnchor.seqPos; +{ + while (me.current.gapPos == me.prevAnchor.gapPos) + { + me.current = me.nextAnchor = me.prevAnchor; + _getAnchor(me.prevAnchor, *me.data_container, --me.anchorIdx); + } + --me.current.gapPos; + if (me.nextAnchor.seqPos - me.prevAnchor.seqPos > me.current.gapPos - me.prevAnchor.gapPos) + me.current.seqPos = me.prevAnchor.seqPos + (me.current.gapPos - me.prevAnchor.gapPos); + else + me.current.seqPos = me.nextAnchor.seqPos; } // ---------------------------------------------------------------------------- @@ -613,39 +613,39 @@ _goPreviousGapAnchorIterator(T & me) // ---------------------------------------------------------------------------- template -inline void +inline void _goToGapAnchorIterator(T & me, TPos pos) { - typedef typename T::TGapAnchors TGapAnchors; - typedef typename Value::Type TGapAnchor; - typedef typename Position::Type TAnchorPos; - typedef typename MakeSigned::Type TAnchorSPos; - - if (_helperIsNegative(pos, typename IsSameType::Type>::Type())) - me.anchorIdx = -1; - else - { - TGapAnchors const & anchors = _dataAnchors(*me.data_container); - if (!empty(anchors)) - { - me.anchorIdx = upperBoundGapAnchor(anchors, pos, SortGapPos()) - begin(anchors, Standard()); - if (me.anchorIdx < (int)length(anchors)) - if (anchors[me.anchorIdx].gapPos == (TAnchorPos)pos && anchors[me.anchorIdx].seqPos != (TAnchorPos)me.seqLength) - ++me.anchorIdx; - } - else + typedef typename T::TGapAnchors TGapAnchors; + typedef typename Value::Type TGapAnchor; + typedef typename Position::Type TAnchorPos; + typedef typename MakeSigned::Type TAnchorSPos; + + if (isNegative(pos)) + me.anchorIdx = -1; + else + { + TGapAnchors const & anchors = _dataAnchors(*me.data_container); + if (!empty(anchors)) { - me.anchorIdx = ((TAnchorSPos)pos < me.seqLength)? 0: 1; + me.anchorIdx = upperBoundGapAnchor(anchors, pos, SortGapPos()) - begin(anchors, Standard()); + if (me.anchorIdx < (int)length(anchors)) + if (anchors[me.anchorIdx].gapPos == (TAnchorPos)pos && anchors[me.anchorIdx].seqPos != (TAnchorPos)me.seqLength) + ++me.anchorIdx; } - } - _getAnchor(me.prevAnchor, *me.data_container, me.anchorIdx); - _getAnchor(me.nextAnchor, *me.data_container, me.anchorIdx + 1); + else + { + me.anchorIdx = ((TAnchorSPos)pos < me.seqLength)? 0: 1; + } + } + _getAnchor(me.prevAnchor, *me.data_container, me.anchorIdx); + _getAnchor(me.nextAnchor, *me.data_container, me.anchorIdx + 1); - me.current.gapPos = pos; - if (me.nextAnchor.seqPos - me.prevAnchor.seqPos > (int)pos - me.prevAnchor.gapPos) - me.current.seqPos = me.prevAnchor.seqPos + ((int)pos - me.prevAnchor.gapPos); - else - me.current.seqPos = me.nextAnchor.seqPos; + me.current.gapPos = pos; + if (me.nextAnchor.seqPos - me.prevAnchor.seqPos > (int)pos - me.prevAnchor.gapPos) + me.current.seqPos = me.prevAnchor.seqPos + ((int)pos - me.prevAnchor.gapPos); + else + me.current.seqPos = me.nextAnchor.seqPos; } // ---------------------------------------------------------------------------- @@ -656,7 +656,7 @@ template inline void goNext(Iter > > & me) { - _goNextGapAnchorIterator(me); + _goNextGapAnchorIterator(me); } // ---------------------------------------------------------------------------- @@ -667,7 +667,7 @@ template inline void goPrevious(Iter > > & me) { - _goPreviousGapAnchorIterator(me); + _goPreviousGapAnchorIterator(me); } // ---------------------------------------------------------------------------- @@ -678,7 +678,7 @@ template inline void goFurther(Iter > > & me, TSize steps) { - _goToGapAnchorIterator(me, me.current.gapPos + steps); + _goToGapAnchorIterator(me, me.current.gapPos + steps); } // ---------------------------------------------------------------------------- @@ -747,4 +747,4 @@ operator+(Iter > > const & lhs, TDif } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ANCHOR_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ANCHOR_H_ diff --git a/seqan/align/gaps_iterator_array.h b/seqan/align/gaps_iterator_array.h index 6d8da0c..5361d93 100644 --- a/seqan/align/gaps_iterator_array.h +++ b/seqan/align/gaps_iterator_array.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -33,8 +33,8 @@ // Author: Manuel Holtgrewe // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ARRAY_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ARRAY_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ARRAY_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ARRAY_H_ namespace seqan { @@ -281,7 +281,7 @@ inline bool goPrevious(Iter > & it) { typedef typename Position::Type TGapsPos; - + if (atBegin(it)) // Handle case of being at the beginning of the gaps. return false; @@ -303,7 +303,7 @@ goPrevious(Iter > & it) it._sourcePosition -= 1; // Adjust clipped view position. it._unclippedViewPosition -= 1; - + return true; } @@ -366,7 +366,7 @@ goFurther(Iter > & it, // TODO(holtgrew): Handle going backwards more efficiently. if (delta == TDifference(0)) return; - if ((int)delta < 0) + if (isNegative(delta)) { typedef typename MakeSigned::Type TSignedDifference; for (; -static_cast(delta); ++delta) @@ -487,7 +487,7 @@ insertGaps(Iter > const & it, { if (count == TCount(0)) return; // Do nothing! - + typedef typename TGaps::TArray_ TArray; typedef typename Position::Type TArrayPos; @@ -738,4 +738,4 @@ operator+(Iter > const & lhs, TDifference d) } // namespace seqan -#endif // SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ARRAY_H_ +#endif // SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_ARRAY_H_ diff --git a/seqan/align/gaps_iterator_base.h b/seqan/align/gaps_iterator_base.h index 1501e17..a1802b7 100644 --- a/seqan/align/gaps_iterator_base.h +++ b/seqan/align/gaps_iterator_base.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // TODO(holtgrew): Switch to Host interface. -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_BASE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_BASE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_BASE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_BASE_H_ namespace seqan { @@ -60,6 +60,19 @@ typedef Tag Position_; // Tags, Classes, Enums // ============================================================================ +/*! + * @class GapsIterator + * @implements RandomAccessIteratorConcept + * + * @brief Iterator class for @link Gaps @endlink. + * + * @signature template + * class Iter >; + * + * @tparam TGaps The @link Gaps @endlink object for the iterator. + * @tparam TSpec The specializing tag. + */ + template struct GapsIterator; @@ -104,15 +117,15 @@ struct Difference > const> : template struct Source > > { - typedef typename Source::Type TSource_; - typedef typename Iterator::Type Type; + typedef typename Source::Type TSource_; + typedef typename Iterator::Type Type; }; template struct Source > const> { - typedef typename Source::Type TSource_; - typedef typename Iterator::Type Type; + typedef typename Source::Type TSource_; + typedef typename Iterator::Type Type; }; // ---------------------------------------------------------------------------- @@ -122,11 +135,11 @@ struct Source > const> template struct Value > > { - typedef typename Source > >::Type TSource_; - typedef typename Value::Type TSourceValue_; + typedef typename Source > >::Type TSource_; + typedef typename Value::Type TSourceValue_; //typedef TSourceValue_ Type; // TODO(holtgrew): We really want gapped values here but there are issues... - typedef typename GappedValueType::Type Type; + typedef typename GappedValueType::Type Type; }; template @@ -139,13 +152,13 @@ struct Value > const> : template struct GetValue > > : - Value > > + Value > > { }; template struct GetValue > const> : - Value > const> + Value > const> { }; @@ -156,15 +169,15 @@ struct GetValue > const> : template struct Reference > > { - typedef Iter > TIterator_; - typedef Proxy > Type; + typedef Iter > TIterator_; + typedef Proxy > Type; }; template struct Reference > const> { - typedef Iter const > TIterator_; - typedef Proxy > Type; + typedef Iter const > TIterator_; + typedef Proxy > Type; }; // ============================================================================ @@ -178,20 +191,20 @@ struct Reference > const> // TODO(holtgrew): Could be general forward template -inline Iter > & +inline Iter > & operator++(Iter > & it) { - goNext(it); - return it; + goNext(it); + return it; } template inline Iter > operator++(Iter > & it, int) { - Iter > ret = it; - goNext(it); - return ret; + Iter > ret = it; + goNext(it); + return ret; } // ---------------------------------------------------------------------------- @@ -199,26 +212,35 @@ operator++(Iter > & it, int) // ---------------------------------------------------------------------------- template -inline Iter > & +inline Iter > & operator--(Iter > & it) { - goPrevious(it); - return it; + goPrevious(it); + return it; } template inline Iter > operator--(Iter > & it, int) { - Iter > ret = it; - goPrevious(it); - return ret; + Iter > ret = it; + goPrevious(it); + return ret; } // ---------------------------------------------------------------------------- // Function insertGap() // ---------------------------------------------------------------------------- +/*! + * @fn GapsIterator#insertGap + * @brief Insert gap at the current position. + * + * @signature void insertGap(it); + * + * @param[in,out] it The iterator to insert gaps at. + */ + // Forward to insertGaps() which has to be implemented by the specific gap // iterator. @@ -226,13 +248,104 @@ template inline void insertGap(Iter > & it) { - insertGaps(it, 1); + insertGaps(it, 1); +} + +// ---------------------------------------------------------------------------- +// Function isCharacter() +// ---------------------------------------------------------------------------- + +/*! + * @fn GapsIterator#isCharacter + * @brief Query an iterator for being at a character + * + * @signature bool isCharacter(it); + * + * @param[in] it Iterator to query for pointing at a character. + * + * @return bool true if it is at a character and false otherwise. + */ + +template +bool isCharacter(Iter > const & it) +{ + return !isGap(it); } +// ---------------------------------------------------------------------------- +// Function countCharacters() +// ---------------------------------------------------------------------------- + +/*! + * @fn GapsIterator#countCharacters + * @brief Count characters at iterator. + * + * @signature TSize countCharacters(it); + * + * @param[in] it Iterator for counting characters at. + * + * @return TSize Number of characters. + */ + +// ---------------------------------------------------------------------------- +// Function isGap() +// ---------------------------------------------------------------------------- + +/*! + * @fn GapsIterator#isGap + * @brief Query an iterator for being at a gap + * + * @signature bool isGap(it); + * + * @param[in] it Iterator to query for pointing at a gap. + * + * @return bool true if it is at a gap and false otherwise. + */ + +// ---------------------------------------------------------------------------- +// Function countGaps() +// ---------------------------------------------------------------------------- + +/*! + * @fn GapsIterator#countGaps + * @brief Count gaps at iterator. + * + * @signature TSize countGaps(it); + * + * @param[in] it Iterator for counting gaps at. + * + * @return TSize Number of gaps. + */ + +// ---------------------------------------------------------------------------- +// Function insertGaps() +// ---------------------------------------------------------------------------- + +/*! + * @fn GapsIterator#insertGaps + * @brief Insert gaps at the current position. + * + * @signature void insertGaps(it, num); + * + * @param[in,out] it Remove gap at the given position (if any). + * @param[in] num Number of gaps to insert. + */ + // ---------------------------------------------------------------------------- // Function removeGap() // ---------------------------------------------------------------------------- +/*! + * @fn GapsIterator#removeGap + * @brief Insert gap at the current position. + * + * @signature TSize removeGap(it); + * + * @param[in,out] it Remove gap at the given position (if any). + * + * @return TSize Number of removed gaps. + */ + // Forward to removeGaps() which has to be implemented by the specific gap // iterator. @@ -240,9 +353,25 @@ template inline typename Size::Type removeGap(Iter > & it) { - return removeGaps(it, 1); + return removeGaps(it, 1); } +// ---------------------------------------------------------------------------- +// Function removeGaps() +// ---------------------------------------------------------------------------- + +/*! + * @fn GapsIterator#removeGaps + * @brief Remove gaps from the current position. + * + * @signature TSize removeGaps(it, num); + * + * @param[in,out] it Remove gap at the given position (if any). + * @param[in] num Number of gaps to remove. + * + * @return TSize Number of removed gaps. + */ + // ---------------------------------------------------------------------------- // Function assignValue() // ---------------------------------------------------------------------------- @@ -252,24 +381,24 @@ removeGap(Iter > & it) template inline void assignValue(Iter > & me, - TValue const & val) + TValue const & val) { - if (!isGap(me)) - { - assignValue(source(me), val); - } + if (!isGap(me)) + { + assignValue(source(me), val); + } // TODO(holtgrew): Else, inserting gaps is problematic... } template inline void assignValue(Iter > const & me, - TValue const & val) + TValue const & val) { - if (!isGap(me)) - { - assignValue(source(me), val); - } + if (!isGap(me)) + { + assignValue(source(me), val); + } } // ---------------------------------------------------------------------------- @@ -280,14 +409,14 @@ template inline TGaps & container(Iter > & me) { - return *me._container; + return *me._container; } template inline TGaps & container(Iter > const & me) { - return *me._container; + return *me._container; } // ---------------------------------------------------------------------------- @@ -345,9 +474,9 @@ operator-=(Iter > & it, TDiff diff) template inline void goFurther(Iter > & it, - TDifference steps) + TDifference steps) { - typedef typename MakeSigned::Type TSignedDifference; + typedef typename MakeSigned::Type TSignedDifference; if (steps > TDifference(0)) for (; steps; --steps) goNext(it); @@ -369,4 +498,4 @@ isClipped(Iter > const &) } // namespace seqan -#endif // SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_BASE_H_ +#endif // SEQAN_INCLUDE_SEQAN_ALIGN_GAPS_ITERATOR_BASE_H_ diff --git a/seqan/align/global_alignment_banded.h b/seqan/align/global_alignment_banded.h index dabfcec..e705e74 100644 --- a/seqan/align/global_alignment_banded.h +++ b/seqan/align/global_alignment_banded.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,8 @@ // the globalFunction() fails is actually meaningful. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_BANDED_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_BANDED_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_BANDED_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_BANDED_H_ namespace seqan { @@ -79,28 +79,31 @@ template TScoreValue globalAlignment(Align & align, Score const & scoringScheme, - AlignConfig const & alignConfig, + AlignConfig const & /*alignConfig*/, int lowerDiag, int upperDiag, - TAlgoTag const & algoTag) + TAlgoTag const & /*algoTag*/) { typedef Align TAlign; typedef typename Size::Type TSize; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; String trace; - // We do not need string ids for this variant and set them to 0u. They are - // only required for the Fragment String and the Alignment Graph variant. - TScoreValue res = _setUpAndRunAlignment(trace, source(row(align, 0)), source(row(align, 1)), scoringScheme, - alignConfig, lowerDiag, upperDiag, algoTag); + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, source(row(align, 0)), source(row(align, 1)), + scoringScheme, TAlignConfig2(lowerDiag, upperDiag), TGapModel()); + _adaptTraceSegmentsTo(row(align, 0), row(align, 1), trace); return res; } // Interface without AlignConfig<>. - template @@ -115,7 +118,6 @@ TScoreValue globalAlignment(Align & align, } // Interface without algorithm tag. - template @@ -126,13 +128,12 @@ TScoreValue globalAlignment(Align & align, int upperDiag) { if (scoreGapOpen(scoringScheme) == scoreGapExtend(scoringScheme)) - return globalAlignment(align, scoringScheme, alignConfig, lowerDiag, upperDiag, NeedlemanWunsch()); + return globalAlignment(align, scoringScheme, alignConfig, lowerDiag, upperDiag, LinearGaps()); else - return globalAlignment(align, scoringScheme, alignConfig, lowerDiag, upperDiag, Gotoh()); + return globalAlignment(align, scoringScheme, alignConfig, lowerDiag, upperDiag, AffineGaps()); } // Interface without AlignConfig<> and algorithm tag. - template TScoreValue globalAlignment(Align & align, @@ -156,27 +157,29 @@ template & gapsH, Gaps & gapsV, Score const & scoringScheme, - AlignConfig const & alignConfig, + AlignConfig const & /*alignConfig*/, int lowerDiag, int upperDiag, - TAlgoTag const & algoTag) + TAlgoTag const & /*algoTag*/) { typedef typename Size::Type TSize; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; - String traceSegments; + String trace; - // We do not need string ids for this variant and set them to 0u. They are - // only required for the Fragment String and the Alignment Graph variant. - TScoreValue res = _setUpAndRunAlignment(traceSegments, source(gapsH), source(gapsV), scoringScheme, alignConfig, - lowerDiag, upperDiag, algoTag); - _adaptTraceSegmentsTo(gapsH, gapsV, traceSegments); + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, source(gapsH), source(gapsV), scoringScheme, + TAlignConfig2(lowerDiag, upperDiag), TGapModel()); + _adaptTraceSegmentsTo(gapsH, gapsV, trace); return res; } // Interface without AlignConfig<>. - template & gapsH, } // Interface without algorithm tag. - template & gapsH, } // Interface without AlignConfig<> and algorithm tag. - template @@ -231,35 +232,39 @@ TScoreValue globalAlignment(Gaps & gapsH, // ---------------------------------------------------------------------------- // Full interface. - template TScoreValue globalAlignment(Graph > & alignmentGraph, Score const & scoringScheme, - AlignConfig const & alignConfig, + AlignConfig const & /*alignConfig*/, int lowerDiag, int upperDiag, - TAlgoTag const & algoTag) + TAlgoTag const & /*algoTag*/) { typedef Graph > TGraph; typedef typename Position::Type TPosition; typedef typename Size::Type TSize; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; - String traceSegments; + String trace; + + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, value(stringSet(alignmentGraph), 0), + value(stringSet(alignmentGraph), 1), scoringScheme, + TAlignConfig2(lowerDiag, upperDiag), TGapModel()); - TScoreValue res = _setUpAndRunAlignment(traceSegments, value(stringSet(alignmentGraph), 0), - value(stringSet(alignmentGraph), 1), scoringScheme, alignConfig, lowerDiag, - upperDiag, algoTag); _adaptTraceSegmentsTo(alignmentGraph, positionToId(stringSet(alignmentGraph), 0), - positionToId(stringSet(alignmentGraph), 1), traceSegments); + positionToId(stringSet(alignmentGraph), 1), trace); return res; } // Interface without AlignConfig<>. - template @@ -274,7 +279,6 @@ TScoreValue globalAlignment(Graph > & } // Interface without algorithm tag. - template @@ -291,7 +295,6 @@ TScoreValue globalAlignment(Graph > & } // Interface without AlignConfig<> and algorithm tag. - template TScoreValue globalAlignment(Graph > & alignmentGraph, @@ -308,7 +311,6 @@ TScoreValue globalAlignment(Graph > & // ---------------------------------------------------------------------------- // Full interface. - template , TStringSpec> & fragmentString, StringSet const & strings, Score const & scoringScheme, - AlignConfig const & alignConfig, + AlignConfig const & /*alignConfig*/, int lowerDiag, int upperDiag, - TAlgoTag const & algoTag) + TAlgoTag const & /*algoTag*/) { typedef String, TStringSpec> TFragments; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; + + String trace; - String traceSegments; + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, value(strings, 0), value(strings, 1), scoringScheme, + TAlignConfig2(lowerDiag, upperDiag), TGapModel()); - TScoreValue res = _setUpAndRunAlignment(traceSegments, value(strings, 0), value(strings, 1), scoringScheme, - alignConfig, lowerDiag, upperDiag, algoTag); - _adaptTraceSegmentsTo(fragmentString, positionToId(strings, 0), positionToId(strings, 1), traceSegments); + _adaptTraceSegmentsTo(fragmentString, positionToId(strings, 0), positionToId(strings, 1), trace); return res; } // Interface without AlignConfig<>. - template , TStringSpec> } // Interface without algorithm tag. - template , TStringSpec> } // Interface without AlignConfig<> and algorithm tag. - template @@ -397,16 +402,23 @@ template const & scoringScheme, - AlignConfig const & alignConfig, + AlignConfig const & /*alignConfig*/, int lowerDiag, int upperDiag, - TAlgoTag const & algoTag) + TAlgoTag const & /*algoTag*/) { - return _setUpAndRunAlignment(seqH, seqV, scoringScheme, alignConfig, lowerDiag, upperDiag, algoTag); + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps, TracebackOff> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; + + DPScoutState_ dpScoutState; + String > traceSegments; // Dummy segments. + return _setUpAndRunAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, + TAlignConfig2(lowerDiag, upperDiag), TGapModel()); } // Interface without AlignConfig<>. - template and algorithm tag. - template @@ -466,17 +476,25 @@ template TScoreValue globalAlignmentScore(StringSet const & strings, Score const & scoringScheme, - AlignConfig const & alignConfig, + AlignConfig const & /*alignConfig*/, int lowerDiag, int upperDiag, - TAlgoTag const & algoTag) + TAlgoTag const & /*algoTag*/) { + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps, TracebackOff> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; + SEQAN_ASSERT_EQ(length(strings), 2u); - return _setUpAndRunAlignment(strings[0], strings[1], scoringScheme, alignConfig, lowerDiag, upperDiag, algoTag); + + DPScoutState_ dpScoutState; + String > traceSegments; // Dummy segments. + return _setUpAndRunAlignment(traceSegments, dpScoutState, strings[0], strings[1], scoringScheme, + TAlignConfig2(lowerDiag, upperDiag), TGapModel()); } // Interface without AlignConfig<>. - template @@ -493,7 +511,6 @@ TScoreValue globalAlignmentScore(StringSet const & strings, } // Interface without algorithm tag. - template @@ -512,7 +529,6 @@ TScoreValue globalAlignmentScore(StringSet const & strings, } // Interface without AlignConfig<> and algorithm tag. - template TScoreValue globalAlignmentScore(StringSet const & strings, @@ -528,4 +544,4 @@ TScoreValue globalAlignmentScore(StringSet const & strings, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_BANDED_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_BANDED_H_ diff --git a/seqan/align/global_alignment_hirschberg_impl.h b/seqan/align/global_alignment_hirschberg_impl.h index 7e05496..640c18c 100644 --- a/seqan/align/global_alignment_hirschberg_impl.h +++ b/seqan/align/global_alignment_hirschberg_impl.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ // TODO(holtgrew): Get rid of this? //#define SEQAN_HIRSCHBERG_DEBUG_CUT -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_HIRSCHBERG_IMPL_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_HIRSCHBERG_IMPL_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_HIRSCHBERG_IMPL_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_HIRSCHBERG_IMPL_H_ namespace seqan { @@ -62,31 +62,30 @@ typedef Tag Hirschberg; class HirschbergSet_ { public: - int x1,x2,y1,y2; - int score; - - HirschbergSet_() - : x1(0),x2(0),y1(0),y2(0) - { - } - - HirschbergSet_(int a1,int a2,int b1,int b2,int sc) - : x1(a1),x2(a2),y1(b1),y2(b2),score(sc) - { - SEQAN_ASSERT_LEQ(a1, a2); - SEQAN_ASSERT_LEQ(b1, b2); - } - - HirschbergSet_ & - operator=(HirschbergSet_ const & other_) - { - x1 = other_.x1; - x2 = other_.x2; - y1 = other_.y1; - y2 = other_.y2; - score = other_.score; - return *this; - } + int x1,x2,y1,y2; + int score; + + HirschbergSet_() + : x1(0), x2(0), y1(0), y2(0), score(0) + {} + + HirschbergSet_(int a1,int a2,int b1,int b2,int sc) + : x1(a1), x2(a2), y1(b1), y2(b2), score(sc) + { + SEQAN_ASSERT_LEQ(a1, a2); + SEQAN_ASSERT_LEQ(b1, b2); + } + + HirschbergSet_ & + operator=(HirschbergSet_ const & other_) + { + x1 = other_.x1; + x2 = other_.x2; + y1 = other_.y1; + y2 = other_.y2; + score = other_.score; + return *this; + } }; @@ -102,15 +101,15 @@ class HirschbergSet_ // Function _begin1() // ---------------------------------------------------------------------------- -inline int& +inline int& _begin1(HirschbergSet_ & me) { - return me.x1; + return me.x1; } -inline int const& +inline int const& _begin1(HirschbergSet_ const & me) { - return me.x1; + return me.x1; } // ---------------------------------------------------------------------------- @@ -119,7 +118,7 @@ _begin1(HirschbergSet_ const & me) { inline void _setBegin1(HirschbergSet_ & me, int const & new_begin) { - me.x1 = new_begin; + me.x1 = new_begin; } // ---------------------------------------------------------------------------- @@ -128,12 +127,12 @@ _setBegin1(HirschbergSet_ & me, int const & new_begin) { inline int& _end1(HirschbergSet_ & me) { - return me.x2; + return me.x2; } -inline int const& +inline int const& _end1(HirschbergSet_ const & me) { - return me.x2; + return me.x2; } // ---------------------------------------------------------------------------- @@ -142,7 +141,7 @@ _end1(HirschbergSet_ const & me) { inline void _setEnd1(HirschbergSet_ & me, int const & new_end) { - me.x2 = new_end; + me.x2 = new_end; } // ---------------------------------------------------------------------------- @@ -151,12 +150,12 @@ _setEnd1(HirschbergSet_ & me, int const & new_end) { inline int& _begin2(HirschbergSet_ & me) { - return me.y1; + return me.y1; } inline int const& _begin2(HirschbergSet_ const & me) { - return me.y1; + return me.y1; } // ---------------------------------------------------------------------------- @@ -165,7 +164,7 @@ _begin2(HirschbergSet_ const & me) { inline void _setBegin2(HirschbergSet_ & me, int const & new_begin) { - me.y1 = new_begin; + me.y1 = new_begin; } // ---------------------------------------------------------------------------- @@ -174,12 +173,12 @@ _setBegin2(HirschbergSet_ & me, int const & new_begin) { inline int& _end2(HirschbergSet_ & me) { - return me.y2; + return me.y2; } inline int const& _end2(HirschbergSet_ const & me) { - return me.y2; + return me.y2; } // ---------------------------------------------------------------------------- @@ -188,7 +187,7 @@ _end2(HirschbergSet_ const & me) { inline void _setEnd2(HirschbergSet_ & me, int const & new_end) { - me.y2 = new_end; + me.y2 = new_end; } // ---------------------------------------------------------------------------- @@ -196,8 +195,8 @@ _setEnd2(HirschbergSet_ & me, int const & new_end) { // ---------------------------------------------------------------------------- inline int& -_score(HirschbergSet_ & me) { - return me.score; +_score(HirschbergSet_ & me) { + return me.score; } // ---------------------------------------------------------------------------- @@ -207,7 +206,7 @@ _score(HirschbergSet_ & me) { inline int const& _score(HirschbergSet_ const & me) { - return me.score; + return me.score; } // ---------------------------------------------------------------------------- @@ -216,7 +215,7 @@ _score(HirschbergSet_ const & me) inline void _setScore(HirschbergSet_ & me,int new_score) { - me.score = new_score; + me.score = new_score; } // ---------------------------------------------------------------------------- @@ -225,16 +224,16 @@ _setScore(HirschbergSet_ & me,int new_score) { // ////////////////////////////////////////////////////////////////////////////////////////////// // Debug Methods -// functions are only used for debugging or verbose output, therefore they +// functions are only used for debugging or verbose output, therefore they // are only active in SEQAN_DEBUG // ////////////////////////////////////////////////////////////////////////////////////////////// #ifdef SEQAN_DEBUG - -inline + +inline void print(HirschbergSet_ const & me) { - std::cout << me.x1 << " " << me.x2 << "\t" << me.y1 << " " << me.y2 << std::endl; + std::cout << me.x1 << " " << me.x2 << "\t" << me.y1 << " " << me.y2 << std::endl; } #endif @@ -243,10 +242,10 @@ print(HirschbergSet_ const & me) // ---------------------------------------------------------------------------- inline bool -operator==(HirschbergSet_ const & lhs, +operator==(HirschbergSet_ const & lhs, HirschbergSet_ const & rhs) { - return ((_begin1(lhs) == _begin1(rhs)) && (_end1(lhs) == _end1(rhs)) && + return ((_begin1(lhs) == _begin1(rhs)) && (_end1(lhs) == _end1(rhs)) && (_begin2(lhs) == _begin2(rhs)) && (_end2(lhs) == _end2(rhs))); } @@ -255,128 +254,114 @@ operator==(HirschbergSet_ const & lhs, // ---------------------------------------------------------------------------- #ifdef SEQAN_HIRSCHBERG_DEBUG_CUT - template - void _writeDebugMatrix(TSource s1,TSource s2) - { - int l1 = length(s1); - int l2 = length(s2); - - int i,j,sg,sd; - - String > fMatrix,rMatrix,tMatrix; - - resize(fMatrix,l1 + 1); - resize(rMatrix,l1 + 1); - resize(tMatrix,l1 + 1); - - for(i = 0;i <= l1;++i) - { - resize(fMatrix[i],l2 + 1); - resize(rMatrix[i],l2 + 1); - resize(tMatrix[i],l2 + 1); - } - - for(i = 0;i <= l1;++i) - fMatrix[i][0] = i * (-1); - - for(i = l1;i >= 0;--i) - rMatrix[i][l2] = (l1 - i) * (-1); - - // calculate forward matrix - for(j = 1;j <= l2;++j) - { - fMatrix[0][j] = j*(-1); - for(i = 1;i <= l1;++i) - { - sg = -1 + ((fMatrix[i-1][j] > fMatrix[i][j-1]) ? fMatrix[i-1][j] : fMatrix[i][j-1]); - sd = fMatrix[i-1][j-1] + ((s1[i - 1] == s2[j-1]) ? 0 : -1 ); - - fMatrix[i][j] = ((sg > sd) ? sg : sd); - } - } - - // calculate reverse matrix - for(j = l2 - 1;j >= 0;--j) - { - rMatrix[l1][j] = (l2 - j)*(-1); - for(i = l1 - 1;i >= 0;--i) - { - sg = -1 + ((rMatrix[i+1][j] > rMatrix[i][j+1]) ? rMatrix[i+1][j] : rMatrix[i][j+1]); - sd = rMatrix[i+1][j+1] + ((s1[i] == s2[j]) ? 0 : -1 ); - - rMatrix[i][j] = ((sg > sd) ? sg : sd); - } - } - - // print fMatrix - std::cout << ";-;"; - for(i = 0;i < l1;++i) - std::cout << s1[i] << ";"; - - std::cout << std::endl << "-;"; - for(j = 0;j <= l2;++j) - { - if(j != 0) std::cout << s2[j-1] << ";"; - for(i = 0;i <= l1;++i) - { - std::cout << fMatrix[i][j] << ";"; - } - std::cout << std::endl; - } - // print rMatrix - std::cout << ";"; - for(i = 0;i < l1;++i) - std::cout << s1[i] << ";"; - std::cout << "-;" << std::endl; - - for(j = 0;j <= l2;++j) - { - if(j != l2) std::cout << s2[j] << ";"; - else std::cout << "-;"; - for(i = 0;i <= l1;++i) - { - std::cout << rMatrix[i][j] << ";"; - } - std::cout << std::endl; - } - - // fill and print target matrix - std::cout << ";-;"; - for(i = 0;i < l1;++i) - std::cout << s1[i] << ";"; - - std::cout << std::endl << "-;"; - for(j = 0;j <= l2;++j) - { - if(j != 0) std::cout << s2[j-1] << ";"; - for(i = 0;i <= l1;++i) - { - tMatrix[i][j] = fMatrix[i][j] + rMatrix[i][j]; - std::cout << tMatrix[i][j] << ";"; - } - std::cout << std::endl; - } - } + template + void _writeDebugMatrix(TSource s1,TSource s2) + { + int l1 = length(s1); + int l2 = length(s2); + + int i,j,sg,sd; + + String > fMatrix,rMatrix,tMatrix; + + resize(fMatrix,l1 + 1); + resize(rMatrix,l1 + 1); + resize(tMatrix,l1 + 1); + + for(i = 0;i <= l1;++i) + { + resize(fMatrix[i],l2 + 1); + resize(rMatrix[i],l2 + 1); + resize(tMatrix[i],l2 + 1); + } + + for(i = 0;i <= l1;++i) + fMatrix[i][0] = i * (-1); + + for(i = l1;i >= 0;--i) + rMatrix[i][l2] = (l1 - i) * (-1); + + // calculate forward matrix + for(j = 1;j <= l2;++j) + { + fMatrix[0][j] = j*(-1); + for(i = 1;i <= l1;++i) + { + sg = -1 + ((fMatrix[i-1][j] > fMatrix[i][j-1]) ? fMatrix[i-1][j] : fMatrix[i][j-1]); + sd = fMatrix[i-1][j-1] + ((s1[i - 1] == s2[j-1]) ? 0 : -1 ); + + fMatrix[i][j] = ((sg > sd) ? sg : sd); + } + } + + // calculate reverse matrix + for(j = l2 - 1;j >= 0;--j) + { + rMatrix[l1][j] = (l2 - j)*(-1); + for(i = l1 - 1;i >= 0;--i) + { + sg = -1 + ((rMatrix[i+1][j] > rMatrix[i][j+1]) ? rMatrix[i+1][j] : rMatrix[i][j+1]); + sd = rMatrix[i+1][j+1] + ((s1[i] == s2[j]) ? 0 : -1 ); + + rMatrix[i][j] = ((sg > sd) ? sg : sd); + } + } + + // print fMatrix + std::cout << ";-;"; + for(i = 0;i < l1;++i) + std::cout << s1[i] << ";"; + + std::cout << std::endl << "-;"; + for(j = 0;j <= l2;++j) + { + if(j != 0) std::cout << s2[j-1] << ";"; + for(i = 0;i <= l1;++i) + { + std::cout << fMatrix[i][j] << ";"; + } + std::cout << std::endl; + } + // print rMatrix + std::cout << ";"; + for(i = 0;i < l1;++i) + std::cout << s1[i] << ";"; + std::cout << "-;" << std::endl; + + for(j = 0;j <= l2;++j) + { + if(j != l2) std::cout << s2[j] << ";"; + else std::cout << "-;"; + for(i = 0;i <= l1;++i) + { + std::cout << rMatrix[i][j] << ";"; + } + std::cout << std::endl; + } + + // fill and print target matrix + std::cout << ";-;"; + for(i = 0;i < l1;++i) + std::cout << s1[i] << ";"; + + std::cout << std::endl << "-;"; + for(j = 0;j <= l2;++j) + { + if(j != 0) std::cout << s2[j-1] << ";"; + for(i = 0;i <= l1;++i) + { + tMatrix[i][j] = fMatrix[i][j] + rMatrix[i][j]; + std::cout << tMatrix[i][j] << ";"; + } + std::cout << std::endl; + } + } #endif -// debug flag .. define to see where Hirschberg cuts the sequences +// debug flag .. define to see where Hirschberg cuts the sequences //#define SEQAN_HIRSCHBERG_DEBUG_CUT -/*DISABLED -.Function.hirschberg: -..cat:Alignment -..summary:Computes a global Alignment for the passed Alignment-Container with the specified scoring scheme -..signature:hirschberg(Align & align,Score const & score) -..param.align: Reference to the Alignment-Object -..param.score: Const Reference to the Scoring Scheme -..remarks: The alignment is based on the algorithm proposed by Hirschberg. The general idea is to divide the DP (dynamic programming) matrix, -to compute a global alignment in linear space. Instead of computing half of the -DP matrix in forward direction and the other half in reverse, a pointer to the cell of the DP matrix, were the actual, optimal alignment -passes the mid column ist saved, during the computation of the second part of the Matrix. -..include:seqan/align.h -*/ - // ---------------------------------------------------------------------------- // Function globalAlignment() // ---------------------------------------------------------------------------- @@ -391,339 +376,339 @@ _globalAlignment(Gaps & gapsH, { TSequenceH const & s1 = source(gapsH); TSequenceV const & s2 = source(gapsV); - - TScoreValue total_score = 0; + + TScoreValue total_score = 0; typedef typename Value::Type TValueV; - typedef typename Size::Type TStringSize; - - typedef typename Iterator::Type TSequenceHIter; - typedef typename Iterator::Type TSequenceVIter; - - typedef typename Iterator >::Type TGapsHIter; - typedef typename Iterator >::Type TGapsVIter; - - TGapsHIter target_0 = begin(gapsH); - TGapsVIter target_1 = begin(gapsV); - - typedef typename Iterator >::Type TMatrixIterator; - - TValueV v; - - TStringSize len1 = length(s1); - TStringSize len2 = length(s2); - - // string to store the score values for the currently active cell - String c_score; - resize(c_score,len2 + 1); - // string to strore the backpointers - String pointer; - resize(pointer,len2 + 1); - - // scoring-scheme specific score values - TScoreValue score_match = scoreMatch(score_); - TScoreValue score_mismatch = scoreMismatch(score_); - TScoreValue score_gap = scoreGapExtend(score_); - - TScoreValue border,s,sg,sd,sg1,sg2; - int dp; - - std::stack to_process; - HirschbergSet_ target; - - int i,j; - - HirschbergSet_ hs_complete(0,len1,0,len2,0); - to_process.push(hs_complete); - - while(!to_process.empty()) - { - target = to_process.top(); - to_process.pop(); - - if(_begin2(target) == _end2(target)) - { - for(i = 0;i < (_end1(target) - _begin1(target));++i) - { - insertGap(target_1); - ++target_0; - ++target_1; - } - } + typedef typename Size::Type TStringSize; + + typedef typename Iterator::Type TSequenceHIter; + typedef typename Iterator::Type TSequenceVIter; + + typedef typename Iterator >::Type TGapsHIter; + typedef typename Iterator >::Type TGapsVIter; + + TGapsHIter target_0 = begin(gapsH); + TGapsVIter target_1 = begin(gapsV); + + typedef typename Iterator >::Type TMatrixIterator; + + TValueV v; + + TStringSize len1 = length(s1); + TStringSize len2 = length(s2); + + // string to store the score values for the currently active cell + String c_score; + resize(c_score,len2 + 1); + // string to strore the backpointers + String pointer; + resize(pointer,len2 + 1); + + // scoring-scheme specific score values + TScoreValue score_match = scoreMatch(score_); + TScoreValue score_mismatch = scoreMismatch(score_); + TScoreValue score_gap = scoreGapExtend(score_); + + TScoreValue border,s,sg,sd,sg1,sg2; + int dp; + + std::stack to_process; + HirschbergSet_ target; + + int i,j; + + HirschbergSet_ hs_complete(0,len1,0,len2,0); + to_process.push(hs_complete); + + while(!to_process.empty()) + { + target = to_process.top(); + to_process.pop(); + + if(_begin2(target) == _end2(target)) + { + for(i = 0;i < (_end1(target) - _begin1(target));++i) + { + insertGap(target_1); + ++target_0; + ++target_1; + } + } if(_begin1(target) == _end1(target)) - { - for(i = 0;i < (_end2(target) - _begin2(target));++i) - { - insertGap(target_0); - ++target_0; - ++target_1; - } - } - else if(_begin1(target) + 1 == _end1(target) || _begin2(target) + 1 == _end2(target)) - { - /* ALIGN */ + { + for(i = 0;i < (_end2(target) - _begin2(target));++i) + { + insertGap(target_0); + ++target_0; + ++target_1; + } + } + else if(_begin1(target) + 1 == _end1(target) || _begin2(target) + 1 == _end2(target)) + { + /* ALIGN */ #ifdef SEQAN_HIRSCHBERG_DEBUG_CUT - std::cout << "align s1 " << _begin1(target) << " to " << _end1(target) << " and s2 " << _begin2(target) << " to " << _end2(target) << std::endl; - std::cout << "align " << infix(s1,_begin1(target),_end1(target)) << " and " << infix(s2,_begin2(target),_end2(target)) << std::endl << std::endl; + std::cout << "align s1 " << _begin1(target) << " to " << _end1(target) << " and s2 " << _begin2(target) << " to " << _end2(target) << std::endl; + std::cout << "align " << infix(s1,_begin1(target),_end1(target)) << " and " << infix(s2,_begin2(target),_end2(target)) << std::endl << std::endl; #endif - TStringSize len_1 = _end1(target) - _begin1(target); - TStringSize len_2 = _end2(target) - _begin2(target); - - Matrix matrix_; - - setDimension(matrix_, 2); - setLength(matrix_, 0, len_1 + 1); - setLength(matrix_, 1, len_2 + 1); - resize(matrix_); - - /* init matrix */ - TSequenceHIter x_begin = iter(s1, _begin1(target), Standard()) - 1; - TSequenceHIter x_end = iter(s1, _end1(target), Standard()) - 1; - TSequenceVIter y_begin = iter(s2, _begin2(target), Standard()) - 1; - TSequenceVIter y_end = iter(s2, _end2(target), Standard()) - 1; - - TSequenceHIter x = x_end; - TSequenceVIter y; - - TMatrixIterator col_ = end(matrix_) - 1; - TMatrixIterator finger1; - TMatrixIterator finger2; - - - TScoreValue h = 0; - TScoreValue border_ = score_gap; - TScoreValue v = border_; - - - //------------------------------------------------------------------------- - // init - - finger1 = col_; - *finger1 = 0; - for (x = x_end; x != x_begin; --x) - { - goPrevious(finger1, 0); - *finger1 = border_; - border_ += score_gap; - } - - //------------------------------------------------------------------------- - //fill matrix - border_ = 0; - for (y = y_end; y != y_begin; --y) - { - TValueV cy = *y; - h = border_; - border_ += score_gap; - v = border_; - - finger2 = col_; - goPrevious(col_, 1); - finger1 = col_; - - *finger1 = v; - - for (x = x_end; x != x_begin; --x) - { - goPrevious(finger1, 0); - goPrevious(finger2, 0); - if (*x == cy) - { - v = h + score_match; - h = *finger2; - } - else - { - TScoreValue s1 = h + score_mismatch; - h = *finger2; - TScoreValue s2 = score_gap + ((h > v) ? h : v); - v = (s1 > s2) ? s1 : s2; - } - *finger1 = v; - } - } + TStringSize len_1 = _end1(target) - _begin1(target); + TStringSize len_2 = _end2(target) - _begin2(target); + + Matrix matrix_; + + setDimension(matrix_, 2); + setLength(matrix_, 0, len_1 + 1); + setLength(matrix_, 1, len_2 + 1); + resize(matrix_); + + /* init matrix */ + TSequenceHIter x_begin = iter(s1, _begin1(target), Standard()) - 1; + TSequenceHIter x_end = iter(s1, _end1(target), Standard()) - 1; + TSequenceVIter y_begin = iter(s2, _begin2(target), Standard()) - 1; + TSequenceVIter y_end = iter(s2, _end2(target), Standard()) - 1; + + TSequenceHIter x = x_end; + TSequenceVIter y; + + TMatrixIterator col_ = end(matrix_) - 1; + TMatrixIterator finger1; + TMatrixIterator finger2; + + + TScoreValue h = 0; + TScoreValue border_ = score_gap; + TScoreValue v = border_; + + + //------------------------------------------------------------------------- + // init + + finger1 = col_; + *finger1 = 0; + for (x = x_end; x != x_begin; --x) + { + goPrevious(finger1, 0); + *finger1 = border_; + border_ += score_gap; + } + + //------------------------------------------------------------------------- + //fill matrix + border_ = 0; + for (y = y_end; y != y_begin; --y) + { + TValueV cy = *y; + h = border_; + border_ += score_gap; + v = border_; + + finger2 = col_; + goPrevious(col_, 1); + finger1 = col_; + + *finger1 = v; + + for (x = x_end; x != x_begin; --x) + { + goPrevious(finger1, 0); + goPrevious(finger2, 0); + if (*x == cy) + { + v = h + score_match; + h = *finger2; + } + else + { + TScoreValue s1 = h + score_mismatch; + h = *finger2; + TScoreValue s2 = score_gap + ((h > v) ? h : v); + v = (s1 > s2) ? s1 : s2; + } + *finger1 = v; + } + } total_score += value(matrix_, 0,0); #ifdef SEQAN_HIRSCHBERG_DEBUG_CUT std::cout << "alignment score is " << total_score << std::endl << std::endl; #endif - /* TRACE BACK */ - finger1 = begin(matrix_); - x = iter(s1,_begin1(target)); - y = iter(s2,_begin2(target)); - x_end = iter(s1,_end1(target)); - y_end = iter(s2,_end2(target)); - - while ((x != x_end) && (y != y_end)) - { - bool gv; - bool gh; - - if (*x == *y) - { - gv = gh = true; - } - else - { - TMatrixIterator it_ = finger1; - - goNext(it_, 0); - TScoreValue v = *it_; - - goNext(it_, 1); - TScoreValue d = *it_; - - it_ = finger1; - goNext(it_, 1); - TScoreValue h = *it_; - - gv = (v >= h) | (d >= h); - gh = (h >= v) | (d >= v); - } - - if (gv) - { - ++x; - goNext(finger1, 0); - } - else - { - insertGap(target_0); - } - - if (gh) - { - ++y; - goNext(finger1, 1); - } - else - { - insertGap(target_1); - } - - ++target_0; - ++target_1; - } - - // if x or y did not reached there end position, fill the rest with gaps - while(x != x_end) - { - insertGap(target_1); - ++target_0; - ++target_1; - ++x; - } - - while(y != y_end) - { - insertGap(target_0); - ++target_0; - ++target_1; - ++y; - } - /* END ALIGN */ - } - else - { - /* - Calculate cut using the algorithm as proposed in the lecture of Clemens Gröpl - using a backpointer to remember the position where the optimal alignment passes - the mid column - */ - int mid = static_cast(floor( static_cast((_begin1(target) + _end1(target))/2) )); + /* TRACE BACK */ + finger1 = begin(matrix_); + x = iter(s1,_begin1(target)); + y = iter(s2,_begin2(target)); + x_end = iter(s1,_end1(target)); + y_end = iter(s2,_end2(target)); + + while ((x != x_end) && (y != y_end)) + { + bool gv; + bool gh; + + if (*x == *y) + { + gv = gh = true; + } + else + { + TMatrixIterator it_ = finger1; + + goNext(it_, 0); + TScoreValue v = *it_; + + goNext(it_, 1); + TScoreValue d = *it_; + + it_ = finger1; + goNext(it_, 1); + TScoreValue h = *it_; + + gv = (v >= h) | (d >= h); + gh = (h >= v) | (d >= v); + } + + if (gv) + { + ++x; + goNext(finger1, 0); + } + else + { + insertGap(target_0); + } + + if (gh) + { + ++y; + goNext(finger1, 1); + } + else + { + insertGap(target_1); + } + + ++target_0; + ++target_1; + } + + // if x or y did not reached there end position, fill the rest with gaps + while(x != x_end) + { + insertGap(target_1); + ++target_0; + ++target_1; + ++x; + } + + while(y != y_end) + { + insertGap(target_0); + ++target_0; + ++target_1; + ++y; + } + /* END ALIGN */ + } + else + { + /* + Calculate cut using the algorithm as proposed in the lecture of Clemens Gröpl + using a backpointer to remember the position where the optimal alignment passes + the mid column + */ + int mid = static_cast(floor( static_cast((_begin1(target) + _end1(target))/2) )); #ifdef SEQAN_HIRSCHBERG_DEBUG_CUT - std::cout << "calculate cut for s1 " << _begin1(target) << " to " << _end1(target) << " and s2 " << _begin2(target) << " to " << _end2(target) << std::endl; - std::cout << "calculate cut for " << infix(s1,_begin1(target),_end1(target)) << " and " << infix(s2,_begin2(target),_end2(target)) << std::endl; - std::cout << "cut is in row " << mid << " symbol is " << getValue(s1,mid-1) << std::endl << std::endl; + std::cout << "calculate cut for s1 " << _begin1(target) << " to " << _end1(target) << " and s2 " << _begin2(target) << " to " << _end2(target) << std::endl; + std::cout << "calculate cut for " << infix(s1,_begin1(target),_end1(target)) << " and " << infix(s2,_begin2(target),_end2(target)) << std::endl; + std::cout << "cut is in row " << mid << " symbol is " << getValue(s1,mid-1) << std::endl << std::endl; - _writeDebugMatrix(infix(s1,_begin1(target),_end1(target)),infix(s2,_begin2(target),_end2(target))); + _writeDebugMatrix(infix(s1,_begin1(target),_end1(target)),infix(s2,_begin2(target),_end2(target))); #endif - border = 0; - for(i = _begin2(target);i <= _end2(target);++i) - { - c_score[i] = border; - border += score_gap; - pointer[i] = i; - } - - // iterate over s1 until the mid column is reached - border = score_gap; - for(i = _begin1(target) + 1;i <= mid;++i) - { - s = c_score[_begin2(target)]; - c_score[_begin2(target)] = border; - border += score_gap; - v = getValue(s1,i-1); - for(j = _begin2(target) + 1;j <= _end2(target);++j) - { - sg = score_gap + ((c_score[j] > c_score[j - 1]) ? c_score[j] : c_score[j - 1]); - sd = s + ((v == getValue(s2,j-1)) ? score_match : score_mismatch); - - s = c_score[j]; - c_score[j] = (sg > sd) ? sg : sd; - } - } - - // from here, rememeber the cell of mid-column, where optimal alignment passed - for(i = mid + 1;i <= _end1(target);++i) - { - s = c_score[_begin2(target)]; - c_score[_begin2(target)] = border; - border += score_gap; - v = getValue(s1,i-1); - - dp = _begin2(target); - - for(j = _begin2(target) + 1;j <= _end2(target);++j) - { - sg1 = score_gap + c_score[j]; - sg2 = score_gap + c_score[j - 1]; - - sd = s + ((v == getValue(s2,j-1)) ? score_match : score_mismatch); - - s = c_score[j]; - sg = pointer[j]; - if(sd >= _max(sg1,sg2)) - { - c_score[j] = sd; - pointer[j] = dp; - } - else - { - if(sg2 > sg1) - { - c_score[j] = sg2; - pointer[j] = pointer[j-1]; - } - else - { - // gap introduced from left - // no update for the pointer - c_score[j] = sg1; - } - } - dp = sg; - } - } + border = 0; + for(i = _begin2(target);i <= _end2(target);++i) + { + c_score[i] = border; + border += score_gap; + pointer[i] = i; + } + + // iterate over s1 until the mid column is reached + border = score_gap; + for(i = _begin1(target) + 1;i <= mid;++i) + { + s = c_score[_begin2(target)]; + c_score[_begin2(target)] = border; + border += score_gap; + v = getValue(s1,i-1); + for(j = _begin2(target) + 1;j <= _end2(target);++j) + { + sg = score_gap + ((c_score[j] > c_score[j - 1]) ? c_score[j] : c_score[j - 1]); + sd = s + ((v == getValue(s2,j-1)) ? score_match : score_mismatch); + + s = c_score[j]; + c_score[j] = (sg > sd) ? sg : sd; + } + } + + // from here, rememeber the cell of mid-column, where optimal alignment passed + for(i = mid + 1;i <= _end1(target);++i) + { + s = c_score[_begin2(target)]; + c_score[_begin2(target)] = border; + border += score_gap; + v = getValue(s1,i-1); + + dp = _begin2(target); + + for(j = _begin2(target) + 1;j <= _end2(target);++j) + { + sg1 = score_gap + c_score[j]; + sg2 = score_gap + c_score[j - 1]; + + sd = s + ((v == getValue(s2,j-1)) ? score_match : score_mismatch); + + s = c_score[j]; + sg = pointer[j]; + if(sd >= _max(sg1,sg2)) + { + c_score[j] = sd; + pointer[j] = dp; + } + else + { + if(sg2 > sg1) + { + c_score[j] = sg2; + pointer[j] = pointer[j-1]; + } + else + { + // gap introduced from left + // no update for the pointer + c_score[j] = sg1; + } + } + dp = sg; + } + } #ifdef SEQAN_HIRSCHBERG_DEBUG_CUT - std::cout << "hirschberg calculates cut in column " << mid << " and row " << pointer[_end2(target)] << std::endl; - std::cout << "requested position in c_score and pointer is " << _end2(target) << std::endl; - std::cout << "alignment score is " << c_score[_end2(target)] << std::endl << std::endl; + std::cout << "hirschberg calculates cut in column " << mid << " and row " << pointer[_end2(target)] << std::endl; + std::cout << "requested position in c_score and pointer is " << _end2(target) << std::endl; + std::cout << "alignment score is " << c_score[_end2(target)] << std::endl << std::endl; #endif - to_process.push(HirschbergSet_(mid,_end1(target),pointer[_end2(target)],_end2(target),0)); - to_process.push(HirschbergSet_(_begin1(target),mid,_begin2(target),pointer[_end2(target)],0)); - } - /* END CUT */ - } - return total_score; + to_process.push(HirschbergSet_(mid,_end1(target),pointer[_end2(target)],_end2(target),0)); + to_process.push(HirschbergSet_(_begin1(target),mid,_begin2(target),pointer[_end2(target)],0)); + } + /* END CUT */ + } + return total_score; } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_HIRSCHBERG_IMPL_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_HIRSCHBERG_IMPL_H_ diff --git a/seqan/align/global_alignment_myers_hirschberg_impl.h b/seqan/align/global_alignment_myers_hirschberg_impl.h index c56fce7..7bc4816 100644 --- a/seqan/align/global_alignment_myers_hirschberg_impl.h +++ b/seqan/align/global_alignment_myers_hirschberg_impl.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Stephan Aiche // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_HIRSCHBERG_IMPL_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_HIRSCHBERG_IMPL_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_HIRSCHBERG_IMPL_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_HIRSCHBERG_IMPL_H_ namespace seqan { @@ -58,109 +58,109 @@ namespace seqan { // ---------------------------------------------------------------------------- #ifdef MYERS_HIRSCHBERG_VERBOSE - template - void _writeDebugMatrix(TSource s1,TSource s2) - { + template + void _writeDebugMatrix(TSource s1,TSource s2) + { //IOREV _notio_ not relevant for iorev - int l1 = length(s1); - int l2 = length(s2); - - int i,j,sg,sd; - - String > fMatrix,rMatrix,tMatrix; - - resize(fMatrix,l1 + 1); - resize(rMatrix,l1 + 1); - resize(tMatrix,l1 + 1); - - for(i = 0;i <= l1;++i) - { - resize(fMatrix[i],l2 + 1); - resize(rMatrix[i],l2 + 1); - resize(tMatrix[i],l2 + 1); - } - - for(i = 0;i <= l1;++i) - fMatrix[i][0] = i * (-1); - - for(i = l1;i >= 0;--i) - rMatrix[i][l2] = (l1 - i) * (-1); - - // calculate forward matrix - for(j = 1;j <= l2;++j) - { - fMatrix[0][j] = j*(-1); - for(i = 1;i <= l1;++i) - { - sg = -1 + ((fMatrix[i-1][j] > fMatrix[i][j-1]) ? fMatrix[i-1][j] : fMatrix[i][j-1]); - sd = fMatrix[i-1][j-1] + ((s1[i - 1] == s2[j-1]) ? 0 : -1 ); - - fMatrix[i][j] = ((sg > sd) ? sg : sd); - } - } - - // calculate reverse matrix - for(j = l2 - 1;j >= 0;--j) - { - rMatrix[l1][j] = (l2 - j)*(-1); - for(i = l1 - 1;i >= 0;--i) - { - sg = -1 + ((rMatrix[i+1][j] > rMatrix[i][j+1]) ? rMatrix[i+1][j] : rMatrix[i][j+1]); - sd = rMatrix[i+1][j+1] + ((s1[i] == s2[j]) ? 0 : -1 ); - - rMatrix[i][j] = ((sg > sd) ? sg : sd); - } - } - - // print fMatrix - std::cout << ";-;"; - for(i = 0;i < l1;++i) - std::cout << s1[i] << ";"; - - std::cout << std::endl << "-;"; - for(j = 0;j <= l2;++j) - { - if(j != 0) std::cout << s2[j-1] << ";"; - for(i = 0;i <= l1;++i) - { - std::cout << fMatrix[i][j] << ";"; - } - std::cout << std::endl; - } - // print rMatrix - std::cout << ";"; - for(i = 0;i < l1;++i) - std::cout << s1[i] << ";"; - std::cout << "-;" << std::endl; - - for(j = 0;j <= l2;++j) - { - if(j != l2) std::cout << s2[j] << ";"; - else std::cout << "-;"; - for(i = 0;i <= l1;++i) - { - std::cout << rMatrix[i][j] << ";"; - } - std::cout << std::endl; - } - - // fill and print target matrix - std::cout << ";-;"; - for(i = 0;i < l1;++i) - std::cout << s1[i] << ";"; - - std::cout << std::endl << "-;"; - for(j = 0;j <= l2;++j) - { - if(j != 0) std::cout << s2[j-1] << ";"; - for(i = 0;i <= l1;++i) - { - tMatrix[i][j] = fMatrix[i][j] + rMatrix[i][j]; - std::cout << tMatrix[i][j] << ";"; - } - std::cout << std::endl; - } - } + int l1 = length(s1); + int l2 = length(s2); + + int i,j,sg,sd; + + String > fMatrix,rMatrix,tMatrix; + + resize(fMatrix,l1 + 1); + resize(rMatrix,l1 + 1); + resize(tMatrix,l1 + 1); + + for(i = 0;i <= l1;++i) + { + resize(fMatrix[i],l2 + 1); + resize(rMatrix[i],l2 + 1); + resize(tMatrix[i],l2 + 1); + } + + for(i = 0;i <= l1;++i) + fMatrix[i][0] = i * (-1); + + for(i = l1;i >= 0;--i) + rMatrix[i][l2] = (l1 - i) * (-1); + + // calculate forward matrix + for(j = 1;j <= l2;++j) + { + fMatrix[0][j] = j*(-1); + for(i = 1;i <= l1;++i) + { + sg = -1 + ((fMatrix[i-1][j] > fMatrix[i][j-1]) ? fMatrix[i-1][j] : fMatrix[i][j-1]); + sd = fMatrix[i-1][j-1] + ((s1[i - 1] == s2[j-1]) ? 0 : -1 ); + + fMatrix[i][j] = ((sg > sd) ? sg : sd); + } + } + + // calculate reverse matrix + for(j = l2 - 1;j >= 0;--j) + { + rMatrix[l1][j] = (l2 - j)*(-1); + for(i = l1 - 1;i >= 0;--i) + { + sg = -1 + ((rMatrix[i+1][j] > rMatrix[i][j+1]) ? rMatrix[i+1][j] : rMatrix[i][j+1]); + sd = rMatrix[i+1][j+1] + ((s1[i] == s2[j]) ? 0 : -1 ); + + rMatrix[i][j] = ((sg > sd) ? sg : sd); + } + } + + // print fMatrix + std::cout << ";-;"; + for(i = 0;i < l1;++i) + std::cout << s1[i] << ";"; + + std::cout << std::endl << "-;"; + for(j = 0;j <= l2;++j) + { + if(j != 0) std::cout << s2[j-1] << ";"; + for(i = 0;i <= l1;++i) + { + std::cout << fMatrix[i][j] << ";"; + } + std::cout << std::endl; + } + // print rMatrix + std::cout << ";"; + for(i = 0;i < l1;++i) + std::cout << s1[i] << ";"; + std::cout << "-;" << std::endl; + + for(j = 0;j <= l2;++j) + { + if(j != l2) std::cout << s2[j] << ";"; + else std::cout << "-;"; + for(i = 0;i <= l1;++i) + { + std::cout << rMatrix[i][j] << ";"; + } + std::cout << std::endl; + } + + // fill and print target matrix + std::cout << ";-;"; + for(i = 0;i < l1;++i) + std::cout << s1[i] << ";"; + + std::cout << std::endl << "-;"; + for(j = 0;j <= l2;++j) + { + if(j != 0) std::cout << s2[j-1] << ";"; + for(i = 0;i <= l1;++i) + { + tMatrix[i][j] = fMatrix[i][j] + rMatrix[i][j]; + std::cout << tMatrix[i][j] << ";"; + } + std::cout << std::endl; + } + } #endif // ---------------------------------------------------------------------------- @@ -181,7 +181,7 @@ _globalAlignment(Gaps & gapsH, // to fit into less words. if (length(source(gapsH)) < length(source(gapsV))) return _globalAlignment(gapsV, gapsH, algorithmTag); - + clearGaps(gapsH); clearGaps(gapsV); clearClipping(gapsH); @@ -189,23 +189,23 @@ _globalAlignment(Gaps & gapsH, typedef int TScoreValue; - // use size of unsigned int as blocksize for bit-vectors - const unsigned int BLOCK_SIZE = BitsPerValue::VALUE; + // use size of unsigned int as blocksize for bit-vectors + const unsigned int BLOCK_SIZE = BitsPerValue::VALUE; - // saves the score value that will be returned - TScoreValue score,total_score = 0; + // saves the score value that will be returned + TScoreValue score,total_score = 0; - typedef typename Value::Type TPatternAlphabet; - typedef typename Size::Type TStringSize; + typedef typename Value::Type TPatternAlphabet; + typedef typename Size::Type TStringSize; - typedef typename Iterator::Type TSequenceHIterator; - typedef typename Iterator::Type TSequenceVIterator; + typedef typename Iterator::Type TSequenceHIterator; + typedef typename Iterator::Type TSequenceVIterator; typedef Gaps TGapsH; typedef Gaps TGapsV; - typedef typename Iterator::Type TGapsHIterator; - typedef typename Iterator::Type TGapsVIterator; + typedef typename Iterator::Type TGapsHIterator; + typedef typename Iterator::Type TGapsVIterator; - typedef typename Iterator, Rooted>::Type TMatrixIterator; + typedef typename Iterator, Rooted>::Type TMatrixIterator; TGapsHIterator target_0 = begin(gapsH); TGapsVIterator target_1 = begin(gapsV); @@ -213,28 +213,28 @@ _globalAlignment(Gaps & gapsH, TSequenceH const & x = source(gapsH); TSequenceV const & y = source(gapsV); - TStringSize len_x = length(x); - TStringSize len_y = length(y); + TStringSize len_x = length(x); + TStringSize len_y = length(y); + + // string to store the score values for the currently active cell + String c_score; + resize(c_score, len_x + 1, 0); - // string to store the score values for the currently active cell - String c_score; - resize(c_score, len_x + 1, 0); - - // scoring-scheme specific score values - TScoreValue score_match = 0; - TScoreValue score_mismatch = -1; - TScoreValue score_gap = -1; + // scoring-scheme specific score values + TScoreValue score_match = 0; + TScoreValue score_mismatch = -1; + TScoreValue score_gap = -1; - // additional vars - int i; + // additional vars + int i; - // stack with parts of matrix that have to be processed - std::stack to_process; - HirschbergSet_ target; + // stack with parts of matrix that have to be processed + std::stack to_process; + HirschbergSet_ target; - // myers specific vars and preprocessing - unsigned int patternAlphabetSize = ValueSize::VALUE; - unsigned int blockCount = (len_y + BLOCK_SIZE - 1) / BLOCK_SIZE; // maximal count of blocks + // myers specific vars and preprocessing + unsigned int patternAlphabetSize = ValueSize::VALUE; + unsigned int blockCount = (len_y + BLOCK_SIZE - 1) / BLOCK_SIZE; // maximal count of blocks String VP; String VN; @@ -244,507 +244,507 @@ _globalAlignment(Gaps & gapsH, resize(VP, blockCount, maxValue()); resize(VN, blockCount, 0); - // first bitMask will be constructed from the shorter sequence + // first bitMask will be constructed from the shorter sequence resize(forwardBitMask, patternAlphabetSize * blockCount, 0); resize(reverseBitMask, patternAlphabetSize * blockCount, 0); - // encoding the letters as bit-vectors + // encoding the letters as bit-vectors for (unsigned int j = 0; j < len_y; j++){ - forwardBitMask[blockCount * ordValue(getValue(y,j)) + j/BLOCK_SIZE] = forwardBitMask[blockCount * ordValue(getValue(y,j)) + j/BLOCK_SIZE] | 1 << (j%BLOCK_SIZE); - reverseBitMask[blockCount * ordValue(getValue(y,len_y - j - 1)) + j/BLOCK_SIZE] = reverseBitMask[blockCount * ordValue(getValue(y,len_y - j - 1)) + j/BLOCK_SIZE] | 1 << (j%BLOCK_SIZE); - } - - HirschbergSet_ hs_complete(0,len_x,0,len_y,1); - to_process.push(hs_complete); - - while(!to_process.empty()) - { - target = to_process.top(); - to_process.pop(); - /* if score is zero, the whole part of the sequence can be simply skipped */ - if(_score(target) == 0) - { - /* coukd work faster */ - for(i = 0;i < (_end1(target) - _begin1(target));++i) - { - ++target_0; - ++target_1; - } + forwardBitMask[blockCount * ordValue(getValue(y,j)) + j/BLOCK_SIZE] = forwardBitMask[blockCount * ordValue(getValue(y,j)) + j/BLOCK_SIZE] | 1 << (j%BLOCK_SIZE); + reverseBitMask[blockCount * ordValue(getValue(y,len_y - j - 1)) + j/BLOCK_SIZE] = reverseBitMask[blockCount * ordValue(getValue(y,len_y - j - 1)) + j/BLOCK_SIZE] | 1 << (j%BLOCK_SIZE); + } + + HirschbergSet_ hs_complete(0,len_x,0,len_y,1); + to_process.push(hs_complete); + + while(!to_process.empty()) + { + target = to_process.top(); + to_process.pop(); + /* if score is zero, the whole part of the sequence can be simply skipped */ + if(_score(target) == 0) + { + /* coukd work faster */ + for(i = 0;i < (_end1(target) - _begin1(target));++i) + { + ++target_0; + ++target_1; + } #ifdef MYERS_HIRSCHBERG_VERBOSE - printf("skipped %i to %i in first sequence\n",_begin1(target),_end1(target)); + printf("skipped %i to %i in first sequence\n",_begin1(target),_end1(target)); #endif - } - else if(_begin1(target) == _end1(target)) - { + } + else if(_begin1(target) == _end1(target)) + { #ifdef MYERS_HIRSCHBERG_VERBOSE - std::cout << "align y " << _begin2(target) << " to " << _end2(target) << std::endl; - std::cout << "align " << infix(y,_begin2(target),_end2(target)) << std::endl << std::endl; -#endif - for(i = 0;i < (_end2(target) - _begin2(target));++i) - { - insertGap(target_0); - ++target_0; - ++target_1; - } - } - else if(_begin2(target) + 1 == _end2(target)) - { - /* ALIGN */ + std::cout << "align y " << _begin2(target) << " to " << _end2(target) << std::endl; + std::cout << "align " << infix(y,_begin2(target),_end2(target)) << std::endl << std::endl; +#endif + for(i = 0;i < (_end2(target) - _begin2(target));++i) + { + insertGap(target_0); + ++target_0; + ++target_1; + } + } + else if(_begin2(target) + 1 == _end2(target)) + { + /* ALIGN */ #ifdef MYERS_HIRSCHBERG_VERBOSE - std::cout << "align x " << _begin1(target) << " to " << _end1(target) << " and y " << _begin2(target) << " to " << _end2(target) << std::endl; - std::cout << "align " << infix(x,_begin1(target),_end1(target)) << " and " << infix(y,_begin2(target),_end2(target)) << std::endl << std::endl; + std::cout << "align x " << _begin1(target) << " to " << _end1(target) << " and y " << _begin2(target) << " to " << _end2(target) << std::endl; + std::cout << "align " << infix(x,_begin1(target),_end1(target)) << " and " << infix(y,_begin2(target),_end2(target)) << std::endl << std::endl; #endif - TStringSize len_1 = _end1(target) - _begin1(target); - TStringSize len_2 = _end2(target) - _begin2(target); - - Matrix matrix_; - - setDimension(matrix_, 2); - setLength(matrix_, 0, len_1 + 1); - setLength(matrix_, 1, len_2 + 1); - resize(matrix_); - - /* init matrix */ - TSequenceHIterator xs_begin = iter(x,_begin1(target)) - 1; - TSequenceHIterator xs_end = iter(x,_end1(target)) - 1; - TSequenceVIterator ys_begin = iter(y,_begin2(target)) - 1; - TSequenceVIterator ys_end = iter(y,_end2(target)) - 1; - - TSequenceHIterator xs = xs_end; - TSequenceVIterator ys; - - TMatrixIterator col_ = end(matrix_) - 1; - TMatrixIterator finger1; - TMatrixIterator finger2; - - - TScoreValue h = 0; - TScoreValue border_ = score_gap; - TScoreValue v = border_; - - - //------------------------------------------------------------------------- - // init - - finger1 = col_; - *finger1 = 0; - for (xs = xs_end; xs != xs_begin; --xs) - { - goPrevious(finger1, 0); - *finger1 = border_; - border_ += score_gap; - } - - //------------------------------------------------------------------------- - //fill matrix - - border_ = 0; - for (ys = ys_end; ys != ys_begin; --ys) - { - TPatternAlphabet cy = *ys; - h = border_; - border_ += score_gap; - v = border_; - - finger2 = col_; - goPrevious(col_, 1); - finger1 = col_; - - *finger1 = v; - - for (xs = xs_end; xs != xs_begin; --xs) - { - goPrevious(finger1, 0); - goPrevious(finger2, 0); - if (*xs == cy) - { - v = h + score_match; - h = *finger2; - } - else - { - TScoreValue s1 = h + score_mismatch; - h = *finger2; - TScoreValue s2 = score_gap + ((h > v) ? h : v); - v = (s1 > s2) ? s1 : s2; - } - *finger1 = v; - } - } - - // if computed the whole matrix last value of v = alignment score - if(target == hs_complete) total_score = v; - - /* TRACE BACK */ - finger1 = begin(matrix_); - xs = iter(x,_begin1(target)); - ys = iter(y,_begin2(target)); - xs_end = iter(x,_end1(target)); - ys_end = iter(y,_end2(target)); - - while ((xs != xs_end) && (ys != ys_end)) - { - bool gv; - bool gh; - - if (*xs == *ys) - { - gv = gh = true; - } - else - { - TMatrixIterator it_ = finger1; - - goNext(it_, 0); - TScoreValue v = *it_; - - goNext(it_, 1); - TScoreValue d = *it_; - - it_ = finger1; - goNext(it_, 1); - TScoreValue h = *it_; - - gv = (v >= h) | (d >= h); - gh = (h >= v) | (d >= v); - } - - if (gv) - { - ++xs; - goNext(finger1, 0); - } - else - { - insertGap(target_0); - } - - if (gh) - { - ++ys; - goNext(finger1, 1); - } - else - { - insertGap(target_1); - } - - ++target_0; - ++target_1; - } - - // if x or y did not reached there end position, fill the rest with gaps - while(xs != xs_end) - { - insertGap(target_1); - ++target_0; - ++target_1; - ++xs; - } - - while(ys != ys_end) - { - insertGap(target_0); - ++target_0; - ++target_1; - ++ys; - } - /* END ALIGN */ + TStringSize len_1 = _end1(target) - _begin1(target); + TStringSize len_2 = _end2(target) - _begin2(target); + + Matrix matrix_; + + setDimension(matrix_, 2); + setLength(matrix_, 0, len_1 + 1); + setLength(matrix_, 1, len_2 + 1); + resize(matrix_); + + /* init matrix */ + TSequenceHIterator xs_begin = iter(x,_begin1(target)) - 1; + TSequenceHIterator xs_end = iter(x,_end1(target)) - 1; + TSequenceVIterator ys_begin = iter(y,_begin2(target)) - 1; + TSequenceVIterator ys_end = iter(y,_end2(target)) - 1; + + TSequenceHIterator xs = xs_end; + TSequenceVIterator ys; + + TMatrixIterator col_ = end(matrix_) - 1; + TMatrixIterator finger1; + TMatrixIterator finger2; + + + TScoreValue h = 0; + TScoreValue border_ = score_gap; + TScoreValue v = border_; + + + //------------------------------------------------------------------------- + // init + + finger1 = col_; + *finger1 = 0; + for (xs = xs_end; xs != xs_begin; --xs) + { + goPrevious(finger1, 0); + *finger1 = border_; + border_ += score_gap; + } + + //------------------------------------------------------------------------- + //fill matrix + + border_ = 0; + for (ys = ys_end; ys != ys_begin; --ys) + { + TPatternAlphabet cy = *ys; + h = border_; + border_ += score_gap; + v = border_; + + finger2 = col_; + goPrevious(col_, 1); + finger1 = col_; + + *finger1 = v; + + for (xs = xs_end; xs != xs_begin; --xs) + { + goPrevious(finger1, 0); + goPrevious(finger2, 0); + if (*xs == cy) + { + v = h + score_match; + h = *finger2; + } + else + { + TScoreValue s1 = h + score_mismatch; + h = *finger2; + TScoreValue s2 = score_gap + ((h > v) ? h : v); + v = (s1 > s2) ? s1 : s2; + } + *finger1 = v; + } + } + + // if computed the whole matrix last value of v = alignment score + if(target == hs_complete) total_score = v; + + /* TRACE BACK */ + finger1 = begin(matrix_); + xs = iter(x,_begin1(target)); + ys = iter(y,_begin2(target)); + xs_end = iter(x,_end1(target)); + ys_end = iter(y,_end2(target)); + + while ((xs != xs_end) && (ys != ys_end)) + { + bool gv; + bool gh; + + if (*xs == *ys) + { + gv = gh = true; + } + else + { + TMatrixIterator it_ = finger1; + + goNext(it_, 0); + TScoreValue v = *it_; + + goNext(it_, 1); + TScoreValue d = *it_; + + it_ = finger1; + goNext(it_, 1); + TScoreValue h = *it_; + + gv = (v >= h) | (d >= h); + gh = (h >= v) | (d >= v); + } + + if (gv) + { + ++xs; + goNext(finger1, 0); + } + else + { + insertGap(target_0); + } + + if (gh) + { + ++ys; + goNext(finger1, 1); + } + else + { + insertGap(target_1); + } + + ++target_0; + ++target_1; + } + + // if x or y did not reached there end position, fill the rest with gaps + while(xs != xs_end) + { + insertGap(target_1); + ++target_0; + ++target_1; + ++xs; + } + + while(ys != ys_end) + { + insertGap(target_0); + ++target_0; + ++target_1; + ++ys; + } + /* END ALIGN */ #ifdef MYERS_HIRSCHBERG_VERBOSE - std::cout << std::endl << align_ << std::endl << std::endl; + std::cout << std::endl << align_ << std::endl << std::endl; #endif - } - else - { - /* - --------------------------------------------------------------- - Calculate cut position using extended Myers-Bitvector-Algorithm - --------------------------------------------------------------- - */ + } + else + { + /* + --------------------------------------------------------------- + Calculate cut position using extended Myers-Bitvector-Algorithm + --------------------------------------------------------------- + */ - /* declare variables */ - unsigned int X, D0, HN, HP; + /* declare variables */ + unsigned int X, D0, HN, HP; - /* compute cut position */ - int mid = static_cast(floor( static_cast((_begin2(target) + _end2(target))/2) )); + /* compute cut position */ + int mid = static_cast(floor( static_cast((_begin2(target) + _end2(target))/2) )); - /* debug infos */ + /* debug infos */ #ifdef MYERS_HIRSCHBERG_VERBOSE - std::cout << "calculate cut for x " << _begin1(target) << " to " << _end1(target) << " and y " << _begin2(target) << " to " << _end2(target) << std::endl; - std::cout << "calculate cut for " << infix(x,_begin1(target),_end1(target)) << " and " << infix(y,_begin2(target),_end2(target)) << std::endl; - std::cout << "cut is in row " << mid << " symbol is " << getValue(x,mid-1) << std::endl << std::endl; + std::cout << "calculate cut for x " << _begin1(target) << " to " << _end1(target) << " and y " << _begin2(target) << " to " << _end2(target) << std::endl; + std::cout << "calculate cut for " << infix(x,_begin1(target),_end1(target)) << " and " << infix(y,_begin2(target),_end2(target)) << std::endl; + std::cout << "cut is in row " << mid << " symbol is " << getValue(x,mid-1) << std::endl << std::endl; - std::cout << std::endl; - _writeDebugMatrix(infix(x,_begin1(target),_end1(target)),infix(y,_begin2(target),_end2(target))); - std::cout << std::endl; + std::cout << std::endl; + _writeDebugMatrix(infix(x,_begin1(target),_end1(target)),infix(y,_begin2(target),_end2(target))); + std::cout << std::endl; #endif - /* compute blocks and score masks */ - int fStartBlock = _begin2(target) / BLOCK_SIZE; - int fEndBlock = (mid - 1) / BLOCK_SIZE; - int fSpannedBlocks = (fEndBlock - fStartBlock) + 1; - - unsigned int fScoreMask = 1 << ((mid - 1) % BLOCK_SIZE); - - unsigned int fOffSet = _begin2(target) % BLOCK_SIZE; - unsigned int fSilencer = ~0; - fSilencer <<= fOffSet; - - /* reset v-bitvectors */ + /* compute blocks and score masks */ + int fStartBlock = _begin2(target) / BLOCK_SIZE; + int fEndBlock = (mid - 1) / BLOCK_SIZE; + int fSpannedBlocks = (fEndBlock - fStartBlock) + 1; + + unsigned int fScoreMask = 1 << ((mid - 1) % BLOCK_SIZE); + + unsigned int fOffSet = _begin2(target) % BLOCK_SIZE; + unsigned int fSilencer = ~0; + fSilencer <<= fOffSet; + + /* reset v-bitvectors */ std::fill(begin(VP, Standard()) + fStartBlock, end(VP, Standard()) + fEndBlock + 1, maxValue()); std::fill(begin(VN, Standard()) + fStartBlock, end(VN, Standard()) + fEndBlock + 1, 0); - /* determine start-position and start-score */ - int pos = _begin1(target); - score = (mid - _begin2(target)) * score_gap; - c_score[pos] = score; - - /* compute with myers - forward - begin */ - if(fSpannedBlocks == 1) - { - while (pos < _end1(target)) { - X = (fSilencer & forwardBitMask[(blockCount * ordValue(static_cast(getValue(x,pos)))) + fStartBlock]) | VN[fStartBlock]; - - D0 = ((VP[fStartBlock] + (X & VP[fStartBlock])) ^ VP[fStartBlock]) | X; - HN = VP[fStartBlock] & D0; - HP = VN[fStartBlock] | ~(VP[fStartBlock] | D0); - - X = (HP << 1) | (1 << fOffSet); - VN[fStartBlock] = X & D0; - VP[fStartBlock] = (HN << 1) | ~(X | D0); - - if (HP & fScoreMask) - score--; - else if (HN & fScoreMask) - score++; - - c_score[pos + 1] = score; - - ++pos; - } - } /* end - short patten */ - else - { - int shift, currentBlock; - unsigned int temp, carryD0, carryHP, carryHN; - - while (pos < _end1(target)) - { - carryD0 = carryHP = carryHN = 0; - shift = blockCount * ordValue(static_cast(getValue(x,pos))); - - // computing first the top most block - X = (fSilencer & forwardBitMask[shift + fStartBlock]) | VN[fStartBlock]; - - temp = VP[fStartBlock] + (X & VP[fStartBlock]); - carryD0 = temp < VP[fStartBlock]; - - D0 = (temp ^ VP[fStartBlock]) | X; - HN = VP[fStartBlock] & D0; - HP = VN[fStartBlock] | ~(VP[fStartBlock] | D0); - - X = (HP << 1) | (1 << fOffSet); - carryHP = HP >> (BLOCK_SIZE - 1); - - VN[fStartBlock] = X & D0; - - temp = (HN << 1); - carryHN = HN >> (BLOCK_SIZE - 1); - - VP[fStartBlock] = temp | ~(X | D0); - - // compute the remaining blocks - for (currentBlock = fStartBlock + 1; currentBlock <= fEndBlock; currentBlock++) { - X = forwardBitMask[shift + currentBlock] | VN[currentBlock]; - - temp = VP[currentBlock] + (X & VP[currentBlock]) + carryD0; - - carryD0 = ((carryD0) ? temp <= VP[currentBlock] : temp < VP[currentBlock]); - - D0 = (temp ^ VP[currentBlock]) | X; - HN = VP[currentBlock] & D0; - HP = VN[currentBlock] | ~(VP[currentBlock] | D0); - - X = (HP << 1) | carryHP; - carryHP = HP >> (BLOCK_SIZE-1); - - VN[currentBlock] = X & D0; - - temp = (HN << 1) | carryHN; - carryHN = HN >> (BLOCK_SIZE - 1); - - VP[currentBlock] = temp | ~(X | D0); - } - - /* update score */ - if (HP & fScoreMask) - score--; - else if (HN & fScoreMask) - score++; - - c_score[pos + 1] = score; - - ++pos; - } - - } /* end - long patten */ - /* compute with myers - forward - end */ - - /* compute blocks and score masks */ - int rStartBlock = (len_y - _end2(target)) / BLOCK_SIZE; - int rEndBlock = (len_y - mid - 1) / BLOCK_SIZE; - int rSpannedBlocks = (rEndBlock - rStartBlock) + 1; - - unsigned int rScoreMask = 1 << ((len_y - mid - 1) % BLOCK_SIZE); - unsigned int rOffSet = (len_y - _end2(target)) % BLOCK_SIZE; - unsigned int rSilencer = ~0; - rSilencer <<= rOffSet; - - /* reset v-bitvectors */ + /* determine start-position and start-score */ + int pos = _begin1(target); + score = (mid - _begin2(target)) * score_gap; + c_score[pos] = score; + + /* compute with myers - forward - begin */ + if(fSpannedBlocks == 1) + { + while (pos < _end1(target)) { + X = (fSilencer & forwardBitMask[(blockCount * ordValue(static_cast(getValue(x,pos)))) + fStartBlock]) | VN[fStartBlock]; + + D0 = ((VP[fStartBlock] + (X & VP[fStartBlock])) ^ VP[fStartBlock]) | X; + HN = VP[fStartBlock] & D0; + HP = VN[fStartBlock] | ~(VP[fStartBlock] | D0); + + X = (HP << 1) | (1 << fOffSet); + VN[fStartBlock] = X & D0; + VP[fStartBlock] = (HN << 1) | ~(X | D0); + + if (HP & fScoreMask) + score--; + else if (HN & fScoreMask) + score++; + + c_score[pos + 1] = score; + + ++pos; + } + } /* end - short patten */ + else + { + int shift, currentBlock; + unsigned int temp, carryD0, carryHP, carryHN; + + while (pos < _end1(target)) + { + carryD0 = carryHP = carryHN = 0; + shift = blockCount * ordValue(static_cast(getValue(x,pos))); + + // computing first the top most block + X = (fSilencer & forwardBitMask[shift + fStartBlock]) | VN[fStartBlock]; + + temp = VP[fStartBlock] + (X & VP[fStartBlock]); + carryD0 = temp < VP[fStartBlock]; + + D0 = (temp ^ VP[fStartBlock]) | X; + HN = VP[fStartBlock] & D0; + HP = VN[fStartBlock] | ~(VP[fStartBlock] | D0); + + X = (HP << 1) | (1 << fOffSet); + carryHP = HP >> (BLOCK_SIZE - 1); + + VN[fStartBlock] = X & D0; + + temp = (HN << 1); + carryHN = HN >> (BLOCK_SIZE - 1); + + VP[fStartBlock] = temp | ~(X | D0); + + // compute the remaining blocks + for (currentBlock = fStartBlock + 1; currentBlock <= fEndBlock; currentBlock++) { + X = forwardBitMask[shift + currentBlock] | VN[currentBlock]; + + temp = VP[currentBlock] + (X & VP[currentBlock]) + carryD0; + + carryD0 = ((carryD0) ? temp <= VP[currentBlock] : temp < VP[currentBlock]); + + D0 = (temp ^ VP[currentBlock]) | X; + HN = VP[currentBlock] & D0; + HP = VN[currentBlock] | ~(VP[currentBlock] | D0); + + X = (HP << 1) | carryHP; + carryHP = HP >> (BLOCK_SIZE-1); + + VN[currentBlock] = X & D0; + + temp = (HN << 1) | carryHN; + carryHN = HN >> (BLOCK_SIZE - 1); + + VP[currentBlock] = temp | ~(X | D0); + } + + /* update score */ + if (HP & fScoreMask) + score--; + else if (HN & fScoreMask) + score++; + + c_score[pos + 1] = score; + + ++pos; + } + + } /* end - long patten */ + /* compute with myers - forward - end */ + + /* compute blocks and score masks */ + int rStartBlock = (len_y - _end2(target)) / BLOCK_SIZE; + int rEndBlock = (len_y - mid - 1) / BLOCK_SIZE; + int rSpannedBlocks = (rEndBlock - rStartBlock) + 1; + + unsigned int rScoreMask = 1 << ((len_y - mid - 1) % BLOCK_SIZE); + unsigned int rOffSet = (len_y - _end2(target)) % BLOCK_SIZE; + unsigned int rSilencer = ~0; + rSilencer <<= rOffSet; + + /* reset v-bitvectors */ std::fill(begin(VP, Standard()) + rStartBlock, end(VP, Standard()) + rEndBlock + 1, maxValue()); std::fill(begin(VN, Standard()) + rStartBlock, end(VN, Standard()) + rEndBlock + 1, 0); - /* determine start-position and start-score */ - pos = _end1(target)-1; - score = (_end2(target) - mid) * score_gap; - - /* set start score */ - c_score[_end1(target)] += score; - - /* determine optimal cut position -- score extension */ - TScoreValue max = c_score[_end1(target)]; - TScoreValue rmax = score; - unsigned int pos_max = _end1(target); - - /* compute with myers - reverse - begin */ - if(rSpannedBlocks == 1) - { - while (pos >= _begin1(target)) { - X = (rSilencer & reverseBitMask[(blockCount * ordValue(static_cast(getValue(x,pos)))) + rStartBlock]) | VN[rStartBlock]; - - D0 = ((VP[rStartBlock] + (X & VP[rStartBlock])) ^ VP[rStartBlock]) | X; - HN = VP[rStartBlock] & D0; - HP = VN[rStartBlock] | ~(VP[rStartBlock] | D0); - - X = (HP << 1) | (1 << rOffSet); - VN[rStartBlock] = X & D0; - VP[rStartBlock] = (HN << 1) | ~(X | D0); - - if (HP & rScoreMask) - --score; - else if (HN & rScoreMask) - ++score; - - c_score[pos] += score; - - /* check for optimality -- score extension */ - if(c_score[pos]> max) - { - pos_max = pos; - max = c_score[pos]; - rmax = score; - } - - --pos; - } - } /* end - short pattern */ - else - { - int shift, currentBlock; - unsigned int temp, carryD0, carryHP, carryHN; - - while (pos >= _begin1(target)) - { - carryD0 = carryHP = carryHN = 0; - shift = blockCount * ordValue(static_cast(getValue(x,pos))); - - // compute first the top most block - X = (rSilencer & reverseBitMask[shift + rStartBlock]) | VN[rStartBlock]; - - temp = VP[rStartBlock] + (X & VP[rStartBlock]); - carryD0 = temp < VP[rStartBlock]; - - D0 = (temp ^ VP[rStartBlock]) | X; - HN = VP[rStartBlock] & D0; - HP = VN[rStartBlock] | ~(VP[rStartBlock] | D0); - - X = (HP << 1) | (1 << rOffSet); - carryHP = HP >> (BLOCK_SIZE - 1); - - VN[rStartBlock] = X & D0; - - temp = (HN << 1); - carryHN = HN >> (BLOCK_SIZE - 1); - - VP[rStartBlock] = temp | ~(X | D0); - - // compute the remaining blocks - for (currentBlock = rStartBlock + 1; currentBlock <= rEndBlock; currentBlock++) { - X = reverseBitMask[shift + currentBlock] | VN[currentBlock]; - - temp = VP[currentBlock] + (X & VP[currentBlock]) + carryD0; - - carryD0 = ((carryD0) ? temp <= VP[currentBlock] : temp < VP[currentBlock]); - - D0 = (temp ^ VP[currentBlock]) | X; - HN = VP[currentBlock] & D0; - HP = VN[currentBlock] | ~(VP[currentBlock] | D0); - - X = (HP << 1) | carryHP; - carryHP = HP >> (BLOCK_SIZE-1); - - VN[currentBlock] = X & D0; - - temp = (HN << 1) | carryHN; - carryHN = HN >> (BLOCK_SIZE - 1); - - VP[currentBlock] = temp | ~(X | D0); - } - - if (HP & rScoreMask) - --score; - else if (HN & rScoreMask) - ++score; - - c_score[pos] += score; - - /* check for optimality -- score extension*/ - if(c_score[pos] > max) - { - pos_max = pos; - max = c_score[pos]; - rmax = score; - } - - --pos; - } - - } /* end - long pattern */ - /* compute with myers - reverse - end */ - - // if computed the whole matrix max = alignment score - if(target == hs_complete) - total_score = max; + /* determine start-position and start-score */ + pos = _end1(target)-1; + score = (_end2(target) - mid) * score_gap; + + /* set start score */ + c_score[_end1(target)] += score; + + /* determine optimal cut position -- score extension */ + TScoreValue max = c_score[_end1(target)]; + TScoreValue rmax = score; + unsigned int pos_max = _end1(target); + + /* compute with myers - reverse - begin */ + if(rSpannedBlocks == 1) + { + while (pos >= _begin1(target)) { + X = (rSilencer & reverseBitMask[(blockCount * ordValue(static_cast(getValue(x,pos)))) + rStartBlock]) | VN[rStartBlock]; + + D0 = ((VP[rStartBlock] + (X & VP[rStartBlock])) ^ VP[rStartBlock]) | X; + HN = VP[rStartBlock] & D0; + HP = VN[rStartBlock] | ~(VP[rStartBlock] | D0); + + X = (HP << 1) | (1 << rOffSet); + VN[rStartBlock] = X & D0; + VP[rStartBlock] = (HN << 1) | ~(X | D0); + + if (HP & rScoreMask) + --score; + else if (HN & rScoreMask) + ++score; + + c_score[pos] += score; + + /* check for optimality -- score extension */ + if(c_score[pos]> max) + { + pos_max = pos; + max = c_score[pos]; + rmax = score; + } + + --pos; + } + } /* end - short pattern */ + else + { + int shift, currentBlock; + unsigned int temp, carryD0, carryHP, carryHN; + + while (pos >= _begin1(target)) + { + carryD0 = carryHP = carryHN = 0; + shift = blockCount * ordValue(static_cast(getValue(x,pos))); + + // compute first the top most block + X = (rSilencer & reverseBitMask[shift + rStartBlock]) | VN[rStartBlock]; + + temp = VP[rStartBlock] + (X & VP[rStartBlock]); + carryD0 = temp < VP[rStartBlock]; + + D0 = (temp ^ VP[rStartBlock]) | X; + HN = VP[rStartBlock] & D0; + HP = VN[rStartBlock] | ~(VP[rStartBlock] | D0); + + X = (HP << 1) | (1 << rOffSet); + carryHP = HP >> (BLOCK_SIZE - 1); + + VN[rStartBlock] = X & D0; + + temp = (HN << 1); + carryHN = HN >> (BLOCK_SIZE - 1); + + VP[rStartBlock] = temp | ~(X | D0); + + // compute the remaining blocks + for (currentBlock = rStartBlock + 1; currentBlock <= rEndBlock; currentBlock++) { + X = reverseBitMask[shift + currentBlock] | VN[currentBlock]; + + temp = VP[currentBlock] + (X & VP[currentBlock]) + carryD0; + + carryD0 = ((carryD0) ? temp <= VP[currentBlock] : temp < VP[currentBlock]); + + D0 = (temp ^ VP[currentBlock]) | X; + HN = VP[currentBlock] & D0; + HP = VN[currentBlock] | ~(VP[currentBlock] | D0); + + X = (HP << 1) | carryHP; + carryHP = HP >> (BLOCK_SIZE-1); + + VN[currentBlock] = X & D0; + + temp = (HN << 1) | carryHN; + carryHN = HN >> (BLOCK_SIZE - 1); + + VP[currentBlock] = temp | ~(X | D0); + } + + if (HP & rScoreMask) + --score; + else if (HN & rScoreMask) + ++score; + + c_score[pos] += score; + + /* check for optimality -- score extension*/ + if(c_score[pos] > max) + { + pos_max = pos; + max = c_score[pos]; + rmax = score; + } + + --pos; + } + + } /* end - long pattern */ + /* compute with myers - reverse - end */ + + // if computed the whole matrix max = alignment score + if(target == hs_complete) + total_score = max; #ifdef MYERS_HIRSCHBERG_VERBOSE - printf("Optimal cut is at %i and %i with forward score %i and reverse score %i\n\n",mid,pos_max,(max - rmax),rmax); + printf("Optimal cut is at %i and %i with forward score %i and reverse score %i\n\n",mid,pos_max,(max - rmax),rmax); #endif - /* push the two computed parts of the dp-matrix on process stack */ - to_process.push(HirschbergSet_(pos_max,_end1(target),mid,_end2(target),rmax)); - to_process.push(HirschbergSet_(_begin1(target),pos_max,_begin2(target),mid,max - rmax)); - - } - /* END CUT */ - } - - return total_score; + /* push the two computed parts of the dp-matrix on process stack */ + to_process.push(HirschbergSet_(pos_max,_end1(target),mid,_end2(target),rmax)); + to_process.push(HirschbergSet_(_begin1(target),pos_max,_begin2(target),mid,max - rmax)); + + } + /* END CUT */ + } + + return total_score; } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_HIRSCHBERG_IMPL_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_HIRSCHBERG_IMPL_H_ diff --git a/seqan/align/global_alignment_myers_impl.h b/seqan/align/global_alignment_myers_impl.h index ddf2743..2f38816 100644 --- a/seqan/align/global_alignment_myers_impl.h +++ b/seqan/align/global_alignment_myers_impl.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // TODO(holtgrew): Should be called _globalAlignmentScore()! -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_IMPL_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_IMPL_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_IMPL_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_IMPL_H_ namespace seqan { @@ -70,28 +70,28 @@ _globalAlignmentScore(String const & seqH, if (length(seqH) < length(seqV)) return _globalAlignmentScore(seqV, seqH, algorithmTag); - // Use size of unsigned int as blocksize for bit-vectors. - const unsigned int BLOCK_SIZE = BitsPerValue::VALUE; + // Use size of unsigned int as blocksize for bit-vectors. + const unsigned int BLOCK_SIZE = BitsPerValue::VALUE; typedef String const TSequenceH; typedef String const TSequenceV; - typedef typename Value::Type TPatternAlphabet; - typedef typename Size::Type TSourceSize; + typedef typename Value::Type TPatternAlphabet; + typedef typename Size::Type TSourceSize; TSequenceH const & x = seqH; TSequenceV const & y = seqV; - TSourceSize len_x = length(x); - unsigned int pos = 0; + TSourceSize len_x = length(x); + unsigned int pos = 0; - // init variables - unsigned int len_y = length(y); - int score = (-1)*len_y; - unsigned int patternAlphabetSize = ValueSize::VALUE; - unsigned int blockCount = (len_y + BLOCK_SIZE - 1) / BLOCK_SIZE; + // init variables + unsigned int len_y = length(y); + int score = (-1)*len_y; + unsigned int patternAlphabetSize = ValueSize::VALUE; + unsigned int blockCount = (len_y + BLOCK_SIZE - 1) / BLOCK_SIZE; - unsigned int scoreMask = 1 << ((len_y % BLOCK_SIZE) - 1); // the mask with a bit set at the position of the last active cell + unsigned int scoreMask = 1 << ((len_y % BLOCK_SIZE) - 1); // the mask with a bit set at the position of the last active cell String VP; resize(VP, blockCount, maxValue()); @@ -100,102 +100,102 @@ _globalAlignmentScore(String const & seqH, String bitMask; resize(bitMask, patternAlphabetSize * blockCount, 0); - // encoding the letters as bit-vectors + // encoding the letters as bit-vectors for (unsigned int j = 0; j < len_y; j++) - bitMask[blockCount * ordValue(getValue(y,j)) + j/BLOCK_SIZE] = bitMask[blockCount * ordValue(getValue(y,j)) + j/BLOCK_SIZE] | 1 << (j%BLOCK_SIZE); - - // compute score - unsigned int X, D0, HN, HP; - if(blockCount == 1) - { - while (pos < len_x) { - X = bitMask[ordValue(static_cast(getValue(x,pos)))] | VN[0]; - - D0 = ((VP[0] + (X & VP[0])) ^ VP[0]) | X; - HN = VP[0] & D0; - HP = VN[0] | ~(VP[0] | D0); - - // customized to compute edit distance - X = (HP << 1) | 1; - VN[0] = X & D0; - VP[0] = (HN << 1) | ~(X | D0); - - if (HP & scoreMask) - score--; - else if (HN & scoreMask) - score++; - - ++pos; - } - } // end compute score - short pattern - else - { - unsigned int temp, shift, currentBlock; - unsigned int carryD0, carryHP, carryHN; - - while (pos < len_x) - { - // set vars - carryD0 = carryHP = carryHN = 0; - shift = blockCount * ordValue(static_cast(getValue(x,pos))); - - // computing first the top most block - X = bitMask[shift] | VN[0]; - - temp = VP[0] + (X & VP[0]); - carryD0 = temp < VP[0]; - - D0 = (temp ^ VP[0]) | X; - HN = VP[0] & D0; - HP = VN[0] | ~(VP[0] | D0); - - // customized to compute edit distance - X = (HP << 1) | 1; - carryHP = HP >> (BLOCK_SIZE - 1); - - VN[0] = X & D0; - - temp = (HN << 1); - carryHN = HN >> (BLOCK_SIZE - 1); - - VP[0] = temp | ~(X | D0); - - // computing the necessary blocks, carries between blocks following one another are stored - for (currentBlock = 1; currentBlock < blockCount; currentBlock++) { - X = bitMask[shift + currentBlock] | VN[currentBlock]; - - temp = VP[currentBlock] + (X & VP[currentBlock]) + carryD0; - - carryD0 = ((carryD0) ? temp <= VP[currentBlock] : temp < VP[currentBlock]); - - D0 = (temp ^ VP[currentBlock]) | X; - HN = VP[currentBlock] & D0; - HP = VN[currentBlock] | ~(VP[currentBlock] | D0); - - X = (HP << 1) | carryHP; - carryHP = HP >> (BLOCK_SIZE-1); - - VN[currentBlock] = X & D0; - - temp = (HN << 1) | carryHN; - carryHN = HN >> (BLOCK_SIZE - 1); - - VP[currentBlock] = temp | ~(X | D0); - } - - // update score with the HP and HN values of the last block the last block - if (HP & scoreMask) - score--; - else if (HN & scoreMask) - score++; - ++pos; - } - - } // end compute score - long pattern - - return score; + bitMask[blockCount * ordValue(getValue(y,j)) + j/BLOCK_SIZE] = bitMask[blockCount * ordValue(getValue(y,j)) + j/BLOCK_SIZE] | 1 << (j%BLOCK_SIZE); + + // compute score + unsigned int X, D0, HN, HP; + if(blockCount == 1) + { + while (pos < len_x) { + X = bitMask[ordValue(static_cast(getValue(x,pos)))] | VN[0]; + + D0 = ((VP[0] + (X & VP[0])) ^ VP[0]) | X; + HN = VP[0] & D0; + HP = VN[0] | ~(VP[0] | D0); + + // customized to compute edit distance + X = (HP << 1) | 1; + VN[0] = X & D0; + VP[0] = (HN << 1) | ~(X | D0); + + if (HP & scoreMask) + score--; + else if (HN & scoreMask) + score++; + + ++pos; + } + } // end compute score - short pattern + else + { + unsigned int temp, shift, currentBlock; + unsigned int carryD0, carryHP, carryHN; + + while (pos < len_x) + { + // set vars + carryD0 = carryHP = carryHN = 0; + shift = blockCount * ordValue(static_cast(getValue(x,pos))); + + // computing first the top most block + X = bitMask[shift] | VN[0]; + + temp = VP[0] + (X & VP[0]); + carryD0 = temp < VP[0]; + + D0 = (temp ^ VP[0]) | X; + HN = VP[0] & D0; + HP = VN[0] | ~(VP[0] | D0); + + // customized to compute edit distance + X = (HP << 1) | 1; + carryHP = HP >> (BLOCK_SIZE - 1); + + VN[0] = X & D0; + + temp = (HN << 1); + carryHN = HN >> (BLOCK_SIZE - 1); + + VP[0] = temp | ~(X | D0); + + // computing the necessary blocks, carries between blocks following one another are stored + for (currentBlock = 1; currentBlock < blockCount; currentBlock++) { + X = bitMask[shift + currentBlock] | VN[currentBlock]; + + temp = VP[currentBlock] + (X & VP[currentBlock]) + carryD0; + + carryD0 = ((carryD0) ? temp <= VP[currentBlock] : temp < VP[currentBlock]); + + D0 = (temp ^ VP[currentBlock]) | X; + HN = VP[currentBlock] & D0; + HP = VN[currentBlock] | ~(VP[currentBlock] | D0); + + X = (HP << 1) | carryHP; + carryHP = HP >> (BLOCK_SIZE-1); + + VN[currentBlock] = X & D0; + + temp = (HN << 1) | carryHN; + carryHN = HN >> (BLOCK_SIZE - 1); + + VP[currentBlock] = temp | ~(X | D0); + } + + // update score with the HP and HN values of the last block the last block + if (HP & scoreMask) + score--; + else if (HN & scoreMask) + score++; + ++pos; + } + + } // end compute score - long pattern + + return score; } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_IMPL_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_MYERS_IMPL_H_ diff --git a/seqan/align/global_alignment_specialized.h b/seqan/align/global_alignment_specialized.h index 783e260..735eabf 100644 --- a/seqan/align/global_alignment_specialized.h +++ b/seqan/align/global_alignment_specialized.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // MyersBitVector, MyersHirschberg. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_SPECIALIZED_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_SPECIALIZED_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_SPECIALIZED_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_SPECIALIZED_H_ namespace seqan { @@ -196,4 +196,4 @@ int globalAlignmentScore(StringSet const & strings, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_SPECIALIZED_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_SPECIALIZED_H_ diff --git a/seqan/align/global_alignment_unbanded.h b/seqan/align/global_alignment_unbanded.h index 190a9f8..8c4a1bd 100644 --- a/seqan/align/global_alignment_unbanded.h +++ b/seqan/align/global_alignment_unbanded.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,8 @@ // the globalFunction() fails is actually meaningful. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_UNBANDED_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_UNBANDED_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_UNBANDED_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_UNBANDED_H_ namespace seqan { @@ -75,36 +75,34 @@ class Fragment; /*! * @fn globalAlignment - * - * @headerfile seqan/align.h - * + * @headerfile * @brief Computes the best global pairwise alignment. - * + * * @signature TScoreVal globalAlignment(align, scoringScheme, [alignConfig,] [lowerDiag, upperDiag,] [algorithmTag]); * @signature TScoreVal globalAlignment(gapsH, gapsV, scoringScheme, [alignConfig,] [lowerDiag, upperDiag,] [algorithmTag]); * @signature TScoreVal globalAlignment(frags, strings, scoringScheme, [alignConfig,] [lowerDiag, upperDiag,] [algorithmTag]); * @signature TScoreVal globalAlignment(alignGraph, scoringScheme, [alignConfig,] [lowerDiag, upperDiag,] [algorithmTag]); - * - * @param align The @link Align @endlink object to use for storing the pairwise alignment. - * @param gapsH The @link Gaps @endlink object for the first row (horizontal in the DP matrix). - * @param gapsV The @link Gaps @endlink object for the second row (vertical in the DP matrix). - * @param frags String of @link Fragment @endlink objects to store alignment in. - * @param strings StringSet of length two with the strings to align. - * @param alignGraph Alignment Graph for the resulting alignment. Must be initialized with two strings. - * @param scoringScheme The @link Score scoring scheme @endlink to use for the alignment. Note that - * the user is responsible for ensuring that the scoring scheme is compatible with algorithmTag. - * @param alignConfig @link AlignConfig @endlink instance to use for the alignment configuration. - * @param lowerDiag Optional lower diagonal (int). - * @param upperDiag Optional upper diagonal (int). - * @param algorithmTag Tag to select the alignment algorithm (see @link AlignmentAlgorithmTags @endlink). * - * @return TScoreVal Score value of the resulting alignment. Of type Value<TScore>::Type where - * TScore is the type of scoringScheme. - * + * @param[in,out] align The @link Align @endlink object to use for storing the pairwise alignment. + * @param[in,out] gapsH The @link Gaps @endlink object for the first row (horizontal in the DP matrix). + * @param[in,out] gapsV The @link Gaps @endlink object for the second row (vertical in the DP matrix). + * @param[in,out] frags String of @link Fragment @endlink objects to store alignment in. + * @param[in] strings StringSet of length two with the strings to align. + * @param[in,out] alignGraph Alignment Graph for the resulting alignment. Must be initialized with two strings. + * @param[in] scoringScheme The @link Score scoring scheme @endlink to use for the alignment. Note that + * the user is responsible for ensuring that the scoring scheme is compatible with algorithmTag. + * @param[in] alignConfig @link AlignConfig @endlink instance to use for the alignment configuration. + * @param[in] lowerDiag Optional lower diagonal (int). + * @param[in] upperDiag Optional upper diagonal (int). + * @param[in] algorithmTag Tag to select the alignment algorithm (see @link AlignmentAlgorithmTags @endlink). + * + * @return TScoreVal Score value of the resulting alignment (Metafunction: @link Score#Value @endlink of + * the type of scoringScheme). + * * There exist multiple overloads for this function with four configuration dimensions. - * + * * First, you can select whether begin and end gaps are free in either sequence using alignConfig. - * + * * Second, you can select the type of the target storing the alignment. This can be either an @link Align @endlink * object, two @link Gaps @endlink objects, a @link AlignmentGraph @endlink, or a string of @link Fragment @endlink * objects. @link Align @endlink objects provide an interface to tabular alignments with the restriction of all rows @@ -113,45 +111,45 @@ class Fragment; * Graphs @endlink provide a graph-based representation of segment-based colinear alignments. Using @link Fragment * @endlink strings is useful for collecting many pairwise alignments, for example in the construction of @link * AlignmentGraph Alignment Graphs @endlink for multiple-sequence alignments (MSA). - * + * * Third, you can optionally give a band for the alignment using lowerDiag and upperDiag. The center * diagonal has index 0, the ith diagonal below has index -i, the ith above has * index i. - * + * * Fourth, you can select the algorithm to use with algorithmTag. This can be one of @link * AlignmentAlgorithmTags#NeedlemanWunsch @endlink and @link AlignmentAlgorithmTags#Gotoh @endlink. The * Needleman-Wunsch algorithm supports scoring schemes with linear gap costs only while Gotoh's algorithm also allows * affine gap costs. - * + * * The available alignment algorithms all have some restrictions. Gotoh's algorithm can handle arbitrary substitution * and affine gap scores. Needleman-Wunsch is limited to linear gap scores. The implementation of Hirschberg's * algorithm is further limited that it does not support alignConfig objects or banding. The implementation of * the Myers-Hirschberg algorithm further limits this to only support edit distance (as scores, matches are scored with * 0, mismatches are scored with -1). - * + * * The examples below show some common use cases. - * + * * @section Examples - * + * * Global alignment of two sequences using an @link Align @endlink object and * the Needleman-Wunsch algorithm. * - * @include demos/align/global_alignment_unbanded.cpp + * @include demos/dox/align/global_alignment_unbanded.cpp * * The output is as follows: * - * @include demos/align/global_alignment_unbanded.cpp.stdout + * @include demos/dox/align/global_alignment_unbanded.cpp.stdout * * Global banded alignment of two sequences using two @link Gaps @endlink objects and the Gotoh algorithm. * - * @include demos/align/global_alignment_banded.cpp + * @include demos/dox/align/global_alignment_banded.cpp * * The output is as follows: * - * @include demos/align/global_alignment_banded.cpp.stdout - * - * http://trac.seqan.de/wiki/Tutorial/PairwiseSequenceAlignment - * + * @include demos/dox/align/global_alignment_banded.cpp.stdout + * + * http://seqan.readthedocs.org/en/develop/Tutorial/PairwiseSequenceAlignment.html + * * @section References * *
    @@ -159,123 +157,44 @@ class Fragment; * of two proteins. J Mol Biol 1970, 48(3): 443-53. *
  • Gotoh O: An improved algorithm for matching biological sequences. J Mol Biol 1982, 162(3):705-8
  • *
- * + * * @see localAlignment * @see globalAlignmentScore * @see AlignmentAlgorithmTags */ -/** -.Function.globalAlignment -..summary:Computes the best global pairwise alignment. -..cat:Alignments -..signature:globalAlignment(align, scoringScheme, [alignConfig,] [lowerDiag, upperDiag,] [algorithmTag]) -..signature:globalAlignment(gapsH, gapsV, scoringScheme, [alignConfig,] [lowerDiag, upperDiag,] [algorithmTag]) -..signature:globalAlignment(frags, strings, scoringScheme, [alignConfig,] [lowerDiag, upperDiag,] [algorithmTag]) -..signature:globalAlignment(alignmentGraph, scoringScheme, [alignConfig,] [lowerDiag, upperDiag,] [algorithmTag]) -..param.align: -An @Class.Align@ object that stores the alignment. -The number of rows must be 2 and the sequences must have already been set. -$row(align, 0)$ is the horizontal one in the alignment matrix alignment, $row(align, 1)$ is the vertical one. -...type:Class.Align -..param.gapsH:Horizontal gapped sequence in alignment matrix. -...type:Class.Gaps -..param.gapsV:Vertical gapped sequence in alignment matrix. -...type:Class.Gaps -..param.frags: -String of @Class.Fragment@ objects. -The sequence with id $0$ is the horizontal one, the sequence with id $1$ is the vertical one. -..param.alignmentGraph: -@Spec.Alignment Graph@ object to store the alignment in. -...type:Spec.Alignment Graph -...remarks:The underlying @Class.StringSet@ must be an @Spec.Owner|Owner StringSet@. -..param.strings:A @Class.StringSet@ containing two sequences. -...type:Class.StringSet -..param.scoringScheme: -The scoring scheme to use for the alignment. -Note that the user is responsible for ensuring that the scoring scheme is compatible with $algorithmTag$. -...type:Class.Score -..param.alignConfig:The @Class.AlignConfig@ to use for the alignment. -...type:Class.AlignConfig -..param.lowerDiag:Optional lower diagonal. -...type:nolink:$int$ -..param.upperDiag:Optional upper diagonal. -...type:nolink:$int$ -..param.algorithmTag:The Tag for picking the alignment algorithm. -...type:Tag.Pairwise Global Alignment Algorithms.tag.Gotoh -...type:Tag.Pairwise Global Alignment Algorithms.tag.NeedlemanWunsch -...type:Tag.Pairwise Global Alignment Algorithms.tag.Hirschberg -...type:Tag.Pairwise Global Alignment Algorithms.tag.MyersHirschberg -..returns:An integer with the alignment score, as given by the @Metafunction.Value@ metafunction of the @Class.Score@ type. -..remarks: -There exist multiple overloads for this function with four configuration dimensions. -..remarks: -First, you can select whether begin and end gaps are free in either sequence using $alignConfig$. -..remarks: -Second, you can select the type of the target storing the alignment. -This can be either an @Class.Align@ object, two @Class.Gaps@ objects, a @Spec.Alignment Graph@, or a string of @Class.Fragment@ objects. -@Class.Align@ objects provide an interface to tabular alignments with the restriction of all rows having the same type. -Using two @Class.Gaps@ objects has the advantage that you an align sequences with different types, for example @Shortcut.DnaString@ and @Shortcut.Dna5String@. -@Spec.Alignment Graph|Alignment Graphs@ provide a graph-based representation of segment-based colinear alignments. -Using @Class.Fragment@ strings is useful for collecting many pairwise alignments, for example in the construction of @Spec.Alignment Graph|Alignment Graphs@ for multiple-sequence alignments (MSA). -..remarks: -Third, you can optionally give a band for the alignment using $lowerDiag$ and $upperDiag$. -The center diagonal has index $0$, the $i$th diagonal below has index $-i$, the $i$th above has index $i$. -..remarks: -Fourth, you can select the algorithm to use with $algorithmTag$. -This can be one of @Tag.Pairwise Global Alignment Algorithms.value.NeedlemanWunsch@ and @Tag.Pairwise Global Alignment Algorithms.value.Gotoh@. -The Needleman-Wunsch algorithm supports scoring schemes with linear gap costs only while Gotoh's algorithm also allows affine gap costs. -..remarks: -The available alignment algorithms all have some restrictions. -Gotoh's algorithm can handle arbitrary substitution and affine gap scores. -Needleman-Wunsch is limited to linear gap scores. -The implementation of Hirschberg's algorithm is further limited that it does not support $alignConfig$ objects or banding. -The implementation of the Myers-Hirschberg algorithm further limits this to only support edit distance (as scores, matches are scored with 0, mismatches are scored with -1). -..remarks: -The examples below show some common use cases. -..example.text:Global alignment of two sequences using an @Class.Align@ object and the Needleman-Wunsch algorithm. The Needleman-Wunsch algorithm is automatically selected since the scoring scheme uses linear gap costs. -..example.file:demos/align/global_alignment_unbanded.cpp -..example.text:Global banded alignment of two sequences using two @Class.Gaps@ objects and the Gotoh algorithm. The Gotoh algorithm is automatically selected since the scoring scheme uses affine gap costs. -..example.file:demos/align/global_alignment_banded.cpp -..see:Function.localAlignment -..see:Function.globalAlignmentScore -..include:seqan/align.h -..wiki:Tutorial/PairwiseSequenceAlignment -..cite:Needleman SB, Wunsch CD: A general method applicable to the search for similarities in the amino acid sequence of two proteins. J Mol Biol 1970, 48(3): 443-53. -..cite:Gotoh O: An improved algorithm for matching biological sequences. J Mol Biol 1982, 162(3):705-8 -. -*/ - // ---------------------------------------------------------------------------- // Function globalAlignment() [unbanded, Align] // ---------------------------------------------------------------------------- -template +template TScoreValue globalAlignment(Align & align, Score const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const & algoTag) + AlignConfig const & /*alignConfig*/, + TAlgoTag const & /*algoTag*/) { typedef Align TAlign; typedef typename Size::Type TSize; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; + String trace; + TScoreValue res; + DPScoutState_ dpScoutState; + res = _setUpAndRunAlignment(trace, dpScoutState, source(row(align, 0)), source(row(align, 1)), scoringScheme, + TAlignConfig2(), TGapModel()); - // We do not need string ids for this variant and set them to 0u. They are - // only required for the Fragment String and the Alignment Graph variant. - TScoreValue res = _setUpAndRunAlignment(trace, source(row(align, 0)), source(row(align, 1)), scoringScheme, - alignConfig, algoTag); _adaptTraceSegmentsTo(row(align, 0), row(align, 1), trace); return res; } // Interface without AlignConfig<>. - template @@ -288,7 +207,6 @@ TScoreValue globalAlignment(Align & align, } // Interface without algorithm tag. - template @@ -303,7 +221,6 @@ TScoreValue globalAlignment(Align & align, } // Interface without AlignConfig<> and algorithm tag. - template TScoreValue globalAlignment(Align & align, @@ -325,25 +242,26 @@ template & gapsH, Gaps & gapsV, Score const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const & algoTag) + AlignConfig const & /*alignConfig*/, + TAlgoTag const & /*algoTag*/) { typedef typename Size::Type TSize; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; String traceSegments; - - // We do not need string ids for this variant and set them to 0u. They are - // only required for the Fragment String and the Alignment Graph variant. - TScoreValue res = _setUpAndRunAlignment(traceSegments, source(gapsH), source(gapsV), scoringScheme, alignConfig, - algoTag); + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(traceSegments, dpScoutState, source(gapsH), source(gapsV), scoringScheme, + TAlignConfig2(), TGapModel()); _adaptTraceSegmentsTo(gapsH, gapsV, traceSegments); return res; } // Interface without AlignConfig<>. - template & gapsH, } // Interface without algorithm tag. - template & gapsH, } // Interface without AlignConfig<> and algorithm tag. - template @@ -392,32 +308,36 @@ TScoreValue globalAlignment(Gaps & gapsH, // ---------------------------------------------------------------------------- // Full interface. - template TScoreValue globalAlignment(Graph > & alignmentGraph, Score const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const & algoTag) + AlignConfig const & /*alignConfig*/, + TAlgoTag const & /*algoTag*/) { typedef Graph > TGraph; typedef typename Position::Type TPosition; typedef typename Size::Type TSize; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; String traceSegments; + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(traceSegments, dpScoutState, value(stringSet(alignmentGraph), 0), + value(stringSet(alignmentGraph), 1), scoringScheme, TAlignConfig2(), + TGapModel()); - TScoreValue res = _setUpAndRunAlignment(traceSegments, value(stringSet(alignmentGraph), 0), - value(stringSet(alignmentGraph), 1), scoringScheme, alignConfig, algoTag); _adaptTraceSegmentsTo(alignmentGraph, positionToId(stringSet(alignmentGraph), 0), positionToId(stringSet(alignmentGraph), 1), traceSegments); return res; } // Interface without AlignConfig<>. - template @@ -430,7 +350,6 @@ TScoreValue globalAlignment(Graph > & } // Interface without algorithm tag. - template @@ -445,7 +364,6 @@ TScoreValue globalAlignment(Graph > & } // Interface without AlignConfig<> and algorithm tag. - template TScoreValue globalAlignment(Graph > & alignmentGraph, @@ -460,7 +378,6 @@ TScoreValue globalAlignment(Graph > & // ---------------------------------------------------------------------------- // Full interface. - template , TStringSpec> & fragmentString, StringSet const & strings, Score const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const & algoTag) + AlignConfig const & /*alignConfig*/, + TAlgoTag const & /*algoTag*/) { typedef String, TStringSpec> TFragments; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; String traceSegments; + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(traceSegments, dpScoutState, value(strings, 0), value(strings, 1), + scoringScheme, TAlignConfig2(), TGapModel()); - TScoreValue res = _setUpAndRunAlignment(traceSegments, value(strings, 0), value(strings, 1), scoringScheme, - alignConfig, algoTag); _adaptTraceSegmentsTo(fragmentString, positionToId(strings, 0), positionToId(strings, 1), traceSegments); return res; } // Interface without AlignConfig<>. - template , TStringSpec> } // Interface without algorithm tag. - template , TStringSpec> } // Interface without AlignConfig<> and algorithm tag. - template @@ -535,13 +454,14 @@ TScoreValue globalAlignment(String, TStringSpec> /*! * @fn globalAlignmentScore + * @headerfile * @brief Computes the best global pairwise alignment score. - * + * * @signature TScoreVal globalAlignmentScore(seqH, seqV, scoringScheme[, alignConfig][, lowerDiag, upperDiag][, algorithmTag]); * @signature TScoreVal globalAlignmentScore(strings, scoringScheme[, alignConfig][, lowerDiag, upperDiag][, algorithmTag]); * @signature TScoreVal globalAlignmentScore(seqH, seqV, {MyersBitVector | MyersHirschberg}); * @signature TScoreVal globalAlignmentScore(strings, {MyersBitVector | MyersHirschberg}); - * + * * @param[in] seqH Horizontal gapped sequence in alignment matrix. Types: String * @param[in] seqV Vertical gapped sequence in alignment matrix. Types: String * @param[in] strings A @link StringSet @endlink containing two sequences. Type: StringSet. @@ -552,64 +472,21 @@ TScoreValue globalAlignment(String, TStringSpec> * @param[in] upperDiag Optional upper diagonal. Types: int * @param[in] algorithmTag The Tag for picking the alignment algorithm. Types: @link PairwiseLocalAlignmentAlgorithms * @endlink. - * - * @return TScoreValue The score value with the alignment score, as given by the @link Score#Value @endlink metafunction - * of the scoringScheme type. - * - * @section Remarks - * + * + * @return TScoreVal Score value of the resulting alignment (Metafunction: @link Score#Value @endlink of + * the type of scoringScheme). + * * This function does not perform the (linear time) traceback step after the (mostly quadratic time) dynamic programming * step. Note that Myers' bit-vector algorithm does not compute an alignment (only in the Myers-Hirschberg variant) but * scores can be computed using globalAlignmentScore. - * + * * The same limitations to algorithms as in @link globalAlignment @endlink apply. Furthermore, the * MyersBitVector and MyersHirschberg variants can only be used without any other parameter. - * - * @see http://trac.seqan.de/wiki/Tutorial/PairwiseSequenceAlignment + * + * @see http://seqan.readthedocs.org/en/develop/Tutorial/PairwiseSequenceAlignment.html * @see globalAlignment */ -/** -.Function.globalAlignmentScore -..summary:Computes the best global pairwise alignment score. -..cat:Alignments -..signature:globalAlignmentScore(seqH, seqV, scoringScheme, [alignConfig,] [lowerDiag, upperDiag,] [algorithmTag]) -..signature:globalAlignmentScore(strings, scoringScheme, [alignConfig,] [lowerDiag, upperDiag,] [algorithmTag]) -..signature:globalAlignmentScore(seqH, seqV, {MyersBitVector | MyersHirschberg}) -..signature:globalAlignmentScore(strings, {MyersBitVector | MyersHirschberg}) -..param.seqH:Horizontal gapped sequence in alignment matrix. -...type:Class.String -..param.seqV:Vertical gapped sequence in alignment matrix. -...type:Class.String -..param.strings:A @Class.StringSet@ containing two sequences. -...type:Class.StringSet -..param.scoringScheme: -The scoring scheme to use for the alignment. -Note that the user is responsible for ensuring that the scoring scheme is compatible with $algorithmTag$. -...type:Class.Score -..param.alignConfig:The @Class.AlignConfig@ to use for the alignment. -...type:Class.AlignConfig -..param.lowerDiag:Optional lower diagonal. -...type:nolink:$int$ -..param.upperDiag:Optional upper diagonal. -...type:nolink:$int$ -..param.algorithmTag:The Tag for picking the alignment algorithm. -...type:Tag.Pairwise Global Alignment Algorithms.tag.Gotoh -...type:Tag.Pairwise Global Alignment Algorithms.tag.NeedlemanWunsch -...type:Tag.Pairwise Global Alignment Algorithms.tag.Hirschberg -...type:Tag.Pairwise Global Alignment Algorithms.tag.MyersHirschberg -...type:Tag.Pairwise Global Alignment Algorithms.tag.MyersBitVector -..returns:An integer with the alignment score, as given by the @Metafunction.Value@ metafunction of the @Class.Score@ type. -..remarks: -This function does not perform the (linear time) traceback step after the (mostly quadratic time) dynamic programming step. -Note that Myers' bit-vector algorithm does not compute an alignment (only in the Myers-Hirschberg variant) but scores can be computed using $globalAlignmentScore$. -..remarks: -The same limitations to algorithms as in @Function.globalAlignment@ apply. -Furthermore, the $MyersBitVector$ and $MyersHirschberg$ variants can only be used without any other parameter. -..see:Function.globalAlignment -..wiki:Tutorial/PairwiseSequenceAlignment -*/ - // ---------------------------------------------------------------------------- // Function globalAlignmentScore() [unbanded, 2 Strings] // ---------------------------------------------------------------------------- @@ -622,14 +499,20 @@ template const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const & algoTag) + AlignConfig const & /*alignConfig*/, + TAlgoTag const & /*algoTag*/) { - return _setUpAndRunAlignment(seqH, seqV, scoringScheme, alignConfig, algoTag); + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps, TracebackOff> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; + + DPScoutState_ dpScoutState; + String > traceSegments; // Dummy segments. + return _setUpAndRunAlignment(traceSegments, dpScoutState, seqH, seqV, scoringScheme, TAlignConfig2(), TGapModel()); } // Interface without AlignConfig<>. - template and algorithm tag. - template @@ -683,15 +564,23 @@ template TScoreValue globalAlignmentScore(StringSet const & strings, Score const & scoringScheme, - AlignConfig const & alignConfig, - TAlgoTag const & algoTag) + AlignConfig const & /*alignConfig*/, + TAlgoTag const & /*algoTag*/) { + typedef AlignConfig TAlignConfig; + typedef typename SubstituteAlignConfig_::Type TFreeEndGaps; + typedef AlignConfig2, TFreeEndGaps, TracebackOff> TAlignConfig2; + typedef typename SubstituteAlgoTag_::Type TGapModel; + SEQAN_ASSERT_EQ(length(strings), 2u); - return _setUpAndRunAlignment(strings[0], strings[1], scoringScheme, alignConfig, algoTag); + + DPScoutState_ dpScoutState; + String > traceSegments; // Dummy segments. + return _setUpAndRunAlignment(traceSegments, dpScoutState, strings[0], strings[1], scoringScheme, TAlignConfig2(), + TGapModel()); } // Interface without AlignConfig<>. - template @@ -706,7 +595,6 @@ TScoreValue globalAlignmentScore(StringSet const & strings, } // Interface without algorithm tag. - template @@ -723,7 +611,6 @@ TScoreValue globalAlignmentScore(StringSet const & strings, } // Interface without AlignConfig<> and algorithm tag. - template TScoreValue globalAlignmentScore(StringSet const & strings, @@ -737,4 +624,4 @@ TScoreValue globalAlignmentScore(StringSet const & strings, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_UNBANDED_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_GLOBAL_ALIGNMENT_UNBANDED_H_ diff --git a/seqan/align/local_alignment_banded.h b/seqan/align/local_alignment_banded.h index abdbbcf..5b5ea1a 100644 --- a/seqan/align/local_alignment_banded.h +++ b/seqan/align/local_alignment_banded.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Interface functions for banded local alignment. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_H_ namespace seqan { @@ -59,74 +59,125 @@ namespace seqan { // Function localAlignment() [banded, Align] // ---------------------------------------------------------------------------- -template +template TScoreValue localAlignment(Align & align, Score const & scoringScheme, int lowerDiag, - int upperDiag) + int upperDiag, + TTag const & tag) { typedef Align TAlign; typedef typename Size::Type TSize; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig2, FreeEndGaps_<> > TAlignConfig2; SEQAN_ASSERT_EQ(length(rows(align)), 2u); - String traceSegments; - TScoreValue score = _setUpAndRunAlignment(traceSegments, source(row(align, 0)), source(row(align, 1)), - scoringScheme, lowerDiag, upperDiag, SmithWaterman()); - _adaptTraceSegmentsTo(row(align, 0), row(align, 1), traceSegments); - return score; + String trace; + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, source(row(align, 0)), source(row(align, 1)), + scoringScheme, TAlignConfig2(lowerDiag, upperDiag), tag); + + _adaptTraceSegmentsTo(row(align, 0), row(align, 1), trace); + return res; +} + +template +TScoreValue localAlignment(Align & align, + Score const & scoringScheme, + int lowerDiag, + int upperDiag) +{ + SEQAN_ASSERT(length(rows(align)) == 2u); + if (_usesAffineGaps(scoringScheme, source(row(align, 0)), source(row(align, 1)))) + return localAlignment(align, scoringScheme, lowerDiag, upperDiag, AffineGaps()); + else + return localAlignment(align, scoringScheme, lowerDiag, upperDiag, LinearGaps()); } // ---------------------------------------------------------------------------- // Function localAlignment() [banded, Gaps] // ---------------------------------------------------------------------------- -template +template TScoreValue localAlignment(Gaps & gapsH, Gaps & gapsV, Score const & scoringScheme, int lowerDiag, - int upperDiag) + int upperDiag, + TTag const & tag) { typedef typename Size::Type TSize; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig2, FreeEndGaps_<> > TAlignConfig2; + + String trace; + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, source(gapsH), source(gapsV), scoringScheme, + TAlignConfig2(lowerDiag, upperDiag), tag); + _adaptTraceSegmentsTo(gapsH, gapsV, trace); + return res; +} - String traceSegments; - TScoreValue score = _setUpAndRunAlignment(traceSegments, source(gapsH), source(gapsV), scoringScheme, lowerDiag, - upperDiag, SmithWaterman()); - _adaptTraceSegmentsTo(gapsH, gapsV, traceSegments); - return score; +template +TScoreValue localAlignment(Gaps & gapsH, + Gaps & gapsV, + Score const & scoringScheme, + int lowerDiag, + int upperDiag) +{ + if (_usesAffineGaps(scoringScheme, source(gapsH), source(gapsV))) + return localAlignment(gapsH, gapsV, scoringScheme, lowerDiag, upperDiag, AffineGaps()); + else + return localAlignment(gapsH, gapsV, scoringScheme, lowerDiag, upperDiag, LinearGaps()); } // ---------------------------------------------------------------------------- // Function localAlignment() [banded, Graph >] // ---------------------------------------------------------------------------- -template +template TScoreValue localAlignment(Graph > & alignmentGraph, Score const & scoringScheme, int lowerDiag, - int upperDiag) + int upperDiag, + TTag const & tag) { typedef Graph > TGraph; typedef typename Size::Type TSize; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig2, FreeEndGaps_<> > TAlignConfig2; + + String trace; + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, value(stringSet(alignmentGraph), 0), + value(stringSet(alignmentGraph), 1), scoringScheme, + TAlignConfig2(lowerDiag, upperDiag), tag); - String traceSegments; - TScoreValue score = _setUpAndRunAlignment(traceSegments, value(stringSet(alignmentGraph), 0), - value(stringSet(alignmentGraph), 1), scoringScheme, lowerDiag, upperDiag, - SmithWaterman()); _adaptTraceSegmentsTo(alignmentGraph, positionToId(stringSet(alignmentGraph), 0), - positionToId(stringSet(alignmentGraph), 1), traceSegments); - return score; + positionToId(stringSet(alignmentGraph), 1), trace); + return res; +} + +template +TScoreValue localAlignment(Graph > & alignmentGraph, + Score const & scoringScheme, + int lowerDiag, + int upperDiag) +{ + SEQAN_ASSERT(length(stringSet(alignmentGraph)) == 2u); + + if (_usesAffineGaps(scoringScheme, stringSet(alignmentGraph)[0], stringSet(alignmentGraph)[1])) + return localAlignment(alignmentGraph, scoringScheme, lowerDiag, upperDiag, AffineGaps()); + else + return localAlignment(alignmentGraph, scoringScheme, lowerDiag, upperDiag, LinearGaps()); } // ---------------------------------------------------------------------------- @@ -134,6 +185,27 @@ TScoreValue localAlignment(Graph > & a // ---------------------------------------------------------------------------- // Full interface. +template +TScoreValue localAlignment(String, TStringSpec> & fragmentString, + StringSet const & strings, + Score const & scoringScheme, + int lowerDiag, + int upperDiag, + TTag const & tag) +{ + typedef String, TStringSpec> TFragments; + typedef typename Position::Type TPosition; + typedef TraceSegment_ TTraceSegment; + typedef AlignConfig2, FreeEndGaps_<> > TAlignConfig2; + + String trace; + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, value(strings, 0), value(strings, 1), scoringScheme, + TAlignConfig2(lowerDiag, upperDiag), tag); + _adaptTraceSegmentsTo(fragmentString, positionToId(strings, 0), positionToId(strings, 1), trace); + return res; +} template , TStringSpec> & int lowerDiag, int upperDiag) { - typedef String, TStringSpec> TFragments; - typedef typename Position::Type TPosition; - typedef TraceSegment_ TTraceSegment; - - String traceSegments; + SEQAN_ASSERT(length(strings) == 2u); - TScoreValue score = _setUpAndRunAlignment(traceSegments, value(strings, 0), value(strings, 1), scoringScheme, - lowerDiag, upperDiag, SmithWaterman()); - _adaptTraceSegmentsTo(fragmentString, positionToId(strings, 0), positionToId(strings, 1), traceSegments); - return score; + if (_usesAffineGaps(scoringScheme, strings[0], strings[1])) + return localAlignment(fragmentString, strings, scoringScheme, lowerDiag, upperDiag, AffineGaps()); + else + return localAlignment(fragmentString, strings, scoringScheme, lowerDiag, upperDiag, LinearGaps()); } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_H_ diff --git a/seqan/align/local_alignment_banded_waterman_eggert_impl.h b/seqan/align/local_alignment_banded_waterman_eggert_impl.h index 763484b..8465129 100644 --- a/seqan/align/local_alignment_banded_waterman_eggert_impl.h +++ b/seqan/align/local_alignment_banded_waterman_eggert_impl.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // Author: Birte Kehr // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_WATERMAN_EGGERT_IMPL_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_WATERMAN_EGGERT_IMPL_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_WATERMAN_EGGERT_IMPL_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_WATERMAN_EGGERT_IMPL_H_ namespace seqan { @@ -100,7 +100,7 @@ _alignBandedSmithWaterman(LocalAlignmentFinder& finder, for (TSize row = 1; row < height; ++row) { actualRow = row + lo_row; hori_val = 0; - + for (TSize col = 0; col < diagonalWidth; ++col, ++matIt) { // handle begin and end triangle of band if ((int)col + diagL + (int)actualRow < 0) {++matIt2; continue;} @@ -153,13 +153,13 @@ _alignBandedSmithWaterman(LocalAlignmentFinder& finder, } // // Debug code // std::cerr << std::endl; - //for(TSize i= 0; i 0) std::cerr << " " << str2[i-1] << std::endl; - // else std::cerr << std::endl; - //} + //for(TSize i= 0; i 0) std::cerr << " " << str2[i-1] << std::endl; + // else std::cerr << std::endl; + //} // if(length(finder.pQ) > 0) std::cerr << "Max score: " << top(finder.pQ).value_ << std::endl; if(!empty(finder.pQ)) { @@ -177,20 +177,20 @@ _alignBandedSmithWaterman(LocalAlignmentFinder& finder, template inline void _setForbiddenCell(String& forbidden, - TSize len1, - TSize len2, - TSize numRows) + TSize len1, + TSize len2, + TSize numRows) { - forbidden[(len1 - 1)*numRows + (len2 - 1)] = true; + forbidden[(len1 - 1)*numRows + (len2 - 1)] = true; } template inline void _setForbiddenCell(Nothing&, - TSize, - TSize, - TSize) + TSize, + TSize, + TSize) { } @@ -213,7 +213,7 @@ _alignBandedSmithWatermanDeclump(LocalAlignmentFinder& finder, typedef unsigned char TTraceValue; // Traceback values - TTraceValue Diagonal = 0; TTraceValue Horizontal = 1; TTraceValue Vertical = 2; + TTraceValue Diagonal = 0; TTraceValue Horizontal = 1; TTraceValue Vertical = 2; TSequenceH const& str1 = seqH; TSequenceV const& str2 = seqV; @@ -279,7 +279,7 @@ _alignBandedSmithWatermanDeclump(LocalAlignmentFinder& finder, maxCol = traceCol + 1; } } - + // iterate over columns that have to be re-calculated if (maxCol > minCol) { col = minCol; @@ -299,18 +299,18 @@ _alignBandedSmithWatermanDeclump(LocalAlignmentFinder& finder, ++matIt2; // horizontal - if (col > 0) { - newVal = _max(newVal, *(matIt-1) + - scoreGapExtendHorizontal(sc, sequenceEntryForScore(sc, str1, (int) actualCol-1), - sequenceEntryForScore(sc, str2, (int)actualRow-1))); - } + if (col > 0) { + newVal = _max(newVal, *(matIt-1) + + scoreGapExtendHorizontal(sc, sequenceEntryForScore(sc, str1, (int) actualCol-1), + sequenceEntryForScore(sc, str2, (int)actualRow-1))); + } // vertical - if (col+1 < maxCol) { - newVal = _max(newVal, *matIt2 + - scoreGapExtendVertical(sc, sequenceEntryForScore(sc, str1, (int)actualCol-1), - sequenceEntryForScore(sc, str2, (int)actualRow-1))); - } + if (col+1 < maxCol) { + newVal = _max(newVal, *matIt2 + + scoreGapExtendVertical(sc, sequenceEntryForScore(sc, str1, (int)actualCol-1), + sequenceEntryForScore(sc, str2, (int)actualRow-1))); + } if (newVal != *matIt) { // matrix entry changed @@ -350,13 +350,13 @@ _alignBandedSmithWatermanDeclump(LocalAlignmentFinder& finder, // for(TSize j = 0; j < diagonalWidth; ++j) { // std::cerr << value(finder.matrix, j, i) << ','; // } - //if (i > 0) std::cerr << " " << str2[i-1] << " "; - //else std::cerr << " "; + //if (i > 0) std::cerr << " " << str2[i-1] << " "; + //else std::cerr << " "; // for (TSize j= 0; j 0) std::cerr << " " << str2[i-1] << std::endl; - //else std::cerr << std::endl; + //else std::cerr << std::endl; // } } @@ -383,7 +383,7 @@ _alignBandedSmithWatermanTrace(LocalAlignmentFinder & finder, clear(finder.trace.tvs); // Traceback values - TTraceValue Diagonal = 0; TTraceValue Horizontal = 1; TTraceValue Vertical = 2; TTraceValue Stop = 3; + TTraceValue Diagonal = 0; TTraceValue Horizontal = 1; TTraceValue Vertical = 2; TTraceValue Stop = 3; // Initialization TSequenceH const& str1 = seqH; @@ -404,18 +404,18 @@ _alignBandedSmithWatermanTrace(LocalAlignmentFinder & finder, TSize endRow = row + lo_row; TSize endCol = static_cast(col + diagL + endRow); - TSize actualRow = row + lo_row; + TSize actualRow = row + lo_row; TSize actualCol = static_cast(col + diagL + actualRow); - if ((actualCol == 0) || (actualRow == 0)) + if ((actualCol == 0) || (actualRow == 0)) return Pair >(); - if (actualCol < len1) _alignTracePrint(finder.trace, seqH, seqV, id1, actualCol, id2, actualRow, len1 - actualCol, Horizontal); - if (actualRow < len2) _alignTracePrint(finder.trace, seqH, seqV, id1, actualCol, id2, actualRow, len2 - actualRow, Vertical); - + if (actualCol < len1) _alignTracePrint(finder.trace, seqH, seqV, id1, actualCol, id2, actualRow, len1 - actualCol, Horizontal); + if (actualRow < len2) _alignTracePrint(finder.trace, seqH, seqV, id1, actualCol, id2, actualRow, len2 - actualRow, Vertical); + TTraceValue traceValue = Stop; TTraceValue nextTraceValue = Horizontal; TSize segLen = 0; - + while (nextTraceValue != Stop) { traceValue = nextTraceValue; if (*matIt == 0) { @@ -428,14 +428,14 @@ _alignBandedSmithWatermanTrace(LocalAlignmentFinder & finder, nextTraceValue = Diagonal; --actualRow; --actualCol; --row; - goPrevious(matIt, 1); + goPrevious(matIt, 1); goPrevious(matIt2, 1); } else if (*matIt == *(matIt2+1) + scoreGapExtendVertical(sc, sequenceEntryForScore(sc, str1, (int)actualCol-1), sequenceEntryForScore(sc, str2, (int)actualRow-1))) { nextTraceValue = Vertical; - --actualRow; + --actualRow; --row; ++col; goPrevious(matIt, 1); goNext(matIt, 0); goPrevious(matIt2, 1); goNext(matIt2, 0); @@ -444,7 +444,7 @@ _alignBandedSmithWatermanTrace(LocalAlignmentFinder & finder, scoreGapExtendHorizontal(sc, sequenceEntryForScore(sc, str1, (int) actualCol-1), sequenceEntryForScore(sc, str2, (int) actualRow-1))); nextTraceValue = Horizontal; - --actualCol; + --actualCol; --col; goPrevious(matIt, 0); goPrevious(matIt2, 0); @@ -456,10 +456,10 @@ _alignBandedSmithWatermanTrace(LocalAlignmentFinder & finder, segLen = 1; } } - - // Handle the remaining sequence - if (actualCol != 0) _alignTracePrint(finder.trace, seqH, seqV, (TId) id1, (TSize) 0, (TId) 0, (TSize) 0, (TSize) actualCol, Horizontal); - if (actualRow != 0) _alignTracePrint(finder.trace, seqH, seqV, (TId) 0, (TSize) 0, (TId) id2, (TSize) 0, (TSize) actualRow, Vertical); + + // Handle the remaining sequence + if (actualCol != 0) _alignTracePrint(finder.trace, seqH, seqV, (TId) id1, (TSize) 0, (TId) 0, (TSize) 0, (TSize) actualCol, Horizontal); + if (actualRow != 0) _alignTracePrint(finder.trace, seqH, seqV, (TId) 0, (TSize) 0, (TId) id2, (TSize) 0, (TSize) actualRow, Vertical); goNext(matIt, 1); // assumes that each trace ends with a diagonal finder.bestBeginPos = position(matIt); @@ -488,7 +488,7 @@ _initLocalAlignmentFinder(TSequenceH const & seqH, TSize len0 = length(seqH); if (len0 - lowerDiag < hi_row) hi_row = static_cast(len0 - lowerDiag); TSize height = hi_row - lo_row + 1; - SEQAN_ASSERT_GEQ(upperDiag, lowerDiag); + SEQAN_ASSERT_GEQ(upperDiag, lowerDiag); TSize diagonalWidth = (TSize) (upperDiag - lowerDiag + 1); setDimension(finder.matrix, 2); @@ -498,8 +498,8 @@ _initLocalAlignmentFinder(TSequenceH const & seqH, resize(finder.forbidden, height * diagonalWidth, false); - finder.bestEndPos = minValue(); - finder.bestBeginPos = minValue(); + finder.bestEndPos = minValue(); + finder.bestBeginPos = minValue(); } // ---------------------------------------------------------------------------- @@ -524,7 +524,7 @@ _localAlignment(LocalAlignmentFinder & finder, _initLocalAlignmentFinder(source(gapsH), source(gapsV), finder, BandedWatermanEggert(), diag1, diag2); finder.needReinit = false; - + // Fill the matrix TScoreValue maxScore = _alignBandedSmithWaterman(finder, source(gapsH), source(gapsV), sc, cutoff, diag1, diag2); if (maxScore < cutoff) return 0; @@ -533,7 +533,7 @@ _localAlignment(LocalAlignmentFinder & finder, Pair > alignmentPositions = _alignBandedSmithWatermanTrace(finder, source(gapsH), source(gapsV), 0u, 0u, sc, diag1, diag2); // Fill the gaps following the trace path. - _pumpTraceToGaps(gapsH, gapsV, finder.trace); + _pumpTraceToGaps(gapsH, gapsV, finder.trace); // The following (using rightOfGaps = false) only works if the gap open cost is negative. In this case, we can // assume that there are no leading or trailing gaps in either sequence. @@ -544,10 +544,10 @@ _localAlignment(LocalAlignmentFinder & finder, // TODO(holtgrew): This should be done in a more robust way. It would probably be better if alignmentPositions gave us the lengths of the local alignments! setClippedEndPosition(gapsH, toViewPosition(gapsH, alignmentPositions.i1.i2, false)); setClippedEndPosition(gapsV, toViewPosition(gapsV, alignmentPositions.i2.i2, false)); - setClippedBeginPosition(gapsH, toViewPosition(gapsH, alignmentPositions.i1.i1)); - setClippedBeginPosition(gapsV, toViewPosition(gapsV, alignmentPositions.i2.i1)); + setClippedBeginPosition(gapsH, toViewPosition(gapsH, alignmentPositions.i1.i1)); + setClippedBeginPosition(gapsV, toViewPosition(gapsV, alignmentPositions.i2.i1)); - pop(finder.pQ); + pop(finder.pQ); return maxScore; } @@ -569,17 +569,17 @@ _localAlignmentNext(LocalAlignmentFinder & finder, // Declump the matrix and find new maximum score _alignBandedSmithWatermanDeclump(finder, source(gapsH), source(gapsV), sc, cutoff, diag1, diag2); typename LocalAlignmentFinder::TMatrixPosition nextBestEnd; - nextBestEnd = _getNextBestEndPosition(finder, cutoff); - if(nextBestEnd==0) - return 0; + nextBestEnd = _getNextBestEndPosition(finder, cutoff); + if(nextBestEnd==0) + return 0; TScoreValue maxScore = getValue(finder.matrix, nextBestEnd); - if(maxScore == 0) return 0; + if(maxScore == 0) return 0; // Follow the trace matrix and create a trace path Pair > alignmentPositions = _alignBandedSmithWatermanTrace(finder, source(gapsH), source(gapsV), 0u, 0u, sc, diag1, diag2); // Fill the gaps following the trace path. - _pumpTraceToGaps(gapsH, gapsV, finder.trace); + _pumpTraceToGaps(gapsH, gapsV, finder.trace); // The following (using rightOfGaps = false) only works if the gap open cost is negative. In this case, we can // assume that there are no leading or trailing gaps in either sequence. @@ -590,12 +590,12 @@ _localAlignmentNext(LocalAlignmentFinder & finder, // TODO(holtgrew): This should be done in a more robust way. It would probably be better if alignmentPositions gave us the lengths of the local alignments! setClippedEndPosition(gapsH, toViewPosition(gapsH, alignmentPositions.i1.i2, false)); setClippedEndPosition(gapsV, toViewPosition(gapsV, alignmentPositions.i2.i2, false)); - setClippedBeginPosition(gapsH, toViewPosition(gapsH, alignmentPositions.i1.i1)); - setClippedBeginPosition(gapsV, toViewPosition(gapsV, alignmentPositions.i2.i1)); + setClippedBeginPosition(gapsH, toViewPosition(gapsH, alignmentPositions.i1.i1)); + setClippedBeginPosition(gapsV, toViewPosition(gapsV, alignmentPositions.i2.i1)); return maxScore; } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_WATERMAN_EGGERT_IMPL_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_BANDED_WATERMAN_EGGERT_IMPL_H_ diff --git a/seqan/align/local_alignment_enumeration.h b/seqan/align/local_alignment_enumeration.h index d797606..5af3901 100644 --- a/seqan/align/local_alignment_enumeration.h +++ b/seqan/align/local_alignment_enumeration.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Manuel Holtgrewe // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_H_ namespace seqan { @@ -55,6 +55,7 @@ class LocalAlignmentEnumerator; /*! * @class LocalAlignmentEnumerator * @headerfile + * @brief Enumeration of local alignments. * * @signature template * class LocalAlignmentEnumerator; @@ -83,22 +84,22 @@ class LocalAlignmentEnumerator; * * @tparam TScore The @link Score @endlink type. * - * @section Example + * @section Examples * * Enumerate all alignments into a @link Align @endlink object. * * @code{.cpp} * SimpleScore scoringScheme(2, -1, -1, -2); * LocalAlignmentEnumerator enumerator(scoringScheme, 5); - * + * * Dna5String seqH = "CGAGAGAGACCGAGA"; * Dna5String seqV = "TTCTGAGATCCGTTTTT"; - * + * * Align align; - * resize(rows(align), 2); + * resize(rows(align), 2);@s * assignSource(row(align), 0, seqH); * assignSource(row(align), 1, seqV); - * + * * int i = 0; * while (nextLocalAlignment(align, enumerator)) * { @@ -115,8 +116,8 @@ class LocalAlignmentEnumerator; * * @signature LocalAlignmentEnumerator::LocalAlignmentEnumerator(scheme[, cutoff]); * - * @param scheme The @link Score @endlink object to use for the alignment score. - * @param cutoff Alignments with scores < cutoff will be discarded (int, default 0). + * @param[in] scheme The @link Score @endlink object to use for the alignment score. + * @param[in] cutoff Alignments with scores < cutoff will be discarded (int, default 0). */ /*! @@ -130,22 +131,22 @@ class LocalAlignmentEnumerator; * * @tparam TScore The @link Score @endlink type. * - * @section Example + * @section Examples * * Enumerate all alignments in the band between -3 and 0 into an @link Align @endlink object. * * @code{.cpp} * SimpleScore scoringScheme(2, -1, -1, -2); * LocalAlignmentEnumerator enumerator(scoringScheme, -3, 0, 5); - * + * * Dna5String seqH = "CGAGAGAGACCGAGA"; * Dna5String seqV = "TTCTGAGATCCGTTTTT"; - * + * * Align align; * resize(rows(align), 2); * assignSource(row(align), 0, seqH); * assignSource(row(align), 1, seqV); - * + * * int i = 0; * while (nextLocalAlignment(align, enumerator)) * { @@ -162,104 +163,12 @@ class LocalAlignmentEnumerator; * * @signature LocalAlignmentEnumerator::LocalAlignmentEnumerator(scheme, upperDiag, lowerDiag[, cutoff]); * - * @param scheme The @link Score @endlink object to use for the alignment score. - * @param upperDiag An int with the upper diagonal. - * @param lowerDiag An int with the lower diagonal. - * @param cutoff Alignments with scores < cutoff will be discarded (int, default 0). + * @param[in] scheme The @link Score @endlink object to use for the alignment score. + * @param[in] upperDiag An int with the upper diagonal. + * @param[in] lowerDiag An int with the lower diagonal. + * @param[in] cutoff Alignments with scores < cutoff will be discarded (int, default 0). */ -/** -.Class.LocalAlignmentEnumerator -..cat:Alignments -..summary:Enumerate local alignments using the Waterman-Eggert algorithm. -..description:This is an abstract base class for the alignment enumeration; the specializations provide the actual implementaiton of banded and unbanded local aligment search. -..signature:LocalAlignmentEnumerator -..param.TScore:The @Class.Score@ type to use. -...type:Class.Score -..param.TSpec:Specialization tag. -..example.text:See the specializations for usage examples. -..cite:Waterman MS, Eggert M: A new algorithm for best subsequence alignments with application to tRNA-rRNA comparisons. J Mol Biol 1987, 197(4):723-728. -..include:seqan/align.h - -.Spec.Unbanded LocalAlignmentEnumerator -..cat:Alignments -..general:Class.LocalAlignmentEnumerator -..summary:Unbanded enumeration of local alignments using the Waterman-Eggert algorithm. -..signature:LocalAlignmentEnumerator -..example.text:Enumerate all alignments into an @Class.Align@ object. -..example.code: -SimpleScore scoringScheme(2, -1, -1, -2); -LocalAlignmentEnumerator enumerator(scoringScheme, 5); - -Dna5String seqH = "CGAGAGAGACCGAGA"; -Dna5String seqV = "TTCTGAGATCCGTTTTT"; - -Align align; -resize(rows(align), 2); -assignSource(row(align), 0, seqH); -assignSource(row(align), 1, seqV); - -int i = 0; -while (nextLocalAlignment(align, enumerator)) -{ - std::cout << i << "-th alignment:\n"; - std::cout << align << "\n\n"; - std::cout << "score == " << getScore(enumerator) << "\n"; -} -..include:seqan/align.h - -.Memfunc.Unbanded LocalAlignmentEnumerator#LocalAlignmentEnumerator -..class:Spec.Unbanded LocalAlignmentEnumerator -..summary:Constructor -..signature:LocalAlignmentEnumerator(score, [cutoff]) -..param.score:The scoring scheme to use for the alignments. -...type:Class.Score -..param.cutoff:Alignments with scores < $cutoff$ will be discarded. -...default:0 -...type:nolink:$int$ - -.Spec.Banded LocalAlignmentEnumerator -..cat:Alignments -..general:Class.LocalAlignmentEnumerator -..signature:LocalAlignmentEnumerator -..summary:Banded enumeration of local alignments using the Waterman-Eggert algorithm. -..example.text:Enumerate all alignments in the band between $-3$ and $0$ into an @Class.Align@ object. -..example.code: -SimpleScore scoringScheme(2, -1, -1, -2); -LocalAlignmentEnumerator enumerator(scoringScheme, 5, -3, 0); - -Dna5String seqH = "CGAGAGAGACCGAGA"; -Dna5String seqV = "TTCTGAGATCCGTTTTT"; - -Align align; -resize(rows(align), 2); -assignSource(row(align), 0, seqH); -assignSource(row(align), 1, seqV); - -int i = 0; -while (nextLocalAlignment(align, enumerator)) -{ - std::cout << i << "-th alignment:\n"; - std::cout << align << "\n\n"; - std::cout << "score == " << getScore(enumerator) << "\n"; -} -..include:seqan/align.h - -.Memfunc.Banded LocalAlignmentEnumerator#LocalAlignmentEnumerator -..class:Spec.Banded LocalAlignmentEnumerator -..summary:Constructor -..signature:LocalAlignmentEnumerator(score, upperDiag, lowerDiag, [cutoff]) -..param.score:The scoring scheme to use for the alignments. -...type:Class.Score -..param.upperDiag:Upper diagonal of the band. -...type:nolink:$int$ -..param.lowerDiag:Lower diagonal of the band. -...type:nolink:$int$ -..param.cutoff:Alignments with scores < $cutoff$ will be discarded. -...type:nolink:$int$ -...default:0 -*/ - // ============================================================================ // Metafunctions // ============================================================================ @@ -279,22 +188,9 @@ while (nextLocalAlignment(align, enumerator)) * * @signature TScoreVal getScore(enumerator); * - * @param enumerator The LocalAlignmentEnumerator to query. - * - * @return TScoreVal The current alignment score. - */ - -/** -.Function.LocalAlignmentEnumerator#getScore -..cat:Alignments -..summary:Compute next suboptimal local alignment. -..signature:getScore(enumerator) -..param.enumerator:The local alignment enumerator to use. -...type:Class.LocalAlignmentEnumerator -..returns: -The score of the previously computed alignment. -(Type: @Metafunction.Value@ of $enumerator$'s class.) -..include:seqan/align.h + * @param[in] enumerator The LocalAlignmentEnumerator to query. + * + * @return TScoreVal The current alignment score (@link Score#Value @endlink of TScore). */ // ---------------------------------------------------------------------------- @@ -309,34 +205,15 @@ The score of the previously computed alignment. * @signature bool nextLocalAlignment(align, enumerator); * @signature bool nextLocalAlignment(gapsH, gapsV, enumerator); * - * @param align @link Align @endlink object to use for the alignment representation. - * @param gapsH @link Gaps @endlink object to use for the first/horizontal sequence in the alignment matrix. - * @param gapsV @link Gaps @endlink object to use for the second/vertical sequence in the alignment matrix. - * @param enumerator The LocalAlignmentEnumerator to advance. - * + * @param[in] align @link Align @endlink object to use for the alignment representation. + * @param[in] gapsH @link Gaps @endlink object to use for the first/horizontal sequence in the alignment matrix. + * @param[in] gapsV @link Gaps @endlink object to use for the second/vertical sequence in the alignment matrix. + * @param[in] enumerator The LocalAlignmentEnumerator to advance. + * * @return bool true if another suboptimal alignment above the given threshold was found and false * otherwise. */ -/** -.Function.nextLocalAlignment -..cat:Alignments -..summary:Compute next suboptimal local alignment. -..signature:nextLocalAlignment(align, enumerator) -..signature:nextLocalAlignment(gapsH, gapsV, enumerator) -..param.align:The @Class.Align@ object to use for the alignment representation. -...type:Class.Align -..param.gapsH:The @Class.Gaps@ object to use for the horizontal sequence in the alignment matrix. -...type:Class.Gaps -..param.gapsV:The @Class.Gaps@ object to use for the vertical sequence in the alignment matrix. -...type:Class.Gaps -..param.enumerator:The @Class.LocalAlignmentEnumerator@ object to use. -...type:Class.LocalAlignmentEnumerator -..returns:$true$ if another suboptimal alignment above the given threshold was found, $false$ otherwise. -...type:nolink:$bool$ -..include:seqan/align.h -*/ - } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_H_ diff --git a/seqan/align/local_alignment_enumeration_banded.h b/seqan/align/local_alignment_enumeration_banded.h index 2d50d72..d8a5828 100644 --- a/seqan/align/local_alignment_enumeration_banded.h +++ b/seqan/align/local_alignment_enumeration_banded.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Manuel Holtgrewe // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_BANDED_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_BANDED_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_BANDED_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_BANDED_H_ namespace seqan { @@ -57,13 +57,13 @@ class LocalAlignmentEnumerator { public: typedef typename Value::Type TScoreValue_; - + TScore _score; int _lowerDiag; int _upperDiag; TScoreValue_ _cutoff; LocalAlignmentFinder _finder; - + LocalAlignmentEnumerator(TScore const & score, int lowerDiag, int upperDiag) : _score(score), _lowerDiag(lowerDiag), _upperDiag(upperDiag), _cutoff(0) {} @@ -123,4 +123,4 @@ nextLocalAlignment(Align & align, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_BANDED_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_BANDED_H_ diff --git a/seqan/align/local_alignment_enumeration_unbanded.h b/seqan/align/local_alignment_enumeration_unbanded.h index 35914ea..4df4a2b 100644 --- a/seqan/align/local_alignment_enumeration_unbanded.h +++ b/seqan/align/local_alignment_enumeration_unbanded.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Manuel Holtgrewe // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_UNBANDED_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_UNBANDED_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_UNBANDED_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_UNBANDED_H_ namespace seqan { @@ -57,11 +57,11 @@ class LocalAlignmentEnumerator { public: typedef typename Value::Type TScoreValue_; - + TScore _scoringScheme; TScoreValue_ _cutoff; LocalAlignmentFinder _finder; - + LocalAlignmentEnumerator(TScore const & score) : _scoringScheme(score), _cutoff(0) {} @@ -119,4 +119,4 @@ nextLocalAlignment(Align & align, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_UNBANDED_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_ENUMERATION_UNBANDED_H_ diff --git a/seqan/align/local_alignment_unbanded.h b/seqan/align/local_alignment_unbanded.h index f560b1c..fe13192 100644 --- a/seqan/align/local_alignment_unbanded.h +++ b/seqan/align/local_alignment_unbanded.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Interface functions for unbanded local alignment. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_UNBANDED_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_UNBANDED_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_UNBANDED_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_UNBANDED_H_ namespace seqan { @@ -63,43 +63,40 @@ namespace seqan { * @fn localAlignment * @headerfile * @brief Computes the best pairwise local alignment using the Smith-Waterman algorithm. - * + * * @signature TScoreVal localAlignment(align, scoringScheme, [lowerDiag, upperDiag]); * @signature TScoreVal localAlignment(gapsH, gapsV, scoringScheme, [lowerDiag, upperDiag]); * @signature TScoreVal localAlignment(fragmentString, scoringScheme, [lowerDiag, upperDiag]); - * - * @param lowerDiag Optional lower diagonal (int). - * @param lowerDiag Optional upper diagonal (int). * - * @param gapsH Horizontal gapped sequence in alignment matrix. Types: Gaps - * @param align An @link Align @endlink object that stores the alignment. The - * number of rows must be 2 and the sequences must have already - * been set. align[0] is the horizontal one in the - * alignment matrix alignment, align[1] is the vertical - * one. Types: Align - * @param fragmentString String of @link Fragment @endlink objects. The sequence - * with id 0 is the horizontal one, the sequence - * with id 1 is the vertical one. - * @param gapsV Vertical gapped sequence in alignment matrix. Types: Gaps - * @param scoringScheme The scoring scheme to use for the alignment. Note that - * the user is responsible for ensuring that the scoring - * scheme is compatible with algorithmTag. Types: - * Score - * - * @return TScoreVal The score value of the alignmetn. - * - * @section Remarks - * + * @param[in,out] gapsH Horizontal gapped sequence in alignment matrix. Types: @link Gaps @endlink + * @param[in,out] gapsV Vertical gapped sequence in alignment matrix. Types: @link Gaps @endlink + * @param[in,out] align An @link Align @endlink object that stores the alignment. The + * number of rows must be 2 and the sequences must have already + * been set. align[0] is the horizontal one in the + * alignment matrix alignment, align[1] is the vertical + * one. + * @param[in,out] fragmentString + * String of @link Fragment @endlink objects. The sequence + * with id 0 is the horizontal one, the sequence + * with id 1 is the vertical one. + * @param[in] scoringScheme + * The @link Score scoring scheme @endlink to use for the alignment. + * @param[in] lowerDiag Optional lower diagonal (int). + * @param[in] upperDiag Optional upper diagonal (int). + * + * @return TScoreVal Score value of the resulting alignment (Metafunction @link Score#Value @endlink of the type of + * scoringScheme). + * * The Waterman-Eggert algorithm (local alignment with declumping) is available through the @link * LocalAlignmentEnumerator @endlink class. - * + * * When using @link Gaps @endlink and @link Align @endlink objects, only parts (i.e. one infix) of each sequence will be * aligned. This will be presented to the user by setting the clipping begin and end position of the gaps (the rows in * the case of @link Align @endlink objects). When using @link Fragment @endlink strings, these parts of the sequences * will not appear in any fragment. - * + * * There exist multiple overloads for this function with two configuration dimensions. - * + * * First, you can select the type of the target storing the alignment. This can be either an @link Align @endlink * object, two @link Gaps @endlink objects, or a string of @link Fragment @endlink objects. @link Align @endlink objects * provide an interface to tabular alignments with the restriction of all rows having the same type. Using two @link @@ -107,45 +104,45 @@ namespace seqan { * @endlink and @link Dna5String @endlink. Using @link Fragment @endlink strings is useful for collecting many pairwise * alignments, for example in the construction of @link AlignmentGraph Alignment Graphs @endlink for multiple- sequence * alignments (MSA). - * + * * Second, you can optionally give a band for the alignment using lowerDiag and upperDiag. The center * diagonal has index 0, the ith diagonal below has index -i, the ith above has * index i. - * + * * The examples below show some common use cases. - * + * * @section Examples - * + * * Local alignment of two sequences using an @link Align @endlink object. - * + * * @code{.cpp} * Dna5String seqH = "CGATT"; * Dna5String seqV = "CGAAATT"; - * + * * Align align; * resize(rows(align), 2); * assignSource(row(align, 0), seqH); * assignSource(row(align, 0), seqV); * Score scoringScheme(2, -1, -2); - * + * * int result = localAlignment(align, scoringScheme); * @endcode * * Local banded alignment of two sequences using two @link Gaps @endlink objects. - * + * * @code{.cpp} * Dna5String seqH = "CGATT"; * Gaps gapsH(seqH); * DnaString seqV = "CGAAATT"; * Gaps > gapsV(seqV); - * + * * Score scoringScheme(5, -3, -1, -5); - * + * * int result = localAlignment(gapsH, gapsV, scoringScheme, -2, 2); * @endcode * - * http://trac.seqan.de/wiki/Tutorial/PairwiseSequenceAlignment - * + * http://seqan.readthedocs.org/en/develop/Tutorial/PairwiseSequenceAlignment.html + * * @section References * *
    @@ -157,124 +154,78 @@ namespace seqan { * @see PairwiseLocalAlignmentAlgorithms */ -/** -.Function.localAlignment -..summary:Computes the best pairwise local alignment using the Smith-Waterman algorithm. -..cat:Alignments -..signature:localAlignment(align, scoringScheme, [lowerDiag, upperDiag]) -..signature:localAlignment(gapsH, gapsV, scoringScheme, [lowerDiag, upperDiag]) -..signature:localAlignment(fragmentString, scoringScheme, [lowerDiag, upperDiag]) -..param.align: -An @Class.Align@ object that stores the alignment. -The number of rows must be 2 and the sequences must have already been set. -$align[0]$ is the horizontal one in the alignment matrix alignment, $align[1]$ is the vertical one. -...type:Class.Align -..param.gapsH:Horizontal gapped sequence in alignment matrix. -...type:Class.Gaps -..param.gapsV:Vertical gapped sequence in alignment matrix. -...type:Class.Gaps -..param.fragmentString: -String of @Class.Fragment@ objects. -The sequence with id $0$ is the horizontal one, the sequence with id $1$ is the vertical one. -..param.scoringScheme: -The scoring scheme to use for the alignment. -Note that the user is responsible for ensuring that the scoring scheme is compatible with $algorithmTag$. -...type:Class.Score -..param.lowerDiag:Optional lower diagonal. -...type:nolink:$int$ -..param.upperDiag:Optional upper diagonal. -...type:nolink:$int$ -..returns:An integer with the alignment score, as given by the @Metafunction.Value@ metafunction of the @Class.Score@ type. -..remarks:The Waterman-Eggert algorithm (local alignment with declumping) is available through the @Class.LocalAlignmentEnumerator@ class. -..remarks: -When using @Class.Gaps@ and @Class.Align@ objects, only parts (i.e. one infix) of each sequence will be aligned. -This will be presented to the user by setting the clipping begin and end position of the gaps (the rows in the case of @Class.Align@ objects). -When using @Class.Fragment@ strings, these parts of the sequences will not appear in any fragment. -..remarks: -There exist multiple overloads for this function with two configuration dimensions. -..remarks: -First, you can select the type of the target storing the alignment. -This can be either an @Class.Align@ object, two @Class.Gaps@ objects, or a string of @Class.Fragment@ objects. -@Class.Align@ objects provide an interface to tabular alignments with the restriction of all rows having the same type. -Using two @Class.Gaps@ objects has the advantage that you an align sequences with different types, for example @Shortcut.DnaString@ and @Shortcut.Dna5String@. -Using @Class.Fragment@ strings is useful for collecting many pairwise alignments, for example in the construction of @Spec.Alignment Graph|Alignment Graphs@ for multiple-sequence alignments (MSA). -..remarks: -Second, you can optionally give a band for the alignment using $lowerDiag$ and $upperDiag$. -The center diagonal has index $0$, the $i$th diagonal below has index $-i$, the $i$th above has index $i$. -..remarks: -The examples below show some common use cases. -..example.text:Local alignment of two sequences using an @Class.Align@ object. -..example.code: -Dna5String seqH = "CGATT"; -Dna5String seqV = "CGAAATT"; - -Align align; -resize(rows(align), 2); -assignSource(row(align, 0), seqH); -assignSource(row(align, 0), seqV); -Score scoringScheme(2, -1, -2); - -int result = localAlignment(align, scoringScheme); -..example.text:Local banded alignment of two sequences using two @Class.Gaps@ objects. -..example.code: -Dna5String seqH = "CGATT"; -Gaps gapsH(seqH); -DnaString seqV = "CGAAATT"; -Gaps > gapsV(seqV); - -Score scoringScheme(5, -3, -1, -5); - -int result = localAlignment(gapsH, gapsV, scoringScheme, -2, 2); -..see:Function.globalAlignment -..see:Class.LocalAlignmentEnumerator -..include:seqan/align.h -..wiki:Tutorial/PairwiseSequenceAlignment -..cite:Smith TF, Waterman, MS: Identification of Common Molecular Subsequences. J Mol Biol 1981, 147(1):195-7. -. -*/ - // ---------------------------------------------------------------------------- // Function localAlignment() [unbanded, Align] // ---------------------------------------------------------------------------- -template +template TScoreValue localAlignment(Align & align, - Score const & scoringScheme) + Score const & scoringScheme, + TTag const & tag) { SEQAN_ASSERT_EQ(length(rows(align)), 2u); typedef Align TAlign; typedef typename Size::Type TSize; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig2, FreeEndGaps_<> > TAlignConfig2; + + String trace; + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, source(row(align, 0)), source(row(align, 1)), + scoringScheme, TAlignConfig2(), tag); - String traceSegments; - TScoreValue score = _setUpAndRunAlignment(traceSegments, source(row(align, 0)), source(row(align, 1)), - scoringScheme, SmithWaterman()); - _adaptTraceSegmentsTo(row(align, 0), row(align, 1), traceSegments); - return score; + _adaptTraceSegmentsTo(row(align, 0), row(align, 1), trace); + return res; } + template + TScoreValue localAlignment(Align & align, + Score const & scoringScheme) + { + SEQAN_ASSERT(length(rows(align)) == 2u); + if (_usesAffineGaps(scoringScheme, source(row(align, 0)), source(row(align, 1)))) + return localAlignment(align, scoringScheme, AffineGaps()); + else + return localAlignment(align, scoringScheme, LinearGaps()); + } + // ---------------------------------------------------------------------------- // Function localAlignment() [unbanded, Gaps] // ---------------------------------------------------------------------------- -template + TScoreValue localAlignment(Gaps & gapsH, + Gaps & gapsV, + Score const & scoringScheme, + TTag const & tag) + { + typedef typename Size::Type TSize; + typedef typename Position::Type TPosition; + typedef TraceSegment_ TTraceSegment; + typedef AlignConfig2, FreeEndGaps_<> > TAlignConfig2; + + String trace; + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, source(gapsH), source(gapsV), scoringScheme, + TAlignConfig2(), tag); + _adaptTraceSegmentsTo(gapsH, gapsV, trace); + return res; + } + + template TScoreValue localAlignment(Gaps & gapsH, Gaps & gapsV, Score const & scoringScheme) { - typedef typename Size::Type TSize; - typedef typename Position::Type TPosition; - typedef TraceSegment_ TTraceSegment; - - String traceSegments; - TScoreValue score = _setUpAndRunAlignment(traceSegments, source(gapsH), source(gapsV), scoringScheme, - SmithWaterman()); - _adaptTraceSegmentsTo(gapsH, gapsV, traceSegments); - return score; + if (_usesAffineGaps(scoringScheme, source(gapsH), source(gapsV))) + return localAlignment(gapsH, gapsV, scoringScheme, AffineGaps()); + else + return localAlignment(gapsH, gapsV, scoringScheme, LinearGaps()); } // ---------------------------------------------------------------------------- @@ -284,21 +235,38 @@ TScoreValue localAlignment(Gaps & gapsH, // Full interface. template + typename TScoreValue, typename TScoreSpec, typename TTag> TScoreValue localAlignment(Graph > & alignmentGraph, - Score const & scoringScheme) + Score const & scoringScheme, + TTag const & tag) { typedef Graph > TGraph; typedef typename Size::Type TSize; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig2, FreeEndGaps_<> > TAlignConfig2; + + String trace; + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, value(stringSet(alignmentGraph), 0), + value(stringSet(alignmentGraph), 1), scoringScheme, TAlignConfig2(), tag); - String traceSegments; - TScoreValue score = _setUpAndRunAlignment(traceSegments, value(stringSet(alignmentGraph), 0), - value(stringSet(alignmentGraph), 1), scoringScheme, SmithWaterman()); _adaptTraceSegmentsTo(alignmentGraph, positionToId(stringSet(alignmentGraph), 0), - positionToId(stringSet(alignmentGraph), 1), traceSegments); - return score; + positionToId(stringSet(alignmentGraph), 1), trace); + return res; +} + +template +TScoreValue localAlignment(Graph > & alignmentGraph, + Score const & scoringScheme) +{ + SEQAN_ASSERT(length(stringSet(alignmentGraph)) == 2u); + + if (_usesAffineGaps(scoringScheme, stringSet(alignmentGraph)[0], stringSet(alignmentGraph)[1])) + return localAlignment(alignmentGraph, scoringScheme, AffineGaps()); + else + return localAlignment(alignmentGraph, scoringScheme, LinearGaps()); } // ---------------------------------------------------------------------------- @@ -309,22 +277,41 @@ TScoreValue localAlignment(Graph > & a template + typename TScoreValue, typename TScoreSpec, typename TTag> TScoreValue localAlignment(String, TStringSpec> & fragmentString, StringSet const & strings, - Score const & scoringScheme) + Score const & scoringScheme, + TTag const & tag) { typedef String, TStringSpec> TFragments; typedef typename Position::Type TPosition; typedef TraceSegment_ TTraceSegment; + typedef AlignConfig2, FreeEndGaps_<> > TAlignConfig2; + + String trace; + DPScoutState_ dpScoutState; + TScoreValue res = _setUpAndRunAlignment(trace, dpScoutState, value(strings, 0), value(strings, 1), scoringScheme, + TAlignConfig2(), tag); + + _adaptTraceSegmentsTo(fragmentString, positionToId(strings, 0), positionToId(strings, 1), trace); + return res; +} + +template +TScoreValue localAlignment(String, TStringSpec> & fragmentString, + StringSet const & strings, + Score const & scoringScheme) +{ + SEQAN_ASSERT(length(strings) == 2u); - String traceSegments; - TScoreValue score = _setUpAndRunAlignment(traceSegments, value(strings, 0), value(strings, 1), scoringScheme, - SmithWaterman()); - _adaptTraceSegmentsTo(fragmentString, positionToId(strings, 0), positionToId(strings, 1), traceSegments); - return score; + if (_usesAffineGaps(scoringScheme, strings[0], strings[1])) + return localAlignment(fragmentString, strings, scoringScheme, AffineGaps()); + else + return localAlignment(fragmentString, strings, scoringScheme, LinearGaps()); } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_UNBANDED_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_UNBANDED_H_ diff --git a/seqan/align/local_alignment_waterman_eggert_impl.h b/seqan/align/local_alignment_waterman_eggert_impl.h index 5723c54..a4d554f 100644 --- a/seqan/align/local_alignment_waterman_eggert_impl.h +++ b/seqan/align/local_alignment_waterman_eggert_impl.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,8 @@ // Smith-Waterman algorithm with declumping. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_WATERMAN_EGGERT_IMPL_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_WATERMAN_EGGERT_IMPL_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_WATERMAN_EGGERT_IMPL_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_WATERMAN_EGGERT_IMPL_H_ namespace seqan { @@ -58,20 +58,20 @@ namespace seqan { // Simple class that stores a value with an ID. template -class ScoreAndID +class ScoreAndID { public: - TValue value_; - TID id_; + TValue value_; + TID id_; - ScoreAndID() : value_(MinValue::VALUE), id_(MaxValue::VALUE) - {} + ScoreAndID() : value_(MinValue::VALUE), id_(MaxValue::VALUE) + {} - ScoreAndID(TValue score, TID id_pos) - { - value_ = score; - id_ = id_pos; - } + ScoreAndID(TValue score, TID id_pos) + { + value_ = score; + id_ = id_pos; + } inline bool operator>(ScoreAndID const & other) const { @@ -82,64 +82,47 @@ class ScoreAndID { return value_ < other.value_; } -}; +}; // ---------------------------------------------------------------------------- // Class LocalAlignmentFinder // ---------------------------------------------------------------------------- -/** -.Class.LocalAlignmentFinder: -..cat:Miscellaneous -..summary:Stores the information necessary for local alignment dynamic programming. -..signature:LocalAlignmentFinder -..param.TScoreValue:The value type that is used for scoring the alignments. -...remarks:Use @Metafunction.Value@ to get the value type for a given class. -..see:Function.localAlignment -.Memfunc.LocalAlignmentFinder#LocalAlignmentFinder -..class:Class.LocalAlignmentFinder -..summary:Constructor -..signature:LocalAlignmentFinder(align) -..param.align:An @Class.Align@ object that is already initialized with the sequences. -..include:seqan/align.h -*/ - template class LocalAlignmentFinder { public: - typedef Matrix TMatrix; - typedef typename Position::Type TMatrixPosition; + typedef Matrix TMatrix; + typedef typename Position::Type TMatrixPosition; typedef typename Size::Type TSize; - typedef ScoreAndID TPQEntry; - - typedef Iter TMatrixIterator; - typedef PriorityType TPriorityQ; - typedef String TBoolMatrix; - - //DP-matrix - TMatrix matrix; - //matrix that memorizes the cells from which not to go diagonal - TBoolMatrix forbidden; - //priority queue for quickly finding the maximum score in the DP-matrix - TPriorityQ pQ; - //position of maximum score (where traceback is started from) - TMatrixPosition bestEndPos; - //position where traceback ended and where declumping begins - TMatrixPosition bestBeginPos; + typedef ScoreAndID TPQEntry; + + typedef Iter TMatrixIterator; + typedef PriorityType TPriorityQ; + typedef String TBoolMatrix; + + //DP-matrix + TMatrix matrix; + //matrix that memorizes the cells from which not to go diagonal + TBoolMatrix forbidden; + //priority queue for quickly finding the maximum score in the DP-matrix + TPriorityQ pQ; + //position of maximum score (where traceback is started from) + TMatrixPosition bestEndPos; + //position where traceback ended and where declumping begins + TMatrixPosition bestBeginPos; //traceback path that is set to forbidden while declumping AlignTraceback trace; - bool needReinit; //true: call "smithWaterman", false: call "smithWatermanGetNext" + bool needReinit; //true: call "smithWaterman", false: call "smithWatermanGetNext" - LocalAlignmentFinder() : needReinit(true) - {} + LocalAlignmentFinder() : bestEndPos(0), bestBeginPos(0), needReinit(true) + {} // TODO(holtgrew): Remove and replace all occurrences with default constructor. template - LocalAlignmentFinder(TAlign const &) - : needReinit(true) - {} + LocalAlignmentFinder(TAlign const &) : bestEndPos(0), bestBeginPos(0), needReinit(true) + {} }; // ============================================================================ @@ -173,9 +156,9 @@ _initLocalAlignmentFinder(TSequenceH const & seqH, resize(finder.matrix); resize(finder.forbidden, (len0 + 1) * (len1 + 1), false); - - finder.bestEndPos = maxValue(); - finder.bestBeginPos = maxValue(); + + finder.bestEndPos = maxValue(); + finder.bestBeginPos = maxValue(); } // ---------------------------------------------------------------------------- @@ -185,7 +168,7 @@ _initLocalAlignmentFinder(TSequenceH const & seqH, template void clear(LocalAlignmentFinder & sw_finder) { - sw_finder.needReinit = true; + sw_finder.needReinit = true; } // ---------------------------------------------------------------------------- @@ -195,10 +178,10 @@ void clear(LocalAlignmentFinder & sw_finder) template TScoreValue getScore(LocalAlignmentFinder const & sw) { - typedef LocalAlignmentFinder TFinder; - if(sw.bestEndPos != maxValue()) - return getValue(const_cast(sw.matrix), sw.bestEndPos); - return 0; + typedef LocalAlignmentFinder TFinder; + if(sw.bestEndPos != maxValue()) + return getValue(const_cast(sw.matrix), sw.bestEndPos); + return 0; } // ---------------------------------------------------------------------------- @@ -213,106 +196,106 @@ _smithWatermanGetMatrix(LocalAlignmentFinder & sw, Score const & score_, TScoreValue cutoff) { - // typedefs - typedef Matrix TMatrix; - typedef typename Position::Type TMatrixPosition; - typedef Iter TMatrixIterator; - - typedef typename Iterator::Type TStringIteratorH; - //typedef typename Value::Type TValueH; - typedef typename Iterator::Type TStringIteratorV; - typedef typename Value::Type TValueV; - - //------------------------------------------------------------------------- - //define some variables - - -// TSize str1_length = length(strH); -// TSize str2_length = length(strV); - TStringIteratorH x_begin = begin(strH) - 1; - TStringIteratorH x_end = end(strH) - 1; - TStringIteratorV y_begin = begin(strV) - 1; - TStringIteratorV y_end = end(strV) - 1; - - TStringIteratorH x = x_end; - TStringIteratorV y; - - TScoreValue score_match = scoreMatch(score_); - TScoreValue score_mismatch = scoreMismatch(score_); - TScoreValue score_gap = scoreGapExtend(score_); - - TScoreValue h = 0; - TScoreValue v = 0; - - TMatrixIterator col_ = end(sw.matrix) - 1; - TMatrixIterator finger1; - TMatrixIterator finger2; - - //------------------------------------------------------------------------- - // init - - finger1 = col_; - *finger1 = 0; - //std::cout <<" "; - for (x = x_end; x != x_begin; --x) - { - goPrevious(finger1, 0); - *finger1 = 0; - } - - //------------------------------------------------------------------------- - //fill matrix - for (y = y_end; y != y_begin; --y) - { - TValueV cy = *y; - - h = 0; - v = 0; - - finger2 = col_; //points to last column - goPrevious(col_, 1); //points to this column - finger1 = col_; - - *finger1 = v; - - for (x = x_end; x != x_begin; --x) - { - goPrevious(finger1, 0); - goPrevious(finger2, 0); - - if (*x == cy) - { - v = h + score_match; - h = *finger2; - } - else - { - TScoreValue s1 = h + score_mismatch; - h = *finger2; - TScoreValue s2 = score_gap + ((h > v) ? h : v); - v = (s1 > s2) ? s1 : s2; - if (v < 0) v = 0; - - } - *finger1 = v; - if (v >= cutoff) - { - push(sw.pQ,ScoreAndID(v,position(finger1))); - } - } - } - - // check if any scores >= cutoff were found - if(!empty(sw.pQ)) - { + // typedefs + typedef Matrix TMatrix; + typedef typename Position::Type TMatrixPosition; + typedef Iter TMatrixIterator; + + typedef typename Iterator::Type TStringIteratorH; + //typedef typename Value::Type TValueH; + typedef typename Iterator::Type TStringIteratorV; + typedef typename Value::Type TValueV; + + //------------------------------------------------------------------------- + //define some variables + + +// TSize str1_length = length(strH); +// TSize str2_length = length(strV); + TStringIteratorH x_begin = begin(strH) - 1; + TStringIteratorH x_end = end(strH) - 1; + TStringIteratorV y_begin = begin(strV) - 1; + TStringIteratorV y_end = end(strV) - 1; + + TStringIteratorH x = x_end; + TStringIteratorV y; + + TScoreValue score_match = scoreMatch(score_); + TScoreValue score_mismatch = scoreMismatch(score_); + TScoreValue score_gap = scoreGapExtend(score_); + + TScoreValue h = 0; + TScoreValue v = 0; + + TMatrixIterator col_ = end(sw.matrix) - 1; + TMatrixIterator finger1; + TMatrixIterator finger2; + + //------------------------------------------------------------------------- + // init + + finger1 = col_; + *finger1 = 0; + //std::cout <<" "; + for (x = x_end; x != x_begin; --x) + { + goPrevious(finger1, 0); + *finger1 = 0; + } + + //------------------------------------------------------------------------- + //fill matrix + for (y = y_end; y != y_begin; --y) + { + TValueV cy = *y; + + h = 0; + v = 0; + + finger2 = col_; //points to last column + goPrevious(col_, 1); //points to this column + finger1 = col_; + + *finger1 = v; + + for (x = x_end; x != x_begin; --x) + { + goPrevious(finger1, 0); + goPrevious(finger2, 0); + + if (*x == cy) + { + v = h + score_match; + h = *finger2; + } + else + { + TScoreValue s1 = h + score_mismatch; + h = *finger2; + TScoreValue s2 = score_gap + ((h > v) ? h : v); + v = (s1 > s2) ? s1 : s2; + if (v < 0) v = 0; + + } + *finger1 = v; + if (v >= cutoff) + { + push(sw.pQ,ScoreAndID(v,position(finger1))); + } + } + } + + // check if any scores >= cutoff were found + if(!empty(sw.pQ)) + { ScoreAndID best = top(sw.pQ); - v = getValue(sw.matrix,best.id_); - sw.bestEndPos = best.id_; - } - else - v=0; + v = getValue(sw.matrix,best.id_); + sw.bestEndPos = best.id_; + } + else + v=0; - return v; + return v; } // ---------------------------------------------------------------------------- @@ -329,9 +312,9 @@ _smithWatermanDeclump(LocalAlignmentFinder & sw , { //------------------------------------------------------------------------- //typedefs - //typedef typename LocalAlignmentFinder::TMatrixPosition TMatrixPosition; + //typedef typename LocalAlignmentFinder::TMatrixPosition TMatrixPosition; typedef typename LocalAlignmentFinder::TMatrix TMatrix; - typedef Iter TMatrixIterator; + typedef Iter TMatrixIterator; typedef Gaps TGapsH; typedef typename Iterator::Type TGapsHIter; @@ -345,193 +328,193 @@ _smithWatermanDeclump(LocalAlignmentFinder & sw , //------------------------------------------------------------------------- //variables - // TRow row0 = row(align_,0); - // TRow row1 = row(align_,1); + // TRow row0 = row(align_,0); + // TRow row1 = row(align_,1); // beginPosition == # leading gaps // endPosition == length of clipped region without trailing gaps // clippedEndPosition == source position of clipping end. - // TAlignIterator ali_it0_stop = iter(row0,beginPosition(row0)); - // TAlignIterator ali_it1_stop = iter(row1,beginPosition(row1)); - TGapsHIter ali_it0_stop = begin(gapsH); - TGapsVIter ali_it1_stop = begin(gapsV); - - // SEQAN_ASSERT( endPosition(row0)- beginPosition(row0) == endPosition(row1)- beginPosition(row1) ); - - // TAlignIterator ali_it0 = iter(row0,endPosition(row0)); - // TAlignIterator ali_it1 = iter(row1,endPosition(row1)); - TGapsHIter ali_it0 = end(gapsH); - TGapsVIter ali_it1 = end(gapsV); - - // TStringIterator x_begin = begin(source(row0))-1; - // TStringIterator y_begin = begin(source(row1))-1; - // TStringIterator x_end = iter(source(row0),clippedEndPosition(row0))-1; - // TStringIterator y_end = iter(source(row1),clippedEndPosition(row1))-1; - TSequenceHIter x_begin = begin(source(gapsH))-1; - TSequenceVIter y_begin = begin(source(gapsV))-1; - TSequenceHIter x_end = iter(source(gapsH), endPosition(gapsH) - 1); - TSequenceVIter y_end = iter(source(gapsV), endPosition(gapsV) - 1); - - // TStringIterator x = x_end; - // TStringIterator y = y_end; - // TStringIterator x_stop = x_end; - TSequenceHIter x = x_end; - TSequenceVIter y = y_end; - TSequenceHIter x_stop = x_end; - - - TScoreValue score_match = scoreMatch(score_); - TScoreValue score_mismatch = scoreMismatch(score_); - TScoreValue score_gap = scoreGapExtend(score_); - TScoreValue h,v; - - TMatrixIterator finger0 = iter(sw.matrix,sw.bestBeginPos); - TMatrixIterator end_col = finger0; - TMatrixIterator finger1 = finger0; - TMatrixIterator forbidden = finger0; - - bool different = true; - bool forbidden_reached = true; - bool end_reached = false; - bool skip_row = false; - - -/* int str0_length = length(source(row(align_,0)))+1; - int str1_length = length(source(row(align_,1)))+1; - for(int i = 0; i v) ? h : v); - v = (s1 > s2) ? s1 : s2; - if (v < 0) v = 0; - - } - - // value is the same as in the original matrix - if(*finger0==v) - { - //x_stop is as far as we have to go at least - if(x v) ? h : v); + v = (s1 > s2) ? s1 : s2; + if (v < 0) v = 0; + + } + + // value is the same as in the original matrix + if(*finger0==v) + { + //x_stop is as far as we have to go at least + if(x, Standard >::Type _smithWatermanTrace(Gaps & gapsH, Gaps & gapsV, - typename LocalAlignmentFinder::TBoolMatrix & fb_matrix, + typename LocalAlignmentFinder::TBoolMatrix & fb_matrix, Iter< Matrix, PositionIterator > source_, Score const & scoring_) { - //typedefs - typedef Iter, PositionIterator > TMatrixIterator; - typedef typename Position >::Type TPosition; + //typedefs + typedef Iter, PositionIterator > TMatrixIterator; + typedef typename Position >::Type TPosition; -// typedef Segment TTargetSourceSegment; - typedef typename Iterator::Type TSourceIteratorH; - typedef typename Iterator::Type TSourceIteratorV; +// typedef Segment TTargetSourceSegment; + typedef typename Iterator::Type TSourceIteratorH; + typedef typename Iterator::Type TSourceIteratorV; typedef Gaps TGapsH; typedef Gaps TGapsV; - typedef typename Iterator::Type TTargetIteratorH; - typedef typename Iterator::Type TTargetIteratorV; - - //------------------------------------------------------------------------- - //variables - TPosition pos_0 = coordinate(source_, 0); - TPosition pos_1 = coordinate(source_, 1); - - TSourceH strH = source(gapsH); - TSourceV strV = source(gapsV); - - TTargetIteratorH target_0 = iter(gapsH, pos_0); - TTargetIteratorV target_1 = iter(gapsV, pos_1); - - TSourceIteratorH it_0 = iter(strH, pos_0, Standard()); - TSourceIteratorH it_0_end = end(strH); - - TSourceIteratorV it_1 = iter(strV, pos_1, Standard()); - TSourceIteratorV it_1_end = end(strV); - - TScoreValue score_mismatch = scoreMismatch(scoring_); - TScoreValue score_gap = scoreGapExtend(scoring_); - - //------------------------------------------------------------------------- - //follow the trace until 0 is reached - while ((*source_!=0) && (it_0 != it_0_end) && (it_1 != it_1_end)) - { - bool gv; - bool gh; - bool forbidden = fb_matrix[position(source_)]; - - if (*it_0 == *it_1 && !forbidden) - { - gv = gh = true; - } - else - { - TMatrixIterator it_ = source_; - - goNext(it_, 0); - TScoreValue v = *it_ + score_gap; - - TScoreValue d; - if(forbidden) - d = 0; - else{ - goNext(it_, 1); - d = *it_ + score_mismatch; - } - - it_ = source_; - goNext(it_, 1); - TScoreValue h = *it_ + score_gap; - - gv = (v >= h) | (d >= h); - gh = (h > v) | (d >= v); - } - - if (gv) - { - ++it_0; - goNext(source_, 0); - } - else - { - insertGap(target_0); - } - - if (gh) - { - ++it_1; - goNext(source_, 1); - } - else - { - insertGap(target_1); - } - ++target_0; - ++target_1; - } + typedef typename Iterator::Type TTargetIteratorH; + typedef typename Iterator::Type TTargetIteratorV; + + //------------------------------------------------------------------------- + //variables + TPosition pos_0 = coordinate(source_, 0); + TPosition pos_1 = coordinate(source_, 1); + + TSourceH strH = source(gapsH); + TSourceV strV = source(gapsV); + + TTargetIteratorH target_0 = iter(gapsH, pos_0); + TTargetIteratorV target_1 = iter(gapsV, pos_1); + + TSourceIteratorH it_0 = iter(strH, pos_0, Standard()); + TSourceIteratorH it_0_end = end(strH); + + TSourceIteratorV it_1 = iter(strV, pos_1, Standard()); + TSourceIteratorV it_1_end = end(strV); + + TScoreValue score_mismatch = scoreMismatch(scoring_); + TScoreValue score_gap = scoreGapExtend(scoring_); + + //------------------------------------------------------------------------- + //follow the trace until 0 is reached + while ((*source_!=0) && (it_0 != it_0_end) && (it_1 != it_1_end)) + { + bool gv; + bool gh; + bool forbidden = fb_matrix[position(source_)]; + + if (*it_0 == *it_1 && !forbidden) + { + gv = gh = true; + } + else + { + TMatrixIterator it_ = source_; + + goNext(it_, 0); + TScoreValue v = *it_ + score_gap; + + TScoreValue d; + if(forbidden) + d = 0; + else{ + goNext(it_, 1); + d = *it_ + score_mismatch; + } + + it_ = source_; + goNext(it_, 1); + TScoreValue h = *it_ + score_gap; + + gv = (v >= h) | (d >= h); + gh = (h > v) | (d >= v); + } + + if (gv) + { + ++it_0; + goNext(source_, 0); + } + else + { + insertGap(target_0); + } + + if (gh) + { + ++it_1; + goNext(source_, 1); + } + else + { + insertGap(target_1); + } + ++target_0; + ++target_1; + } // We have removed all gaps and clippings from gapsH and gapsV in the calling functions, so the following works. // Note that we have to set the end position first. // TODO(holtgrew): Use setBegin/EndPosition(). setClippedEndPosition(gapsH, toViewPosition(gapsH, position(it_0, strH))); setClippedEndPosition(gapsV, toViewPosition(gapsV, position(it_1, strV))); - setClippedBeginPosition(gapsH, toViewPosition(gapsH, pos_0)); - setClippedBeginPosition(gapsV, toViewPosition(gapsV, pos_1)); - - return source_; + setClippedBeginPosition(gapsH, toViewPosition(gapsH, pos_0)); + setClippedBeginPosition(gapsV, toViewPosition(gapsV, pos_1)); + + return source_; } // ---------------------------------------------------------------------------- @@ -659,35 +642,35 @@ _getNextBestEndPosition(LocalAlignmentFinder & sw , TScoreValue cutoff) { // get maximal score from priority queue - TScoreValue topScore = 0; + TScoreValue topScore = 0; if (!empty(sw.pQ)) topScore = getValue(sw.matrix, top(sw.pQ).id_); // check if matrix entry of topScore did not change while declumping - if (!empty(sw.pQ)) { - while (top(sw.pQ).value_ != topScore) { - if (topScore >= cutoff) { - ((sw.pQ).heap[0]).value_ = topScore; - adjustTop(sw.pQ); - } else { - pop(sw.pQ); - } - if (!empty(sw.pQ)) topScore = getValue(sw.matrix, top(sw.pQ).id_); - else break; - } - } + if (!empty(sw.pQ)) { + while (top(sw.pQ).value_ != topScore) { + if (topScore >= cutoff) { + ((sw.pQ).heap[0]).value_ = topScore; + adjustTop(sw.pQ); + } else { + pop(sw.pQ); + } + if (!empty(sw.pQ)) topScore = getValue(sw.matrix, top(sw.pQ).id_); + else break; + } + } // priority queue with top scores is empty if(empty(sw.pQ)) {//||top(sw.pQ).value_::TMatrixPosition ret_pos = top(sw.pQ).id_; - sw.bestEndPos = ret_pos; - pop(sw.pQ); - - return ret_pos; + sw.needReinit = true; + return 0; + } + + typename LocalAlignmentFinder::TMatrixPosition ret_pos = top(sw.pQ).id_; + sw.bestEndPos = ret_pos; + pop(sw.pQ); + + return ret_pos; } // ---------------------------------------------------------------------------- @@ -700,7 +683,7 @@ TScoreValue _smithWaterman(Gaps & gapsH, Gaps & gapsV, LocalAlignmentFinder & sw_finder, - Score const & score_, + Score const & score_, TScoreValue cutoff) { // TODO(holtgrew): This sourceSegment() stuff is confusing... Do we *really* need this? @@ -711,25 +694,25 @@ _smithWaterman(Gaps & gapsH, clearClipping(gapsV); _initLocalAlignmentFinder(sourceSegment(gapsH), sourceSegment(gapsV), sw_finder, WatermanEggert()); - - TScoreValue ret = _smithWatermanGetMatrix(sw_finder, sourceSegment(gapsH), sourceSegment(gapsV), score_,cutoff); - - if(ret==0) - return ret; - sw_finder.needReinit = false; - typedef Iter::TMatrix,PositionIterator > TMatrixIterator; - TMatrixIterator best_begin; + TScoreValue ret = _smithWatermanGetMatrix(sw_finder, sourceSegment(gapsH), sourceSegment(gapsV), score_,cutoff); + + if(ret==0) + return ret; + sw_finder.needReinit = false; + + typedef Iter::TMatrix,PositionIterator > TMatrixIterator; + TMatrixIterator best_begin; // TODO(holtgrew): What does the following comment mean? - // TODO: sw_finder statt kram - best_begin = _smithWatermanTrace(gapsH, gapsV, sw_finder.forbidden,iter(sw_finder.matrix,(top(sw_finder.pQ)).id_), score_); + // TODO: sw_finder statt kram + best_begin = _smithWatermanTrace(gapsH, gapsV, sw_finder.forbidden,iter(sw_finder.matrix,(top(sw_finder.pQ)).id_), score_); + + sw_finder.bestBeginPos = position(best_begin); - sw_finder.bestBeginPos = position(best_begin); - - pop(sw_finder.pQ); + pop(sw_finder.pQ); - return ret; + return ret; } // ---------------------------------------------------------------------------- @@ -742,27 +725,27 @@ TScoreValue _smithWatermanGetNext(Gaps & gapsH, Gaps & gapsV, LocalAlignmentFinder & sw_finder , - Score const & score_, + Score const & score_, TScoreValue cutoff) -{ - _smithWatermanDeclump(sw_finder, gapsH, gapsV, score_); +{ + _smithWatermanDeclump(sw_finder, gapsH, gapsV, score_); - clearGaps(gapsH); - clearGaps(gapsV); + clearGaps(gapsH); + clearGaps(gapsV); clearClipping(gapsH); clearClipping(gapsV); - typename LocalAlignmentFinder::TMatrixPosition next_best_end; - next_best_end = _getNextBestEndPosition(sw_finder,cutoff); - if(next_best_end==0) - return 0; - typename LocalAlignmentFinder::TMatrixIterator next_best_begin; - next_best_begin= _smithWatermanTrace(gapsH, gapsV, sw_finder.forbidden,iter(sw_finder.matrix,next_best_end), score_); - sw_finder.bestBeginPos = position(next_best_begin); - - return getValue(sw_finder.matrix,next_best_end); + typename LocalAlignmentFinder::TMatrixPosition next_best_end; + next_best_end = _getNextBestEndPosition(sw_finder,cutoff); + if(next_best_end==0) + return 0; + typename LocalAlignmentFinder::TMatrixIterator next_best_begin; + next_best_begin= _smithWatermanTrace(gapsH, gapsV, sw_finder.forbidden,iter(sw_finder.matrix,next_best_end), score_); + sw_finder.bestBeginPos = position(next_best_begin); + + return getValue(sw_finder.matrix,next_best_end); } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_WATERMAN_EGGERT_IMPL_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_LOCAL_ALIGNMENT_WATERMAN_EGGERT_IMPL_H_ diff --git a/seqan/align/matrix_base.h b/seqan/align/matrix_base.h index f1438ea..925cd24 100644 --- a/seqan/align/matrix_base.h +++ b/seqan/align/matrix_base.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -54,9 +54,9 @@ template struct SizeArr_; template struct SizeArr_ > { - typedef Matrix TMatrix_; - typedef typename Size::Type TSize_; - typedef String Type; + typedef Matrix TMatrix_; + typedef typename Size::Type TSize_; + typedef String Type; }; ////////////////////////////////////////////////////////////////////////////// @@ -64,7 +64,13 @@ struct SizeArr_ > template struct Host > { - typedef String Type; + typedef String Type; +}; + +template +struct Host const> +{ + typedef String const Type; }; ////////////////////////////////////////////////////////////////////////////// @@ -77,25 +83,14 @@ struct Host > * @headerfile * @brief A simple n-dimensional matrix type. * - * @signature template + * @signature template * class Matrix; * * @tparam TValue Type of matrix entries. * @tparam DIMENSION Dimension of the matrix. Use 0 for n-dimensional, values > 0 for a matrix with - * DIMENSION dimensions. + * DIMENSION dimensions. Defaults to 0. */ -/** -.Class.Matrix: -..cat:Miscellaneous -..summary:A simple n-dimensional matrix type. -..signature:Matrix -..param.TValue:Type of matrix entries. -..param.unsigned DIMENSION:The specializing type (0: NDimensional matrix; 2: two dimensional matrix). -..remarks: The following operators and functions are supported: A*B, A*a, A+B,A-B,<<, transpose -..include:seqan/align.h -*/ - template class Matrix @@ -103,59 +98,59 @@ class Matrix //____________________________________________________________________________ public: - typedef typename Size::Type TSize; - typedef String TSizeArr; - typedef String THost; + typedef typename Size::Type TSize; + typedef String TSizeArr; + typedef String THost; - TSizeArr data_lengths; //Length of every dimension - TSizeArr data_factors; //used for positions of dimensions in host ("size of jumps" to get to next entry of specified dimension) + TSizeArr data_lengths; //Length of every dimension + TSizeArr data_factors; //used for positions of dimensions in host ("size of jumps" to get to next entry of specified dimension) - Holder data_host; + Holder data_host; //____________________________________________________________________________ public: - Matrix() - { - create(data_host); - } - Matrix(Matrix const & other_): - data_lengths(other_.data_lengths), - data_factors(other_.data_factors), - data_host(other_.data_host) - { - } - inline Matrix const & - operator = (Matrix const & other_) - { - data_lengths = other_.data_lengths; - data_factors = other_.data_factors; - data_host = other_.data_host; - - return *this; - } - ~Matrix() - { - } + Matrix() + { + create(data_host); + } + Matrix(Matrix const & other_): + data_lengths(other_.data_lengths), + data_factors(other_.data_factors), + data_host(other_.data_host) + { + } + inline Matrix const & + operator = (Matrix const & other_) + { + data_lengths = other_.data_lengths; + data_factors = other_.data_factors; + data_host = other_.data_host; + + return *this; + } + ~Matrix() + { + } //____________________________________________________________________________ //____________________________________________________________________________ - inline TValue & - operator () (TSize x1, TSize x2) - { - return value(*this, x1, x2); - } - inline TValue & - operator () (TSize x1, TSize x2, TSize x3) - { - return value(*this, x1, x2, x3); - } - inline TValue & - operator () (TSize x1, TSize x2, TSize x3, TSize x4) - { - return value(*this, x1, x2, x3, x4); - } + inline TValue & + operator () (TSize x1, TSize x2) + { + return value(*this, x1, x2); + } + inline TValue & + operator () (TSize x1, TSize x2, TSize x3) + { + return value(*this, x1, x2, x3); + } + inline TValue & + operator () (TSize x1, TSize x2, TSize x3, TSize x4) + { + return value(*this, x1, x2, x3, x4); + } //____________________________________________________________________________ }; @@ -167,57 +162,57 @@ class Matrix //____________________________________________________________________________ public: - typedef typename Size::Type TSize; - typedef String TSizeArr; - typedef String THost; + typedef typename Size::Type TSize; + typedef String TSizeArr; + typedef String THost; - TSizeArr data_lengths; - TSizeArr data_factors; + TSizeArr data_lengths; + TSizeArr data_factors; - Holder data_host; + Holder data_host; //____________________________________________________________________________ public: - Matrix() - { - create(data_host); - - //setDimension to 2 - resize(data_lengths, 2, 0); - resize(data_factors, 2, 0); - data_factors[0] = 1; - } - Matrix(Matrix const & other_): - data_lengths(other_.data_lengths), - data_factors(other_.data_factors), - data_host(other_.data_host) - { - } - inline Matrix const & - operator = (Matrix const & other_) - { - data_lengths = other_.data_lengths; - data_factors = other_.data_factors; - data_host = other_.data_host; - - return *this; - } - - ~Matrix() - { - } + Matrix() + { + create(data_host); + + //setDimension to 2 + resize(data_lengths, 2, 0); + resize(data_factors, 2, 0); + data_factors[0] = 1; + } + Matrix(Matrix const & other_): + data_lengths(other_.data_lengths), + data_factors(other_.data_factors), + data_host(other_.data_host) + { + } + inline Matrix const & + operator = (Matrix const & other_) + { + data_lengths = other_.data_lengths; + data_factors = other_.data_factors; + data_host = other_.data_host; + + return *this; + } + + ~Matrix() + { + } //____________________________________________________________________________ //____________________________________________________________________________ - inline TValue & - operator () (TSize x1, TSize x2) - { - return value(*this, x1, x2); - } + inline TValue & + operator () (TSize x1, TSize x2) + { + return value(*this, x1, x2); + } //____________________________________________________________________________ }; @@ -228,57 +223,57 @@ class Matrix //____________________________________________________________________________ public: - typedef typename Size::Type TSize; - typedef String TSizeArr; - typedef String THost; + typedef typename Size::Type TSize; + typedef String TSizeArr; + typedef String THost; - TSizeArr data_lengths; - TSizeArr data_factors; + TSizeArr data_lengths; + TSizeArr data_factors; - Holder data_host; + Holder data_host; //____________________________________________________________________________ public: - Matrix() - { - create(data_host); - - //setDimension to 3 - resize(data_lengths, 3, 0); - resize(data_factors, 3); - data_factors[0] = 1; - } - Matrix(Matrix const & other_): - data_lengths(other_.data_lengths), - data_factors(other_.data_factors), - data_host(other_.data_host) - { - } - inline Matrix const & - operator = (Matrix const & other_) - { - data_lengths = other_.data_lengths; - data_factors = other_.data_factors; - data_host = other_.data_host; - - return *this; - } - - ~Matrix() - { - } + Matrix() + { + create(data_host); + + //setDimension to 3 + resize(data_lengths, 3, 0); + resize(data_factors, 3); + data_factors[0] = 1; + } + Matrix(Matrix const & other_): + data_lengths(other_.data_lengths), + data_factors(other_.data_factors), + data_host(other_.data_host) + { + } + inline Matrix const & + operator = (Matrix const & other_) + { + data_lengths = other_.data_lengths; + data_factors = other_.data_factors; + data_host = other_.data_host; + + return *this; + } + + ~Matrix() + { + } //____________________________________________________________________________ //____________________________________________________________________________ - inline TValue & - operator () (TSize x1, TSize x2, TSize x3) - { - return value(*this, x1, x2, x3); - } + inline TValue & + operator () (TSize x1, TSize x2, TSize x3) + { + return value(*this, x1, x2, x3); + } //____________________________________________________________________________ }; @@ -287,28 +282,28 @@ template inline typename SizeArr_ >::Type & _dataLengths(Matrix & me) { - return me.data_lengths; + return me.data_lengths; } template inline typename SizeArr_ >::Type const & _dataLengths(Matrix const & me) { - return me.data_lengths; + return me.data_lengths; } template inline typename SizeArr_ >::Type & _dataFactors(Matrix & me) { - return me.data_factors; + return me.data_factors; } template inline typename SizeArr_ >::Type const & _dataFactors(Matrix const & me) { - return me.data_factors; + return me.data_factors; } //____________________________________________________________________________ @@ -318,33 +313,23 @@ template inline bool dependent(Matrix & me) { - return dependent(me.data_host); -} - -//____________________________________________________________________________ - -template -inline void -setHost(Matrix & me, THost & host_) -{ - setValue(me.data_host, host_); + return dependent(me.data_host); } //____________________________________________________________________________ - template -inline typename Host >::Type & -host(Matrix & me) +inline Holder >::Type> & +_dataHost(Matrix & matrix) { - return value(me.data_host); + return matrix.data_host; } template -inline typename Host >::Type const & -host(Matrix const & me) +inline Holder >::Type> const & +_dataHost(Matrix const & matrix) { - return value(me.data_host); + return matrix.data_host; } //____________________________________________________________________________ @@ -353,7 +338,7 @@ template inline void assignHost(Matrix & me, THost const & value_) { - assignValue(me.data_host, value_); + assignValue(me.data_host, value_); } //____________________________________________________________________________ @@ -362,7 +347,7 @@ template inline void moveHost(Matrix & me, THost const & value_) { - moveValue(me.data_host, value_); + moveValue(me.data_host, value_); } ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// @@ -370,7 +355,7 @@ moveHost(Matrix & me, THost const & value_) template struct Value< Matrix > { - typedef TValue Type; + typedef TValue Type; }; ////////////////////////////////////////////////////////////////////////////// @@ -378,23 +363,23 @@ struct Value< Matrix > template struct Iterator< Matrix, TIteratorSpec > { - typedef Iter, PositionIterator> Type; + typedef Iter, PositionIterator> Type; }; template struct Iterator< Matrix const, TIteratorSpec > { - typedef Iter const, PositionIterator> Type; + typedef Iter const, PositionIterator> Type; }; ////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// template -inline unsigned int +inline typename Size const>::Type dimension(Matrix const & me) { - return length(_dataLengths(me)); + return length(_dataLengths(me)); } ////////////////////////////////////////////////////////////////////////////// @@ -402,16 +387,16 @@ dimension(Matrix const & me) template inline void setDimension(Matrix & me, - unsigned int dim_) + unsigned int dim_) { - SEQAN_ASSERT_GT(dim_, 0u); + SEQAN_ASSERT_GT(dim_, 0u); //std::cout<<"\npress enter1\n"; //std::cin.get(); - resize(_dataLengths(me), dim_, 0); + resize(_dataLengths(me), dim_, 0); - resize(_dataFactors(me), dim_); - _dataFactors(me)[0] = 1; + resize(_dataFactors(me), dim_); + _dataFactors(me)[0] = 1; } ////////////////////////////////////////////////////////////////////////////// @@ -419,22 +404,22 @@ setDimension(Matrix & me, template inline typename Size >::Type length(Matrix const & me, - unsigned int dim_) + unsigned int dim_) { - return me.data_lengths[dim_]; + return me.data_lengths[dim_]; } template inline typename Size >::Type length(Matrix const & me) { - return length(host(me)); + return length(host(me)); } template inline bool empty(Matrix const & me) { - return empty(host(me)); + return empty(host(me)); } ////////////////////////////////////////////////////////////////////////////// @@ -442,65 +427,76 @@ inline bool empty(Matrix const & me) template inline void setLength(Matrix & me, - unsigned int dim_, - TLength length_) + unsigned int dim_, + TLength length_) { - SEQAN_ASSERT_GT(length_, static_cast(0)); - SEQAN_ASSERT_LT(dim_, dimension(me)); + SEQAN_ASSERT_GT(length_, static_cast(0)); + SEQAN_ASSERT_LT(dim_, dimension(me)); typedef typename SizeArr_ >::TSize_ TSize_; - _dataLengths(me)[dim_] = static_cast(length_); + _dataLengths(me)[dim_] = static_cast(length_); } ////////////////////////////////////////////////////////////////////////////// +/*! + * @fn Matrix#resize + * @brief Resize the matrix and fill it with a given value or zeroes. + * + * @signature void resize(matrix[, val]); + * + * @param[in,out] matrix The Matrix to fill. + * @param[in] val The optional value to fill the matrix with. + */ + + template inline void resize(Matrix & me) { - typedef Matrix TMatrix; - typedef typename Size::Type TSize; + typedef Matrix TMatrix; + typedef typename Size::Type TSize; - unsigned int dimension_ = dimension(me); + unsigned int dimension_ = dimension(me); - SEQAN_ASSERT_GT(dimension_, 0u); + SEQAN_ASSERT_GT(dimension_, 0u); - TSize factor_ = _dataFactors(me)[0] * length(me, 0); - for (unsigned int i = 1; (factor_ > 0) && (i < dimension_); ++i) - { - _dataFactors(me)[i] = factor_; - factor_ *= length(me, i); - } + TSize factor_ = _dataFactors(me)[0] * length(me, 0); + for (unsigned int i = 1; (factor_ > 0) && (i < dimension_); ++i) + { + _dataFactors(me)[i] = factor_; + factor_ *= length(me, i); + } - if (factor_ > 0) - { - resize(host(me), factor_); - } + if (factor_ > 0) + { + resize(host(me), factor_); + } } ////////////////////////////////////////////////////////////////////////////// template inline void -resize(Matrix & me, TFillValue myValue) //resize the matrix and fill with value +resize(Matrix & me, TFillValue myValue) //resize the matrix and fill with value { - typedef Matrix TMatrix; - typedef typename Size::Type TSize; + typedef Matrix TMatrix; + typedef typename Size::Type TSize; - unsigned int dimension_ = dimension(me); + unsigned int dimension_ = dimension(me); - SEQAN_ASSERT_GT(dimension_, 0u); + SEQAN_ASSERT_GT(dimension_, 0u); - TSize factor_ = _dataFactors(me)[0] * length(me, 0); - for (unsigned int i = 1; (factor_ > 0) && (i < dimension_); ++i) - { - _dataFactors(me)[i] = factor_; - factor_ *= length(me, i); - } + TSize factor_ = _dataFactors(me)[0] * length(me, 0); + for (unsigned int i = 1; (factor_ > 0) && (i < dimension_); ++i) + { + _dataFactors(me)[i] = factor_; + factor_ *= length(me, i); + } - if (factor_ > 0) - resize(host(me), factor_, myValue); + if (factor_ > 0) + resize(host(me), factor_, myValue); } @@ -509,37 +505,37 @@ resize(Matrix & me, TFillValue myValue) //resize the matrix a template inline typename Position >::Type nextPosition(Matrix & me, - TPosition position_, - unsigned int dimension_) + TPosition position_, + unsigned int dimension_) { - return position_ + _dataFactors(me)[dimension_]; + return position_ + _dataFactors(me)[dimension_]; } template inline typename Position >::Type nextPosition(Matrix const & me, - TPosition position_, - unsigned int dimension_) + TPosition position_, + unsigned int dimension_) { - return position_ + _dataFactors(me)[dimension_]; + return position_ + _dataFactors(me)[dimension_]; } template inline typename Position >::Type previousPosition(Matrix & me, - TPosition position_, - unsigned int dimension_) + TPosition position_, + unsigned int dimension_) { - return position_ - _dataFactors(me)[dimension_]; + return position_ - _dataFactors(me)[dimension_]; } template inline typename Position >::Type previousPosition(Matrix const & me, - TPosition position_, - unsigned int dimension_) + TPosition position_, + unsigned int dimension_) { - return position_ - _dataFactors(me)[dimension_]; + return position_ - _dataFactors(me)[dimension_]; } ////////////////////////////////////////////////////////////////////////////// @@ -547,19 +543,19 @@ previousPosition(Matrix const & me, template inline typename Size< Matrix >::Type coordinate(Matrix const & me, - TPosition position_, - unsigned int dimension_) -{ - SEQAN_ASSERT_LT(dimension_, dimension(me)); - - if (dimension_ < dimension(me) - 1) - { - return (position_ / _dataFactors(me)[dimension_]) % _dataFactors(me)[dimension_ + 1]; - } - else - { - return position_ / _dataFactors(me)[dimension_]; - } + TPosition position_, + unsigned int dimension_) +{ + SEQAN_ASSERT_LT(dimension_, dimension(me)); + + if (dimension_ < dimension(me) - 1) + { + return (position_ / _dataFactors(me)[dimension_]) % _dataFactors(me)[dimension_ + 1]; + } + else + { + return position_ / _dataFactors(me)[dimension_]; + } } ////////////////////////////////////////////////////////////////////////////// @@ -567,16 +563,16 @@ coordinate(Matrix const & me, template inline typename Iterator, Tag const>::Type begin(Matrix & me, - Tag const) + Tag const) { - return typename Iterator, Tag const >::Type(me, 0); + return typename Iterator, Tag const >::Type(me, 0); } template inline typename Iterator const, Tag const>::Type begin(Matrix const & me, - Tag const) + Tag const) { - return typename Iterator const, Tag const >::Type(me, 0); + return typename Iterator const, Tag const >::Type(me, 0); } ////////////////////////////////////////////////////////////////////////////// @@ -584,16 +580,16 @@ begin(Matrix const & me, template inline typename Iterator, Tag const >::Type end(Matrix & me, - Tag const) + Tag const) { - return typename Iterator, Tag const >::Type(me, length(host(me))); + return typename Iterator, Tag const >::Type(me, length(host(me))); } template inline typename Iterator const, Tag const >::Type end(Matrix const & me, - Tag const) + Tag const) { - return typename Iterator, Tag const >::Type(me, length(host(me))); + return typename Iterator, Tag const >::Type(me, length(host(me))); } ////////////////////////////////////////////////////////////////////////////// @@ -601,9 +597,9 @@ end(Matrix const & me, template inline typename Reference >::Type value(Matrix & me, - TPosition position_) + TPosition position_) { - return value(host(me), position_); + return value(host(me), position_); } template @@ -620,10 +616,10 @@ value(Matrix const & me, template inline typename Reference >::Type value(Matrix & me, - TOrdinate1 i1, - TOrdinate2 i2) + TOrdinate1 i1, + TOrdinate2 i2) { - return value(host(me), i1 + i2 * _dataFactors(me)[1]); + return value(host(me), i1 + i2 * _dataFactors(me)[1]); } template @@ -642,11 +638,11 @@ value(Matrix const & me, template inline typename Reference >::Type value(Matrix & me, - TOrdinate1 i1, - TOrdinate2 i2, - TOrdinate3 i3) + TOrdinate1 i1, + TOrdinate2 i2, + TOrdinate3 i3) { - return value(host(me), i1 + i2 * _dataFactors(me)[1] + i3 * _dataFactors(me)[2]); + return value(host(me), i1 + i2 * _dataFactors(me)[1] + i3 * _dataFactors(me)[2]); } //____________________________________________________________________________ @@ -656,12 +652,12 @@ value(Matrix & me, template inline typename Reference >::Type value(Matrix & me, - TOrdinate1 i1, - TOrdinate2 i2, - TOrdinate3 i3, - TOrdinate4 i4) + TOrdinate1 i1, + TOrdinate2 i2, + TOrdinate3 i3, + TOrdinate4 i4) { - return value(host(me), i1 + i2 * _dataFactors(me)[1] + i3 * _dataFactors(me)[2] + i4 * _dataFactors(me)[3]); + return value(host(me), i1 + i2 * _dataFactors(me)[1] + i3 * _dataFactors(me)[2] + i4 * _dataFactors(me)[3]); } ////////////////////////////////////////////////////////////////////////////// @@ -672,31 +668,31 @@ value(Matrix & me, template inline void goNext(Iter, PositionIterator> & me, - unsigned int dimension_) + unsigned int dimension_) { - setPosition(me, nextPosition(container(me), position(me), dimension_)); + setPosition(me, nextPosition(container(me), position(me), dimension_)); } template inline void goNext(Iter const, PositionIterator> & me, - unsigned int dimension_) + unsigned int dimension_) { - setPosition(me, nextPosition(container(me), position(me), dimension_)); + setPosition(me, nextPosition(container(me), position(me), dimension_)); } template inline void goNext(Iter, PositionIterator> & me) { - goNext(me, 0); + goNext(me, 0); } template inline void goNext(Iter const, PositionIterator> & me) { - goNext(me, 0); + goNext(me, 0); } ////////////////////////////////////////////////////////////////////////////// @@ -706,31 +702,31 @@ goNext(Iter const, PositionIterator> & me) template inline void goPrevious(Iter< Matrix, PositionIterator > & me, - unsigned int dimension_) + unsigned int dimension_) { - setPosition(me, previousPosition(container(me), position(me), dimension_)); + setPosition(me, previousPosition(container(me), position(me), dimension_)); } template inline void goPrevious(Iter< Matrix const, PositionIterator > & me, - unsigned int dimension_) + unsigned int dimension_) { - setPosition(me, previousPosition(container(me), position(me), dimension_)); + setPosition(me, previousPosition(container(me), position(me), dimension_)); } template inline void goPrevious(Iter< Matrix, PositionIterator > & me) { - goPrevious(me, 0); + goPrevious(me, 0); } template inline void goPrevious(Iter< Matrix const, PositionIterator > & me) { - goPrevious(me, 0); + goPrevious(me, 0); } ////////////////////////////////////////////////////////////////////////////// @@ -774,102 +770,102 @@ goTo(Iter const, PositionIterator> & me, TPosition0 po template inline typename Size< Matrix >::Type coordinate(Iter, PositionIterator > & me, - unsigned int dimension_) + unsigned int dimension_) { - return coordinate(container(me), position(me), dimension_); + return coordinate(container(me), position(me), dimension_); } template inline typename Size< Matrix >::Type coordinate(Iter const, PositionIterator > & me, - unsigned int dimension_) + unsigned int dimension_) { - return coordinate(container(me), position(me), dimension_); + return coordinate(container(me), position(me), dimension_); } -/* -operator + -Computes the matricial sum between two matrices -..signature:Matrix +(matrix1,matrix2) -..param.matrix1:The first matrix. -...type:Class.Matrix -..param.matrix2:The second matrix. -...type:Class.Matrix -..returns:The sum of the two matrices (another nxm matrix). -..remarks:The number of rows and columns of matrix1 must be equal to the number of rows and columns of matrix2 (length of dimensions for NDimensional matrices) -*/ +/*! + * @fn Matrix::operator+ + * @brief Sum operator for the Matrix type. + * + * @signature TMatrix Matrix::operator+(lhs, rhs); + * + * @param[in] lhs First summand. + * @param[in] rhs Second summand. + * + * @return TMatrix The resulting matrix of same type as lhs and rhs. + */ template Matrix operator + (Matrix const & matrix1,Matrix const & matrix2) { - //the two matrices must have same dimension - SEQAN_ASSERT(_dataLengths(matrix1) == _dataLengths(matrix2)); - - Matrix result; - //copy the first matrix - setDimension(result,length(_dataLengths(matrix1))); - _dataLengths(result) = _dataLengths(matrix1); - resize(result); - - //add the matrices - for(unsigned int i = 0;i< length(host(result));++i) - { - value(host(result), i)=value(host(matrix1), i)+value(host(matrix2), i); - } - //Return matrix sum - return result; + //the two matrices must have same dimension + SEQAN_ASSERT(_dataLengths(matrix1) == _dataLengths(matrix2)); + + Matrix result; + //copy the first matrix + setDimension(result,length(_dataLengths(matrix1))); + _dataLengths(result) = _dataLengths(matrix1); + resize(result); + + //add the matrices + for(unsigned int i = 0;i< length(host(result));++i) + { + value(host(result), i)=value(host(matrix1), i)+value(host(matrix2), i); + } + //Return matrix sum + return result; } template Matrix operator - (Matrix const & matrix1,Matrix const & matrix2) { - //the two matrices must have same dimension - SEQAN_ASSERT(_dataLengths(matrix1) == _dataLengths(matrix2)); - - Matrix result; - //resize the matrix - setDimension(result,length(_dataLengths(matrix1))); - _dataLengths(result) = _dataLengths(matrix1); - resize(result); - - //subtract the matrices - for(unsigned int i = 0;i< length(host(result));++i) - { - value(host(result), i)=value(host(matrix1), i)-value(host(matrix2), i); - } - //Return matrix difference - return result; + //the two matrices must have same dimension + SEQAN_ASSERT(_dataLengths(matrix1) == _dataLengths(matrix2)); + + Matrix result; + //resize the matrix + setDimension(result,length(_dataLengths(matrix1))); + _dataLengths(result) = _dataLengths(matrix1); + resize(result); + + //subtract the matrices + for(unsigned int i = 0;i< length(host(result));++i) + { + value(host(result), i)=value(host(matrix1), i)-value(host(matrix2), i); + } + //Return matrix difference + return result; } template Matrix operator * (Matrix const & matrix1, Matrix const & matrix2) { - SEQAN_ASSERT_EQ(length(matrix1,1), length(matrix2,0)); - - unsigned int nrow1=length(matrix1,0); - unsigned int ncol2=length(matrix2,1); - Matrix result; - //resize the matrix - setLength(result, 0, nrow1); - setLength(result, 1, ncol2); - resize(result,(TValue) 0); - - //Matrix product - for(unsigned int row = 0; row < nrow1; row++) - { - for(unsigned int col = 0; col < ncol2; col++) - { - for(unsigned int colRes = 0; colRes < length(matrix1,1); colRes++) - { - value(result,row,col)+= value(host(matrix1), row + colRes * matrix1.data_factors[1])*value(host(matrix2), colRes + col * matrix2.data_factors[1]); - } - } - } - //return the matrix product - return result; + SEQAN_ASSERT_EQ(length(matrix1,1), length(matrix2,0)); + + unsigned int nrow1=length(matrix1,0); + unsigned int ncol2=length(matrix2,1); + Matrix result; + //resize the matrix + setLength(result, 0, nrow1); + setLength(result, 1, ncol2); + resize(result,(TValue) 0); + + //Matrix product + for(unsigned int row = 0; row < nrow1; row++) + { + for(unsigned int col = 0; col < ncol2; col++) + { + for(unsigned int colRes = 0; colRes < length(matrix1,1); colRes++) + { + value(result,row,col)+= value(host(matrix1), row + colRes * matrix1.data_factors[1])*value(host(matrix2), colRes + col * matrix2.data_factors[1]); + } + } + } + //return the matrix product + return result; } @@ -877,30 +873,30 @@ template Matrix operator * (TValue const & scalar, Matrix const & matrix) { - Matrix result; - result= matrix; - //scalar multiplication - for(unsigned int i = 0;i< length(host(result));++i) - { - value(host(result), i)*=scalar; - } - //return the matrix product - return result; + Matrix result; + result= matrix; + //scalar multiplication + for(unsigned int i = 0;i< length(host(result));++i) + { + value(host(result), i)*=scalar; + } + //return the matrix product + return result; } template Matrix operator * (Matrix const & matrix, TValue const & scalar) { - Matrix result; - result= matrix; - //scalar multiplication - for(unsigned int i = 0;i< length(host(result));++i) - { - value(host(result), i)*=scalar; - } - //return the matrix product - return result; + Matrix result; + result= matrix; + //scalar multiplication + for(unsigned int i = 0;i< length(host(result));++i) + { + value(host(result), i)*=scalar; + } + //return the matrix product + return result; } @@ -908,50 +904,39 @@ template bool operator == (Matrix const & matrix1, Matrix const & matrix2) { - bool result; - result= (matrix1.data_lengths==matrix2.data_lengths)&&(matrix1.data_factors==matrix2.data_factors)&&(value(matrix1.data_host)==value(matrix2.data_host))&&(DIMENSION1==DIMENSION2); - return result; + bool result; + result= (matrix1.data_lengths==matrix2.data_lengths)&&(matrix1.data_factors==matrix2.data_factors)&&(value(matrix1.data_host)==value(matrix2.data_host))&&(DIMENSION1==DIMENSION2); + return result; } -/* -.Function.matricialSum: -..summary:Computes the matricial sum between two nxm matrixes -..signature:matricialSum(matrix1,matrix2) -..param.matrix1:The first matrix. -...type:Matrix& -..param.matrix2:The second matrix. -...type:Matrix& -..returns:The sum of the two matrices (another nxm matrix). -..remarks:The number of rows and columns of matrix1 must be equal to the number of rows and columns of matrix2. -..include:seqan/align.h -*/ + /* template Matrix matricialSum(Matrix &matrix1,Matrix &matrix2) { - //the two matrices must have same dimension - if(length(matrix1,0) != length(matrix2,0)||length(matrix1,1) != length(matrix2,1)) - { - fprintf(stderr,"Error: The two matrices have different dimensions"); - } + //the two matrices must have same dimension + if(length(matrix1,0) != length(matrix2,0)||length(matrix1,1) != length(matrix2,1)) + { + fprintf(stderr,"Error: The two matrices have different dimensions"); + } - unsigned int nrow=length(matrix1,0); - unsigned int ncol=length(matrix1,1); + unsigned int nrow=length(matrix1,0); + unsigned int ncol=length(matrix1,1); - Matrix result; - //resize the matrix - setLength(result, 0, nrow); - setLength(result, 1, ncol); - resize(result); + Matrix result; + //resize the matrix + setLength(result, 0, nrow); + setLength(result, 1, ncol); + resize(result); - //add the matrices - for(unsigned int i = 0;i< nrow*ncol;++i) - { - value(host(result), i)=value(host(matrix1), i)+value(host(matrix2), i); - } - //Return matrix difference - return result; + //add the matrices + for(unsigned int i = 0;i< nrow*ncol;++i) + { + value(host(result), i)=value(host(matrix1), i)+value(host(matrix2), i); + } + //Return matrix difference + return result; } */ @@ -959,154 +944,129 @@ matricialSum(Matrix &matrix1,Matrix &matrix2) // _matricialDifference ////////////////////////////////////////////////////////////////////////////// -/* -.Function.matricialDifference: -..summary:Computes the matricial difference between two matrixes -..signature:matricialDifference(matrix1,matrix2) -..param.matrix1:The first matrix. -...type:Matrix& -..param.matrix2:The second matrix. -...type:Matrix& -..returns:The difference of the two matrices (another matrix). -..remarks:The number of rows and columns of matrix1 must be equal to the number of rows and columns of matrix2. -..include:seqan/align.h -*/ /* template inline Matrix matricialDifference(Matrix & matrix1, Matrix & matrix2) { - //the two matrices must have same dimension - if(length(matrix1,0) != length(matrix2,0)||length(matrix1,1) != length(matrix2,1)) - { - fprintf(stderr,"Error: The two matrices have different dimensions"); - } - - unsigned int nrow=length(matrix1,0); - unsigned int ncol=length(matrix1,1); - - Matrix result; - //resize the matrix - //setDimension(result, 2); - setLength(result, 0, nrow); - setLength(result, 1, ncol); - resize(result); - - //Substract the matrices - for(unsigned int i1 = 0;i1< nrow;++i1) - { - for(unsigned int i2 = 0;i2 result; + //resize the matrix + //setDimension(result, 2); + setLength(result, 0, nrow); + setLength(result, 1, ncol); + resize(result); + + //Substract the matrices + for(unsigned int i1 = 0;i1< nrow;++i1) + { + for(unsigned int i2 = 0;i2& -..param.matrix2:The second matrix (nxp). -...type:Matrix& -..returns:The products of the two matrices (another matrix, mxp). -..remarks:The number of columns of matrix1 (left matrix) must be equal to the number of rows of matrix2(right matrix). -..include:seqan/align.h -*/ + /* template inline Matrix matricialProduct(Matrix &matrix1, - Matrix &matrix2) -{ - //SEQAN_ASSERT_LT(dimension_, dimension(me)); - if(length(matrix1,1) != length(matrix2,0)) - { - fprintf(stderr,"Error: Number of columns of matrix1 is unequal to number of rows of matrix2"); - } - - unsigned int nrow1=length(matrix1,0); - unsigned int ncol2=length(matrix2,1); - Matrix result; - //resize the matrix - setLength(result, 0, nrow1); - setLength(result, 1, ncol2); - resize(result,(TValue) 0); - - //Matrix product - for(unsigned int row = 0; row < nrow1; row++) - { - for(unsigned int col = 0; col < ncol2; col++) - { - for(unsigned int colRes = 0; colRes < length(matrix1,1); colRes++) - { - value(result,row,col)+=value(matrix1, row,colRes)*value(matrix2,colRes,col); - } - } - } - //return the matrix product - return result; + Matrix &matrix2) +{ + //SEQAN_ASSERT_LT(dimension_, dimension(me)); + if(length(matrix1,1) != length(matrix2,0)) + { + fprintf(stderr,"Error: Number of columns of matrix1 is unequal to number of rows of matrix2"); + } + + unsigned int nrow1=length(matrix1,0); + unsigned int ncol2=length(matrix2,1); + Matrix result; + //resize the matrix + setLength(result, 0, nrow1); + setLength(result, 1, ncol2); + resize(result,(TValue) 0); + + //Matrix product + for(unsigned int row = 0; row < nrow1; row++) + { + for(unsigned int col = 0; col < ncol2; col++) + { + for(unsigned int colRes = 0; colRes < length(matrix1,1); colRes++) + { + value(result,row,col)+=value(matrix1, row,colRes)*value(matrix2,colRes,col); + } + } + } + //return the matrix product + return result; } */ -// TODO(holtgrew): Should work as the graph-transpose. -/** -.Function.Matrix#transpose -..summary:Transposes matrix -..class:Class.Matrix -..signature:Matrix transpose(matrix) -..param.matrix:The matrix (mxn) to transpose. -...type:Class.Matrix -...remarks: must be of type Matrix (two dimensional) -..returns:Transposed matrix -..remarks:Only works on two dimensional matrices -..include:seqan/align.h -*/ + +/*! + * @fn Matrix#transpose + * @brief Tranpose a 2D Matrix. + * + * @signature TMatrix transpose(matrix); + * + * @param[in] matrix The matrix to tranpose. + * @return TMatrix The resulting tranposed matrix. + */ + template Matrix transpose(Matrix const & matrix) { - unsigned int nrow=length(matrix,0); - unsigned int ncol=length(matrix,1); + unsigned int nrow=length(matrix,0); + unsigned int ncol=length(matrix,1); - Matrix result; - //resize the matrix - setLength(result, 0, ncol); - setLength(result, 1, nrow); - resize(result); + Matrix result; + //resize the matrix + setLength(result, 0, ncol); + setLength(result, 1, nrow); + resize(result); - for(unsigned int i1 = 0;i1< nrow;++i1) - { - for(unsigned int i2 = 0;i2 -::std::ostream& operator<<(::std::ostream &out, const Matrix &matrix) -{ - for(unsigned int i1 = 0;i1< matrix.data_lengths[0];++i1) - { - for(unsigned int i2 = 0;i2<(matrix.data_lengths[1]-1);++i2) - { - out< &matrix) +{ + for(unsigned int i1 = 0;i1< matrix.data_lengths[0];++i1) + { + for(unsigned int i2 = 0;i2<(matrix.data_lengths[1]-1);++i2) + { + out< &matrix) // template < typename TValue > // void read(FILE *file, Matrix & matrix) // { -// //unsigned int column_size=3; -// unsigned int column_size=pow(4,5); -// //read the transition matrix -// setLength(matrix, 0, column_size); -// setLength(matrix, 1, column_size); +// //unsigned int column_size=3; +// unsigned int column_size=pow(4,5); +// //read the transition matrix +// setLength(matrix, 0, column_size); +// setLength(matrix, 1, column_size); // resize(matrix,0.0); -// for(unsigned int row=0; row +// ========================================================================== +// Extras for the align module +// ========================================================================== + +#ifndef SEQAN_ALIGN_EXTEND_H +#define SEQAN_ALIGN_EXTEND_H + +#include +#include +#include + +#include +#include +#include + +#endif diff --git a/seqan/align_extend/align_extend.h b/seqan/align_extend/align_extend.h new file mode 100644 index 0000000..038c28b --- /dev/null +++ b/seqan/align_extend/align_extend.h @@ -0,0 +1,647 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2013, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Hannes Hauswedell +// ========================================================================== +// This file contains routines to extend an existing Align object +// ========================================================================== + +#ifndef INCLUDE_ALIGN_ALIGN_EXTEND_H +#define INCLUDE_ALIGN_ALIGN_EXTEND_H + +namespace seqan { + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Class AliExtContext_ +// ---------------------------------------------------------------------------- + +// Context with memory holding objects for alignment extension +// This can be reused to prevent repeated memory allocations +template +struct AliExtContext_ +{ + typedef typename Size::Type TSize; + typedef typename Position::Type TPosition; + + TGaps0 leftRow0, centerRow0, rightRow0; + TGaps1 leftRow1, centerRow1, rightRow1; + + TDPContext dpContext; + + String > traceSegment; +}; + +template +inline void +clear(AliExtContext_ & prov) +{ + // gaps don't need to be cleared, because they are always + // re-assigned before use; dpContext, too! + + // trace segment always needs to be cleared + clear(prov.traceSegment); + // dpContext doesn't need to be cleared +} + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Function _reverseTrace() +// ---------------------------------------------------------------------------- + +// Reverse a trace string and adapt internal position. +template +void _reversePartialTrace(String, TSpec> & trace, + TPos2 const lengthH, + TPos2 const lengthV) +{ + typedef String, TSpec> TTrace; + typedef typename Iterator::Type TTraceIter; + + if (empty(trace)) + return; + + for (TTraceIter it = begin(trace, Rooted()); !atEnd(it); goNext(it)) + { + it->_horizontalBeginPos = lengthH - _getEndHorizontal(*it); + it->_verticalBeginPos = lengthV - _getEndVertical(*it); + } + reverse(trace); +} + +// ---------------------------------------------------------------------------- +// Function _setUpAndRunAlignImpl() +// ---------------------------------------------------------------------------- + +template +inline TScoreValue +_setUpAndRunAlignImpl(TAliExtContext_ & alignContext, + TString0 const & str0, + TString1 const & str1, + Score const & scoreScheme, + int const /*lowerDiag*/, + int const /*upperDiag*/, + TScoreValue const /*xDrop*/, + TTracebackConfig const & /*gapOrientation*/, + False const & /*TBoolBanded*/, + False const & /*TBoolXDrop*/) +{ + typedef FreeEndGaps_ TFreeEndGaps; + typedef AlignConfig2, DPBandConfig, TFreeEndGaps, + TracebackOn > > TAlignConfig; + + DPScoutState_ scoutState; + return _setUpAndRunAlignment(alignContext.dpContext, alignContext.traceSegment, scoutState, str0, str1, scoreScheme, + TAlignConfig()); +} + +template +inline TScoreValue +_setUpAndRunAlignImpl(TAliExtContext_ & alignContext, + TString0 const & str0, + TString1 const & str1, + Score const & scoreScheme, + int const lowerDiag, + int const upperDiag, + TScoreValue const /*xDrop*/, + TTracebackConfig const & /*gapOrientation*/, + True const & /*TBoolBanded*/, + False const & /*TBoolXDrop*/) +{ + typedef FreeEndGaps_ TFreeEndGaps; + typedef AlignConfig2, DPBandConfig, TFreeEndGaps, + TracebackOn > > TAlignConfig; + + DPScoutState_ scoutState; + return _setUpAndRunAlignment(alignContext.dpContext, alignContext.traceSegment, scoutState, str0, str1, scoreScheme, + TAlignConfig(lowerDiag, upperDiag)); +} + +template +inline TScoreValue +_setUpAndRunAlignImpl(TAliExtContext_ & alignContext, + TString0 const & str0, + TString1 const & str1, + Score const & scoreScheme, + int const /*lowerDiag*/, + int const /*upperDiag*/, + TScoreValue const xDrop, + TTracebackConfig const & /*gapOrientation*/, + False const & /*TBoolBanded*/, + True const & /*TBoolXDrop*/) +{ + typedef FreeEndGaps_ TFreeEndGaps; + typedef AlignConfig2 >, DPBandConfig, TFreeEndGaps, + TracebackOn > > TAlignConfig; + + DPScoutState_ > > scoutState(xDrop); + return _setUpAndRunAlignment(alignContext.dpContext, alignContext.traceSegment, scoutState, str0, str1, scoreScheme, + TAlignConfig()); +} + +template +inline TScoreValue +_setUpAndRunAlignImpl(TAliExtContext_ & alignContext, + TString0 const & str0, + TString1 const & str1, + Score const & scoreScheme, + int const lowerDiag, + int const upperDiag, + TScoreValue const xDrop, + TTracebackConfig const & /*gapOrientation*/, + True const & /*TBoolBanded*/, + True const & /*TBoolXDrop*/) +{ + typedef FreeEndGaps_ TFreeEndGaps; + typedef AlignConfig2 >, DPBandConfig, TFreeEndGaps, + TracebackOn > > TAlignConfig; + + DPScoutState_ > > scoutState(xDrop); + return _setUpAndRunAlignment(alignContext.dpContext, alignContext.traceSegment, scoutState, str0, str1, scoreScheme, + TAlignConfig(lowerDiag, upperDiag)); +} + +// ---------------------------------------------------------------------------- +// Function _extendAlignmentImpl() +// ---------------------------------------------------------------------------- + +template +inline TScoreValue +_extendAlignmentImpl(Gaps & row0, + Gaps & row1, + TScoreValue const & origScore, + TString0 const & hSeq, + TString1 const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + int const lowerDiag, + int const upperDiag, + TScoreValue const & xDrop, + Score const & scoreScheme, + TBoolBanded const & /**/, + TBoolXDrop const & /**/, + TAliExtContext_ & alignContext) +{ + typedef typename Infix::Type TInf0; + typedef typename Infix::Type TInf1; + + TPos const hBeginPos = positions[0]; + TPos const vBeginPos = positions[1]; + TPos const hEndPos = positions[2]; + TPos const vEndPos = positions[3]; + + SEQAN_ASSERT_EQ(infix(source(row0), beginPosition(row0), endPosition(row0)), + infix(hSeq, hBeginPos, hEndPos)); + SEQAN_ASSERT_EQ(infix(source(row1), beginPosition(row1), endPosition(row1)), + infix(vSeq, vBeginPos, vEndPos)); + + bool extendLeft = ((direction & EXTEND_LEFT) && (hBeginPos > 0u) && (vBeginPos > 0u)); + bool extendRight = ((direction & EXTEND_RIGHT) && (hEndPos < length(hSeq)) && (vEndPos < length(vSeq))); + + clear(alignContext); + alignContext.centerRow0 = row0; + alignContext.centerRow1 = row1; + + TScoreValue leftScore = 0; + TScoreValue centerScore = origScore; + TScoreValue rightScore = 0; + + TPos newAlignLen = length(row0); + + // centerScore was set to "compute yourself" by interface function without score parameter + if (centerScore == minValue()) + { + centerScore = 0; + + for (TPos i = 0; i < length(row0); ++i) + { + if ( ( isGap(row0, i)) || ( isGap(row1, i)) ) + { + if (( i==0 ) || + (isGap(row0, i-1) != isGap(row0, i)) || + (isGap(row1, i-1) != isGap(row1, i)) ) + { + centerScore += scoreGapOpen(scoreScheme); + } + else + { + centerScore += scoreGapExtend(scoreScheme); + } + } + else + { + centerScore += score(scoreScheme, row0[i], row1[i]); + } + } + } + + // "reset" original alignment to full length on sequences and no gaps + assignSource(row0, infix(hSeq, 0, length(hSeq))); + assignSource(row1, infix(vSeq, 0, length(vSeq))); + + // left + if (extendLeft) + { + TInf0 inf0 = infix(hSeq, 0, hBeginPos); + TInf1 inf1 = infix(vSeq, 0, vBeginPos); + + // reverse input + ModifiedString const r_inf0(inf0); + ModifiedString const r_inf1(inf1); + + leftScore = _setUpAndRunAlignImpl(alignContext, r_inf0, r_inf1, scoreScheme, lowerDiag, upperDiag, xDrop, + TracebackConfig_(), TBoolBanded(), TBoolXDrop()); + // un-reverve + _reversePartialTrace(alignContext.traceSegment, length(inf0), length(inf1)); + + setSource(alignContext.leftRow0, inf0); + setSource(alignContext.leftRow1, inf1); + + _adaptTraceSegmentsTo(alignContext.leftRow0, alignContext.leftRow1, alignContext.traceSegment); + + if (length(alignContext.leftRow0) > 0) + { + integrateGaps(row0, alignContext.leftRow0); + integrateGaps(row1, alignContext.leftRow1); + setClippedBeginPosition(row0, clippedBeginPosition(alignContext.leftRow0)); + setClippedBeginPosition(row1, clippedBeginPosition(alignContext.leftRow1)); + + newAlignLen += length(alignContext.leftRow0); + } + else + { + extendLeft = false; + } + } + + // center + if (extendLeft) + { + integrateGaps(row0, alignContext.centerRow0, length(alignContext.leftRow0)); + integrateGaps(row1, alignContext.centerRow1, length(alignContext.leftRow1)); + } + else + { + integrateGaps(row0, alignContext.centerRow0, hBeginPos); + integrateGaps(row1, alignContext.centerRow1, vBeginPos); + TPos leadGaps0 = countGaps(begin(alignContext.centerRow0)); + TPos leadGaps1 = countGaps(begin(alignContext.centerRow1)); + + TPos sourceBeginPos0 = toSourcePosition(alignContext.centerRow0, leadGaps0) + hBeginPos - + beginPosition(alignContext.centerRow0); + TPos sourceBeginPos1 = toSourcePosition(alignContext.centerRow1, leadGaps1) + vBeginPos - + beginPosition(alignContext.centerRow1); + + setClippedBeginPosition(row0, toViewPosition(row0, sourceBeginPos0) - leadGaps0); + setClippedBeginPosition(row1, toViewPosition(row1, sourceBeginPos1) - leadGaps1); + } + + // right + if (extendRight) + { + TInf0 inf0 = infix(hSeq, hEndPos, length(hSeq)); + TInf1 inf1 = infix(vSeq, vEndPos, length(vSeq)); + + clear(alignContext.traceSegment); + rightScore = _setUpAndRunAlignImpl(alignContext, inf0, inf1, scoreScheme, lowerDiag, upperDiag, xDrop, + TracebackConfig_(), TBoolBanded(), TBoolXDrop()); + + setSource(alignContext.rightRow0, inf0); + setSource(alignContext.rightRow1, inf1); + _adaptTraceSegmentsTo(alignContext.rightRow0, alignContext.rightRow1, alignContext.traceSegment); + + if (length(alignContext.rightRow0) > 0) + { + integrateGaps(row0, alignContext.rightRow0); + integrateGaps(row1, alignContext.rightRow1); + + newAlignLen += length(alignContext.rightRow0); + } + } + + setClippedEndPosition(row0, clippedBeginPosition(row0) + newAlignLen); + setClippedEndPosition(row1, clippedBeginPosition(row1) + newAlignLen); + + return leftScore + centerScore + rightScore; +} + +// get rows from align object +template +inline TScoreValue +_extendAlignmentImpl(Align & align, + TScoreValue const & origScore, + TString const & hSeq, + TString const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + int const lowerDiag, + int const upperDiag, + TScoreValue const & xDrop, + Score const & scoreScheme, + TBoolBanded const & /**/, + TBoolXDrop const & /**/, + TAliExtContext_ & alignContext) +{ + SEQAN_ASSERT_EQ_MSG(length(rows(align)), 2u, "Only works with pairwise alignments."); + SEQAN_ASSERT_EQ_MSG(length(row(align, 0)), length(row(align, 1)), "Invalid alignment!"); + + return _extendAlignmentImpl(row(align, 0), row(align, 1), origScore, hSeq, vSeq, positions, direction, lowerDiag, + upperDiag, xDrop, scoreScheme, TBoolBanded(), TBoolXDrop(), alignContext); +} + +// create AlignContext +template +inline TScoreValue +_extendAlignmentImpl(Align & align, + TScoreValue const & origScore, + TString const & hSeq, + TString const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + int const lowerDiag, + int const upperDiag, + TScoreValue const & xDrop, + Score const & scoreScheme, + TBoolBanded const & /**/, + TBoolXDrop const & /**/) +{ + if (scoreGapOpen(scoreScheme) == scoreGapExtend(scoreScheme)) + { + typedef DPContext TDPContext; + typedef AliExtContext_, + Gaps, + TDPContext> TAliExtContext_; + TAliExtContext_ alignContext; + return _extendAlignmentImpl(align, origScore, hSeq, vSeq, positions, direction, lowerDiag, upperDiag, xDrop, + scoreScheme, TBoolBanded(), TBoolXDrop(), alignContext); + } + else + { + typedef DPContext TDPContext; + typedef AliExtContext_, + Gaps, + TDPContext> TAliExtContext_; + TAliExtContext_ alignContext; + return _extendAlignmentImpl(align, origScore, hSeq, vSeq, positions, direction, lowerDiag, upperDiag, xDrop, + scoreScheme, TBoolBanded(), TBoolXDrop(), alignContext); + } +} + +// ---------------------------------------------------------------------------- +// Function extendAlignment() +// ---------------------------------------------------------------------------- + +/*! + * @fn extendAlignment + * @headerfile + * @brief X-Drop extension for alignment objects. + * @signature TScoreValue extendAlignment(align, [origScore,] hSeq, vSeq, positions, extensionDirection, + * [lowerDiag, upperDiag,] [xDrop,] scoreScheme); + * + * @param[in,out] align The @link Align @endlink object to work on. Must be an alignment over the + * @link InfixSegment infix @endlink of the const type of hSeq + * and vSeq. Also see section "Returned Alignment". + * @param[in] origScore Original score value of the alignment (optional; computed if not provided). + * @param[in] hSeq Full horizontal sequence. + * @param[in] vSeq Full vertical sequence. + * @param[in] positions A @link Tuple @endlink of length 4 with the begin and end position of the + * infixes in align. + * @param[in] extensionDirection + * The extension direction (@link ExtensionDirection @endlink). + * @param[in] lowerDiag Lower alignment diagonal to use (int). + * @param[in] upperDiag Upper alignment diagonal to use (int). + * @param[in] xDrop The X-drop value to use (integral value). It only limits computation of new + * columns in the DP-Matrix and has no influence on the diagonals (but can be combined with them). + * @param[in] scoringScheme + * The @link Score @endlink to use. + * + * @return TScoreValue + * The score of the new alignment. TScoreValue is the value type of + * scoringScheme. + * + * @section Returned Alignment + * + * The resulting alignment has the infixes extended to the whole underlying sequence. The alignment + * is clipped to give the parts of the aligned sequences. + * + * @section Example + * + * @include demos/dox/align_extend/extend_alignment.cpp + * + * The output is as follows: + * + * @include demos/dox/align_extend/extend_alignment.cpp.stdout + * + * @section Remarks + * + * It is necessary to explicitly pass hSeq, vSeq and the positions, because the + * original hSeq and vSeq (that Align was created on), might have been infixes, + * (especially if they are members of a ConcatDirect set) in which cases their + * actual begin and end positions cannot be inferred from the Align object's + * rows' source(). + */ + +// NO BAND, NO XDROP +template +inline TScoreValue +extendAlignment(Align & align, + TString const & hSeq, + TString const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + Score const & scoreScheme) +{ + return _extendAlignmentImpl(align, minValue(), hSeq, vSeq, positions, direction, 0, 0, 0, scoreScheme, + False(), False()); +} + +template +inline TScoreValue +extendAlignment(Align & align, + TScoreValue const & origScore, + TString const & hSeq, + TString const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + Score const & scoreScheme) +{ + return _extendAlignmentImpl(align, origScore, hSeq, vSeq, positions, direction, 0, 0, 0, scoreScheme, False(), + False()); +} + +// BAND, NO XDROP +template +inline TScoreValue +extendAlignment(Align & align, + TString const & hSeq, + TString const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + int const lowerDiag, + int const upperDiag, + Score const & scoreScheme) +{ + return _extendAlignmentImpl(align, minValue(), hSeq, vSeq, positions, direction, lowerDiag, upperDiag, + 0, scoreScheme, True(), False()); +} + +template +inline TScoreValue +extendAlignment(Align & align, + TScoreValue const & origScore, + TString const & hSeq, + TString const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + int const lowerDiag, + int const upperDiag, + Score const & scoreScheme) +{ + return _extendAlignmentImpl(align, origScore, hSeq, vSeq, positions, direction, lowerDiag, upperDiag, 0, + scoreScheme, True(), False()); +} + +// NO BAND, XDROP +template +inline TScoreValue +extendAlignment(Align & align, + TString const & hSeq, + TString const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + TScoreValue const & xDrop, + Score const & scoreScheme) +{ + return _extendAlignmentImpl(align, minValue(), hSeq, vSeq, positions, direction, 0, 0, xDrop, + scoreScheme, False(), True()); +} + +template +inline TScoreValue +extendAlignment(Align & align, + TScoreValue const & origScore, + TString const & hSeq, + TString const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + TScoreValue const & xDrop, + Score const & scoreScheme) +{ + return _extendAlignmentImpl(align, origScore, hSeq, vSeq, positions, direction, 0, 0, xDrop, scoreScheme, False(), + True()); +} + +// BAND, XDROP +template +inline TScoreValue +extendAlignment(Align & align, + TString const & hSeq, + TString const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + int const lowerDiag, + int const upperDiag, + TScoreValue const & xDrop, + Score const & scoreScheme) +{ + return _extendAlignmentImpl(align, minValue(), hSeq, vSeq, positions, direction, lowerDiag, upperDiag, + xDrop, scoreScheme, True(), True()); +} + +template +inline TScoreValue +extendAlignment(Align & align, + TScoreValue const & origScore, + TString const & hSeq, + TString const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + int const lowerDiag, + int const upperDiag, + TScoreValue const & xDrop, + Score const & scoreScheme) +{ + return _extendAlignmentImpl(align, origScore, hSeq, vSeq, positions, direction, lowerDiag, upperDiag, xDrop, + scoreScheme, True(), True()); +} + +template +inline TScoreValue +extendAlignment(Align & align, + TAliExtContext & alignContext, + TScoreValue const & origScore, + TString const & hSeq, + TString const & vSeq, + Tuple const & positions, + ExtensionDirection const & direction, + int const lowerDiag, + int const upperDiag, + TScoreValue const & xDrop, + Score const & scoreScheme) +{ + return _extendAlignmentImpl(align, origScore, hSeq, vSeq, positions, direction, lowerDiag, upperDiag, xDrop, + scoreScheme, True(), True(), alignContext); +} + +} + +#endif // INCLUDE_ALIGN_ALIGN_EXTEND_H diff --git a/seqan/align_extend/align_extend_base.h b/seqan/align_extend/align_extend_base.h new file mode 100644 index 0000000..d27d0e9 --- /dev/null +++ b/seqan/align_extend/align_extend_base.h @@ -0,0 +1,201 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2013, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Hannes Hauswedell +// ========================================================================== +// This file contains routines to extend an existing Align object +// ========================================================================== + + +#ifndef INCLUDE_ALIGN_ALIGN_EXTEND_BASE_H +#define INCLUDE_ALIGN_ALIGN_EXTEND_BASE_H + +namespace seqan { + + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +template +struct AlignExtend_ +{ +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + + +// overrides for AligExtend general case +template +struct SetupAlignmentProfile_, TAlignConfig, TGapCosts, + TTraceSwitch> +{ + typedef DPProfile_, TGapCosts, TracebackOn<> > Type; +}; + +template +struct TraceTail_ > : False +{}; + +template +struct TraceHead_ > : True +{}; + +template +struct IsFreeEndGap_, DPLastRow> : True +{}; + +template +struct IsFreeEndGap_, DPLastColumn> : True +{}; + + + +// ---------------------------------------------------------------------------- +// Class DPMetaColumn_ [FullColumn] +// ---------------------------------------------------------------------------- + +template +struct DPMetaColumn_, TGapCosts, + TTraceback>, + MetaColumnDescriptor > +{ + + typedef typename If, RecursionDirectionZero, + RecursionDirectionHorizontal + >::Type TRecursionTypeFirstCell_; + typedef typename If, RecursionDirectionVertical, + RecursionDirectionAll + >::Type TRecursionTypeInnerCell_; + typedef typename If, RecursionDirectionVertical, + RecursionDirectionAll + >::Type TRecursionTypeLastCell_; + + typedef DPMetaCell_ TFirstCell_; + typedef DPMetaCell_ TInnerCell_; + typedef DPMetaCell_ TLastCell_; +}; + + +// ---------------------------------------------------------------------------- +// Class DPMetaColumn_ [PartialColumnTop] +// ---------------------------------------------------------------------------- + +template +struct DPMetaColumn_, TGapCosts, + TTraceback>, + MetaColumnDescriptor > +{ + + typedef typename If, RecursionDirectionZero, + RecursionDirectionHorizontal + >::Type TRecursionTypeFirstCell_; + typedef typename If, RecursionDirectionVertical, + RecursionDirectionAll + >::Type TRecursionTypeInnerCell_; + typedef typename If, RecursionDirectionVertical, + RecursionDirectionLowerDiagonal + >::Type TRecursionTypeLastCell_; + + typedef DPMetaCell_ TFirstCell_; + typedef DPMetaCell_ TInnerCell_; + typedef DPMetaCell_ TLastCell_; +}; + +// ---------------------------------------------------------------------------- +// Class DPMetaColumn_ [PartialColumnMiddle] +// ---------------------------------------------------------------------------- + +template +struct DPMetaColumn_, TGapCosts, + TTraceback>, + MetaColumnDescriptor > +{ + typedef typename If, RecursionDirectionZero, + RecursionDirectionUpperDiagonal + >::Type TRecursionTypeFirstCell_; + typedef typename If, RecursionDirectionVertical, + RecursionDirectionAll + >::Type TRecursionTypeInnerCell_; + typedef typename If, RecursionDirectionVertical, + RecursionDirectionLowerDiagonal + >::Type TRecursionTypeLastCell_; + + typedef DPMetaCell_ TFirstCell_; + typedef DPMetaCell_ TInnerCell_; + typedef DPMetaCell_ TLastCell_; +}; + +// ---------------------------------------------------------------------------- +// Class DPMetaColumn_ [PartialColumnBottom] +// ---------------------------------------------------------------------------- + +template +struct DPMetaColumn_, TGapCosts, + TTraceback>, + MetaColumnDescriptor > +{ + typedef typename If, RecursionDirectionZero, + RecursionDirectionUpperDiagonal + >::Type TRecursionTypeFirstCell_; + typedef typename If, RecursionDirectionVertical, + RecursionDirectionAll + >::Type TRecursionTypeInnerCell_; + typedef typename If, RecursionDirectionVertical, + RecursionDirectionAll + >::Type TRecursionTypeLastCell_; + + typedef DPMetaCell_ TFirstCell_; + typedef DPMetaCell_ TInnerCell_; + typedef DPMetaCell_ TLastCell_; +}; + +} +#endif diff --git a/seqan/align_extend/dp_scout_xdrop.h b/seqan/align_extend/dp_scout_xdrop.h new file mode 100644 index 0000000..b7d0045 --- /dev/null +++ b/seqan/align_extend/dp_scout_xdrop.h @@ -0,0 +1,206 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2013, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Hannes Hauswedell +// ========================================================================== +// This file contains routines to extend an existing Align object +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_DP_SCOUT_EXTEND_H_ +#define INCLUDE_SEQAN_ALIGN_DP_SCOUT_EXTEND_H_ + +namespace seqan { + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Tag XDropScout. +// ---------------------------------------------------------------------------- + +template +struct XDrop_ +{ +}; + +// ---------------------------------------------------------------------------- +// Class DPScoutState_ +// ---------------------------------------------------------------------------- + +template +class DPScoutState_ > > +{ +public: + TScoreValue const terminationThreshold; + TScoreValue columnMax; + + DPScoutState_() : + terminationThreshold(MaxValue::VALUE), + columnMax(MinValue::VALUE) + { + } + + DPScoutState_(TScoreValue const & _terminationThreshold) : + terminationThreshold(_terminationThreshold), + columnMax(MinValue::VALUE) + { + } +}; + + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// overrides for AligExtend XDrop case +template +struct HasTerminationCriterium_ > > : True {}; + +template +struct ScoutSpecForAlignmentAlgorithm_ > > +{ + typedef Terminator_ > Type; +}; + +template +struct ScoutSpecForAlignmentAlgorithm_ > const> +{ + typedef Terminator_ > Type; +}; + +template +struct ScoutStateSpecForScout_< + DPScout_< + TDPCell, Terminator_::Type> > > > +{ + typedef Terminator_::Type> > Type; +}; + +// ============================================================================ +// Functions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Function _scoutBestScore() [DPScout_] +// ---------------------------------------------------------------------------- + +// NOTE: The original code here used Value::Type instead of TDPCellValue but this caused ambiguous call +// errors in MSVC. + +template +inline void +_scoutBestScore(DPScout_ > > & dpScout, + TDPCell const & activeCell, + TTraceMatrixNavigator const & navigator, + TIsLastColumn const & /**/, + False const & /*IsLastRow*/) +{ + typedef typename Value::Type TScoreValue; + typedef XDrop_ TXDrop; + typedef DPScout_ > TDPScout; + typedef typename TDPScout::TParent TParent; + + // global maximum + _scoutBestScore(static_cast( dpScout ), activeCell, navigator); + + // column maximum + dpScout.state->columnMax = _max(dpScout.state->columnMax, _scoreOfCell(activeCell)); +} + +template +inline void +_scoutBestScore(DPScout_ > > & dpScout, + TDPCell const & activeCell, + TTraceMatrixNavigator const & navigator, + TIsLastColumn const & /**/, + True const & /*IsLastRow*/) +{ + typedef typename Value::Type TScoreValue; + + _scoutBestScore(dpScout, activeCell, navigator, TIsLastColumn(), False()); + + // check termination condition + if (_scoreOfCell(dpScout._maxScore) - dpScout.state->columnMax >= dpScout.state->terminationThreshold) + terminateScout(dpScout); + else // reset columMax at end of column + dpScout.state->columnMax = MinValue::VALUE; +} + +// ---------------------------------------------------------------------------- +// Function _scoutBestScore() [DPScout_] +// ---------------------------------------------------------------------------- + +// Computes the score and tracks it if enabled. +template +inline void +_computeCell(TDPScout & scout, + TTraceMatrixNavigator & traceMatrixNavigator, + DPCell_ & activeCell, + DPCell_ const & previousDiagonal, + DPCell_ const & previousHorizontal, + DPCell_ const & previousVertical, + TSequenceHValue const & seqHVal, + TSequenceVValue const & seqVVal, + TScoringScheme const & scoringScheme, + TColumnDescriptor const &, + TCellDescriptor const &, // One of FirstCell, InnerCell or LastCell. + DPProfile_ >, TGapCosts, + TTraceback> const &) +{ + typedef DPProfile_ >, TGapCosts, TTraceback> TDPProfile; + typedef DPMetaColumn_ TMetaColumn; + + assignValue(traceMatrixNavigator, _computeScore(activeCell, previousDiagonal, previousHorizontal, previousVertical, + seqHVal, seqVVal, scoringScheme, + typename RecursionDirection_::Type(), + TDPProfile())); + if (TrackingEnabled_::VALUE) + { + typedef typename IsSameType< typename TColumnDescriptor::TColumnProperty, DPFinalColumn>::Type TIsLastColumn; + + // the following is the only change to the regular _computeCell: + // for the evaluation of the termination criterium we treat + // all lastCells as lastRows + typedef typename IsSameType::Type TIsLastRow; + _scoutBestScore(scout, activeCell, traceMatrixNavigator, TIsLastColumn(), TIsLastRow()); + } +} + +} // namespace seqan + +#endif // #ifndef INCLUDE_SEQAN_ALIGN_DP_SCOUT_EXTEND_H_ diff --git a/seqan/align_profile.h b/seqan/align_profile.h new file mode 100644 index 0000000..21e5317 --- /dev/null +++ b/seqan/align_profile.h @@ -0,0 +1,57 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Manuel Holtgrewe +// ========================================================================== +// Facade header for module align_profile. +// ========================================================================== + +#ifndef INCLUDE_SEQAN_ALIGN_PROFILE_H_ +#define INCLUDE_SEQAN_ALIGN_PROFILE_H_ + +// =========================================================================== +// Prerequisites +// =========================================================================== + +#include +#include +#include +#include +#include + +// =========================================================================== +// Module Contents +// =========================================================================== + +#include +#include + +#endif // INCLUDE_SEQAN_ALIGN_PROFILE_H_ diff --git a/seqan/align_profile/add_to_profile.h b/seqan/align_profile/add_to_profile.h new file mode 100644 index 0000000..0967125 --- /dev/null +++ b/seqan/align_profile/add_to_profile.h @@ -0,0 +1,165 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Manuel Holtgrewe +// ========================================================================== +// Algorithm for profile-to-sequence alignment that then adds the resulting +// sequence alignment to the profile. +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_PROFILE_ADD_TO_PROFILE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_PROFILE_ADD_TO_PROFILE_H_ + +namespace seqan { + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Function addToProfile() +// ---------------------------------------------------------------------------- + +/*! + * @fn addToProfile + * @headerfile + * @brief Sequence-to-profile alignment with sequence integration. + * + * seq will be aligned to profile using @link globalAlignment @endlink. To compute a banded alignment + * lDiag and uDiag have to be given. + * The resulting alignment will then be integrated into profile by incrementing the count values of + * the profile entries that correspond to the seq row of the alignment. + * + * @signature void addToProfile(profile, seq[, lDiag, uDiag]); + * + * @param[in,out] profile The profile to add seq to. A @link String @endlink over @link ProfileChar @endlink. + * @param[in] seq @link String @endlink to align to profile. + * @param[in] lDiag Lower diagonal for alignment (@link SignedIntegerConcept @endlink). + * @param[in] uDiag Upper diagonal for alignment (@link SignedIntegerConcept @endlink). + */ + +// TODO(holtgrew): Add example? + +template +void addToProfile(String > & profile, + String /*const*/ & seq, + int lDiag = minValue(), + int uDiag = maxValue()) // non-const because of holder issues +{ + typedef ProfileChar TProfileChar; + + typedef String TProfileString; + typedef String TSequence; + + // Define gaps and scoring scheme. + Gaps gapsH(profile); + Gaps gapsV(seq); + seqan::Score sScheme(profile); + + // Perform the global alignment. + if (lDiag == minValue() || uDiag == maxValue()) + globalAlignment(gapsH, gapsV, sScheme, Gotoh()); + else + globalAlignment(gapsH, gapsV, sScheme, lDiag, uDiag, Gotoh()); + + // Construct a new profile from the alignment into buffer and finally swap out the new profile. + TProfileString buffer; + reserve(buffer, length(gapsH)); + typename Iterator, Standard>::Type + itH = begin(gapsH, Standard()), + itHEnd = end(gapsH, Standard()); + typename Iterator, Standard>::Type itV = begin(gapsV, Standard()); + + // std::cout << "--- BEFORE ----\n"; + // std::cout << "HORIZONTAL\n"; + // for (; itH != itHEnd; ++itH) + // { + // if (isGap(itH)) + // { + // std::cerr << "-\n"; + // } + // else + // { + // TProfileChar c = (TProfileChar)*itH; + // std::cout << "(" << c.count[0] << ", " << c.count[1] << ", " << c.count[2] << ", " << c.count[3] << ", " << c.count[4] << ")\n"; + // } + //} + //std::cout << "VERTICAL\n"; + //std::cout << gapsV << "\n"; + + itH = begin(gapsH, Standard()); + SEQAN_ASSERT_EQ(length(gapsH), length(gapsV)); + for (; itH != itHEnd; ++itH, ++itV) + { + if (isGap(itH)) + { + SEQAN_ASSERT_NOT_MSG(isGap(itV), "Must not generate gaps columns!"); + appendValue(buffer, TProfileChar()); + back(buffer).count[valueSize()] += 1; + back(buffer).count[ordValue(TChar(*itV))] += 1; + } + else + { + TProfileChar c = *itH; + appendValue(buffer, c); + if (isGap(itV)) + back(buffer).count[valueSize()] += 1; + else + back(buffer).count[ordValue(TChar(*itV))] += 1; + } + } + + //std::cout << "--- AFTER ----\n"; + //std::cout << "HORIZONTAL\n"; + //for (typename Iterator::Type it = begin(buffer, Standard()); it != end(buffer, Standard()); ++it) + //{ + // std::cout << "(" << it->count[0] << ", " << it->count[1] << ", " << it->count[2] << ", " + // << it->count[3] << ", " << it->count[4] << ")\n"; + //} + + swap(buffer, profile); +} + +} // namespace seqan + +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_PROFILE_ADD_TO_PROFILE_H_ diff --git a/seqan/align_profile/score_profile_seq.h b/seqan/align_profile/score_profile_seq.h new file mode 100644 index 0000000..c557340 --- /dev/null +++ b/seqan/align_profile/score_profile_seq.h @@ -0,0 +1,536 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Manuel Holtgrewe +// ========================================================================== + +#ifndef SEQAN_INCLUDE_PROFILE_SPROFILE_SEQ_H_ +#define SEQAN_INCLUDE_PROFILE_SPROFILE_SEQ_H_ + +namespace seqan { + +// ============================================================================ +// Forwards +// ============================================================================ + +// ProfileSeqScore + +struct ProfileSeqScore_; +typedef Tag ProfileSeqScore; + +template +class Score; + +template +inline void +assignProfile(Score & me, TString const & profile); + +// ProfileSeqFracScore + +struct ProfileSeqFracScore_; +typedef Tag ProfileSeqFracScore; + +template +class Score; + +template +inline void +assignProfile(Score & me, TString const & profile); + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Class ProfileSeq Score +// ---------------------------------------------------------------------------- + +struct ProfileSeqScore_; +typedef Tag ProfileSeqScore; + +/*! + * @class ProfileSeqScore ProfileSeq Score + * @extends Score + * @headerfile + * @brief Score for sequence-to-profile alignments. + * + * Using this class, you can align sequences to profiles. The profile is assumed to be in the horizontal direction + * (first row), the sequence in the vertical direction (second row). + * + * Scoring works as follows. + * + * The integer SEQAN_CONSENSUS_UNITY and fractions thereof are used to express scores. Gap opens in the + * profile are scored proportional to the number of gaps in the profile with two times unity, gap extends with one times + * unity at the position. + * + * Gap opens in the sequence are scored with two times unity, gap extends in the sequence with unity. Alignments of + * profile characters to sequence characters are scored with the fraction of profile characters that match the sequence + * characters times unity. + * + * @signature template + * class Score; + * + * @tparam TValue The integer type to use for representing scores. + * + * @section Examples + * + * The following example uses the ProfileSeq Score to align a sequence against a profile. + * Note that we print the gap state for each position since profiles cannot be printed to one stdout character. + * + * @include demos/dox/align_profile/profile_seq_score.cpp + * + * The output is as follows: + * + * @code{.output} + * score value = -2097152 + * gaps in profile/sequence + * pos G S + * 0 0 0 + * 1 1 0 + * 2 0 0 + * 3 1 0 + * 4 0 0 + * 5 0 0 + * @endcode + */ + +/*! + * @fn ProfileSeqScore::Score + * @brief Constructor + * + * @signature Score::Score(); + * @signature Score::Score(profile); + * + * @param[in] profile The profile to copy from (@link AllocString @endlink of @link ProfileChar @endlink objects). + * + * When providing profile, the function @link ProfileSeqScore#assignProfile @endlink is automatically used to + * assign the profile to this class. + */ + +template +class Score +{ + public: + // A table of position x (ord value) giving the counts of the characters at the given positions. + String consensusSet; + + Score() {} + + // Construct given a profile string. + template + explicit + Score(TProfile const & profile) + { + assignProfile(*this, profile); + } +}; + +// ---------------------------------------------------------------------------- +// Class ProfileSeqFrac Score +// ---------------------------------------------------------------------------- + +struct ProfileSeqFracScore_; +typedef Tag ProfileSeqFracScore; + +/*! + * @class ProfileSeqFracScore ProfileSeqFrac Score + * @extends Score + * @headerfile + * @brief Score for sequence-to-profile alignments. + * + * Using this class, you can align sequences to profiles. The profile is assumed to be in the horizontal direction + * (first row), the sequence in the vertical direction (second row). + * + * Scoring works as follows. + * + * The integer SEQAN_CONSENSUS_UNITY and fractions thereof are used to express scores. Gap opens in the + * profile are scored proportional to the number of gaps in the profile two times unity, gap extends are scored + * proportional to the number of gaps in the profile at the position. Gap opens in the sequence are scored with two + * times unity, gap extends with one times unity. + * + * @signature template + * class Score; + * + * @tparam TValue The integer type to use for representing scores. + * + * @section Examples + * + * The following example uses the ProfileSeqFrac Score to align a sequence against a profile. Note that we print the + * gap state for each position since profiles cannot be printed to one stdout character. + * + * @include demos/dox/align_profile/profile_seq_frac_score.cpp + * + * The output is as follows: + * + * @code + * score value = -2097152 + * gaps in profile/sequence + * pos G S + * 0 0 0 + * 1 1 0 + * 2 0 0 + * 3 1 0 + * 4 0 0 + * 5 0 0 + * @endcode + */ + +/*! + * @fn ProfileSeqFracScore::Score + * @brief Constructor + * + * @signature Score::Score(); + * @signature Score::Score(profile); + * + * @param[in] profile The profile to copy from (@link AllocString @endlink of @link ProfileChar @endlink objects). + * + * When providing profile, the function @link ProfileSeqFracScore#assignProfile @endlink is automatically used to + * assign the profile to this class. + */ + + +template +class Score +{ + public: + // Total number of profile characters in each column + String sum; + + Score() {} + + // Construct given a profile string. + template + explicit + Score(TProfile const & profile) + { + assignProfile(*this, profile); + } +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// -------------------------------------------------------------------------- +// Metafunction SequenceEntryForScore [ProfileSeq Score] +// -------------------------------------------------------------------------- + +// Returns the type that holds a sequence entry. This is used for abstracting away the access to sequence characters. + +template +struct SequenceEntryForScore, TSequence> +{ + typedef ConsensusScoreSequenceEntry Type; +}; + +template +struct SequenceEntryForScore const, TSequence> : + SequenceEntryForScore, TSequence> +{}; + +// -------------------------------------------------------------------------- +// Metafunction SequenceEntryForScore [ProfileSeqFrac Score] +// -------------------------------------------------------------------------- + +// Returns the type that holds a sequence entry. This is used for abstracting away the access to sequence characters. + +template +struct SequenceEntryForScore, TSequence> +{ + typedef ConsensusScoreSequenceEntry Type; +}; + +template +struct SequenceEntryForScore const, TSequence> : + SequenceEntryForScore, TSequence> +{}; + +// ============================================================================ +// Functions +// ============================================================================ + +// -------------------------------------------------------------------------- +// Function sequenceEntryForScore() [ProfileSeq Score] +// -------------------------------------------------------------------------- + +template +inline ConsensusScoreSequenceEntry +sequenceEntryForScore(Score const & /*sScheme*/, + TSequence const & seq, TPosition pos) +{ + return ConsensusScoreSequenceEntry(seq, pos); +} + +// -------------------------------------------------------------------------- +// Function assignProfile() [ProfileSeq Score] +// -------------------------------------------------------------------------- + +/*! + * @fn ProfileSeqScore#assignProfile + * @brief Assign profile to ProfileSeqScore. + * + * @signature void assignProfile(score, profile); + * + * @param[out] score The ProfileSeqScore object to assign the profile for. + * @param[in] profile The profile to assign to the score. @link AllocString @endlink of @link ProfileChar @endlink. + */ + +template +inline void +assignProfile(Score & me, + TString const & profile) +{ + typedef typename Size::Type TSize; + TSize alphSize = ValueSize::Type>::VALUE; + resize(me.consensusSet, alphSize * length(profile)); + + typedef typename Iterator::Type TIter; + typedef typename Iterator, Standard>::Type TConsSetIter; + TConsSetIter itConsSet = begin(me.consensusSet, Standard()); + TIter it = begin(profile, Standard()); + TIter itEnd = end(profile, Standard()); + TSize maxCount = 0; + for (;it!=itEnd;++it) + { + maxCount = 0; + for (TSize i = 0; i maxCount) + maxCount = (*it).count[i]; + for (TSize i = 0; i +inline TValue +scoreGapExtendHorizontal( + Score const & me, + ConsensusScoreSequenceEntry const & entry1, + ConsensusScoreSequenceEntry const & entry2) +{ + typedef typename Value::Type TValue1; + if ((int)position(entry2) < 0) + return -SEQAN_CONSENSUS_UNITY; + else + return me.consensusSet[position(entry1) * (ValueSize::VALUE) + (ValueSize::VALUE - 1)]; +} + +// -------------------------------------------------------------------------- +// Function scoreGapOpenHorizontal() [ProfileSeq Score] +// -------------------------------------------------------------------------- + +template +inline TValue +scoreGapOpenHorizontal( + Score const & me, + ConsensusScoreSequenceEntry const & entry1, + ConsensusScoreSequenceEntry const & entry2) +{ + typedef typename Value::Type TValue1; + if ((int)position(entry2) < 0) + return -2 * SEQAN_CONSENSUS_UNITY; + else + return 2 * me.consensusSet[position(entry1) * (ValueSize::VALUE) + (ValueSize::VALUE - 1)]; +} + +// -------------------------------------------------------------------------- +// Function scoreGapExtendVertical() [ProfileSeq Score] +// -------------------------------------------------------------------------- + +template +inline TValue +scoreGapOpenVertical( + Score const &, + ConsensusScoreSequenceEntry const & /*entry1*/, + ConsensusScoreSequenceEntry const & /*entry2*/) +{ + return -2 * SEQAN_CONSENSUS_UNITY; +} + +// -------------------------------------------------------------------------- +// Function scoreGapOpenVertical() [ProfileSeq Score] +// -------------------------------------------------------------------------- + +template +inline TValue +scoreGapExtendVertical( + Score const &, + ConsensusScoreSequenceEntry const & /*entry1*/, + ConsensusScoreSequenceEntry const & /*entry2*/) +{ + return -SEQAN_CONSENSUS_UNITY; +} + +// -------------------------------------------------------------------------- +// Function score() [ProfileSeq Score] +// -------------------------------------------------------------------------- + +template +inline TValue +score(Score const & me, + ConsensusScoreSequenceEntry const & entry1, + ConsensusScoreSequenceEntry const & entry2) +{ + typedef typename Value::Type TValue1; + return me.consensusSet[position(entry1) * (ValueSize::VALUE) + ordValue(value(entry2))]; +} + +// -------------------------------------------------------------------------- +// Function sequenceEntryForScore() [ProfileSeqFrac Score] +// -------------------------------------------------------------------------- + +template +inline ConsensusScoreSequenceEntry +sequenceEntryForScore(Score const & /*sScheme*/, + TSequence const & seq, TPosition pos) +{ + return ConsensusScoreSequenceEntry(seq, pos); +} + +// -------------------------------------------------------------------------- +// Function assignProfile() [ProfileSeqFrac Score] +// -------------------------------------------------------------------------- + +/*! + * @fn ProfileSeqFracScore#assignProfile + * @brief Assign profile to ProfileSeqFrac Score. + * + * @signature void assignProfile(score, profile); + * + * @param[out] score The ProfileSeqScore object to assign the profile for. + * @param[in] profile The profile to assign to the score. @link AllocString @endlink of @link ProfileChar @endlink. + */ + +template +inline void +assignProfile(Score & me, + TString const & profile) +{ + typedef typename Size::Type TSize; + resize(me.sum, length(profile)); + typedef typename Iterator::Type TIter; + typedef typename Iterator, Standard>::Type TSumIter; + TSumIter itSum = begin(me.sum, Standard()); + TIter it = begin(profile, Standard()); + TIter itEnd = end(profile, Standard()); + for (; it!=itEnd; ++it, ++itSum) + { + *itSum = 0; + for (TSize i = 0; i < (TSize) ValueSize::Type>::VALUE; ++i) + *itSum += (*it).count[i]; + } +} + +// -------------------------------------------------------------------------- +// Function scoreGapExtendHorizontal() [ProfileSeqFrac Score] +// -------------------------------------------------------------------------- + +template +inline TValue +scoreGapExtendHorizontal( + Score const & me, + ConsensusScoreSequenceEntry const & entry1, + ConsensusScoreSequenceEntry const & entry2) +{ + if (((int)position(entry2) < 0) || (!me.sum[position(entry1)])) + return -SEQAN_CONSENSUS_UNITY; + else + return ((TValue) (( (int)value(entry1).count[ValueSize::Type>::VALUE - 1] - me.sum[position(entry1)]) * SEQAN_CONSENSUS_UNITY) / me.sum[position(entry1)]); +} + +// -------------------------------------------------------------------------- +// Function scoreGapOpenHorizontal() [ProfileSeqFrac Score] +// -------------------------------------------------------------------------- + +template +inline TValue +scoreGapOpenHorizontal( + Score const & me, + ConsensusScoreSequenceEntry const & entry1, + ConsensusScoreSequenceEntry const & entry2) +{ + if (((int)position(entry2) < 0) || (!me.sum[position(entry1)])) + return -SEQAN_CONSENSUS_UNITY; + else + return ((TValue) (((int)value(entry1).count[ValueSize::Type>::VALUE - 1] - me.sum[position(entry1)]) * SEQAN_CONSENSUS_UNITY) / me.sum[position(entry1)]); +} + +// -------------------------------------------------------------------------- +// Function scoreGapExtendVertical() [ProfileSeqFrac Score] +// -------------------------------------------------------------------------- + +template +inline TValue +scoreGapOpenVertical( + Score const &, + ConsensusScoreSequenceEntry const & /*entry1*/, + ConsensusScoreSequenceEntry const & /*entry2*/) +{ + return -SEQAN_CONSENSUS_UNITY; +} + +// -------------------------------------------------------------------------- +// Function scoreGapOpenVertical() [ProfileSeqFrac Score] +// -------------------------------------------------------------------------- + +template +inline TValue +scoreGapExtendVertical( + Score const &, + ConsensusScoreSequenceEntry const & /*entry1*/, + ConsensusScoreSequenceEntry const & /*entry2*/) +{ + return -SEQAN_CONSENSUS_UNITY; +} + +// -------------------------------------------------------------------------- +// Function score() [ProfileSeqFrac Score] +// -------------------------------------------------------------------------- + +template +inline TValue +score(Score const & me, + ConsensusScoreSequenceEntry const & entry1, + ConsensusScoreSequenceEntry const & entry2) +{ + if (!me.sum[position(entry1)]) + return -SEQAN_CONSENSUS_UNITY; + else + return ((TValue) (((int)value(entry1).count[ordValue(value(entry2))] - me.sum[position(entry1)]) * SEQAN_CONSENSUS_UNITY) / me.sum[position(entry1)]); +} + +} // namespace seqan + +#endif // #ifndef SEQAN_INCLUDE_PROFILE_SPROFILE_SEQ_H_ diff --git a/seqan/align_split.h b/seqan/align_split.h new file mode 100644 index 0000000..4352e2a --- /dev/null +++ b/seqan/align_split.h @@ -0,0 +1,55 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Manuel Holtgrewe +// ========================================================================== +// Facade header for module align_split. +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_SPLIT_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_SPLIT_H_ + +// =========================================================================== +// Prerequisites. +// =========================================================================== + +#include +#include +#include + +// =========================================================================== +// Split Alignment Impelmentation. +// =========================================================================== + +#include +#include + +#endif // SEQAN_INCLUDE_SEQAN_ALIGN_SPLIT_H_ diff --git a/seqan/align_split/align_split_interface.h b/seqan/align_split/align_split_interface.h new file mode 100644 index 0000000..e0623e2 --- /dev/null +++ b/seqan/align_split/align_split_interface.h @@ -0,0 +1,568 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Manuel Holtgrewe +// ========================================================================== +// Split alignment implementation. +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_SPLIT_ALIGN_SPLIT_INTERFACE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_SPLIT_ALIGN_SPLIT_INTERFACE_H_ + +#include "dp_scout_split.h" + +namespace seqan { + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// Tag for the split alignment algorithm in DPProfile_. + +template +struct SplitAlignment_ {}; + +// Tag for the split alignment algorithm. + +struct SplitAlignmentAlgo_; +typedef Tag SplitAlignmentAlgo; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Metafunction ScoutSpecForAlignmentAlgorithm_ +// ---------------------------------------------------------------------------- + +// For the split alignment, we will use our SplitAlignmentScout specialization of DPScout. + +template +struct ScoutSpecForAlignmentAlgorithm_ > +{ + typedef SplitAlignmentScout Type; +}; + +template +struct ScoutSpecForAlignmentAlgorithm_ const> +{ + typedef SplitAlignmentScout Type; +}; + +// ---------------------------------------------------------------------------- +// Metafunction IsSplitAlignment_ +// ---------------------------------------------------------------------------- + +// Convenience function used in the DP configuration below. + +template +struct IsSplitAlignment_ : False {}; + +template +struct IsSplitAlignment_ >: + True {}; + +template +struct IsSplitAlignment_ const>: + True {}; + +template +struct IsSplitAlignment_ >: + IsSplitAlignment_ {}; + +template +struct IsSplitAlignment_ const>: + IsSplitAlignment_ {}; + +// ---------------------------------------------------------------------------- +// Metafunction IsFreeEndGap_ +// ---------------------------------------------------------------------------- + +// We want the same free endgaps configuration as for global alignments. + +template +struct IsFreeEndGap_, TRow> : + IsFreeEndGap_, TRow> +{}; + +template +struct IsFreeEndGap_ const, TRow> : + IsFreeEndGap_ const, TRow> +{}; + +// ---------------------------------------------------------------------------- +// Metafunction IsGlobalAlignment_ +// ---------------------------------------------------------------------------- + +// We use similar functionality as the global alignment. + +template +struct IsGlobalAlignment_ > : + True +{}; + +template +struct IsGlobalAlignment_ const> : + True +{}; + +// ---------------------------------------------------------------------------- +// Metafunction DPMetaColumn_ +// ---------------------------------------------------------------------------- + +template +struct DPMetaColumn_, TGapCosts, TTraceFlag>, MetaColumnDescriptor > +{ + typedef DPProfile_, TGapCosts, TTraceFlag> TDPProfile; + typedef typename IsLocalAlignment_::Type TIsLocal; + + // If InitialColumn -> Zero, Vertical | Zero, Vertical | Zero // Within the algorithm we need to define the first row as only one cell if it is no initial column + // If InnerColumn -> Horizontal | Zero, All, All + // If FinalColumn -> Horizontal | Zero, All, All + + typedef typename If, + IsFreeEndGap_ >, RecursionDirectionZero, RecursionDirectionHorizontal>::Type TRecursionTypeFirstCell_; + typedef typename If, + typename If, RecursionDirectionZero, RecursionDirectionVertical>::Type, + RecursionDirectionAll>::Type TRecursionTypeInnerCell_; + typedef typename If, + typename If, RecursionDirectionZero, RecursionDirectionVertical>::Type, + RecursionDirectionAll>::Type TRecursionTypeLastCell_; + + typedef DPMetaCell_ TFirstCell_; + typedef DPMetaCell_ TInnerCell_; + typedef DPMetaCell_ TLastCell_; +}; + + +template +struct DPMetaColumn_, TGapCosts, TTraceFlag>, MetaColumnDescriptor > +{ + typedef DPProfile_, TGapCosts, TTraceFlag> TDPProfile; + typedef typename IsLocalAlignment_::Type TIsLocal; + + // How does the recursion directions look like? + + // If InitialColumn -> Zero, Vertical | Zero, Vertical | Zero // Within the algorithm we need to define the first row as only one cell if it is no initial column + // If InnerColumn -> Horizontal | Zero, All, LowerBand + // If FinalColumn -> Horizontal | Zero, All, LowerBand + + typedef typename If, + IsFreeEndGap_ >, RecursionDirectionZero, RecursionDirectionHorizontal>::Type TRecursionTypeFirstCell_; + typedef typename If, + typename If, RecursionDirectionZero, RecursionDirectionVertical>::Type, + RecursionDirectionAll>::Type TRecursionTypeInnerCell_; + typedef typename If, + typename If, RecursionDirectionZero, RecursionDirectionVertical>::Type, + RecursionDirectionLowerDiagonal>::Type TRecursionTypeLastCell_; + + typedef DPMetaCell_ TFirstCell_; + typedef DPMetaCell_ TInnerCell_; + typedef DPMetaCell_ TLastCell_; +}; + +template +struct DPMetaColumn_, TGapCosts, TTraceFlag>, MetaColumnDescriptor > +{ + typedef DPProfile_, TGapCosts, TTraceFlag> TDPProfile; + typedef typename IsLocalAlignment_::Type TIsLocal; + + // If InitialColumn -> Zero, Vertical | Zero, Vertical | Zero // Within the algorithm we need to define the first row as only one cell if it is no initial column + // If InnerColumn -> UpperDiagonal, All, LowerDiagonal + // If FinalColumn -> UpperDiagonal, All, LowerDiagonal + + typedef typename If, RecursionDirectionZero, RecursionDirectionUpperDiagonal>::Type TRecursionTypeFirstCell_; + typedef typename If, + typename If, RecursionDirectionZero, RecursionDirectionVertical>::Type, + RecursionDirectionAll>::Type TRecursionTypeInnerCell_; + typedef typename If, + typename If, RecursionDirectionZero, RecursionDirectionVertical>::Type, + RecursionDirectionLowerDiagonal>::Type TRecursionTypeLastCell_; + + typedef DPMetaCell_ TFirstCell_; + typedef DPMetaCell_ TInnerCell_; + typedef DPMetaCell_ TLastCell_; +}; + +template +struct DPMetaColumn_, TGapCosts, TTraceFlag>, MetaColumnDescriptor > +{ + typedef DPProfile_, TGapCosts, TTraceFlag> TDPProfile; + typedef typename IsLocalAlignment_::Type TIsLocal; + + // If InitialColumn -> Zero, Vertical | Zero, Vertical | Zero // Within the algorithm we need to define the first row as only one cell if it is no initial column + // If InnerColumn -> UpperDiagonal, All, All + // If FinalColumn -> UpperDiagonal, All, All + + typedef typename If, RecursionDirectionZero, RecursionDirectionUpperDiagonal>::Type TRecursionTypeFirstCell_; + typedef typename If, + typename If, RecursionDirectionZero, RecursionDirectionVertical>::Type, + RecursionDirectionAll>::Type TRecursionTypeInnerCell_; + typedef typename If, + typename If, RecursionDirectionZero, RecursionDirectionVertical>::Type, + RecursionDirectionAll>::Type TRecursionTypeLastCell_; + + typedef DPMetaCell_ TFirstCell_; + typedef DPMetaCell_ TInnerCell_; + typedef DPMetaCell_ TLastCell_; +}; + +// ---------------------------------------------------------------------------- +// Metafunction SetupAlignmentProfile_ +// ---------------------------------------------------------------------------- + +template +struct SetupAlignmentProfile_ +{ + typedef DPProfile_, TGapCosts, TTraceSwitch> Type; +}; + +// ============================================================================ +// Functions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Function _reverseTrace() +// ---------------------------------------------------------------------------- + +// Reverse a trace string and adapt internal position. +template +void _reverseTrace(String, TSpec> & trace) +{ + typedef String, TSpec> TTrace; + typedef typename Iterator::Type TTraceIter; + + if (empty(trace)) + return; + TPosition lengthH = _getEndHorizontal(front(trace)); + TPosition lengthV = _getEndVertical(front(trace)); + + for (TTraceIter it = begin(trace, Rooted()); !atEnd(it); goNext(it)) + { + it->_horizontalBeginPos = lengthH - _getEndHorizontal(*it); + it->_verticalBeginPos = lengthV - _getEndVertical(*it); + } + reverse(trace); +} + +// ---------------------------------------------------------------------------- +// Function _splitAlignmentImpl() +// ---------------------------------------------------------------------------- + +// We call the long sequence contig and the shorter one read but could be changed roles. +template +int _splitAlignmentImpl(Gaps & gapsContigL, + Gaps & gapsReadL, + Gaps & gapsContigR, + Gaps & gapsReadR, + int lowerDiagonal, + int upperDiagonal, + Score const & scoringScheme) +{ + typedef Gaps TGaps; + typedef typename Size::Type TSize; + typedef typename Position::Type TPosition; + typedef TraceSegment_ TTraceSegment; + + typedef FreeEndGaps_ TFreeEndGaps; + //alignConfig; + + // Check whether we need to run the banded versions. + bool banded = (lowerDiagonal != minValue() && upperDiagonal != maxValue()); + + // Compute trace and split score sequence for the left alignment. + + DPScoutState_ scoutStateL; + resize(scoutStateL.splitScore, length(source(gapsContigL)) + 1, minValue() / 2); + + String traceL; + if (!banded) + { + typedef AlignConfig2, TFreeEndGaps, TracebackOn > > TAlignConfig; + _setUpAndRunAlignment(traceL, scoutStateL, source(gapsContigL), source(gapsReadL), scoringScheme, TAlignConfig()); + } + else + { + typedef AlignConfig2, TFreeEndGaps, TracebackOn > > TAlignConfig; + _setUpAndRunAlignment(traceL, scoutStateL, source(gapsContigL), source(gapsReadL), scoringScheme, + TAlignConfig(lowerDiagonal, upperDiagonal)); + } + _adaptTraceSegmentsTo(gapsContigL, gapsReadL, traceL); + + // Get reversed versions of the right contig and read sequence. + ModifiedString revContigR(source(gapsContigR)); + ModifiedString revReadR(source(gapsReadR)); + + // Compute trace and split score sequence for the right alignment. + + DPScoutState_ scoutStateR; + resize(scoutStateR.splitScore, length(source(gapsContigR)) + 1, minValue() / 2); + + String traceR; + if (!banded) + { + typedef AlignConfig2, TFreeEndGaps, TracebackOn > > TAlignConfig; + _setUpAndRunAlignment(traceR, scoutStateR, revContigR, revReadR, scoringScheme, TAlignConfig()); + } + else + { + typedef AlignConfig2, TFreeEndGaps, TracebackOn > > TAlignConfig; + _setUpAndRunAlignment(traceR, scoutStateR, revContigR, revReadR, scoringScheme, + TAlignConfig(lowerDiagonal, upperDiagonal)); + } + // Reverse trace so it fits to the forward right sequences. Also reverse the trace such that we can directly apply + // it for the right alignment. + _reverseTrace(traceR); + reverse(scoutStateR.splitScore); + _adaptTraceSegmentsTo(gapsContigR, gapsReadR, traceR); + + SEQAN_ASSERT_EQ(length(scoutStateL.splitScore), length(scoutStateR.splitScore)); + + // We will split the left and right alignments into two parts such that the alignment score is optimal. We compute + // the leftmost best position for a split (equivalent to the best prefix of the first left alignment). Note that + // placing the breakpoint at the leftmost position is coherent with the SNPdb semantics but there are other data + // bases that use rightmost placement. + + // TODO(holtgrew): Make selecting the left/right split position from interface possible? Maybe not necessary. + + int bestScore = minValue() / 2; + unsigned bestPrefixLength = 0; + for (unsigned i = 0; i < length(scoutStateL.splitScore); ++i) + { + int s = scoutStateL.splitScore[i] + scoutStateR.splitScore[i]; + if (s > bestScore) + { + bestScore = s; + bestPrefixLength = i; + } + } + + // std::cerr << "bestPrefixLength = " << bestPrefixLength << "\n"; + + // std::cerr << "split store left "; + // for (unsigned i = 0; i < length(scoutStateL.splitScore); ++i) + // fprintf(stderr, " %3d", scoutStateL.splitScore[i]); + // std::cerr << "\n"; + // std::cerr << "split store right"; + // for (unsigned i = 0; i < length(scoutStateR.splitScore); ++i) + // fprintf(stderr, " %3d", scoutStateR.splitScore[i]); + // std::cerr << "\n"; + + // Set the clipping positions. + TPosition cePosR = toViewPosition(gapsContigR, bestPrefixLength); + setClippedBeginPosition(gapsContigR, cePosR); + setClippedBeginPosition(gapsReadR, cePosR); + // We have to correct the clipping position for the left alignment because of the to-right projection. The + // insertion itself is not part of the alignment. + TPosition cePosL = toViewPosition(gapsContigL, bestPrefixLength); + if (bestPrefixLength > 0) + cePosL = toViewPosition(gapsContigL, bestPrefixLength - 1) + 1; + setClippedEndPosition(gapsContigL, cePosL); + setClippedEndPosition(gapsReadL, cePosL); + + return bestScore; +} + +// ---------------------------------------------------------------------------- +// Function splitAlignment() +// ---------------------------------------------------------------------------- + +/*! + * @fn splitAlignment + * @headerfile + * @brief Compute split alignments. + * + * @signature TScoreValue splitAlignment(alignL, alignR, scoringScheme[, lowerDiag, upperDiag]); + * @signature TScoreValue splitAlignment(gapsHL, gapsVL, gapsHR, gapsVR, scoringScheme[, lowerDiag, upperDiag]); + * + * @param[in,out] alignL @link Align @endlink object with two rows for the left alignment. + * @param[in,out] alignR @link Align @endlink object with two rows for the right alignment. + * @param[in,out] gapsHL @link Gaps @endlink object with the horizontal/contig row for the left alignment. + * @param[in,out] gapsVL @link Gaps @endlink object with the vertical/read row for the left alignment. + * @param[in,out] gapsHR @link Gaps @endlink object with the horizontal/contig row for the right alignment. + * @param[in,out] gapsVR @link Gaps @endlink object with the vertical/read row for the right alignment. + * @param[in] scoringScheme The scoring scheme to use for the alignment. + * @param[in] lowerDiag The lower diagonal.You have to specify the upper and lower diagonals for the left + * alignment. For the right alignment, the corresponding diagonals are chosen for the + * lower right part of the DP matrix, int. + * @param[in] upperDiag The lower diagonal. Also see remark for lowerDiag, int. + * + * @return TScoreValue The sum of the alignment scores of both alignments (Metafunction: @link Score#Value @endlink + * of the type of scoringScheme). + * + * There are two variants of the split alignment problem. In the first variant, we wan to align two sequences where the + * first (say the reference) one is shorter than the second (say a read) and the read contains an insertion with respect + * to the reference. We now want to align the read agains the reference such that the left part of the read aligns well + * against the left part of the reference and the right part of the read aligns well against the right part of the + * reference. The center gap in the reference is free. + * + * For example: + * + * @code{.console} + * reference AGCATGTTAGATAAGATAGC-----------TGTGCTAGTAGGCAGTCAGCGCCAT + * |||||||||||||||||||| ||||||||||||||||||||||||| + * read AGCATGTTAGATAAGATAGCCCCCCCCCCCCTGTGCTAGTAGGCAGTCAGCGCCAT + * @endcode + * + * The second variant is to align two sequences A and B against a reference such that the left part of A aligns well to + * the left part of the reference and the right part of B aligns well to the right part of the reference. Together, + * both reads span the whole reference and overlap with an insertion in the reference. + * + * @code{.console} + * reference AGCATGTTAGATAAGATAGCTGTGCTAGTAGGCAGTCAGCGCCAT + * |||||||||||||||||| | || + * AGCATGTTAGATAAGATATCCGTCC + * read 1 + * ||| ||||||||||||||||||||||| + * CCGCTATGCTAGTAGGCAGTCAGCGCCAT + * read 2 + * @endcode + * + * The resulting alignment of the left/right parts is depicted below. The square brackets indicate clipping positions. + * + * @code{.console} + * reference AGCATGTTAGATAAGATA [GCTGTGCTAGTAGGCAGTCAGCGCCAT + * |||||||||||||||||| [ | || + * AGCATGTTAGATAAGATA [TCCGTCC + * read 1 + * reference AGCATGTTAGATAAGATA] GTGCTAGTAGGCAGTCAGCGCCAT + * ] ||||||||||||||||||||||| + * CCGCT] ATGCTAGTAGGCAGTCAGCGCCAT + * read 2 + * @endcode + * + * In the first case, we want to find the one breakpoint in the reference and the two breakpoints in the reads and the + * alignment of the left and right well-aligning read parts. In the second case, we want to find the one breakpoint in + * the reference and the breakpoint/clipping position in each read. + * + * The splitAlignment() function takes as the input two alignments. The sequence in each alignment's first row + * is the reference and the sequence of the second row is the read. The sequence has to be the same sequence whereas + * the reads might differ. If the reads are the same then this is the same as the first case and if the reads differ + * then this is the second case. + * + * The result is two alignments of the left and right contig path clipped appropriately. The resulting score is the sum + * of the scores of both alignments. + * + * @section Remarks + * + * The DP algorithm is chosen automatically depending on whether the gap open and extension costs are equal. + * + * @section Example + * + * The following example demonstrates the usage of splitAlignment in the first case. The second case + * works accordingly. + * + * @include demos/dox/align_split/split_alignment.cpp + * + * The output is as follows. + * + * @include demos/dox/align_split/split_alignment.cpp.stdout + */ + +// Variant: unbanded, with Align objects. + +template +int splitAlignment(Align & alignL, + Align & alignR, + Score const & scoringScheme) +{ + SEQAN_ASSERT_EQ_MSG(source(row(alignL, 0)), source(row(alignR, 0)), + "Contig must be the same for left and right split alignment."); + + return _splitAlignmentImpl(row(alignL, 0), row(alignL, 1), row(alignR, 0), row(alignR, 1), + minValue(), maxValue(), + scoringScheme); +} + +// Variant: unbanded, with Gaps objects. + +template +int splitAlignment(Gaps & gapsHL, + Gaps & gapsVL, + Gaps & gapsHR, + Gaps & gapsVR, + Score const & scoringScheme) +{ + SEQAN_ASSERT_EQ_MSG(source(gapsHL), source(gapsHR), + "Contig must be the same for left and right split alignment."); + + return _splitAlignmentImpl(gapsHL, gapsVL, gapsHR, gapsVR, minValue(), maxValue(), + scoringScheme); +} + +// Variant: banded, with Align objects. + +template +int splitAlignment(Align & alignL, + Align & alignR, + Score const & scoringScheme, + int lowerDiagonal, + int upperDiagonal) +{ + SEQAN_ASSERT_EQ_MSG(source(row(alignL, 0)), source(row(alignR, 0)), + "Contig must be the same for left and right split alignment."); + + return _splitAlignmentImpl(row(alignL, 0), row(alignL, 1), row(alignR, 0), row(alignR, 1), + lowerDiagonal, upperDiagonal, scoringScheme); +} + +// Variant: banded, with Gaps objects. + +template +int splitAlignment(Gaps & gapsHL, + Gaps & gapsVL, + Gaps & gapsHR, + Gaps & gapsVR, + Score const & scoringScheme, + int lowerDiagonal, + int upperDiagonal) +{ + SEQAN_ASSERT_EQ_MSG(source(gapsHL), source(gapsHR), + "Contig must be the same for left and right split alignment."); + + return _splitAlignmentImpl(gapsHL, gapsVL, gapsHR, gapsVR, lowerDiagonal, upperDiagonal, + scoringScheme); +} + +} // namespace seqan + +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_SPLIT_ALIGN_SPLIT_INTERFACE_H_ diff --git a/seqan/stream/adapt_mmap.h b/seqan/align_split/dp_scout_split.h similarity index 54% rename from seqan/stream/adapt_mmap.h rename to seqan/align_split/dp_scout_split.h index c74ee3c..2eab23e 100644 --- a/seqan/stream/adapt_mmap.h +++ b/seqan/align_split/dp_scout_split.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -29,15 +29,13 @@ // DAMAGE. // // ========================================================================== -// Author: Hannes Hauswedell +// Author: Manuel Holtgrewe // ========================================================================== -// Adaptions for the SeqAn Strings +// DPScout_ specialization for the split alignment implementation. // ========================================================================== -// TODO(holtgrew): Should better be string adaption! - -#ifndef SEQAN_STREAM_ADAPT_MMAP_H_ -#define SEQAN_STREAM_ADAPT_MMAP_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ALIGN_SPLIT_DP_SCOUT_SPLIT_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGN_SPLIT_DP_SCOUT_SPLIT_H_ namespace seqan { @@ -49,118 +47,85 @@ namespace seqan { // Tags, Classes, Enums // ============================================================================ -/* -.Adaption.String -..cat:Input/Output -..summary:Adaption from $fstream$, $ifstream$ and $ofstream$ to the @Concept.StreamConcept@ concept. -..include:seqan/stream.h - */ - -// ============================================================================ -// Metafunctions -// ============================================================================ - - -// ============================================================================ -// Functions -// ============================================================================ - // ---------------------------------------------------------------------------- -// Function streamWriteChar() +// Tag SplitAlignmentScout. // ---------------------------------------------------------------------------- -template -inline int -streamWriteChar(String & stream, TChar const & c) -{ - appendValue(stream, c); - return 0; -} +struct SplitAlignmentScout_; +typedef Tag SplitAlignmentScout; // ---------------------------------------------------------------------------- -// Function streamWriteBlock() +// Class DPScoutState_ // ---------------------------------------------------------------------------- -template -inline typename Size >::Type -streamWriteBlock(String & stream, char const * ptr, unsigned count) +template <> +class DPScoutState_ { - reserve(stream, length(stream) + count); - for (unsigned i = 0; i < count; ++i, ++ptr) - appendValue(stream, *ptr); - return count; -} +public: + // The best score for each column. Initialized with 0.5*minValue(). + seqan::String splitScore; + + DPScoutState_() + {} +}; // ---------------------------------------------------------------------------- -// Function streamPut() +// Class DPScout_ // ---------------------------------------------------------------------------- -template -inline int -streamPut(String & stream, char const c) -{ - appendValue(stream, c); - return 0; -} - -template -inline int -streamPut(String & stream, - SimpleType const & c) +template +class DPScout_ : public DPScout_ { - appendValue(stream, c); - return 0; -} +public: + typedef DPScout_ TParent; + DPScoutState_ * state; -// template -// inline int -// streamPut(String > & stream, -// String, TSpec2> const & source) -// { -// String buf = source; -// append(stream, toCString(buf)); -// return 0; -// } - -template -inline int -streamPut(String & stream, - String const & source) -{ - append(stream, source); - return 0; -} + DPScout_() : TParent(), state(0) + {} + DPScout_(DPScoutState_ & state) : TParent(), state(&state) + {} +}; -template -inline int -_appendWithoutTrailing0(String & stream, - TSource const & source) -{ - for (int i = 0; source[i] != 0; ++i) - appendValue(stream, source[i]); - return 0; -} +// ============================================================================ +// Metafunctions +// ============================================================================ -template -inline int -streamPut(String & stream, char const *source) -{ - return _appendWithoutTrailing0(stream, source); -} +// ============================================================================ +// Functions +// ============================================================================ +// ---------------------------------------------------------------------------- +// Function _scoutBestScore() [DPScout_] +// ---------------------------------------------------------------------------- -// for numerical types -template -inline int -streamPut(String & stream, TSource const & source) +template +inline void +_scoutBestScore(DPScout_ & dpScout, + TDPCell const & activeCell, + TTraceMatrixNavigator const & navigator, + TIsLastColumn /*isLastColumn*/, + TIsLastRow /*isLastRow*/ ) { - return _streamPut(stream, source, typename IsSequence::Type()); + //typedef typename Value::Type TScoreValue; + // Note that the underlying matrix has the coordinates flipped. We use posH/posV as we would in pairwise alignments + // and thus this is the reverse from the matrix representation. + unsigned posH = coordinate(navigator, +DPMatrixDimension_::HORIZONTAL); + // unsigned posV = coordinate(navigator, +DPMatrixDimension_::VERTICAL); + + int & i = dpScout.state->splitScore[posH]; + i = std::max(i, _scoreOfCell(activeCell)); + + // We track only the last row for the best traceback score. + if (TIsLastColumn::VALUE || TIsLastRow::VALUE) + { + typedef DPScout_ TDPScout; + typedef typename TDPScout::TParent TParent; + _scoutBestScore(static_cast(dpScout), activeCell, navigator); + } } - } // namespace seqan -#endif // #ifndef SEQAN_STREAM_ADAPT_MMAP_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_SPLIT_DP_SCOUT_SPLIT_H_ diff --git a/seqan/alignment_free.h b/seqan/alignment_free.h new file mode 100644 index 0000000..073825a --- /dev/null +++ b/seqan/alignment_free.h @@ -0,0 +1,63 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Jonathan Goeke +// ========================================================================== +// Facade header for module alignment_free. +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_H_ + +// =========================================================================== +// Prerequisites. +// =========================================================================== + +#include +#include +#include +#include +#include + +// =========================================================================== +// Module's headers. +// =========================================================================== + +#include +#include +#include +#include +#include +#include + +#include + +#endif // SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_H_ diff --git a/seqan/alignment_free/af_d2.h b/seqan/alignment_free/af_d2.h new file mode 100644 index 0000000..a7222d4 --- /dev/null +++ b/seqan/alignment_free/af_d2.h @@ -0,0 +1,124 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Jonathan Goeke +// ========================================================================== +// This header contains the implementation of the D2 score for alignment free +// sequence comparison (inner product of kmer counts). +// +// These functions can be called with alignmentFreeComparison(). +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2_H_ + +namespace seqan { + +/* + * _alignmentFreeComparison is called by alignmentFreeComparison() (see alignment_free_comparison.h) + */ +template +void _alignmentFreeComparison(Matrix & scoreMatrix, + TStringSet const & sequenceSet, + AFScore const & score) +{ + //typedef typename Value::Type TString; + //typedef typename Value::Type TAlphabet; + typedef typename Iterator::Type TIteratorSet; + typedef typename Iterator > >::Type TIteratorSetInt; + //typedef Matrix TMatrix; + + unsigned seqNumber = length(sequenceSet); + + // Resize the scoreMatrix + setLength(scoreMatrix, 0, seqNumber); + setLength(scoreMatrix, 1, seqNumber); + resize(scoreMatrix, (TValue) 0); + + StringSet > kmerCounts; + resize(kmerCounts, seqNumber); + + // Count all kmers + TIteratorSetInt itKmerCounts = begin(kmerCounts); + TIteratorSet itSeqSet = begin(sequenceSet); + + for (; itSeqSet < end(sequenceSet); ++itSeqSet) + { + countKmers(value(itKmerCounts), value(itSeqSet), score.kmerSize); + ++itKmerCounts; + } + if(score.verbose) + { + std::cout << "\ncounted words"; + } + + // Calculate all pairwise scores and store them in scoreMatrix + for (unsigned rowIndex = 0; rowIndex < seqNumber; ++rowIndex) + { + if(score.verbose) + { + std::cout << "\nSequence number " << rowIndex; + } + for (unsigned colIndex = rowIndex; colIndex < seqNumber; ++colIndex) + { + _alignmentFreeCompareCounts(value(scoreMatrix, rowIndex, colIndex), kmerCounts[rowIndex], kmerCounts[colIndex], score); + value(scoreMatrix, colIndex, rowIndex) = value(scoreMatrix, rowIndex, colIndex); // Copy symmetric entries + } + } + +} + +/* + * Calculate pairwise score given the counts of all kmers + */ +template +void +_alignmentFreeCompareCounts(TValue & result, + String const & kmerCounts1, + String const & kmerCounts2, + AFScore const & /*score*/) +{ + typedef typename Iterator const>::Type TIteratorInt; + + TIteratorInt it1 = begin(kmerCounts1); + TIteratorInt it2 = begin(kmerCounts2); + + result = 0; + for (; it1 < end(kmerCounts1); ++it1) + { + result += value(it1) * value(it2); + ++it2; + } +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2_H_ diff --git a/seqan/alignment_free/af_d2star.h b/seqan/alignment_free/af_d2star.h new file mode 100644 index 0000000..2791818 --- /dev/null +++ b/seqan/alignment_free/af_d2star.h @@ -0,0 +1,224 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Jonathan Goeke +// ========================================================================== +// This header contains the implementation of the D2star score for alignment +// free sequence comparison. +// +// See: Reinert et al. J Comput Biol. 2009 Dec;16(12):1615-34. +// +// These functions can be called with alignmentFreeComparison(). +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2STAR_ORIGINAL_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2STAR_ORIGINAL_H_ + +namespace seqan { + +/* + * _alignmentFreeComparison is called by alignmentFreeComparison() (see alignment_free_comparison.h) + */ +template +void _alignmentFreeComparison(Matrix & scoreMatrix, + TStringSet const & sequenceSet, + AFScore const & score) +{ + + //typedef typename Iterator::Type TIteratorSet; + //typedef typename Iterator > >::Type TIteratorSetDouble; + + //typedef Matrix TMatrix; + + unsigned seqNumber = length(sequenceSet); + + // Resize the scoreMatrix + setLength(scoreMatrix, 0, seqNumber); + setLength(scoreMatrix, 1, seqNumber); + resize(scoreMatrix, (TValue) 0); + StringSet > standardisedKmerCounts; + resize(standardisedKmerCounts, seqNumber); + + // Calculate all pairwise scores and store them in scoreMatrix + for (unsigned rowIndex = 0; rowIndex < seqNumber; ++rowIndex) + { + if(score.verbose) + { + std::cout << "\nSequence number " << rowIndex; + } + for (unsigned colIndex = rowIndex; colIndex < seqNumber; ++colIndex) + { + _d2star(value(scoreMatrix, rowIndex, colIndex), sequenceSet[rowIndex], sequenceSet[colIndex], score); + value(scoreMatrix, colIndex, rowIndex) = value(scoreMatrix, rowIndex, colIndex); // Copy symmetric entries + } + } +} + +/* + * _d2star calculates the pairwise score of two sequences according to the paper referenced above. + */ +template +void _d2star(TValue & result, + TSequence const & sequence1, + TSequence const & sequence2, + AFScore const & score) +{ + typedef typename Value::Type TAlphabet; + typedef typename UnmaskedAlphabet_::Type TUnmaskedAlphabet; + + TValue missing = -pow(10.0, 10); + TSequence seq1seq2; + append(seq1seq2, sequence1); + append(seq1seq2, sequence2); + result = 0.0; + + // Note that there is some code below that looks like copy-and-paste. However, pulling this out into another + // function is the only way to get rid of the duplicate lines since we use different types. After some discussion, + // weese, goeke and holtgrew agreed that it is probably easier to read and maintain this way than to spread the code + // over to one more function. + if (score.bgModelOrder == 0) + { + // -------------------------------------------------------------------- + // Order 0 Background Model + // -------------------------------------------------------------------- + + String kmerCounts1; + String kmerCounts2; + String backgroundCounts; + String backgroundFrequencies; + resize(backgroundFrequencies, 4, 0); + countKmers(kmerCounts1, sequence1, score.kmerSize); + countKmers(kmerCounts2, sequence2, score.kmerSize); + countKmers(backgroundCounts, seq1seq2, 1); + int sumBG = 0; + for (unsigned i = 0; i < length(backgroundCounts); ++i) + { + sumBG += backgroundCounts[i]; + } + for (unsigned i = 0; i < length(backgroundCounts); ++i) + { + backgroundFrequencies[i] = backgroundCounts[i] / ((double)sumBG); + } + unsigned nvals = length(kmerCounts1); // Number of kmers + int len1 = 0; + int len2 = 0; + + for (unsigned l = 0; l < nvals; l++) + { + len1 += kmerCounts1[l]; + len2 += kmerCounts2[l]; + } + + String probabilities; // String of TValue to store the word probabilities p_w + resize(probabilities, nvals, missing); + + for (unsigned i = 0; i < nvals; ++i) + { + TValue p_w = 1; // Probability of kmer + + String w; + unhash(w, i, score.kmerSize); + calculateProbability(p_w, w, backgroundFrequencies); + TValue variance1 = 0.0; + TValue variance2 = 0.0; + + variance1 = pow(len1 * p_w, 0.5); + variance2 = pow(len2 * p_w, 0.5); + + // Test if variance is larer than 0 and smaller than inf before dividing + if ((variance1 > missing) && (variance1 < pow(10.0, 10))) + { + if (p_w > 0) + { + TValue stCount1 = (kmerCounts1[i] - p_w * len1) / variance1; + TValue stCount2 = (kmerCounts2[i] - p_w * len2) / variance2; + result += stCount1 * stCount2; + } + } + } + } + else + { + // -------------------------------------------------------------------- + // Higher Order Background Model + // -------------------------------------------------------------------- + + String kmerCounts1; + String kmerCounts2; + StringSet > bgSequences; + stringToStringSet(bgSequences, seq1seq2); // Create unmasked sequences + MarkovModel backgroundModel(score.bgModelOrder); + buildMarkovModel(backgroundModel, bgSequences); + countKmers(kmerCounts1, sequence1, score.kmerSize); + countKmers(kmerCounts2, sequence2, score.kmerSize); + + unsigned nvals = length(kmerCounts1); // Number of kmers + int len1 = 0; + int len2 = 0; + + for (unsigned l = 0; l < nvals; l++) + { + len1 += kmerCounts1[l]; + len2 += kmerCounts2[l]; + } + String probabilities; + resize(probabilities, nvals, missing); + for (unsigned i = 0; i < nvals; ++i) + { + TValue p_w = 1.0; // Probability of kmer + TValue variance = 0.0; + String w; + unhash(w, i, score.kmerSize); + p_w = emittedProbability(backgroundModel, w); + variance = ((TValue) pow(((TValue) len1 * len2), 0.5)) * p_w; + TValue variance1 = 0.0; + TValue variance2 = 0.0; + + variance1 = pow(len1 * p_w, 0.5); + variance2 = pow(len2 * p_w, 0.5); + + // Calculate standardised kmer Count + if ((variance > pow(10.0, -10)) && (variance < pow(10.0, 10))) + { + if (p_w > 0) + { + TValue stCount1 = (kmerCounts1[i] - p_w * len1) / variance1; + TValue stCount2 = (kmerCounts2[i] - p_w * len2) / variance2; + result += stCount1 * stCount2; + } + } + } + } +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2STAR_ORIGINAL_H_ diff --git a/seqan/alignment_free/af_d2z.h b/seqan/alignment_free/af_d2z.h new file mode 100644 index 0000000..d2a45a8 --- /dev/null +++ b/seqan/alignment_free/af_d2z.h @@ -0,0 +1,631 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Jonathan Goeke +// ========================================================================== +// This header contains the implementation of the D2z score for +// alignment free sequence comparison. +// +// See Kantorovitz et al. Bioinformatics 2007, Volume23, Issue13, +// Pp. i249-i255. +// +// These functions can be called with alignmentFreeComparison(). +// ========================================================================== + +// TODO(goeke): const could be added below for the input variables but the function value() in matrix_base (align) is not defined for const. Similarly, the function emittedProbabilty is not defined for const in statistics_markov_model.h + +#ifndef SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2Z_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2Z_H_ + +namespace seqan { + +/* + * _alignmentFreeComparison is called by alignmentFreeComparison() (see alignment_free_comparison.h) + */ +template +void _alignmentFreeComparison(Matrix & scoreMatrix, + TStringSet const & sequenceSet, + AFScore const & score) +{ + typedef typename Value::Type TString; + typedef typename Value::Type TAlphabet; + typedef typename UnmaskedAlphabet_::Type TUnmaskedAlphabet; + typedef typename Iterator::Type TIteratorSet; + typedef typename Iterator > >::Type TIteratorSetUnsigned; + typedef typename Iterator > >::Type TIteratorSetDouble; + typedef typename Iterator > >::Type TIteratorMarkovModel; + + unsigned seqNumber = length(sequenceSet); + + // Resize scoreMatrix + setLength(scoreMatrix, 0, seqNumber); + setLength(scoreMatrix, 1, seqNumber); + resize(scoreMatrix, (TValue) 0); + + StringSet > kmerCounts; + resize(kmerCounts, seqNumber); + + // Note that there is some code below that looks like copy-and-paste. However, pulling this out into another + // function is the only way to get rid of the duplicate lines since we use different types. After some discussion, + // weese, goeke and holtgrew agreed that it is probably easier to read and maintain this way than to spread the code + // over to one more function. + if (score.bgModelOrder == 0) + { + // -------------------------------------------------------------------- + // Order 0 Background Model + // -------------------------------------------------------------------- + + StringSet > backgroundFrequencies; + resize(backgroundFrequencies, seqNumber); + + // Count all kmers and all background nucleotide frequencies and store them in stringSets + TIteratorSetUnsigned itKmerCounts = begin(kmerCounts); + TIteratorSetDouble itBackgroundFrequencies = begin(backgroundFrequencies); + + TIteratorSet itSeqSet = begin(sequenceSet); + for (; itSeqSet < end(sequenceSet); ++itSeqSet) + { + countKmers(*itKmerCounts, *itBackgroundFrequencies, *itSeqSet, score.kmerSize); + ++itKmerCounts; + ++itBackgroundFrequencies; + } + if(score.verbose) + { + std::cout << "\ncounted words"; + } + // Calculate all pairwise scores and store them in scoreMatrix + for (unsigned rowIndex = 0; rowIndex < seqNumber; ++rowIndex) + { + if(score.verbose) + { + std::cout << "\nSequence number " << rowIndex; + } + for (unsigned colIndex = rowIndex; colIndex < seqNumber; ++colIndex) + { + _alignmentFreeCompareCounts(value(scoreMatrix, rowIndex, colIndex), kmerCounts[rowIndex], + backgroundFrequencies[rowIndex], kmerCounts[colIndex], + backgroundFrequencies[colIndex], score); + value(scoreMatrix, colIndex, rowIndex) = value(scoreMatrix, rowIndex, colIndex); // Copy symmetric entries + } + } + } + else + { + // -------------------------------------------------------------------- + // Higher Order Background Model + // -------------------------------------------------------------------- + + String > backgroundModels; + resize(backgroundModels, seqNumber, MarkovModel(score.bgModelOrder)); + TIteratorMarkovModel itMM = begin(backgroundModels); + TIteratorSet itSeqSet = begin(sequenceSet); + // Count all kmers and all background nucleotide frequencies and store them in StringSets + TIteratorSetUnsigned itKmerCounts = begin(kmerCounts); + + for (; itSeqSet < end(sequenceSet); ++itSeqSet) + { + countKmers(*itKmerCounts, *itMM, *itSeqSet, score.kmerSize); + ++itKmerCounts; + if (itMM < end(backgroundModels)) + { + itMM->_computeAuxiliaryMatrices(); + ++itMM; + } + + } + if(score.verbose) + { + std::cout << "\ncounted words"; + } + // Calculate all pairwise scores and store them in scoreMatrix + for (unsigned rowIndex = 0; rowIndex < seqNumber; ++rowIndex) + { + if(score.verbose) + { + std::cout << "\nSequence number " << rowIndex; + } + for (unsigned colIndex = rowIndex; colIndex < seqNumber; ++colIndex) + { + _alignmentFreeCompareCounts(value(scoreMatrix, rowIndex, colIndex), kmerCounts[rowIndex], + backgroundModels[rowIndex], kmerCounts[colIndex], + backgroundModels[colIndex], score); + value(scoreMatrix, colIndex, rowIndex) = value(scoreMatrix, rowIndex, colIndex); // Copy symmetric entries + } + } + } +} + +/* + * computeExpectationD2 calculates the expected value of the D2 score given a Bernoulli model, + * see paper referenced above + */ +template +double computeExpectationD2(int const len1, int const len2, unsigned const k, TValue const * q1, TValue const * q2) +{ + TValue p2 = 0; + for (int i = 0; i < 4; i++) + p2 += q1[i] * q2[i]; + + int nbar1 = len1 - k + 1; + int nbar2 = len2 - k + 1; + + TValue retval = nbar1; + retval *= nbar2; + retval *= pow((double)p2, (int)k); + return retval; +} + +/* + * computeExpectationD2 calculates the expected value of the D2 score given a Markov model, + * see paper referenced above + */ +template +double computeExpectationD2(int const slen1, + int const slen2, + unsigned const k, + MarkovModel /*const*/ & bkg1, + MarkovModel /*const*/ & bkg2, + TValue & indicatorexpectation) +{ + unsigned mo = bkg1.order; + if (mo >= k) + { + // Error: Can't suppport markov order greater or equal to word length + exit(1); + } + + long emo = 1 << (2 * mo); // This is equal to pow(4, mo) + long ekmo = 1 << (2 * (k - mo)); // This is equal to pow(4, k - mo) + + TValue mean = 0; + for (long i = 0; i < emo; i++) + { + TValue term = value(bkg1.stationaryDistribution, i) * value(bkg2.stationaryDistribution, i); + TValue subterm = 0; + + for (long j = 0; j < ekmo; j++) + { + TValue subprob1 = _computeWordProbGivenPrefix(i, j, bkg1, k, mo); + TValue subprob2 = _computeWordProbGivenPrefix(i, j, bkg2, k, mo); + subterm += subprob1 * subprob2; + } + mean += term * subterm; + } + + indicatorexpectation = mean; // This is E[Y_ij], see paper referenced above. + int nbar1 = slen1 - k + 1; // Number of kmers in sequence1 + int nbar2 = slen2 - k + 1; // Number of kmers in sequence2 + return mean * ((TValue) nbar1 * nbar2); +} + +/* + * computeVarianceD2 calculates the variance of the D2 score given a Bernoulli model, + * see paper referenced above + */ +template +double computeVarianceD2(int len1, int len2, unsigned k, TValue * q1, TValue * q2) +{ + int nbar1 = len1 - k + 1; // Number of kmers in sequence 1 + int nbar2 = len2 - k + 1; // Number of kmers in sequence 2 + + int qbar1 = len1 - 2 * k + 2; // Number of overlapping kmers in sequence 1 + int qbar2 = len2 - 2 * k + 2; + + TValue p2 = 0, p31 = 0, p32 = 0; + for (int i = 0; i < 4; i++) + { + p2 += q1[i] * q2[i]; + p31 += q1[i] * q2[i] * q1[i]; + p32 += q1[i] * q2[i] * q2[i]; + } + + TValue variance = 0; + // 'Crabgrass' with l = 0 (= complete overlap), see paper referenced above + TValue power1 = pow((double)p32, (int)k) - pow((double)p2, 2 * (int)k); + power1 *= TValue(nbar1) * TValue(qbar2) * TValue(qbar2 - 1); + TValue power2 = pow((double)p31, (int)k) - pow((double)p2, 2 * (int)k); + power2 *= TValue(nbar2) * TValue(qbar1) * TValue(qbar1 - 1); + variance += power1 + power2; + + // 'Crabgrasses' with l > 0, see paper referenced above + for (unsigned l = 1; l <= k - 1; l++) + variance += 2 * TValue(nbar1 - l) * TValue(qbar2) * TValue(qbar2 - 1) * (pow((double)p2, (int)(2 * l)) * pow((double)p32, (int)(k - l)) - pow((double)p2, (int)(2 * k))) + 2 * TValue(nbar2 - l) * TValue(qbar1) * TValue(qbar1 - 1) * (pow((double)p2, (int)(2 * l)) * pow((double)p31, (int)(k - l)) - pow((double)p2, (int)(2 * k))); + + // Accordion main diagonal, see paper referenced above + variance += TValue(nbar1) * TValue(nbar2) * (pow((double)p2, (int)(k)) - pow((double)p2, (int)(2 * k))); + for (unsigned l = 1; l <= k - 1; l++) + variance += 2 * TValue(nbar1 - l) * TValue(nbar2 - l) * (pow((double)p2, (int)(k + l)) - pow((double)p2, (int)(2 * k))); + + return variance; +} + + +/* + * computeVarianceD2 calculates the variance of the D2 score given a Markov model, + * see paper referenced above + */ +template +double computeVarianceD2(int const slen1, + int const slen2, + unsigned const k, + MarkovModel /*const*/ & bkg1, + MarkovModel /*const*/ & bkg2, + TValue indicatorexpectation) +{ + unsigned mo = bkg1.order; + if (mo >= k) + { + // Error: Can't support markov order greater or equal to word length + exit(1); + } + + long emo = 1 << (2 * mo); // Equivalent to pow(4, mo); + long ekmo = 1 << (2 * (k - mo)); // Equivalent to pow(4, k - mo); + + int q1 = slen1 - 2 * k + 2; + int q2 = slen2 - 2 * k + 2; + int nbar1 = slen1 - k + 1; + int nbar2 = slen2 - k + 1; + + // Create matrix for Sbctilde + Matrix Sbctilde1; + setLength(Sbctilde1, 0, emo); + setLength(Sbctilde1, 1, emo); + resize(Sbctilde1); + + Matrix Sbctilde2; + setLength(Sbctilde2, 0, emo); + setLength(Sbctilde2, 1, emo); + resize(Sbctilde2); + + for (long i = 0; i < emo; i++) + { + for (long j = 0; j < emo; j++) + { + value(Sbctilde1, i, j) = (q1 * (q1 + 1) / 2) * value(bkg1.stationaryDistribution, j) - (q1 - 1) * value(bkg1._qppp, i, j) - value(bkg1._qppqpp, i, j); + value(Sbctilde2, i, j) = (q2 * (q2 + 1) / 2) * value(bkg2.stationaryDistribution, j) - (q2 - 1) * value(bkg2._qppp, i, j) - value(bkg2._qppqpp, i, j); + } + } + + // Compute sums of word probabilities and star probabilities of x-mers (x between mo + 1 and k) conditional on last or first (resp) mo-word, see paper referenced above + Matrix sump; // sump[x][wk] is the total word probability of every x-word ending with wk (which is of length mo) + setLength(sump, 0, k + 1); + setLength(sump, 1, emo); + resize(sump, 0.0); + + Matrix sumpstar; + setLength(sumpstar, 0, k + 1); + setLength(sumpstar, 1, emo); + resize(sumpstar, 0.0); + + + for (unsigned x = 0; x <= k; x++) + { + if (x <= mo) + { + + continue; + } + + for (long wk = 0; wk < emo; wk++) + { + TValue sum = 0; + long exmo = 1 << (2 * (x - mo)); // Equivalent to pow(4, x - mo); + for (long wpre = 0; wpre < exmo; wpre++) + { + sum += _computeWordProb((wpre << (2 * mo)) + wk, bkg1, x, mo) * _computeWordProb((wpre << (2 * mo)) + wk, bkg2, x, mo); + } + value(sump, x, wk) = sum; + } + + for (long u1 = 0; u1 < emo; u1++) + { + TValue sum = 0; + long exmo = 1 << (2 * (x - mo)); // Equivalent to pow(4, x - mo); + for (long usuf = 0; usuf < exmo; usuf++) + { + sum += _computeWordProbGivenPrefix(u1, usuf, bkg1, x, mo) * _computeWordProbGivenPrefix(u1, usuf, bkg2, x, mo); + } + value(sumpstar, x, u1) = sum; + } + } + + // Handle the non overlap terms first + TValue covnonoverlap = 0; + for (long wk = 0; wk < emo; wk++) + { + for (long u1 = 0; u1 < emo; u1++) + { + TValue term = value(Sbctilde1, wk, u1) * value(Sbctilde2, wk, u1) * value(sump, k, wk) * value(sumpstar, k, u1); + covnonoverlap += 4 * term; + } + } + TValue subtractFromNonOverlap = (TValue)q1 * (q1 - 1) * q2 * (q2 - 1); + subtractFromNonOverlap *= pow(indicatorexpectation, 2); + covnonoverlap -= subtractFromNonOverlap; + // Compute 'crabgrass' terms, see paper referenced above + TValue covcrabgrass = 0; + + // Case 1: overlap >= mo + for (unsigned m = 1; m <= k - mo; m++) + { + long ekm = 1 << (2 * (k - m)); // Equivalent to pow(4, k - m) + long moonesflag = (1 << (2 * mo)) - 1; + long kmmoonesflag = (1 << (2 * (k - m - mo))) - 1; + for (long v = 0; v < ekm; v++) + { + long vsuf = v & moonesflag; // Last morder chars of v + long vpre = v >> (2 * (k - m - mo)); // First morder chars of v, equivalent to v / pow(4, k - m - mo) + long vsuf2 = v & kmmoonesflag; // Remaining k-m-morder chars of v; equivalent to v % pow(4, k - m - mo) + // In the following, if m = k - mo, _computeWordProbGivenPrefix(.,.,.,k - m, mo) will return 1, as it should + // Overlap in A, separate in B + TValue term1 = value(Sbctilde2, vsuf, vpre); + term1 *= _computeWordProbGivenPrefix(vpre, vsuf2, bkg1, k - m, mo); + term1 *= pow(_computeWordProbGivenPrefix(vpre, vsuf2, bkg2, k - m, mo), 2); + term1 *= value(sump, m + mo, vpre); + term1 *= value(sumpstar, m + mo, vsuf); + // Overlap in B, separate in A + TValue term2 = value(Sbctilde1, vsuf, vpre); + term2 *= _computeWordProbGivenPrefix(vpre, vsuf2, bkg2, k - m, mo); + term2 *= pow(_computeWordProbGivenPrefix(vpre, vsuf2, bkg1, k - m, mo), 2); + term2 *= value(sump, m + mo, vpre); + term2 *= value(sumpstar, m + mo, vsuf); + + covcrabgrass += 4 * q1 * term1 + 4 * q2 * term2; + } + } + // Case 2: overlap < morder + for (unsigned m = k - mo + 1; m <= k - 1; m++) + { + int tlen = 2 * mo - (k - m); + long et = 1 << (2 * tlen); // Equivalent to pow(4, tlen); + long moonesflag = (1 << (2 * mo)) - 1; + long tmoonesflag = (1 << (2 * (tlen - mo))) - 1; + for (long t = 0; t < et; t++) + { + long tsuf = t & moonesflag; // Last morder chars of t; equivalent to t % emo; + long tpre = t >> (2 * (tlen - mo)); // First morder chars of t, equivalent to t / pow(4, tlen - mo) + long tsuf2 = t & tmoonesflag; // Remaining tlen-morder chars of t, equivalent to t % etmo; + // Overlap in A, separate in B + TValue term1 = value(Sbctilde2, tpre, tsuf); + term1 *= _computeWordProbGivenPrefix(tpre, tsuf2, bkg1, tlen, mo); + term1 *= value(sump, k, tpre); + term1 *= value(sumpstar, k, tsuf); + // Overlap in B, separate in A + TValue term2 = value(Sbctilde1, tpre, tsuf); + term2 *= _computeWordProbGivenPrefix(tpre, tsuf2, bkg2, tlen, mo); + term2 *= value(sump, k, tpre); + term2 *= value(sumpstar, k, tsuf); + + covcrabgrass += 4 * q1 * term1 + 4 * q2 * term2; + } + } + + // Case 3: m=0 (complete overlap) + long moonesflag = (1 << (2 * mo)) - 1; + for (long wpre = 0; wpre < emo; wpre++) + { + // First morder chars of w + TValue term1 = 0; + TValue term2 = 0; + for (long wsuf2 = 0; wsuf2 < ekmo; wsuf2++) + { + // Remaining k-morder chars of w + long wsuf = ((wpre << (2 * (k - mo))) + wsuf2) & moonesflag; // Last morder chars of w + term1 += 2 * nbar1 * value(Sbctilde2, wsuf, wpre) * _computeWordProbGivenPrefix(wpre, wsuf2, bkg1, k, mo) * pow(_computeWordProbGivenPrefix(wpre, wsuf2, bkg2, k, mo), 2); + term2 += 2 * nbar2 * value(Sbctilde1, wsuf, wpre) * _computeWordProbGivenPrefix(wpre, wsuf2, bkg2, k, mo) * pow(_computeWordProbGivenPrefix(wpre, wsuf2, bkg1, k, mo), 2); + } + covcrabgrass += (term1 + term2) * value(bkg1.stationaryDistribution, wpre) * value(bkg2.stationaryDistribution, wpre); + } + + // Ignored edge effects, see paper referened above + // Subtract expectations + TValue subtractfromcrabgrass = (((2 * q1 * k - nbar1) * (double)q2 * double(q2 - 1)) + ((2 * q2 * k - nbar2) * (double)q1 * double(q1 - 1))); + subtractfromcrabgrass *= pow(indicatorexpectation, 2); + covcrabgrass -= subtractfromcrabgrass; + + // Compute the accordion main diagonal terms + TValue covaccordiondiag = 0; + + for (unsigned m = 1; m <= k - 1; m++) + { + long tlen; + if (m <= mo - 1) + tlen = m + mo; + else + tlen = 2 * mo - 1; + long et = 1 << (2 * tlen); // Equivalent to pow(4, tlen) + long moonesflag = (1 << (2 * mo)) - 1; + long tmoonesflag = (1 << (2 * (tlen - mo))) - 1; + + TValue mterm = 0; + for (long t = 0; t < et; t++) + { + TValue term = 1; + long tpre = t >> (2 * (tlen - mo)); // First morder chars of t, equivalent to t / pow(4, tlen - mo) + long tsuf2 = t & tmoonesflag; // The remaining tlen - morder chars of t, equivalent to t % etmo; + term *= _computeWordProbGivenPrefix(tpre, tsuf2, bkg1, tlen, mo) * _computeWordProbGivenPrefix(tpre, tsuf2, bkg2, tlen, mo); + term *= value(sump, k, tpre); + if (m >= mo) + { + long tsuf = t & moonesflag; // Last morder chars of t, equivalent to t%emo; + term *= value(sumpstar, m + 1, tsuf); + } + mterm += term; + } + covaccordiondiag += 2 * (nbar1 - m) * (nbar2 - m) * mterm; + } + covaccordiondiag += nbar1 * nbar2 * indicatorexpectation; // Complete overlap term + covaccordiondiag -= (nbar1 * nbar2 * (2 * k - 1) - (nbar1 + nbar2) * k * (k - 1) + (k - 1) * k * (2 * k - 1) / 3) * pow(indicatorexpectation, 2); + TValue variance = covnonoverlap + covcrabgrass + covaccordiondiag; + return variance; + +} + +/* + * Calculate pairwise score given the counts of all kmers and the background Bernoulli models + */ +template +void _alignmentFreeCompareCounts(TValue & result, + String const & kmerCounts1, + TStringBG const & backgroundFrequencies1, + String const & kmerCounts2, + TStringBG const & backgroundFrequencies2, + AFScore const & score) +{ + typedef typename Value::Type TValueBG; + TValueBG sum = 0; + unsigned len1 = score.kmerSize - 1; + unsigned len2 = score.kmerSize - 1; + unsigned nvals = length(kmerCounts1); + + for (unsigned l = 0; l < nvals; l++) + { + len1 += kmerCounts1[l]; + len2 += kmerCounts2[l]; + + sum += kmerCounts1[l] * kmerCounts2[l]; + } + + TValueBG q1[4]; + TValueBG q2[4]; + for (int l = 0; l < 4; l++) + { + q1[l] = backgroundFrequencies1[l]; + q2[l] = backgroundFrequencies2[l]; + } + + // Compute expected value and variance (IID) + double E = computeExpectationD2(len1, len2, score.kmerSize, q1, q2); + double var = computeVarianceD2(len1, len2, score.kmerSize, q1, q2); + + if ((var <= 0)) + { + if(score.verbose) + { + std::cout << "Error: negative variance\n"; + } + result = 0; + return; + } + // Calculate z-score + result = (TValue) (sum - E) / pow(var, 0.5); +} + +/* + * Calculate pairwise score given the counts of all kmers and the background Markov models + */ +template +void _alignmentFreeCompareCounts(TValue & result, + String const & kmerCounts1, + MarkovModel /*const*/ & bgModel1, + String const & kmerCounts2, + MarkovModel /*const*/ & bgModel2, + AFScore const & score) +{ + unsigned nvals = length(kmerCounts1); + int sum = 0; + int sumCounts1 = score.kmerSize - 1; + int sumCounts2 = score.kmerSize - 1; + + for (unsigned l = 0; l < nvals; l++) + { + sumCounts1 += kmerCounts1[l]; + sumCounts2 += kmerCounts2[l]; + sum += value(kmerCounts1, l) * value(kmerCounts2, l); // Calculate the inner product + } + + TValue D2 = (TValue) sum; + + // Compute mean and variance + TValue indicatorexpectation = 0; + + double E = computeExpectationD2(sumCounts1, sumCounts2, score.kmerSize, bgModel1, bgModel2, indicatorexpectation); + double var = computeVarianceD2(sumCounts1, sumCounts2, score.kmerSize, bgModel1, bgModel2, indicatorexpectation); + + if (var <= 0) + { + result = 0; + return; + } + // Return z-score of D2 + result = (D2 - E) / pow(var, 0.5); +} + +/* + * Compute the word probability given a Markov model + * see paper referenced on top + */ +template +double _computeWordProb(long const word, MarkovModel /*const*/ & bkg, unsigned const k, int const mo) +{ + // mo needs to be passed, since that decides the prefix + // Similarly, this function needs to know the total length of the word k + long prefix = word >> (2 * (k - mo)); // Equivalent to word / pow(4, k - mo); gets the first mo chars of word + long suffix = word & ((1 << (2 * (k - mo))) - 1); // Get the last (k - mo) chars of word + return value(bkg.stationaryDistribution, prefix) * _computeWordProbGivenPrefix(prefix, suffix, bkg, k, mo); +} + +/* + * Compute the word probability given a Markov model + * see paper referenced on top + */ +template +double _computeWordProbGivenPrefix(long const prefix, long const suffix, MarkovModel /*const*/ & bkg, unsigned const k, unsigned const mo) +{ + // Computes p_ * (prefix suffix) + // Calculate only if k - mo >= 1; otherwise return 1 + if (k - mo <= 0) + return 1; + + TValue prob = 1; + long pre = prefix; + long jcopy = suffix; + + // Iterate through successive positions of the suffix + for (unsigned l = 0; l < k - mo; l++) + { + // Get the first char of jcopy, which is of length k - mo; equivalent to jcopy / pow(4, k - mo - 1) + long jcopyfirst = jcopy >> (2 * (k - mo - 1)); + long suf = ((pre & ((1 << (2 * (mo - 1))) - 1)) << 2) + jcopyfirst; + + // prob *=P[pre][suf]; P is the transition matrix + prob *= value(bkg.transition, pre, suf); + // Erase the first char of jcopy, and preserve its length at k - mo by shifting one char to the left; equivalent to (jcopy % pow(4, k - mo - 1)) * 4; + jcopy = (jcopy & ((1 << (2 * (k - mo - 1))) - 1)) << 2; + pre = suf; + } + return prob; +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_D2Z_H_ diff --git a/seqan/alignment_free/af_n2.h b/seqan/alignment_free/af_n2.h new file mode 100644 index 0000000..0268992 --- /dev/null +++ b/seqan/alignment_free/af_n2.h @@ -0,0 +1,641 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Jonathan Goeke +// ========================================================================== +// This header contains the implementation of the N2 score for alignment free +// sequence comparison with word neighbourhood counts +// +// See: Goeke et al, to appear. +// +// These functions can be called with alignmentFreeComparison(). +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_N2_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_N2_H_ + +namespace seqan { + +/* + * This function returns a string with indices indicating which k-mer is the + * reverse complement k-mer: i=revComIndex[revComIndex[i]] + */ +inline void _initialiseRevComIndex(String & revComIndex, unsigned const k) +{ + unsigned myLength = (unsigned)pow(4.0, (int)k); + resize(revComIndex, myLength, 0); + Shape myShape; + resize(myShape, k); + for (unsigned i = 0; i < myLength; ++i) + { + String w; + unhash(w, i, k); + DnaStringReverseComplement wRC(w); + unsigned hashValue = hash(myShape, begin(wRC)); + revComIndex[i] = hashValue; + } + +} + +/* + * This function returns a stringSet with strings of indices indicating which + * k-mers belong to the word neighbourhood for every k-mer (all k-mers with + * one mismatch) + */ +inline void _initialiseKmerNeighbourhood(StringSet > & kmerNeighbourhood, + unsigned const k, bool const revCom, + String const & revComIndex) +{ + unsigned myLength = (unsigned)pow(4.0, (int)k); + Shape myShape; + resize(myShape, k); + resize(kmerNeighbourhood, myLength); + for (unsigned i = 0; i < myLength; ++i) + { + resize(kmerNeighbourhood[i], 1, i); + + String w; + unhash(w, i, k); + if ((revComIndex[i] != i) && (revCom == true)) + { + appendValue(kmerNeighbourhood[i], revComIndex[i]); + } + for (unsigned j = 0; j < k; ++j) + { + for (unsigned l = 0; l < 4; ++l) + { + String wTMP; + wTMP = w; + if (wTMP[j] != l) + { + wTMP[j] = l; + unsigned hashValue = hash(myShape, begin(wTMP)); + // Check for double word occurrences + bool duplicate = false; + if (revCom == true) + { + + for (unsigned n = 0; n < length(kmerNeighbourhood[i]); ++n) + { + if ((hashValue) == kmerNeighbourhood[i][n]) + { + duplicate = true; + break; + } + } + + } + + if (duplicate == false) + { + appendValue(kmerNeighbourhood[i], hashValue); + if (revCom == true) + { + if (revComIndex[hashValue] != hashValue) + { + appendValue(kmerNeighbourhood[i], revComIndex[hashValue]); + } + } + } + } + } + } + } +} + +/* + * _alignmentFreeComparison is called by alignmentFreeComparison() (see alignment_free_comparison.h) + */ +template +void _alignmentFreeComparison(Matrix & scoreMatrix, + TStringSet const & sequenceSet, + AFScore const & score) +{ + + + + typedef typename Value::Type TString; + typedef typename Value::Type TAlphabet; + typedef typename UnmaskedAlphabet_::Type TUnmaskedAlphabet; + typedef typename Iterator::Type TIteratorSet; + typedef typename Iterator > >::Type TIteratorSetDouble; + + + // Initialise the reverse complement hash table + String revComIndex; + StringSet > kmerNeighbourhood; + _initialiseRevComIndex(revComIndex, score.kmerSize); + if (score.revCom == "both_strands") + { + _initialiseKmerNeighbourhood(kmerNeighbourhood, score.kmerSize, true, revComIndex); + } + else + { + _initialiseKmerNeighbourhood(kmerNeighbourhood, score.kmerSize, false, revComIndex); + } + + unsigned seqNumber = length(sequenceSet); + + setLength(scoreMatrix, 0, seqNumber); + setLength(scoreMatrix, 1, seqNumber); + resize(scoreMatrix, (TValue) 0); + + StringSet > standardisedKmerCounts; + resize(standardisedKmerCounts, seqNumber); + // Count all kmers and all background nucleotide frequencies and store them in StringSets + TIteratorSetDouble itStandardisedKmerCounts = begin(standardisedKmerCounts); + TIteratorSet itSeqSet = begin(sequenceSet); + for (; itSeqSet < end(sequenceSet); ++itSeqSet) + { + + _standardiseCounts(*itStandardisedKmerCounts, revComIndex, kmerNeighbourhood, *itSeqSet, score); + if(score.verbose) + { + std::cout << "\n" << position(itSeqSet); + } + ++itStandardisedKmerCounts; + } + + if (score.norm == true) // Normalise the score so that sequence-self-comparisons are always 1 + { + itStandardisedKmerCounts = begin(standardisedKmerCounts); + for (; itStandardisedKmerCounts < end(standardisedKmerCounts); ++itStandardisedKmerCounts) + { + TValue normValue = 0.0; + for (unsigned i = 0; i < length(value(itStandardisedKmerCounts)); ++i) + { + normValue += value(itStandardisedKmerCounts)[i] * value(itStandardisedKmerCounts)[i]; + } + for (unsigned i = 0; i < length(value(itStandardisedKmerCounts)); ++i) + { + value(itStandardisedKmerCounts)[i] /= sqrt(normValue); + } + + } + } + + if (!(score.outputFile == "")) + { + std::ofstream myfile; + myfile.open(toCString(score.outputFile)); + for (unsigned i = 0; i < length(standardisedKmerCounts[0]); ++i) + { + String w; + unhash(w, i, score.kmerSize); + myfile << "\t" << w; + } + myfile << "\n"; + for (unsigned seqIndex = 0; seqIndex < seqNumber; ++seqIndex) + { + myfile << "Seq" << seqIndex; + for (unsigned i = 0; i < length(standardisedKmerCounts[seqIndex]); ++i) + { + myfile << "\t" << standardisedKmerCounts[seqIndex][i]; + } + myfile << "\n"; + } + myfile.close(); + } + + if(score.verbose) + { + std::cout << "\ncounted words"; + } + + // Calculate all pairwise scores and store them in scoreMatrix + for (unsigned rowIndex = 0; rowIndex < seqNumber; ++rowIndex) + { + if(score.verbose) + { + std::cout << "\nSequence number " << rowIndex; + } + for (unsigned colIndex = rowIndex; colIndex < seqNumber; ++colIndex) + { + _alignmentFreeCompareCounts(value(scoreMatrix, rowIndex, colIndex), revComIndex, standardisedKmerCounts[rowIndex], standardisedKmerCounts[colIndex], score); + value(scoreMatrix, colIndex, rowIndex) = value(scoreMatrix, rowIndex, colIndex); // Copy symmetric entries + } + } +} + +/* + * Calculate pairwise score given the counts of all kmers + */ +template +void +_alignmentFreeCompareCounts(TValue & result, + String const revComIndex, + TString const & kmerCounts1, + TString const & kmerCounts2, + AFScore const & score) +{ + typedef typename Iterator::Type TIteratorTString; + + TIteratorTString it1 = begin(kmerCounts1); + TIteratorTString it2 = begin(kmerCounts2); + result = 0.0; + TValue resultRC = 0.0; + for (; it1 < end(kmerCounts1); ++it1) + { + result += (TValue)(value(it1) * value(it2)); + // Computation of the reverse complement strand score + if ((score.revCom != "") && (score.revCom != "both_strands")) + { + unsigned hashValue = revComIndex[position(it1)]; + resultRC += (TValue)(value(it1) * kmerCounts2[hashValue]); + } + ++it2; + } + + if (score.revCom == "mean") + { + result = (TValue) (resultRC + result) / 2; + } + else if (score.revCom == "max") + { + result = std::max(resultRC, result); + } + else if (score.revCom == "min") + { + result = std::min(resultRC, result); + } +} + +/* + * count kmers and standardise count vectors for Dna5 and markov model background + */ +template +void _standardiseCounts(TString & standardisedCounts, + String const & revComIndex, + StringSet > const & kmerNeighbourhood, + TSequence const & sequence, + AFScore const & score) +{ + typedef typename Value::Type TAlphabet; + typedef typename UnmaskedAlphabet_::Type TUnmaskedAlphabet; + typedef typename Value::Type TValue; + typedef typename Iterator, Rooted>::Type TIteratorUnsigned; + typedef typename Iterator::Type TIteratorTString; + + unsigned alphabetSize = ValueSize::VALUE; + + // Save all word covariances which are computed in covariance Matrix to avoid double computations + Matrix covarianceMatrix; + TValue missing = -pow(10.0, 10); + if (score.mismatches > 0) + { + setLength(covarianceMatrix, 0, pow((double)alphabetSize, (int)score.kmerSize)); + setLength(covarianceMatrix, 1, pow((double)alphabetSize, (int)score.kmerSize)); + resize(covarianceMatrix, missing); + } + + // Note that there is some code below that looks like copy-and-paste. However, pulling this out into another + // function is the only way to get rid of the duplicate lines since we use different types. After some discussion, + // weese, goeke and holtgrew agreed that it is probably easier to read and maintain this way than to spread the code + // over to one more function. + if (score.bgModelOrder == 0) + { + // ---------------------------------------------------------------------- + // Order 0 Background Model + // ---------------------------------------------------------------------- + + String kmerCounts; + String backgroundFrequencies; + countKmers(kmerCounts, backgroundFrequencies, sequence, score.kmerSize); + int nvals = length(kmerCounts); // Number of kmers + int len1 = 0; + for (int l = 0; l < nvals; l++) + { + len1 += kmerCounts[l]; + } + resize(standardisedCounts, nvals, (TValue) 0.0); + + // String of TValue to store the word probabilites p_w + String probabilities; + resize(probabilities, nvals, missing); + + TIteratorUnsigned itCounts; + TIteratorTString itStandardisedCounts; + + itCounts = begin(kmerCounts); + itStandardisedCounts = begin(standardisedCounts); + + for (; itCounts < end(kmerCounts); ++itCounts) + { + // Temporary counter for mismatch kmer counting + TValue counterTMP = 0; + TValue p_w = 1; // Probability of kmer + + String w; + unhash(w, (unsigned)position(itCounts), score.kmerSize); + calculateProbability(p_w, w, backgroundFrequencies); + TValue variance = 0; + if ((score.mismatches == 1)) // Mismatch score calculation + { + p_w = 0; + // The first word in the kmerNeighbourhood is the kmer itself, it is weighted normally + // Sum of all entries in the covariance matrix. only once computed + unsigned wordHash = position(itCounts); + unsigned wordRCHash = revComIndex[wordHash]; + + for (unsigned row = 0; row < length(kmerNeighbourhood[wordHash]); ++row) + { + unsigned wordRowHash = kmerNeighbourhood[wordHash][row]; + // The kmer itself is weighted normally + if (wordRowHash == wordHash) // The first word in the kmerNeighbourhood is the kmer itself, it is weighted normally + { + counterTMP += (TValue) kmerCounts[wordRowHash]; + } + else if ((score.revCom == "both_strands") && (wordRowHash == wordRCHash)) + { + counterTMP += ((TValue) kmerCounts[wordRowHash]); + } + else + { + counterTMP += ((TValue) kmerCounts[wordRowHash]) * score.mismatchWeight; + } + String wMM1; + unhash(wMM1, wordRowHash, score.kmerSize); + + for (unsigned col = row; col < length(kmerNeighbourhood[wordHash]); ++col) + { + unsigned wordColHash = kmerNeighbourhood[wordHash][col]; + if (value(covarianceMatrix, wordColHash, wordRowHash) == missing) + { + String wMM2; + + unhash(wMM2, wordColHash, score.kmerSize); + calculateCovariance(value(covarianceMatrix, wordColHash, wordRowHash), wMM1, wMM2, backgroundFrequencies, (len1 + score.kmerSize - 1)); + value(covarianceMatrix, wordRowHash, wordColHash) = value(covarianceMatrix, wordColHash, wordRowHash); + } + if (row == col) // Variance of weighted variables + { + if ((wordRowHash == wordHash) || (score.revCom == "both_strands" && (wordRowHash == wordRCHash))) // The variance of the kmer is counted full + { + variance += value(covarianceMatrix, wordRowHash, wordColHash); + } + else + { + variance += pow(score.mismatchWeight, 2) * value(covarianceMatrix, wordRowHash, wordColHash); // Calculate weighted variances + } + } + // The covariance of the kmer and the reverse complement is weighted full + else if ((score.revCom == "both_strands") && (((wordRowHash == wordHash) && (wordColHash == wordRCHash)) || ((wordRowHash == wordRCHash) && (wordColHash == wordHash)))) + { + variance += (2.0) * value(covarianceMatrix, wordRowHash, wordColHash); + } + else if ((wordRowHash == wordHash || wordColHash == wordHash) || (score.revCom == "both_strands" && (wordRowHash == wordRCHash || wordColHash == wordRCHash))) // The covariance is weighted half + { + variance += (2.0) * score.mismatchWeight * value(covarianceMatrix, wordRowHash, wordColHash); + } + else // The covariance is weighted^2 + { + variance += (2.0) * pow(score.mismatchWeight, 2) * value(covarianceMatrix, wordRowHash, wordColHash); + } + } + if (probabilities[wordRowHash] == missing) + { + calculateProbability(probabilities[wordRowHash], wMM1, backgroundFrequencies); + } + if (wordRowHash == wordHash) //Weight the probabilities and expected values, normal weight for the kmer itself + { + p_w += probabilities[wordRowHash]; + } + else if ((score.revCom == "both_strands") && (wordRowHash == wordRCHash)) // Weight the probabiliets and expected values, normal weight for the reverse complement kmer itself + { + p_w += probabilities[wordRowHash]; + } + else + { + p_w += score.mismatchWeight * probabilities[wordRowHash]; + } + } + variance = pow(variance, 0.5); + } // End of mismatch calculation + else if (score.revCom == "both_strands") + { + TValue variance1; + TValue variance2; + TValue covariance; + String wRC; + unhash(wRC, (unsigned) revComIndex[(unsigned)position(itCounts)], score.kmerSize); + calculateVariance(variance1, w, backgroundFrequencies, (len1 + score.kmerSize - 1)); + calculateVariance(variance2, wRC, backgroundFrequencies, (len1 + score.kmerSize - 1)); + calculateCovariance(covariance, w, wRC, backgroundFrequencies, (len1 + score.kmerSize - 1)); + variance = pow((variance1 + variance2 + (2.0) * covariance), 0.5); + TValue p_wRC = 1; // Probability of the reverse complement kmer + calculateProbability(p_wRC, wRC, backgroundFrequencies); + p_w += p_wRC; + } + else + { + calculateVariance(variance, w, backgroundFrequencies, (len1 + score.kmerSize - 1)); + variance = pow(variance, 0.5); + } + if ((variance > pow(10.0, -10)) && (variance < pow(10.0, 10))) + { + if (p_w > 0) + { + if (score.mismatches > 0) + { + value(itStandardisedCounts) = ((TValue) ((TValue) counterTMP) - p_w * ((TValue)len1)) / variance; + } + else if (score.revCom == "both_strands") + { + value(itStandardisedCounts) = ((TValue) ((TValue) value(itCounts) + kmerCounts[revComIndex[(unsigned)position(itCounts)]]) - p_w * ((TValue)len1)) / variance; + } + else + { + value(itStandardisedCounts) = ((TValue) ((TValue) value(itCounts)) - p_w * ((TValue)len1)) / variance; + } + } + } + ++itStandardisedCounts; + } + } + else + { + // ---------------------------------------------------------------------- + // Higher Order Background Model + // ---------------------------------------------------------------------- + + String kmerCounts; + MarkovModel backgroundModel(score.bgModelOrder); + countKmers(kmerCounts, backgroundModel, sequence, score.kmerSize); + + int nvals = length(kmerCounts); // Number of kmers + int len1 = 0; + for (int l = 0; l < nvals; l++) + { + len1 += kmerCounts[l]; + } + resize(standardisedCounts, nvals, (TValue) 0.0); + String probabilities; + resize(probabilities, nvals, missing); + TIteratorUnsigned itCounts; + TIteratorTString itStandardisedCounts; + itCounts = begin(kmerCounts); + itStandardisedCounts = begin(standardisedCounts); + + for (; itCounts < end(kmerCounts); ++itCounts) + { + TValue p_w = 1; // Probability of kmer + TValue variance = 0; + String w; + unhash(w, (unsigned)position(itCounts), score.kmerSize); + p_w = emittedProbability(backgroundModel, w); + + TValue counterTMP = 0.0; + if ((score.mismatches == 1)) // Start of mismatch calculations + { + p_w = 0; + // The first word in the kmerNeighbourhood is the kmer itself, it is weighted normally + // Sum of all entries in the covariance matrix, computed and stored dynamically + unsigned wordHash = position(itCounts); + unsigned wordRCHash = revComIndex[wordHash]; + + for (unsigned row = 0; row < length(kmerNeighbourhood[wordHash]); ++row) + { + unsigned wordRowHash = kmerNeighbourhood[wordHash][row]; + // The kmer itself is weighted normally + if (wordRowHash == wordHash) // The first word in the kmerNeighbourhood is the kmer itself, it is weighted normally + { + counterTMP += (TValue) kmerCounts[wordRowHash]; + } + else if ((score.revCom == "both_strands") && (wordRowHash == wordRCHash)) + { + counterTMP += ((TValue) kmerCounts[wordRowHash]); + } + else + { + counterTMP += ((TValue) kmerCounts[wordRowHash]) * score.mismatchWeight; + } + String wMM1; + unhash(wMM1, wordRowHash, score.kmerSize); + for (unsigned col = row; col < length(kmerNeighbourhood[wordHash]); ++col) + { + unsigned wordColHash = kmerNeighbourhood[wordHash][col]; + if (value(covarianceMatrix, wordColHash, wordRowHash) == missing) + { + String wMM2; + unhash(wMM2, wordColHash, score.kmerSize); + calculateCovariance(value(covarianceMatrix, wordColHash, wordRowHash), wMM1, wMM2, backgroundModel, (len1 + score.kmerSize - 1)); + value(covarianceMatrix, wordRowHash, wordColHash) = value(covarianceMatrix, wordColHash, wordRowHash); + } + if (row == col) // Variance of weighted variables + { + if ((wordRowHash == wordHash) || (score.revCom == "both_strands" && (wordRowHash == wordRCHash))) // The variance of the kmer is counted full + { + variance += value(covarianceMatrix, wordRowHash, wordColHash); + } + else + { + variance += pow(score.mismatchWeight, 2) * value(covarianceMatrix, wordRowHash, wordColHash); + } + } + // The covariance of the kmer and the reverse complement is weighted full + else if ((score.revCom == "both_strands") && (((wordRowHash == wordHash) && (wordColHash == wordRCHash)) || ((wordRowHash == wordRCHash) && (wordColHash == wordHash)))) + { + variance += (2.0) * value(covarianceMatrix, wordRowHash, wordColHash); + } + else if ((wordRowHash == wordHash || wordColHash == wordHash) || (score.revCom == "both_strands" && (wordRowHash == wordRCHash || wordColHash == wordRCHash))) // The covariance is weighted half + { + variance += (2.0) * score.mismatchWeight * value(covarianceMatrix, wordRowHash, wordColHash); + } + else // The covariance is weighted^2 + { + variance += (2.0) * pow(score.mismatchWeight, 2) * value(covarianceMatrix, wordRowHash, wordColHash); + } + } + if (probabilities[wordRowHash] == missing) + { + probabilities[wordRowHash] = emittedProbability(backgroundModel, wMM1); + } + if (wordRowHash == wordHash) //Weight the probabiliets and expected values, normal weight for the kmer itself + { + p_w += probabilities[wordRowHash]; + } + else if ((score.revCom == "both_strands") && (wordRowHash == wordRCHash)) // Weight the probabiliets and expected values, normal weight for the reverse complement kmer itself + { + p_w += probabilities[wordRowHash]; + } + else + { + p_w += score.mismatchWeight * probabilities[wordRowHash]; + } + } + variance = pow(variance, 0.5); // Calculate the standard deviation + } // End of mismatch calculations + else if (score.revCom == "both_strands") + { + TValue variance1; + TValue variance2; + TValue covariance; + String wRC; + unhash(wRC, (unsigned)revComIndex[(unsigned)position(itCounts)], score.kmerSize); + calculateVariance(variance1, w, backgroundModel, (len1 + score.kmerSize - 1)); + calculateVariance(variance2, wRC, backgroundModel, (len1 + score.kmerSize - 1)); + calculateCovariance(covariance, w, wRC, backgroundModel, (len1 + score.kmerSize - 1)); + variance = pow((variance1 + variance2 + (2.0) * covariance), 0.5); + TValue p_wRC = 1; // Probability of the reverse complement kmer + p_wRC = emittedProbability(backgroundModel, wRC); + p_w += p_wRC; + } + else + { + calculateVariance(variance, w, backgroundModel, (len1 + score.kmerSize - 1)); + variance = pow(variance, 0.5); + } + if ((variance > pow(10.0, -10)) && (variance < pow(10.0, 10))) + { + if (p_w > 0) + { + if (score.mismatches > 0) + { + value(itStandardisedCounts) = ((TValue) ((TValue) counterTMP) - p_w * ((TValue)len1)) / variance; + } + else if (score.revCom == "both_strands") + { + value(itStandardisedCounts) = ((TValue) ((TValue) value(itCounts) + kmerCounts[revComIndex[(unsigned)position(itCounts)]]) - p_w * ((TValue)len1)) / variance; + } + else + { + value(itStandardisedCounts) = ((TValue) ((TValue) value(itCounts)) - p_w * ((TValue)len1)) / ((TValue) variance); + } + } + ++itStandardisedCounts; + } + } + } +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_AF_N2_H_ diff --git a/seqan/alignment_free/alignment_free_base.h b/seqan/alignment_free/alignment_free_base.h new file mode 100644 index 0000000..490a96d --- /dev/null +++ b/seqan/alignment_free/alignment_free_base.h @@ -0,0 +1,340 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Jonathan Goeke +// ========================================================================== +// Definition of all AFScore and the specialisations, D2, D2star, D2z +// and N2. +// ========================================================================== + +// TODO(holtgrew): Make struct a class here. + +#ifndef SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_ALIGNMENT_FREE_BASE_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_ALIGNMENT_FREE_BASE_H_ + +namespace seqan { + +/*! + * @class AFScore + * @headerfile + * @brief Used to specify parameters and methods for alignment-free sequence comparison. + * + * @signature template + * struct AFScore; + * + * @tparam TSpec Tag for specialization. + * + * @see alignmentFreeComparison + */ + +template +struct AFScore; + +/*! + * @class D2AFScore D2 AFScore + * @extends AFScore + * @headerfile + * + * @brief D2 computes the inner product of the kmer count vectors. + * + * @signature template <> + * struct AFScore; + * + * To be used for alignment free comparison. + * + * @section References + * + * Lippert RA, et al. Distributional regimes for the number of k-word matches between two random sequences. Proc. Natl + * Acad. Sci. USA 2002. + * + * @see alignmentFreeComparison + * + * @var unsigned D2AFScore::kmerSize; + * @brief Size of the kmers. + * + * @var bool D2AFScore::verbose; + * @brief true to enable verbose debug output. + * + * @fn D2AFScore::AFScore + * + * @brief Constructor + * + * @signature AFScore::AFScore(kmerSize, verbose); + * + * @param[in] verbose This option will report progress to standard output (boolunsigned). + * + * @see alignmentFreeComparison + */ + +struct D2_; // Inner product of k-mer counts, d2 score +typedef Tag const D2; + +template <> +struct AFScore +{ + unsigned kmerSize; + bool verbose; + AFScore(unsigned k, bool verbose_ = false) : kmerSize(k), verbose(verbose_) + {} +}; + +/*! + * @class D2StarAFScore D2Star AFSScore + * @extends AFScore + * @headerfile + * @brief D2Star computes the inner product of the standardised kmer count vectors. + * + * @signature template <> + * struct AFScore; + * + * D2Star can be used for alignment-free sequence comparison, this version calculates the background model on the + * concatenation of both sequences + * + * @section References + * + * Reinert, G.; Chew, D.; Sun, F., Waterman, M. S. Alignment-Free Sequence Comparison (I): Statistics and Power. J + * Comput Biol, 2009. + * + * @see alignmentFreeComparison + * + * @fn D2StarAFScore::AFScore + * @brief Constructor + * @signature AFScore::AFScore(kmerSize, bgModelOrder, verbose); + * + * @param[in] kmerSize Size of kmer, unsigned. + * @param[in] bgModelOrder Order of the background Markov model, unsigned. + * @param[in] verbose This option will report progress to standard output, bool. + * + * @var unsigned D2StarAFScore::kmerSize + * @brief Size of the kmers. + * + * @var CharString D2StarAFScore::outputFile + * @brief When specified, all kmerWeights will be written to this file, for every sequence, and for every sequence + * comparison. + * + * @var unsigned D2StarAFScore::bgModelOrder + * @brief Order of the background model. + */ + +struct D2Star_; // Reinert and Waterman, D2 with centralised and standardised counts +typedef Tag const D2Star; + +template <> +struct AFScore +{ + unsigned kmerSize; + unsigned bgModelOrder; + bool verbose; + + + AFScore(unsigned k, unsigned m, bool verbose_ = false) : + kmerSize(k), bgModelOrder(m), verbose(verbose_) + {} +}; + +/*! + * @class N2AFScore N2 AFScore + * @extends AFScore + * @headerfile + * @brief N2 computes the inner product of the standardised neighbourhood kmer count vectors. + * + * @signature template <> + * struct AFScore; + * + * N2 can be used for alignment-free sequence comparison. + * + * @section References + * + * Jonathan Goeke, Marcel H. Schulz, Julia Lasserre, and Martin Vingron. + Estimation of Pairwise Sequence Similarity of Mammalian Enhancers with Word Neighbourhood Counts. Bioinformatics + (2012). + * + * @fn N2AFScore::AFScore + * + * @brief Constructor + * + * @signature AFScore::AFScore(kmerSize, bgModelOrder, outputFile, verbose); + * @signature AFScore::AFScore(kmerSize, bgModelOrder, revCom, outputFile, verbose); + * @signature AFScore::AFScore(kmerSize, bgModelOrder, revCom, mismatches, mismatchWeight, outputFile, verbose); + * + * @param[in] kmerSize Size of kmer, unsigned. + * @param[in] bgModelOrder Order of the background Markov model, unsigned. + * @param[in] outputFile When specified, all normalised and standardised kmer neighbourhood counts will be written + * to this file for every sequence, @link CharString @endlink. + * @param[in] revCom Scoring of reverse complements words [''/'max'/'min'/'mean'/'both_strands'/], + * @link CharString @endlink. + * @param[in] verbose This option will report progress to standard output, bool, defaults to + * false. + * @param[in] mismatches Includes words with one mismatch into the word neighbourhood, unsigned, 0 or 1. + * @param[in] mismatchWeight Weight of word counts with one mismatch, double. + * + * @see alignmentFreeComparison + * + * @var unsigned N2AFScore::kmerSize; + * @brief Size of the kmers. + * + * @var double N2AFScore::mismatchWeight; + * @brief Weight for approximate word matches + * + * @var CharString N2AFScore::revCom; + * @brief Scoring of reverse complements words, @link CharString @endlink [''/'max'/'min'/'mean'/'both_strands'/]. + * + * @var CharString N2AFScore::outputFile; + * @brief When specified, all kmerWeights for every sequence will be written to this file. + * + * @var unsigned N2AFScore::bgModelOrder; + * @brief Order of the background model + * + * @var unsigned N2AFScore::mismatches; + * @brief Approximate word matches [0(exact)/1(one mismatch)] + * + * @var bool N2AFScore::verbose; + * @brief true to enable verbose debug output. + */ + +struct N2_; // Reinert and Waterman, D2 with centralised and standardised counts +typedef Tag const N2; + +template <> +struct AFScore +{ + unsigned kmerSize; + unsigned bgModelOrder; + String revCom; // Count reverse complement words? + // revCom="";"mean","max","both_strands" + unsigned mismatches; // Currently 0 or 1 + double mismatchWeight; // Weight of words in the mismatch neighbourhood + bool verbose; + bool norm; // Normalize score? Needed to provide a proper similarity measure + String outputFile; // Output of all kmer weights for every sequence into this file + + // Constructor for the simple case with only exact word counts (N2*) + AFScore(unsigned k, unsigned m, String kmerWeightsFile = "", bool verbose_ = false) + { + kmerSize = k; + bgModelOrder = m; + outputFile = kmerWeightsFile; + verbose = verbose_; + revCom = ""; + mismatches = 0; + mismatchWeight = 1.0; + norm = true; + + }; + + // Constructor for the case with exact word counts and reverse complement (N2rc) + AFScore(unsigned k, unsigned m, String revCom_, String kmerWeightsFile = "", bool verbose_ = false) + { + kmerSize = k; + bgModelOrder = m; + revCom = revCom_; + outputFile = kmerWeightsFile; + verbose = verbose_; + mismatches = 0; + mismatchWeight = 1.0; + norm = true; + }; + + // Constructor for the case with mismatch-neighbourhood word counts and reverse complement (N2mmrc) + AFScore(unsigned k, + unsigned m, + String revCom_, + unsigned mm, double mmw, + String kmerWeightsFile = "", + bool verbose_ = false) + { + kmerSize = k; + bgModelOrder = m; + revCom = revCom_; + mismatches = mm; + mismatchWeight = mmw; + outputFile = kmerWeightsFile; + verbose = verbose_; + norm = true; + }; +}; + +/*! + * @class D2zAFScore D2zAFScore + * @extends AFScore + * @headerfile + * @brief D2z computes a z-score of the inner product of kmer count vectors + * + * @signature template <> + * struct AFScore; + * + * D2z can be used for alignment-free sequence comparison. The algorithm differs from the original implementation by + * the way masked sequences are handled + * + * @section References + * + * Kantorovitz, M. R.; Robinson, G. E., Sinha, S. A statistical method for alignment-free comparison of regulatory + * sequences. Bioinformatics, 2007. + * + * @fn D2zAFScore::AFScore + * + * @brief Constructor + * + * @signature AFScore::AFScore(kmerSize, bgModelOrder[, verbose]); + * + * @param[in] kmerSize Size of kmer, unsigned. + * @param[in] bgModelOrder Order of the background Markov model, unsigned. + * @param[in] verbose This option will report progress to standard output; bool, defaults to + * false. + * + * @var unsigned D2zAFScore::bgModelOrder; + * @brief Order of the background model + * + * @var unsigned D2zAFScore::kmerSize; + * @brief Size of the kmers + * + * @var bool D2zAFScore::verbose; + * @brief true to enable verbose debug output. +] */ + +struct D2z_; // Inner product of k-mer counts, d2 score with z-score +typedef Tag const D2z; + +template <> +struct AFScore +{ + unsigned kmerSize; + unsigned bgModelOrder; + bool verbose; + AFScore(unsigned k, unsigned m, bool verbose_ = false) : + kmerSize(k), bgModelOrder(m), verbose(verbose_) + {} +}; + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_ALIGNMENT_FREE_BASE_H_ diff --git a/seqan/alignment_free/alignment_free_comparison.h b/seqan/alignment_free/alignment_free_comparison.h new file mode 100644 index 0000000..0809f09 --- /dev/null +++ b/seqan/alignment_free/alignment_free_comparison.h @@ -0,0 +1,106 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Jonathan Goeke +// ========================================================================== +// This file contains the function that calls alignment free sequence +// comparisons algorithms (see AFScore): +// alignmentFreeComparison() +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_ALIGNMENT_FREE_COMPARISON_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_ALIGNMENT_FREE_COMPARISON_H_ + +namespace seqan { + +/*! + * @fn alignmentFreeComparison + * @headerfile + * @brief Computes the pairwise similarity scores for a set of sequences. + * + * @signature void alignmentFreeComparison(scoreMatrix, sequenceSet, score); + * + * @param[out] scoreMatrix A two-dimensional @link Matrix @endlink, used to store all pairwise scores. + * @param[in] sequenceSet @link StringSet @endlink containing all sequences for which pairwise scores will be + * computed. + * @param[in] score The @link AFScore @endlink object to be used for computing the alignment. + * + * @section Examples + * + * Calculate the alignment free sequence similarity of two masked DNA sequences. + * + * @code{.cpp} + * using namespace seqan; + * StringSet sequences; + * Dna5String seq1 = + * "TAGGTTTTCCGAAAAGGTAGCAACTTTACGTGATCAAACCTCTGACGGGGTTTTCCCCGTCGAAATTGGGTG" + * "TTTCTTGTCTTGTTCTCACTTGGGGCATCTCCGTCAAGCCAAGAAAGTGCTCCCTGGATTCTGTTGCTAACG" + * "AGTCTCCTCTGCATTCCTGCTTGACTGATTGGGCGGACGGGGTGTCCACCTGACGCTGAGTATCGCCGTCAC" + * "GGTGCCACATGTCTTATCTATTCAGGGATCAGAATTTATTCAGGAAATCAGGAGATGCTACACTTGGGTTAT" + * "CGAAGCTCCTTCCAAGGCGTAGCAAGGGCGACTGAGCGCGTAAGCTCTAGATCTCCTCGTGTTGCAACTACA" + * "CGCGCGGGTCACTCGAAACACATAGTATGAACTTAACGACTGCTCGTACTGAACAATGCTGAGGCAGAAGAT" + * "CGCAGACCAGGCATCCCACTGCTTGAAAAAACTATNNNNCTACCCGCCTTTTTATTATCTCATCAGATCAAG"; + * Dna5String seq2 = + * "ACCGACGATTAGCTTTGTCCGAGTTACAACGGTTCAATAATACAAAGGATGGCATAAACCCATTTGTGTGAA" + * "AGTGCCCATCACATTATGATTCTGTCTACTATGGTTAATTCCCAATATACTCTCGAAAAGAGGGTATGCTCC" + * "CACGGCCATTTACGTCACTAAAAGATAAGATTGCTCAAANNNNNNNNNACTGCCAACTTGCTGGTAGCTTCA" + * "GGGGTTGTCCACAGCGGGGGGTCGTATGCCTTTGTGGTATACCTTACTAGCCGCGCCATGGTGCCTAAGAAT" + * "GAAGTAAAACAATTGATGTGAGACTCGACAGCCAGGCTTCGCGCTAAGGACGCAAAGAAATTCCCTACATCA" + * "GACGGCCGCGNNNAACGATGCTATCGGTTAGGACATTGTGCCCTAGTATGTACATGCCTAATACAATTGGAT" + * "CAAACGTTATTCCCACACACGGGTAGAAGAACNNNNATTACCCGTAGGCACTCCCCGATTCAAGTAGCCGCG"; + * + * clear(sequences); + * appendValue(sequences, seq1); + * appendValue(sequences, seq2); + * + * Matrix myMatrix; + * + * unsigned kmerSize = 5; + * unsigned bgModelOrder = 1; + * String revCom = "both_strands"; + * unsigned mismatches = 1; + * double mismatchWeight = 0.5; + * AFScore myScoreN2(kmerSize, bgModelOrder, revCom, mismatches, mismatchWeight); + * + * alignmentFreeComparison(myMatrix, sequences, myScoreN2); + * std::cout << myMatrix; + * @endcode + */ + +template +void alignmentFreeComparison(Matrix & scoreMatrix, TStringSet const & sequenceSet, TComparisonMethod const & comparisonMethod) +{ + _alignmentFreeComparison(scoreMatrix, sequenceSet, comparisonMethod); +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_COMPARISON_H_ diff --git a/seqan/alignment_free/kmer_functions.h b/seqan/alignment_free/kmer_functions.h new file mode 100644 index 0000000..c67935e --- /dev/null +++ b/seqan/alignment_free/kmer_functions.h @@ -0,0 +1,965 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Jonathan Goeke +// ========================================================================== +// This file contains helper functions to count words in sequences and to +// calculate probabilities and variances of word occurrences. +// ========================================================================== + +// TODO (goeke) const could be added below for the input variables but the function value() in matrix_base (align) is not defined for const. Similarly, the function emittedProbabilty is not defined for const in statistics_markov_model.h + +#ifndef SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_KMER_FUNCTIONS_H_ +#define SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_KMER_FUNCTIONS_H_ + +namespace seqan { + +template +struct UnmaskedAlphabet_ +{ + typedef TAlphabet Type; +}; + +template <> +struct UnmaskedAlphabet_ +{ + typedef Dna Type; +}; + +template +struct UnmaskedAlphabet_ +{ + typedef const typename UnmaskedAlphabet_::Type Type; +}; + +/*! + * @fn countKmers + * @headerfile + * @brief Counts kmers in a sequence. Optionally, a background model is returned. + * + * @signature void countKmers(kmerCounts, sequence, k); + * @signature void countKmers(kmerCounts, bgFrequencies, sequence, k); + * @signature void countKmers(kmerCounts, bgModel, sequence, k); + * + * @param[out] kmerCounts @link String @endlink of unsigned with kmer counts for every k-mer. + * @param[out] bgFrequencies @link String @endlink of background frequencies (double) representing the model. + * @param[out] bgModel @link MarkovModel @endlink to use. + * @param[in] sequence @link String @endlink (sequence) where k-mers are counted. + * @param[in] k k-mer length (unsigned). + * + * k-mers overlapping masked (aka 'N') letters are not counted in case of Dna5Strings. A Bernoulli or Markov Model + * can be choosen as a background model. + * + * @section Examples + * + * Calculate the alignment free sequence similarity o two masked DNA sequences. + * + * @code{.cpp} + * using namespace seqan; + * // Masked sequence, we do not want to count words overlapping 'N' + * Dna5String sequenceDna5 = + * "TAGGTTTTCCGAAAAGGTAGCAACTTTACGTGATCAAACCTCTGACGGGGTTTTCCCCGTCGAAATTGGGTG" + * "TTTCTTGTCTTGTTCTCACTTGGGGCATCTCCGTCAAGCCAAGAAAGTGCTCCCTGGATTCTGTTGCTAACG" + * "AGTCTCCTCTGCATTCCTGCTTGACTGATTGGGCGGACGGGGTGTCCACCTGACGCTGAGTATCGCCGTCAC" + * "GGTGCCACATGTCTTATCTATTCAGGGATCAGAATTTATTCAGGAAATCAGGAGATGCTACACTTGGGTTAT" + * "CGAAGCTCCTTCCAAGGCGTAGCAAGGGCGACTGAGCGCGTAAGCTCTAGATCTCCTCGTGTTGCAACTACA" + * "CGCGCGGGTCACTCGAAACACATAGTATGAACTTAACGACTGCTCGTACTGAACAATGCTGAGGCAGAAGAT" + * "CGCAGACCAGGCATCCCACTGCTTGAAAAAACTATNNNNCTACCCGCCTTTTTATTATCTCATCAGATCAAG"; + * + * String kmerCounts; + * unsigned k = 2; // Count all 2-mers + * countKmers(kmerCounts, sequenceDna5, k); + * + * for(unsigned i = 0; i<16; ++i) // Print the 2-mer counts + * std::cout< nucleotideFrequencies; // Defines a Bernoulli model for DNA sequences. + * // Count all 2-mers and save the nucleotide frequencies + * countKmers(kmerCounts, nucleotideFrequencies, sequenceDna5, k); + * + * for(unsigned i = 0; i<4; ++i) // Print the nucleotide frequencies + * std::cout << nucleotideFrequencies[i] << "\n"; + * // => p(A) = 0.238; p(C) = 0.254; p(G) = 0.238; p(T) = 0.27; + * + * MarkovModel backgroundModel(1); // Markov model of order 1 + * // Count all 2-mers and return a Markov model + * countKmers(kmerCounts, backgroundModel, sequenceDna5, k); + * std::cout< +void countKmers(String & kmerCounts, TString const & sequence, unsigned const k) +{ + typedef typename Value::Type TAlphabet; + typedef typename UnmaskedAlphabet_::Type TUnmaskedAlphabet; + typedef typename Iterator::Type TIterator; + typedef typename Position::Type TPosition; + typedef Shape TShape; + // Declare variables + TShape myShape; // Shape, length can be changed (kmer_length) + resize(myShape, k); + // Calculate the number of kmers, length of count vector + int kmerNumber = _intPow((unsigned)ValueSize::VALUE, weight(myShape)); + clear(kmerCounts); + resize(kmerCounts, kmerNumber, 0); + TIterator itSequence = begin(sequence); + int counterN = 0; + + // Check for any N that destroys the first kmers + unsigned j = k - 1; + for (TPosition i = position(itSequence); i <= j; ++i) + { + if (_repeatMaskValue(sequence[i])) + { + counterN = i + 1; + } + } + for (; itSequence <= (end(sequence) - k); ++itSequence) + { + // Check if there is a "N" at the end of the new kmer + if (_repeatMaskValue(value(itSequence + (k - 1)))) + counterN = k; // Do not consider any kmer covering this "N" + + // If there is no "N" overlapping with the current kmer, count it + if (counterN <= 0) + { + unsigned hashValue = hash(myShape, itSequence); + ++kmerCounts[hashValue]; + } + counterN--; + } +} + +/* + * Function to count kmers and background nucleotide frequencies, Ns are not considered + * (for zero order background model) + */ +template +void countKmers(String & kmerCounts, String & backgroundFrequencies, TString const & sequence, unsigned const k) +{ + typedef typename Value::Type TAlphabet; + typedef typename UnmaskedAlphabet_::Type TUnmaskedAlphabet; + typedef typename Iterator::Type TIterator; + typedef typename Iterator >::Type TIteratorTStringBG; + typedef typename Position::Type TPosition; + typedef Shape TShape; + unsigned alphabetSize = ValueSize::VALUE; + + // Declare variables + TShape myShape; // Shape, length can be changed (kmer_length) + TShape myShapeBG; // Shape for background, set to markovlen+1, here zero order only + resize(myShape, k); + resize(myShapeBG, 1); // Markov model of zero order (count background frequencies) + + // Calculate number of kmers/ length of count vector, respectively background vector + unsigned kmerNumber = _intPow(alphabetSize, k); + unsigned kmerNumberBG = alphabetSize; // Zero order model for DNA sequences (Bernoulli model) + clear(kmerCounts); + resize(kmerCounts, kmerNumber, 0); + resize(backgroundFrequencies, kmerNumberBG, (TValueBG) 0); + TIterator itSequence = begin(sequence); + + int counterN = 0; // Counter that counts how many kmers are effected by a N + int counterNbg = 0; // Counter for background model (different shape size) + + // Check for any N that destroys the first kmers + unsigned j = k - 1; + for (TPosition i = position(itSequence); i <= j; ++i) + { + if (_repeatMaskValue(sequence[i])) + counterN = i + 1; + } + + int sumBG = 0; // Count the number of nucleotides for the nucleotide frequency calculation (Ns are not considered anymore). + for (; itSequence <= (end(sequence) - k); ++itSequence) + { + // Check if there is a "N" at the end of the new kmer + if (_repeatMaskValue(value(itSequence + (k - 1)))) + { + counterN = k; // Do not consider any kmer covering this "N" + } + // If there is no "N" overlapping with the current kmer, count it. + if (counterN <= 0) + { + unsigned hashValue = hash(myShape, itSequence); + ++kmerCounts[hashValue]; + } + // Check if there is a "N" at the end of the new background word, here single letters only. + if (_repeatMaskValue(value(itSequence))) + { + counterNbg = 1; + } + if (counterNbg <= 0) + { + unsigned hashValueBG = hash(myShapeBG, itSequence); + backgroundFrequencies[hashValueBG] += 1.0; + ++sumBG; + } + counterN--; + counterNbg--; + } + // The background counts are updated until the last base is covered. + for (; itSequence < end(sequence); ++itSequence) + { + if (_repeatMaskValue(value(itSequence))) + { + counterNbg = 1; + } + if (counterNbg <= 0) + { + unsigned hashValueBG = hash(myShapeBG, itSequence); + ++backgroundFrequencies[hashValueBG]; + ++sumBG; + } + counterNbg--; + } + // Normalise the background counts to obtain the nucleotide frequencies (Bernoulli model of DNA sequences). + TIteratorTStringBG itBackground = begin(backgroundFrequencies); + for (; itBackground < end(backgroundFrequencies); ++itBackground) + if (sumBG != 0) + value(itBackground) /= ((TValueBG) sumBG); +} + +/* + * Function to count kmers and build a background markov model with masked sequences + */ +template +void countKmers(String & kmerCounts, MarkovModel & backgroundModel, TString const & sequence, unsigned k) +{ + //typedef typename Value::Type TAlphabet; + //typedef typename UnmaskedAlphabet_::Type TUnmaskedAlphabet; + typedef typename Iterator::Type TIterator; + //typedef typename Iterator, Rooted>::Type TIteratorInt; + typedef typename Position::Type TPosition; + typedef Shape TShape; + + // Declare variables + TShape myShape; // Shape, length can be changed (kmer_length) + resize(myShape, k); + // Only consider kmers without N + int kmerNumber = _intPow((unsigned)ValueSize::VALUE, weight(myShape)); + clear(kmerCounts); + resize(kmerCounts, kmerNumber, 0); + + // Create sequence set for the markov model, if Ns occur, the sequence is split and Ns are removed + StringSet > seqSetMM; + + TIterator itSeq = begin(sequence); + + // Check for any N that destroys the first kmers + unsigned j = (k - 1); + for (TPosition i = position(itSeq); i <= j; ++i) + { + if (_repeatMaskValue(sequence[i])) + { + if ((i - position(itSeq)) > 0) + { + appendValue(seqSetMM, infix(sequence, position(itSeq), i)); + } + goFurther(itSeq, i + 1 - position(itSeq)); + j = i + k - 1; + } + } + + int counterN = 0; + TPosition startSplitSequence = position(itSeq); // The position of possible start of a sequence after NNs is stored to split sequences. + + for (; itSeq <= (end(sequence) - k); ++itSeq) + { + if (_repeatMaskValue(value(itSeq + (k - 1)))) + { + counterN = k; + + if (((position(itSeq) + k - 1) > startSplitSequence)) + appendValue(seqSetMM, infix(sequence, startSplitSequence, (position(itSeq) + k - 1))); + + startSplitSequence = (position(itSeq) + k); // Position after N, possible start + } + if (counterN <= 0) + { + unsigned hashValue = hash(myShape, itSeq); + ++kmerCounts[hashValue]; + } + + counterN--; + } + // Create a stringSet, needed to create the Markov model + if ((position(itSeq) + k - 1) > startSplitSequence) + { + appendValue(seqSetMM, infix(sequence, startSplitSequence, (position(itSeq) + k - 1))); + } + // Build background Markov model + buildMarkovModel(backgroundModel, seqSetMM); +} + +/*! + * @fn calculateProbability + * @headerfile + * @brief Calculates the probability of a sequence given a Bernoulli model. + * + * @signature void calculateProbability(probability, sequence, bgFrequencies); + * + * @param[out] probability Probability (double) of the sequence given the model. + * @param[in] sequence @link String @endlink, usually of Dna characters. + * @param[in] bgFrequencies @link String @endlink of background frequencies (double) representing the model. + * + * @section Examples + * + * Calculate the probability for the word CCCAAGTTT with p(A) = p(T) = 0.3 and p(C) = p(G) = 0.2. + * + * @code{.cpp} + * using namespace seqan; + * double p = 0.0; + * DnaString word = "CCCAAGTTT"; + * String model; + * resize(model, 4); + * model[0] = 0.3; // p(A) + * model[1] = 0.2; // p(C) + * model[2] = 0.2; // p(G) + * model[3] = 0.3; // p(T) + * calculateProbability(p, word, model); // p = 3.888e-06 + * @endcode + * + * @see calculateVariance + * @see alignmentFreeComparison + * @see calculateCovariance + * @see countKmers + */ + +template +void calculateProbability(TValue & probability, TString const & sequence, TStringBG const & backgroundFrequencies) +{ + typedef typename Iterator::Type TIteratorTString; + + TIteratorTString itSequence = begin(sequence); + probability = (TValue) 1; + for (; itSequence < end(sequence); ++itSequence) + probability *= backgroundFrequencies[ordValue(*itSequence)]; +} + +/*! + * @fn calculateVariance + * @headerfile + * @brief Calculates the variance for the number of word occurrences of a word in a sequence of length n given a + * background model. + * + * @signature void calculateVariance(variance, word, bgFrequencies, n); + * @signature void calculateVariance(variance, word, bgModel, n); + * + * @param[out] variance Variance of the number of occurrences of the word in a sequence of length n given the + * model; double. + * @param[in] word @link String @endlink, usually of Dna to compute variance for. + * @param[in] bgFrequencies @link String @endlink of bg frequencies representing the model. + * @param[in] bgModel @link MarkovModel @endlink to use. + * @param[in] n Length of the sequence where the occurrences of word are counted, int. + * + * Calculates the variance for the number of word occurrences of a word in a sequence of length n given a background + * model (Markov model or Bernoulli model). The formula is obtained from (Robin et al., 2005). + * + * @section References + * + * Robin, S., Rodolphe, F., and Schbath, S. (2005). DNA, Words and Models. Cambridge University Press. See Jonathan + * Goeke et al (to appear) for details on the implementation. + * + * @section Examples + * + * Calculate the variance for the number of occurrences of CAAGTC in a sequence of length 10000bp with + * p(A) = p(T) = 0.3 and p(C) = p(G) = 0.2. + * + * @code{.cpp} + * using namespace seqan; + * double var = 0.0; + * int n = 10000; + * DnaString word = "CAAGTC"; + * String model; + * resize(model, 4); + * model[0] = 0.3; // p(A) + * model[1] = 0.2; // p(C) + * model[2] = 0.2; // p(G) + * model[3] = 0.3; // p(T) + * calculateVariance(var, word, model, n); // var = 2.16 + * @endcode + * + * Estimate a Markov model on a set of sequences and calculate the variance for the number of occurrences of the word + * CAAGTC in a sequence of length 10000bp. + * + * @code{.cpp} + * using namespace seqan; + * double var = 0.0; + * int n = 10000; + * DnaString word = "CAAGTC"; + * StringSet sequences; + * appendValue(sequences, "CAGAAAAAAACACTGATTAACAGGAATAAGCAGTTTACTTATTTTGGGCCTGGGACCCGTGTCTCTAATTTAATTAGGTGATCCCTGCGAAGTTTCTCCA"); + * MarkovModel model(0); // Bernoulli model + * model.build(sequences); + * calculateVariance(var, word, model, n); // var = 2.16 + * MarkovModel model1(1); // First order Markov model + * model1.build(sequences); + * calculateVariance(var, word, model1, n); // var = 1.69716 + * @endcode + * + * @see calculateProbability + * @see calculateCovariance + * @see MarkovModel + * @see alignmentFreeComparison + * @see calculatePeriodicity + * @see countKmers + * @see calculateOverlapIndicator + */ + +template +void calculateVariance(TValue & variance, TString const & word, TStringBG const & backgroundFrequencies, int const n) +{ + typedef typename Value::Type TAlphabet; + typedef typename Value::Type TValueBG; + typedef typename Iterator, Rooted>::Type TIteratorInt; + + int l = length(word); + TValueBG p_w; + calculateProbability(p_w, word, backgroundFrequencies); + + String periodicity; + calculatePeriodicity(periodicity, word, word); + variance = (TValue) (n - l + 1) * p_w; + for (TIteratorInt i = begin(periodicity); i < end(periodicity); ++i) + { + TValueBG p_clump; + TValueBG p_tmp; + calculateProbability(p_tmp, word, backgroundFrequencies); + String wordPrefix = prefix(word, value(i)); + calculateProbability(p_clump, wordPrefix, backgroundFrequencies); + p_clump *= p_tmp; + variance += (TValue) 2 * (n - l + 1 - value(i)) * p_clump; + } + variance += (TValue) p_w * p_w * (n - 2 * n * l + 3 * l * l - 4 * l + 1); +} + + +template +void calculateVariance(TValue & variance, String const & word, MarkovModel /*const*/ & bgModel, int const n) +{ + typedef typename Iterator, Rooted>::Type TIteratorInt; + + int l = length(word); + TValue p_w; + p_w = emittedProbability(bgModel, word); + String periodicity; + calculatePeriodicity(periodicity, word, word); + variance = (TValue) (n - l + 1) * p_w; + for (TIteratorInt i = begin(periodicity); i < end(periodicity); ++i) + { + TValue p_clump; + String clump = prefix(word, value(i)); + append(clump, word); + p_clump = emittedProbability(bgModel, clump); + variance += (TValue) 2 * (n - l + 1 - value(i)) * p_clump; + } + variance += (TValue) p_w * p_w * (n - 2 * n * l + 3 * l * l - 4 * l + 1); +} + +/*! + * @fn calculateCovariance + * @headerfile + * @brief Calculates the covariance for the number of word occurrences for two words in a sequence of length n, given a + * background model. + * + * @signature void calculateCovariance(covariance, word1, word2, bgFrequencies, n); + * @signature void calculateCovariance(covariance, word1, word2, bgModel, n); + * + * @param[out] covariance Variance of the number of occurrences of the word in a sequence of length n given the + * model, double. + * @param[in] word1 @link String @endlink, usually of Dna. + * @param[in] word2 @link String @endlink, usually of Dna. + * @param[in] bgFrequencies @link String @endlink of double with the background frequencies representing + * @param[in] bgModel @link MarkovModel @endlink to use. + * @param[in] n Length of the sequence where the occurrences of word are counted, int. + * + * Calculates the covariance for the number of word occurrences for two words in a sequence of length n given a + * background model (Markov model or Bernoulli model). The covariance is influenced by the property of words to overlap, + * for example, the words ATAT and TATA have a high covariance since they are likely to overlap. The formula is based on + * (Robin et al., 2005). + * + * @section References + * + * Robin, S., Rodolphe, F., and Schbath, S. (2005). DNA, Words and Models. Cambridge University Press. See Jonathan + * Goeke et al (to appear) for details on the implementation. + * + * @section Examples + * + * Calculate the covariance for the number of occurrences of ATATAT and TATATA in a sequence of length 10000bp with + * p(A) = p(T) = 0.3 and p(C) = p(G) = 0.2. + * + * @code{.cpp} + * using namespace seqan; + * double covar = 0.0; + * int n = 10000; + * DnaString word1 = "ATATAT"; + * DnaString word2 = "TATATA"; + * String model; + * resize(model, 4); + * model[0] = 0.3; // p(A) + * model[1] = 0.2; // p(C) + * model[2] = 0.2; // p(G) + * model[3] = 0.3; // p(T) + * calculateCovariance(covar, word1, word2, model, n); // covar = 4.74 + * @endcode + * + * Estimate a Markov model on a set of sequences and calculate the covariance for the number of occurrences of ATATAT + * and TATATA in a sequence of length 10000bp. + * + * @code{.cpp} + * using namespace seqan; + * double covar = 0.0; + * int n = 10000; + * DnaString word1 = "ATATAT"; + * DnaString word2 = "TATATA"; + * StringSet sequences; + * appendValue(sequences, "CAGCACTGATTAACAGGAATAAGCAGTTTACTTCTGTCAGAATATTGGGCATATATA" + * "CTGGGACCCGTGTAATACTCTAATTTAATTAGGTGATCCCTGCGAAGTCTCCA"); + * MarkovModel modelMM0(0); // Bernoulli model + * modelMM0.build(sequences); + * calculateCovariance(covar, word1, word2, modelMM0, n); // covar = 4.74 + * MarkovModel modelMM1(1); // First order Markov model + * modelMM1.build(sequences); + * calculateCovariance(covar, word1, word2, modelMM1, n); // covar = 13.1541 + * @endcode + * + * @see calculateProbability + * @see calculateVariance + * @see MarkovModel + * @see alignmentFreeComparison + * @see calculatePeriodicity + * @see countKmers + * @see calculateOverlapIndicator + */ + +template +void calculateCovariance(TValue & covariance, TString const & word1, TString const & word2, TStringBG const & backgroundFrequencies, int const n) +{ + if (word1 == word2) + { + calculateVariance(covariance, word1, backgroundFrequencies, n); + return; + } + typedef typename Value::Type TAlphabet; + typedef typename Value::Type TValueBG; + typedef typename Iterator, Rooted>::Type TIteratorInt; + + covariance = 0; + int l1 = length(word1); + TValueBG p_w1; + calculateProbability(p_w1, word1, backgroundFrequencies); + String periodicity1; + calculatePeriodicity(periodicity1, word1, word2); + for (TIteratorInt i = begin(periodicity1); i < end(periodicity1); ++i) + { + TValueBG p_clump; + TValueBG p_tmp; + calculateProbability(p_tmp, word2, backgroundFrequencies); + String wordPrefix = prefix(word1, value(i)); + calculateProbability(p_clump, wordPrefix, backgroundFrequencies); + p_clump *= p_tmp; + covariance += (TValue) (n - l1 + 1 - value(i)) * p_clump; + } + + int l2 = length(word2); + TValueBG p_w2; + calculateProbability(p_w2, word2, backgroundFrequencies); + String periodicity2; + calculatePeriodicity(periodicity2, word2, word1); + for (TIteratorInt i = begin(periodicity2); i < end(periodicity2); ++i) + { + TValueBG p_clump; + TValueBG p_tmp; + calculateProbability(p_tmp, word1, backgroundFrequencies); + String wordPrefix = prefix(word2, value(i)); + calculateProbability(p_clump, wordPrefix, backgroundFrequencies); + p_clump *= p_tmp; + covariance += (TValue) (n - l2 + 1 - value(i)) * p_clump; + } + covariance += (TValue) p_w1 * p_w2 * (n - 2 * n * l1 + 3 * l1 * l1 - 4 * l1 + 1); +} + +template +void calculateCovariance(TValue & covariance, String const & word1, String const & word2, MarkovModel /*const*/ & bgModel, int const n) +{ + if (word1 == word2) + { + calculateVariance(covariance, word1, bgModel, n); + return; + } + typedef typename Iterator, Rooted>::Type TIteratorInt; + + covariance = 0; + int l1 = length(word1); + TValue p_w1; + p_w1 = emittedProbability(bgModel, word1); + String periodicity1; + calculatePeriodicity(periodicity1, word1, word2); // word2 is right + for (TIteratorInt i = begin(periodicity1); i < end(periodicity1); ++i) + { + TValue p_clump; + String clump = prefix(word1, value(i)); + append(clump, word2); + + p_clump = emittedProbability(bgModel, clump); + + covariance += (TValue) (n - l1 + 1 - value(i)) * p_clump; + } + TValue p_w2; + p_w2 = emittedProbability(bgModel, word2); + String periodicity2; + calculatePeriodicity(periodicity2, word2, word1); + for (TIteratorInt i = begin(periodicity2); i < end(periodicity2); ++i) + { + TValue p_clump; + String clump = prefix(word2, value(i)); + append(clump, word1); + + p_clump = emittedProbability(bgModel, clump); + + covariance += (TValue) (n - l1 + 1 - value(i)) * p_clump; + } + covariance += (TValue) p_w1 * p_w2 * (n - 2 * n * l1 + 3 * l1 * l1 - 4 * l1 + 1); +} + +/*! + * @fn calculatePeriodicity + * @headerfile + * @brief Calculate word periodicity (indicator for overlaps) + * + * @signature void calculatePeriodicity(periodicity, word1, word2); + * + * @param[out] periodicity String of int values giving the periodicity (overlap indicator) of + * word1 and word2. + * @param[int] word1 String, usually of Dna characters. + * @param[int] word2 String, usually of Dna characters. + * + * Calculate word periodicity (indicator for overlaps) for two words. + * + * @section Examples + * + * Calculate the periodicity of two words (At which positions can they overlap?) + * + * @code{.cpp} + * using namespace seqan; + * DnaString word1 = "ATATA"; + * DnaString word2 = "TATAT"; + * String periodicity; + * calculatePeriodicity(periodicity, word1, word2); + * for(unsigned i = 0; i < length(periodicity); ++i) // Print the periodicity + * std::cout << periodicity[i] << "\t"; + * + * // periodocity[0] = 1: + * // i = 01234 + * // word1 = ATATA + * // word2 = -TATAT + * + * // periodocity[1] = 3: + * // i = 01234 + * // word1 = ATATA + * // word2 = ---TATAT + * @endcode + * + * @see calculateVariance + * @see calculateCovariance + * @see calculateOverlapIndicator + * @see alignmentFreeComparison + */ + +template +void calculatePeriodicity(String & periodicity, TString const & word1, TString const & word2) +{ + typedef typename Value::Type TAlphabet; + //typedef typename Iterator::Type TIterator; + typedef typename Size::Type TSize; + + TSize length1 = length(word1); + TSize length2 = length(word2); + for (TSize i = 1; i < length1; ++i) + { + String my_suffix = suffix(word1, i); // Overlap of suffix of word1 with prefix of word2 + TSize my_min = std::min(length2, (length1 - i)); + String my_prefix = prefix(word2, my_min); + if (my_suffix == my_prefix) + { + appendValue(periodicity, i); + } + } +} + +/*! + * @fn calculateOverlapIndicator + * @headerfile + * @brief Calculate word overlaps: epsilon(word1, word2) = 1 where word2[j] = word1[j+p] for + * all j = 1..(k-p). + * + * @signature void calculateOverlapIndicator(epsilon, word1, word2); + * + * @param[out] epsilon String of int giving the periodicity (overlap indicator) of word1 and word2. + * @param[in] word1 String (for example a DNA sequence). + * @param[in] word2 String (for example a DNA sequence). + * + * Calculate the indicator for overlaps of two words. The formula is based on (Robin et al., 2005) + * + * @section References + * + * Robin, S., Rodolphe, F., and Schbath, S. (2005). DNA, Words and Models. Cambridge University Press. See Jonathan + * Goeke et al (to appear) for details on the implementation. + * + * @section Examples + * + * Calculate the overlap indicator (epsilon) for two words + * + * @code{.cpp} + * using namespace seqan; + * DnaString word1 = "ATATA"; + * DnaString word2 = "TATAT"; + * String epsilon; + * calculateOverlapIndicator(epsilon, word1, word2); + * for(unsigned i = 0; i < length(epsilon); ++i) + * std::cout << epsilon[i] << "\t"; + * // epsilon = 01010: + * // word1 ATATA + * // word2 overlap 1: -TATAT + * // word2 overlap 2: ---TATAT + * @endcode + * + * @see calculateVariance + * @see calculateCovariance + * @see calculatePeriodicity + * @see alignmentFreeComparison + */ + +template +void calculateOverlapIndicator(String & epsilon, TString const & word1, TString const & word2) +{ + typedef typename Value::Type TAlphabet; + //typedef typename Iterator::Type TIterator; + typedef typename Size::Type TSize; + + TSize length1 = length(word1); + TSize length2 = length(word2); + clear(epsilon); + resize(epsilon, length1, 0); + for (TSize i = 0; i < length1; ++i) + { + String my_suffix = suffix(word1, length1 - i - 1); // Overlap of suffix of word1 with prefix of word2 + TSize my_min = std::min(length2, i + 1); + String my_prefix = prefix(word2, my_min); + if (my_suffix == my_prefix) + epsilon[i] = 1; + } +} + +/*! + * @fn stringToStringSet + * @headerfile + * @brief Transform a String into a StringSet containing this String. + * + * @signature void stringToStringSet(stringSet, string); + * @signature void stringToStringSet(dnaStringSet, dna5String); + * + * @param[out] stringSet @link StringSet @endlink to create with one sequence. + * @param[in] string @link String @endlink to create the string set of. + * @param[out] dnaStringSet @link StringSet @endlink of @link String Strings @endlink over the alphabet @link Dna @endlink. + * @param[in] dna5String @link String @endlink over the alphabet @link Dna5 @endlink to convert. + * + * @note The second variant removes all N characters from the @link Dna5String @endlink. + * + * @section Examples + * + * Transform a masked DNA sequence into a set of sequences with all masked parts removed. + * + * @code{.cpp} + * using namespace seqan; + * Dna5String sequenceDna5 = + * "NNNNNNTTTCCGAAAAGGTANNNNNGCAACTTTANNNCGTGATCAAAGTTTTCCCCGTCGAAATTGGGNNTG"; + * StringSet sequencesDna; + * stringToStringSet(sequencesDna, sequenceDna5); + * // Print the masked sequence + * std::cout< +void +stringToStringSet(StringSet & stringSet, TString const & sequence) +{ + resize(stringSet, 1); + stringSet[0] = sequence; +} + +inline void +stringToStringSet(StringSet > & dnaStringSet, String const & sequence) +{ + typedef Iterator const, Rooted>::Type TIterator; + typedef Position::Type TPosition; + + TIterator itSeq = begin(sequence); + // Check for any N that destroys the first kmers + unsigned j = 0; + for (TPosition i = position(itSeq); i <= j; ++i) + { + if (sequence[i] == 'N') + { + if ((i - position(itSeq)) > 0) + appendValue(dnaStringSet, infix(sequence, position(itSeq), i)); + goFurther(itSeq, i + 1 - position(itSeq)); + j = i; + } + } + int counterN = 0; + TPosition startSplitSequence = position(itSeq); // The position of possible starts of a sequence after Ns is stored to split the sequence. + for (; itSeq <= (end(sequence) - 1); ++itSeq) + { + if (value(itSeq) == 'N') + { + counterN = 1; + if (((position(itSeq)) > startSplitSequence)) + { + appendValue(dnaStringSet, infix(sequence, startSplitSequence, position(itSeq))); + } + startSplitSequence = (position(itSeq) + 1); // Position after N, possible start + } + counterN--; + } + // Create the stringSet, the stringSet can be used to create a Markov model + if (position(itSeq) > startSplitSequence) + appendValue(dnaStringSet, infix(sequence, startSplitSequence, position(itSeq))); +} + +/*! + * @fn cutNs + * @headerfile + * @brief Cut out all masked sequences from a Dna5String. + * + * @signature void cutNs(sequenceCut, sequence); + * + * @param[out] sequenceCut Dna5String similar to sequence with all Ns cut out. + * @param[in] sequence Masked DNA sequence. + * + * This function concatenates the nonmasked parts of the sequence, thereby changing the word content. If you want to + * remove the masked parts of a sequence without concatenation, use stringToStringSet. + * + * @section Examples + * + * Transform a masked DNA sequence into an unmasked sequences with all masked parts cut out + * + * @code{.cpp} + * using namespace seqan; + * Dna5String sequenceMasked = + * "NNNNNNTTTCCGAAAAGGTANNNNNGCAACTTTANNNCGTGATCAAAGTTTTCCCCGTCGAAATTGGGNNTG"; + * Dna5String sequenceMaskedPartsRemoved; + * cutNs(sequenceMaskedPartsRemoved, sequenceMasked); + * // Print the masked sequence + * std::cout< & sequenceCut, String const & sequence) +{ + typedef Iterator const, Rooted>::Type TIterator; + typedef Position::Type TPosition; + + sequenceCut = ""; + TIterator itSeq = begin(sequence); + + // Check for any N that destroys the first kmers + unsigned j = 0; + for (TPosition i = position(itSeq); i <= j; ++i) + { + if (sequence[i] == 'N') + { + if ((i - position(itSeq)) > 0) + sequenceCut += infix(sequence, position(itSeq), i); + goFurther(itSeq, i + 1 - position(itSeq)); + j = i; + } + } + int counterN = 0; + TPosition startSplitSequence = position(itSeq); // The position of possible starts of a sequence after Ns is stored to split the sequence. + for (; itSeq <= (end(sequence) - 1); ++itSeq) + { + if (value(itSeq) == 'N') + { + counterN = 1; + if (((position(itSeq)) > startSplitSequence)) + sequenceCut += infix(sequence, startSplitSequence, position(itSeq)); + startSplitSequence = (position(itSeq) + 1); // Position after N, possible start + } + counterN--; + } + // Create the sequence with any N cut out. + if (position(itSeq) > startSplitSequence) + { + sequenceCut += infix(sequence, startSplitSequence, position(itSeq)); + } +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_ALIGNMENT_FREE_KMER_FUNCTIONS_H_ diff --git a/seqan/arg_parse.h b/seqan/arg_parse.h index b134b28..f75a094 100644 --- a/seqan/arg_parse.h +++ b/seqan/arg_parse.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,17 +34,24 @@ // Facade header for module arg_parse. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ARG_PARSE_H_ +#define SEQAN_INCLUDE_SEQAN_ARG_PARSE_H_ // =========================================================================== // Prerequisites. // =========================================================================== #include -#include +//#include +#include #include +#include +#include +#ifndef PLATFORM_WINDOWS +#include +#endif + // =========================================================================== // The ArgParse Headers. // =========================================================================== @@ -64,4 +71,4 @@ #include -#endif // SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_H_ +#endif // SEQAN_INCLUDE_SEQAN_ARG_PARSE_H_ diff --git a/seqan/arg_parse/INFO b/seqan/arg_parse/INFO deleted file mode 100644 index 2d19c8a..0000000 --- a/seqan/arg_parse/INFO +++ /dev/null @@ -1,10 +0,0 @@ -Name: arg_parse -Author: Stephan Aiche -Maintainer: Stephan Aiche -License: BSD 3-clause -Copyright: 2006-2013, FU Berlin -Status: under development -Description: Command Line Parsing functionality. - The module arg_parse provides an argument/command line parser for SeqAn apps - including functionality to document tools and export the documentation/command - line information to different formats like man, html, or CTD. diff --git a/seqan/arg_parse/arg_parse_argument.h b/seqan/arg_parse/arg_parse_argument.h index 498d2b9..c33a1be 100644 --- a/seqan/arg_parse/arg_parse_argument.h +++ b/seqan/arg_parse/arg_parse_argument.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Stephan Aiche // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_ARGUMENT_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_ARGUMENT_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_ARGUMENT_H_ +#define SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_ARGUMENT_H_ #include #include @@ -62,7 +62,7 @@ inline std::string getFileExtension(ArgParseArgument const & me, unsigned pos); /*! * @class ArgParseArgument - * @extends AssignableConcept + * @implements AssignableConcept * @headerfile * @brief Information for a specific command line argument. * @@ -75,76 +75,47 @@ inline std::string getFileExtension(ArgParseArgument const & me, unsigned pos); * @brief Define the type of an @link ArgParseArgument @endlink. * * @signature enum ArgParseArgument::ArgumentType; + * + * @section Examples + * + * In the following example, the types INPUT_FILE, OUTPUT_FILE, and DOUBLE are used. + * + * @include demos/dox/arg_parse/argument_parser.cpp */ /*! - * @var ArgParseArgument::ArgumentType ArgParseArgument::STRING + * @val ArgParseArgument::ArgumentType STRING * @brief Argument is a string. * - * @var ArgParseArgument::ArgumentType ArgParseArgument::INTEGER + * @val ArgParseArgument::ArgumentType ArgParseArgument::INTEGER; * @brief Argument is a signed 32 bit integer. * - * @var ArgParseArgument::ArgumentType ArgParseArgument::INT64 + * @val ArgParseArgument::ArgumentType ArgParseArgument::INT64; * @brief Argument is a signed 64 bit integer. * - * @var ArgParseArgument::ArgumentType ArgParseArgument::DOUBLE + * @val ArgParseArgument::ArgumentType ArgParseArgument::DOUBLE; * @brief Argument is a floating point number stored as double. * - * @var ArgParseArgument::ArgumentType ArgParseArgument::INPUTFILE + * @val ArgParseArgument::ArgumentType ArgParseArgument::INPUT_FILE; * @brief Argument is an input file. * - * @var ArgParseArgument::ArgumentType ArgParseArgument::OUTPUTFILE + * @val ArgParseArgument::ArgumentType ArgParseArgument::OUTPUT_FILE; * @brief Argument is an output file. */ -/** -.Class.ArgParseArgument -..cat:Miscellaneous -..summary:Stores information for a specific command line argument. It can be either an argument of -a ArgParseArgument or directly an Argument on the command line. -..signature:ArgParseArgument -..include:seqan/arg_parse.h -..see:Class.ArgParseOption -..see:Class.ArgumentParser -*/ - /*! * @fn ArgParseArgument::ArgParseArgument * @brief Constructor * * @signature ArgParseArgument::ArgParseArgument(argumentType[, argumentLabel[, isListArgument[, numberOfArgument]]]); * - * @param argumentType Type of the argument (ArgParseArgument::ArgumentType). - * @param argumentLabel Label for the argument (char const *). - * @param isListArgument Whether or not this argument can be given multiple times (bool). - * @param numberOfArguments Number of times the argument must be given. E.g. set to 2 for the parser to always - * expect two values (int, default is 1). + * @param[in] argumentType Type of the argument (ArgParseArgument::ArgumentType). + * @param[in] argumentLabel Label for the argument (char const *). + * @param[in] isListArgument Whether or not this argument can be given multiple times (bool). + * @param[in] numberOfArguments Number of times the argument must be given. E.g. set to 2 for the parser to always + * expect two values (int, default is 1). */ -/** -.Memfunc.ArgParseArgument#ArgParseArgument -..class:Class.ArgParseArgument -..summary:Constructor -..signature:ArgParseArgument (argumentType [, argumentLabel, isListArgument, numberOfArguments]) -..param.argumentType:A ArgParseArgument.ArgumentType value defining the type (e.g., String) of the -ArgParseArgument. -...tableheader:Flag|Description -...table:$ArgParseArgument::STRING$|Argument is a string -...table:$ArgParseArgument::INTEGER$|Argument is an integer -...table:$ArgParseArgument::INT64|Argument is a 64 bit integer -...table:$ArgParseArgument::DOUBLE$|A float -...table:$ArgParseArgument::INPUTFILE$|An input file -...table:$ArgParseArgument::OUTPUTFILE$|An output file - ..param.argumentLabel:Defines a user defined argument label for the help output. If this option is - not set, ArgParseArgument will automatically define a label based on the ArgumentType. -..param.isListArgument:Defines if the argument can be given multiple times. -...default:false. -..param.numberOfArguments: Defines if the argument consists of defined number of elements (e.g., if -you want to provide an interval you would set this option to 2, so the parser knows that he needs -to search for exactly 2 values). -...default:1. -*/ - class ArgParseArgument { public: @@ -155,10 +126,10 @@ class ArgParseArgument INTEGER, // .. an integer INT64, // .. a 64 bit integer DOUBLE, // .. a float - INPUTFILE, // .. an inputfile (implicitly also a string) - OUTPUTFILE, // .. an outputfile (implicitly also a string) + INPUT_FILE, // .. an inputfile (implicitly also a string) + OUTPUT_FILE, // .. an outputfile (implicitly also a string) INPUTPREFIX, // .. an inputprefix (implicitly also a string) - OUTPUTPREFIX // .. an outoutprefix (implicitly also a string) + OUTPUT_PREFIX // .. an outoutprefix (implicitly also a string) }; @@ -250,11 +221,11 @@ inline std::string _typeToString(ArgParseArgument const & me) typeName = "string"; break; - case ArgParseArgument::INPUTFILE: + case ArgParseArgument::INPUT_FILE: typeName = "inputfile"; break; - case ArgParseArgument::OUTPUTFILE: + case ArgParseArgument::OUTPUT_FILE: typeName = "outputfile"; break; @@ -262,7 +233,7 @@ inline std::string _typeToString(ArgParseArgument const & me) typeName = "inputprefix"; break; - case ArgParseArgument::OUTPUTPREFIX: + case ArgParseArgument::OUTPUT_PREFIX: typeName = "outputprefix"; break; @@ -286,24 +257,11 @@ inline std::string _typeToString(ArgParseArgument const & me) * * @signature bool isListArgument(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return bool true if it can be given multiple times, false otherwise. */ -/** -.Function.isListArgument -..class:Class.ArgParseArgument -..summary:Returns whether the argument can be given multiple times. -..cat:Miscellaneous -..signature:isListArgument(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..returns:$true$ if the argument argument can be given multiple times. -..see:Memfunc.ArgParseArgument#ArgParseArgument.param.isListArgument -..include:seqan/arg_parse.h -*/ - inline bool isListArgument(ArgParseArgument const & me) { return me._isListArgument; @@ -319,33 +277,20 @@ inline bool isListArgument(ArgParseArgument const & me) * * @brief Returns whether the argument is a string. * - * @signature bool ArgParseArgument#isStringArgument(arg); + * @signature bool isStringArgument(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return bool true if it is a string, false otherwise. */ -/** -.Function.isStringArgument -..class:Class.ArgParseArgument -..summary:Returns whether the argument is a string. -..cat:Miscellaneous -..signature:isListArgument(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..returns:$true$ if the argument argument is a string argument. -..see:Memfunc.ArgParseArgument#ArgParseArgument.param.argumentType -..include:seqan/arg_parse.h -*/ - inline bool isStringArgument(ArgParseArgument const & me) { return (me._argumentType == ArgParseArgument::STRING) || - (me._argumentType == ArgParseArgument::INPUTFILE) || - (me._argumentType == ArgParseArgument::OUTPUTFILE) || + (me._argumentType == ArgParseArgument::INPUT_FILE) || + (me._argumentType == ArgParseArgument::OUTPUT_FILE) || (me._argumentType == ArgParseArgument::INPUTPREFIX) || - (me._argumentType == ArgParseArgument::OUTPUTPREFIX) ; + (me._argumentType == ArgParseArgument::OUTPUT_PREFIX) ; } // ---------------------------------------------------------------------------- @@ -359,24 +304,11 @@ inline bool isStringArgument(ArgParseArgument const & me) * * @signature bool isIntegerArgument(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return bool true if it is an integer, false otherwise. */ -/** -.Function.isIntegerArgument -..class:Class.ArgParseArgument -..summary:Returns whether the argument is an integer. -..cat:Miscellaneous -..signature:isIntegerArgument(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..returns:$true$ if the argument argument is an integer argument. -..see:Memfunc.ArgParseArgument#ArgParseArgument.param.argumentType -..include:seqan/arg_parse.h -*/ - inline bool isIntegerArgument(ArgParseArgument const & me) { return me._argumentType == ArgParseArgument::INTEGER; @@ -393,24 +325,11 @@ inline bool isIntegerArgument(ArgParseArgument const & me) * * @signature bool isInt64Argument(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return bool true if it is a 64 bit integer, false otherwise. */ -/** -.Function.isInt64Argument -..class:Class.ArgParseArgument -..summary:Returns whether the argument is a 64 bit integer. -..cat:Miscellaneous -..signature:isInt64Argument(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..returns:$true$ if the argument argument is a 64 bit integer argument. -..see:Memfunc.ArgParseArgument#ArgParseArgument.param.argumentType -..include:seqan/arg_parse.h -*/ - inline bool isInt64Argument(ArgParseArgument const & me) { return me._argumentType == ArgParseArgument::INT64; @@ -427,24 +346,11 @@ inline bool isInt64Argument(ArgParseArgument const & me) * * @signature bool isDoubleArgument(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return bool true if it is a double argument, false otherwise. */ -/** -.Function.isDoubleArgument -..class:Class.ArgParseArgument -..summary:Returns whether the argument is a double. -..cat:Miscellaneous -..signature:isDoubleArgument(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..returns:$true$ if the argument argument is a double argument. -..see:Memfunc.ArgParseArgument#ArgParseArgument.param.argumentType -..include:seqan/arg_parse.h -*/ - inline bool isDoubleArgument(ArgParseArgument const & me) { return me._argumentType == ArgParseArgument::DOUBLE; @@ -461,27 +367,14 @@ inline bool isDoubleArgument(ArgParseArgument const & me) * * @signature bool isInputFileArgument(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return bool true if it is a input file argument, false otherwise. */ -/** -.Function.isInputFileArgument -..class:Class.ArgParseArgument -..summary:Returns whether the argument is an input file. -..cat:Miscellaneous -..signature:isOutputFileArgument(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..returns:$true$ if the argument argument is an input file argument. -..see:Memfunc.ArgParseArgument#ArgParseArgument.param.argumentType -..include:seqan/arg_parse.h -*/ - inline bool isInputFileArgument(ArgParseArgument const & me) { - return me._argumentType == ArgParseArgument::INPUTFILE; + return me._argumentType == ArgParseArgument::INPUT_FILE; } // ---------------------------------------------------------------------------- @@ -495,28 +388,14 @@ inline bool isInputFileArgument(ArgParseArgument const & me) * * @signature bool isOutputFileArgument(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return bool true if it is a output file argument, false otherwise. */ -/** -.Function.isOutputFileArgument -..class:Class.ArgParseArgument -..summary:Returns whether the argument is an output file. -..cat:Miscellaneous -..signature:isOutputFileArgument(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -...type:Class.ArgParseOption -..returns:$true$ if the argument argument is an output file argument. -..see:Memfunc.ArgParseArgument#ArgParseArgument.param.argumentType -..include:seqan/arg_parse.h -*/ - inline bool isOutputFileArgument(ArgParseArgument const & me) { - return me._argumentType == ArgParseArgument::OUTPUTFILE; + return me._argumentType == ArgParseArgument::OUTPUT_FILE; } // ---------------------------------------------------------------------------- @@ -530,28 +409,14 @@ inline bool isOutputFileArgument(ArgParseArgument const & me) * * @signature bool isOutputPrefixArgument(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return bool true if it is an output prefix argument, false otherwise. */ -/** - .Function.isOutputPrefixArgument - ..class:Class.ArgParseArgument - ..summary:Returns whether the argument is an output file. - ..cat:Miscellaneous - ..signature:isOutputPrefixArgument(argument) - ..param.argument:The @Class.ArgParseArgument@ object. - ...type:Class.ArgParseArgument - ...type:Class.ArgParseOption - ..returns:$true$ if the argument argument is an output file argument. - ..see:Memfunc.ArgParseArgument#ArgParseArgument.param.argumentType - ..include:seqan/arg_parse.h - */ - inline bool isOutputPrefixArgument(ArgParseArgument const & me) { - return me._argumentType == ArgParseArgument::OUTPUTPREFIX; + return me._argumentType == ArgParseArgument::OUTPUT_PREFIX; } // ---------------------------------------------------------------------------- @@ -565,30 +430,16 @@ inline bool isOutputPrefixArgument(ArgParseArgument const & me) * * @signature bool isInputPrefixArgument(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return bool true if it is an input prefix argument, false otherwise. */ -/** - .Function.isInputPrefixArgument - ..class:Class.ArgParseArgument - ..summary:Returns whether the argument is an output file. - ..cat:Miscellaneous - ..signature:isInputPrefixArgument(argument) - ..param.argument:The @Class.ArgParseArgument@ object. - ...type:Class.ArgParseArgument - ...type:Class.ArgParseOption - ..returns:$true$ if the argument argument is an input prefix argument. - ..see:Memfunc.ArgParseArgument#ArgParseArgument.param.argumentType - ..include:seqan/arg_parse.h - */ - inline bool isInputPrefixArgument(ArgParseArgument const & me) { return me._argumentType == ArgParseArgument::INPUTPREFIX; } - + // ---------------------------------------------------------------------------- // Function getArgumentLabel() // ---------------------------------------------------------------------------- @@ -600,24 +451,11 @@ inline bool isInputPrefixArgument(ArgParseArgument const & me) * * @signature std::string getArgumentLabel(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return std::string The argument label as a STL string. */ -/** -.Function.getArgumentLabel -..class:Class.ArgParseArgument -..summary:Returns the label for the given @Class.ArgParseArgument@. Either the user defined label -is returned or a default label (based on the ArgumentType is used). -..cat:Miscellaneous -..signature:getArgumentLabel(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..returns:A $ShortCut.std::string$ containing the label. -..include:seqan/arg_parse.h -*/ - inline std::string const getArgumentLabel(ArgParseArgument const & me) { if (me._argumentLabel != "") @@ -661,14 +499,14 @@ inline std::string const getArgumentLabel(ArgParseArgument const & me) // Helper Function _intervalAssert() // ---------------------------------------------------------------------------- -// this methods ensures that the given arguments define a non emtpy value interval +// this methods ensures that the given arguments define a valid interval // otherwise it will trigger a SEQAN_CHECK failure template inline void _intervalAssert(const std::string minValueAsString, const std::string maxValueAsString) { if (minValueAsString != "" && maxValueAsString != "") - SEQAN_CHECK(_cast(minValueAsString) < _cast(maxValueAsString), - "The interval [%s:%s] is empty. Please specify a valid, non-empty interval.", + SEQAN_CHECK(_cast(minValueAsString) <= _cast(maxValueAsString), + "The interval [%s:%s] is invalid. Please specify a valid interval.", minValueAsString.c_str(), maxValueAsString.c_str()); } @@ -684,25 +522,10 @@ inline void _intervalAssert(const std::string minValueAsString, const std::strin * * @signature void setMinValue(arg, minValue); * - * @param arg The ArgParseArgument to set the smallest value of. - * @param minValue The smallest value to set (std::string). - * - * @return std::string The argument label as a STL string. + * @param[in,out] arg The ArgParseArgument to set the smallest value of. + * @param[in] minValue The smallest value to set (std::string). */ -/** -.Function.setMinValue -..class:Class.ArgParseArgument -..summary:Sets the minimum value of a @Class.ArgParseArgument@ object. -..cat:Miscellaneous -..signature:setMinValue(argument,minValue) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..param.minValue:A std::string containing a string representation of the minimum value -of the @Class.ArgParseArgument@. -..include:seqan/arg_parse.h -*/ - inline void setMinValue(ArgParseArgument & me, const std::string minValue) { if (isDoubleArgument(me)) @@ -738,25 +561,10 @@ inline void setMinValue(ArgParseArgument & me, const std::string minValue) * * @signature void setMaxValue(arg, maxValue); * - * @param arg The ArgParseArgument to set the smallest value of. - * @param maxValue The largest value to set (std::string). - * - * @return std::string The argument label as a STL string. + * @param[in,out] arg The ArgParseArgument to set the smallest value of. + * @param[in] maxValue The largest value to set (std::string). */ -/** -.Function.setMaxValue -..class:Class.ArgParseArgument -..summary:Sets the maximum value of a @Class.ArgParseArgument@ object. -..cat:Miscellaneous -..signature:setMaxValue(argument,maxValue) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..param.maxValue:A std::string containing a string representation of the maximum value -of the @Class.ArgParseArgument@. -..include:seqan/arg_parse.h -*/ - inline void setMaxValue(ArgParseArgument & me, const std::string maxValue) { if (isDoubleArgument(me)) @@ -792,9 +600,9 @@ inline void setMaxValue(ArgParseArgument & me, const std::string maxValue) * * @signature void setValidValues(arg, values); * - * @param arg The ArgParseArgument to set the valid values for. - * @param values Either a std::string containing all valid entries, separated by spaces or a - * std::vector<std::string> with the valid entries. + * @param[in,out] arg The ArgParseArgument to set the valid values for. + * @param[in] values Either a std::string containing all valid entries, separated by spaces or a + * std::vector<std::string> with the valid entries. * * If the argument is of type string then the list of valid values is the case-sensitive list of string values * allowed for this argument. If it is an input or output file then the list of valid values is a list of @@ -814,11 +622,11 @@ inline void setMaxValue(ArgParseArgument & me, const std::string maxValue) * setValidValues(stringArg, values); // one of {"four", "five"} * @endcode * - * An example for an input file option. Note that by changing INPUTFILE to OUTPUTFILE below, + * An example for an input file option. Note that by changing INPUT_FILE to OUTPUT_FILE below, * the example would be the same for output files. * * @code{.cpp} - * seqan::ArgParseArgument fileArg(seqan::ArgParseArgument::INPUTFILE); + * seqan::ArgParseArgument fileArg(seqan::ArgParseArgument::INPUT_FILE); * setValidValues(fileArg, "fq fastq"); // file must end in ".fq" or ".fastq" * * std::vector values; @@ -828,21 +636,6 @@ inline void setMaxValue(ArgParseArgument & me, const std::string maxValue) * @endcode */ -/** -.Function.setValidValues -..class:Class.ArgParseArgument -..summary:Sets the set of allowed values of a @Class.ArgParseArgument@ object. -..cat:Miscellaneous -..signature:setValidValues(argument,values) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..param.values:A std::vector containing all valid entries for the option or a std::string -with valid values separated by spaces. -..remarks:If the argument or option is an in- or output file. The valid strings will be interpreted as -file endings and the command line parser checks if the provided file has the required file ending. -..include:seqan/arg_parse.h -*/ - inline void setValidValues(ArgParseArgument & me, std::vector const & values) { if (isDoubleArgument(me) || isIntegerArgument(me)) @@ -886,22 +679,10 @@ inline void setValidValues(ArgParseArgument & me, std::string const & valuesStri * * @signature void setHelpText(arg, text); * - * @param arg The ArgParseArgument to set the help text for. - * @param text The text to display as the description of the argument (std::string). + * @param[in,out] arg The ArgParseArgument to set the help text for. + * @param[in] text The text to display as the description of the argument (std::string). */ -/** -.Function.setHelpText -..class:Class.ArgParseArgument -..summary:Sets the help text for an ArgParseArgument. -..cat:Miscellaneous -..signature:setHelpText(argument,text) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..param.text:A std::string describing the argument. -..include:seqan/arg_parse.h -*/ - inline void setHelpText(ArgParseArgument & me, std::string const & text) { me._helpText = text; @@ -972,7 +753,7 @@ inline bool _compareExtension(std::string const & str, std::string const & ext) // The parameter i gives the index of the value in the argument. -inline void _checkStringRestrictions(ArgParseArgument const & me, std::string value, +inline void _checkStringRestrictions(ArgParseArgument const & me, std::string const &value, unsigned i) { typedef std::vector::const_iterator TVectorIterator; @@ -980,6 +761,14 @@ inline void _checkStringRestrictions(ArgParseArgument const & me, std::string va // we only check valid values for files and string arguments, but not for prefix arguments if (!empty(me.validValues) && !(isInputPrefixArgument(me) || isOutputPrefixArgument(me))) { + // The file name "-" is reserved for stdin or stdout + if ((isInputFileArgument(me) || isOutputFileArgument(me)) && value == "-") + return; + + // Allow the filename to be a pipe (without checking its extension) + if (isInputFileArgument(me) && _isPipe(value.c_str())) + return; + bool isContained = false; for (TVectorIterator validValue = me.validValues.begin(); validValue != me.validValues.end(); @@ -1059,21 +848,6 @@ inline void _checkValue(ArgParseArgument const & me) // Function _assignArgumentValue() // ---------------------------------------------------------------------------- -/** -.Internal.Function._assignArgumentValue -..class:Class.ArgParseArgument -..summary:Assigns the given value (if applicable) to the @Class.ArgParseArgument@ object. If -the @Class.ArgParseArgument@ is a list or can hold multiple values -(@Memfunc.ArgParseArgument#ArgParseArgument.param.numberOfArguments@) the value will be appended. -Otherwise the value will be overwritten. -..cat:internal -..signature:_assignArgumentValue(argument,value [, argNo]) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..param.value:A std::string containing the value that should be assigned. -..include:seqan/arg_parse.h -*/ - inline void _assignArgumentValue(ArgParseArgument & me, std::string const & value) { // assignment @@ -1101,29 +875,13 @@ inline void _assignArgumentValue(ArgParseArgument & me, std::string const & valu * * @signature std::string getArgumentValue(arg[, argNo]); * - * @param arg The ArgParseArgument to query. - * @param argNo In case that the ArgParseArgument allowed multiple values, give the index of the argument - * that you want to retrieve (unsigned, starts at 0). + * @param[in,out] arg The ArgParseArgument to query. + * @param[in] argNo In case that the ArgParseArgument allowed multiple values, give the index of the argument + * that you want to retrieve (unsigned, starts at 0). * * @return std::string Const-reference to the argument value. */ -/** -.Function.ArgParseArgument#getArgumentValue -..class:Class.ArgParseArgument -..summary:Returns the value of the @Class.ArgParseArgument@ object. If -the @Class.ArgParseArgument@ is a list or can hold multiple values -(@Memfunc.ArgParseArgument#ArgParseArgument.param.numberOfArguments@) you can specify which value -you want to get. If not set the first value will be returned. -..cat:Miscellaneous -..signature:getArgumentValue(argument [, argNo]) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..param.argNo:If the argument is a list, the $argNo$-th list element is returned. -..returns:The value set at position $position$. -..include:seqan/arg_parse.h -*/ - inline std::string const & getArgumentValue(ArgParseArgument const & me, unsigned argNo) { SEQAN_CHECK(argNo < me.value.size() || argNo < me.defaultValue.size(), @@ -1151,24 +909,11 @@ inline std::string const & getArgumentValue(ArgParseArgument const & me) * * @signature std::vector getArgumentValue(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return std::vector Const-reference to the argument values. */ -/** -.Function.getArgumentValues -..class:Class.ArgParseArgument -..summary:Returns all values of the @Class.ArgParseArgument@ object as const std::vector. -..cat:Miscellaneous -..signature:getArgumentValues(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..returns:$std::vector$ containing the values. If no value was set and no -default value exists an empty vector will be returned. -..include:seqan/arg_parse.h -*/ - inline std::vector const & getArgumentValues(ArgParseArgument const & me) { if (!me.value.empty()) @@ -1188,24 +933,12 @@ inline std::vector const & getArgumentValues(ArgParseArgument const * * @signature bool hasValue(arg[, pos]); * - * @param arg The ArgParseArgument to query. - * @param pos The position of the argument in case of being a list (unsigned, 0-based, default is 0). + * @param[in] arg The ArgParseArgument to query. + * @param[in] pos The position of the argument in case of being a list (unsigned, 0-based, default is 0). * * @return bool true if pos is less than the size and the argument is non-empty. */ -/** -.Function.ArgParseArgument#hasValue -..class:Class.ArgParseArgument -..summary:Returns true if a value for the given position is available. -..cat:Miscellaneous -..signature:hasValue(argument [, position=0]) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..param.position:The position for which the availability should be tested. -..returns: $true$ if a value is available, $false$ if not. -..include:seqan/arg_parse.h -*/ inline bool hasValue(ArgParseArgument const & arg, unsigned position) { return arg.value.size() > position || arg.defaultValue.size() > position; @@ -1227,23 +960,11 @@ inline bool hasValue(ArgParseArgument const & arg) * * @signature bool isSet(arg): * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return bool true if a value was assigned, false otherwise. */ -/** -.Function.ArgParseArgument#isSet -..class:Class.ArgParseArgument -..summary:Returns true if a value was assigned to the argument. -..cat:Miscellaneous -..signature:isSet(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..returns:$true$ if a value was assigned to the argument, $false$ if not. -..include:seqan/arg_parse.h -*/ - inline bool isSet(ArgParseArgument const & me) { return !me.value.empty(); @@ -1260,22 +981,11 @@ inline bool isSet(ArgParseArgument const & me) * * @signature bool hasDefault(arg); * - * @param arg The argument to query. + * @param[in] arg The argument to query. * * @return bool true if the argument has a default value and false if not. */ -/** -.Function.ArgParseArgument#hasDefault -..summary:Returns true if a default value was given for that argument. -..cat:Miscellaneous -..signature:hasDefault(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..returns:$true$ if a default value was given for the argument, $false$ if not. -..include:seqan/arg_parse.h -*/ - inline bool hasDefault(ArgParseArgument const & me) { return !me.defaultValue.empty(); @@ -1292,23 +1002,11 @@ inline bool hasDefault(ArgParseArgument const & me) * * @signature unsigned numberOfAllowedValues(arg); * - * @param arg The ArgParseArgument to query. + * @param[in] arg The ArgParseArgument to query. * * @return unsigned The number of allowed values. */ -/** -.Function.numberOfAllowedValues -..class:Class.ArgParseArgument -..summary:Returns the number of allowed values for this @Class.ArgParseArgument@. -..cat:Miscellaneous -..signature:numberOfAllowedValues(argument) -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..returns:The number of allowed values for this @Class.ArgParseArgument@. -..include:seqan/arg_parse.h -*/ - inline unsigned numberOfAllowedValues(ArgParseArgument const & me) { return me._numberOfValues; @@ -1323,7 +1021,7 @@ inline unsigned numberOfAllowedValues(ArgParseArgument const & me) * @headerfile * @brief Returns the file extension for the given file argument. * - * Only valid when argument is an INPUTFILE or OUTPUTFILE. + * Only valid when argument is an INPUT_FILE or OUTPUT_FILE. * * Halts the program if not an input or output file argument. * @@ -1333,33 +1031,16 @@ inline unsigned numberOfAllowedValues(ArgParseArgument const & me) * * @signature std::string getFileExtension(arg[, pos]); * - * @param arg The ArgParseArgument to query. - * @param pos The position of the value to retrieve if multiple values (unsigned). + * @param[in] arg The ArgParseArgument to query. + * @param[in] pos The position of the value to retrieve if multiple values (unsigned). * * @return std::string The file extension, empty if no extension or not set. */ -/** -.Function.ArgParseArgument#getFileExtension -..class:Class.ArgParseArgument -..summary:Returns the file extension for the given file argument. -..description:Only valid when argument is an INPUTFILE or OUTPUTFILE. -..cat:Miscellaenous -..signature:std::string getFileExtension(argument[, pos]); -..param.argument:The @Class.ArgParseArgument@ object. -...type:Class.ArgParseArgument -..param.pos:The position of the value if multiple values are allowed. -...type:nolink:$unsigned$ -..returns:A $std::string$ with the extension of the file, if any. Empty if no extension. -..remarks:Halts the program if the argument is not an input or output file argument. -..remarks:Note that you can -..include:seqan/arg_parse.h -*/ - inline std::string getFileExtension(ArgParseArgument const & me, unsigned pos = 0) { - if (me._argumentType != ArgParseArgument::INPUTFILE && - me._argumentType != ArgParseArgument::OUTPUTFILE) + if (me._argumentType != ArgParseArgument::INPUT_FILE && + me._argumentType != ArgParseArgument::OUTPUT_FILE) SEQAN_FAIL("Cannot get file extension from non-file argument/option."); // Short-circuit to override file extension if set. @@ -1399,4 +1080,4 @@ inline std::string getFileExtension(ArgParseArgument const & me, unsigned pos = } // namespace seqan -#endif // SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_ARGUMENT_H_ +#endif // SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_ARGUMENT_H_ diff --git a/seqan/arg_parse/arg_parse_ctd_support.h b/seqan/arg_parse/arg_parse_ctd_support.h index 4c46d40..3542bd9 100644 --- a/seqan/arg_parse/arg_parse_ctd_support.h +++ b/seqan/arg_parse/arg_parse_ctd_support.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -33,8 +33,8 @@ // Author: Stephan Aiche // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_CTD_SUPPORT_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_CTD_SUPPORT_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_CTD_SUPPORT_H_ +#define SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_CTD_SUPPORT_H_ #include @@ -112,11 +112,6 @@ TSequence _toText(TSequence const & input) // Function _join() // ---------------------------------------------------------------------------- -/** - * joins all elements of the the passed StringSet into a single CharString - * the provided delimiter is used to separate the single entries in the - * resulting CharString - */ template inline std::string _join(std::vector const & v, std::string const & delimiter) @@ -289,6 +284,8 @@ inline std::string _getManual(ArgumentParser const & me) // Function writeCTD() // ---------------------------------------------------------------------------- +// TODO(holtgrew): Change argument order. + /*! * @fn ArgumentParser#writeCTD * @headerfile \ @@ -296,26 +293,13 @@ inline std::string _getManual(ArgumentParser const & me) * * @signature bool writeCTD(parser[, stream]); * - * @param parser The ArgumentParser to write the CTD file for. - * @param stream A std::ostream to write to. If omitted an output file with the name form the "write-ctd" - * parameter of the parser is used. + * @param[in] parser The ArgumentParser to write the CTD file for. + * @param[out] stream A std::ostream to write to. If omitted an output file with the name form the + * "write-ctd" parameter of the parser is used. * * @return bool true on success, false on failure. */ -/** -.Function.writeCTD -..summary:Exports the app's interface description to a .ctd file. -..cat:Miscellaneous -..signature:writeCTD(parser [, ctdfile]) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.ctdfile:The stream where the ctd file will be written to. If non is given the function writes it to the file given in the write-ctd parameter. -..param.parser:The @Class.ArgumentParser@ object. -..returns:$true$ if the ctd file could be created correctly, $false$ otherwise. -..include:seqan/arg_parse.h -*/ - inline bool writeCTD(ArgumentParser const & me, std::ostream & ctdfile) { @@ -355,6 +339,11 @@ writeCTD(ArgumentParser const & me, std::ostream & ctdfile) ++optionMapIterator) { ArgParseOption const & opt = *optionMapIterator; + + // exclude hidden + if (isHidden(opt)) + continue; + std::string optionIdentifier = _getPrefixedOptionName(opt); std::string refName = toolname + "." + _getOptionName(opt); @@ -395,6 +384,10 @@ writeCTD(ArgumentParser const & me, std::ostream & ctdfile) if (!_includeInCTD(opt)) continue; + // exclude hidden + if (isHidden(opt)) + continue; + // prefer short name for options std::string optionName = _getOptionName(opt); @@ -441,7 +434,7 @@ writeCTD(ArgumentParser const & me, std::ostream & ctdfile) ctdfile << "supported_formats=\"" << xmlEscape(_join(supported_formats, ",")) << "\" "; ctdfile << "required=\"" << (isRequired(opt) ? "true" : "false") << "\" "; - ctdfile << "advanced=\"" << (isHidden(opt) ? "true" : "false") << "\" "; + ctdfile << "advanced=\"" << (isAdvanced(opt) ? "true" : "false") << "\" "; // Write out tags attribute. if (!opt.tags.empty()) @@ -565,4 +558,4 @@ writeCTD(ArgumentParser const & me) } // namespace seqan -#endif // SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_CTD_SUPPORT_H_ +#endif // SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_CTD_SUPPORT_H_ diff --git a/seqan/arg_parse/arg_parse_doc.h b/seqan/arg_parse/arg_parse_doc.h index d49cb02..165a084 100644 --- a/seqan/arg_parse/arg_parse_doc.h +++ b/seqan/arg_parse/arg_parse_doc.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Stephan Aiche // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_DOC_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_DOC_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_DOC_H_ +#define SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_DOC_H_ #include #include @@ -48,18 +48,16 @@ namespace seqan { // Function getAppName() // -------------------------------------------------------------------------- -/** -.Function.ArgumentParser#getAppName -..class:Class.ArgumentParser -..summary:Get tool name of @Class.ArgumentParser@ object. -..cat:Miscellaneous -..signature:getAppName(parser) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..returns:Tool name of argument parser object. -...type:nolink:$std::string$ -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#getAppName + * @brief Return program name of ArgumentParser. + * + * @signature TCharStringRef getAppName(parser); + * + * @param[in] parser The ArgumentParser to get the app name for. + * + * @return TCharStringRef The app name, const-ref to @link CharString @endlink. + */ inline CharString const & getAppName(ArgumentParser const & parser) { @@ -86,19 +84,17 @@ inline void _parseAppName(ArgumentParser & parser, std::string const & candidate // Helper Function _addLine() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#addLine: -..class:Class.ArgumentParser -..summary:Adds a line of text to the help output of the @Class.ArgumentParser@ in the block of -@Class.ArgParseOption@s. -..cat:Miscellaneous -..signature:addLine(parser, text) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.text:A line of text that will be added to the help output. -...type:Shortcut.CharString -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#addLine + * @brief Adds a line of text to the help output of the ArgumentParser. + * + * The line of text will be added to the block of the options. + * + * @signature void addLine(parser, line); + * + * @param[in,out] parser The ArgumentParser to add the line to. + * @param[in] line The line of text to add, @link StringConcept @endlink of char. + */ template inline void addLine(ArgumentParser & me, TString const & line) @@ -110,30 +106,28 @@ inline void addLine(ArgumentParser & me, TString const & line) // Function addSection() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#addSection: -..class:Class.ArgumentParser -..summary:Begins a new section of @Class.ArgParseOption@ the help output of -the @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:addSection(parser, text) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.text:A section header that will be added to the help output. -...type:Shortcut.CharString -..include:seqan/arg_parse.h -..example.code: -ArgumentParser parser; - -[...] // init parser - -addSection(parser, "In-/Output-Options"); -addOption("i", ... ); -addOption("o", ... ); - -addSection(parser, "Other Options"); -addOption("x", ... ); -*/ +/*! + * @fn ArgumentParser#addSection + * @brief Begins a new section of the option block of the ArgumentParser help output. + * + * @signature void addSection(parser, title); + * + * @param[in,out] parser The ArgumentParser to add the line to. + * @param[in] title The title to add, @link StringConcept @endlink of char. + * + * @code{.cpp} + * ArgumentParser parser; + * + * [...] // init parser + * + * addSection(parser, "In-/Output-Options"); + * addOption("i", ... ); + * addOption("o", ... ); + * + * addSection(parser, "Other Options"); + * addOption("x", ... ); + * @endcode + */ template inline void addSection(ArgumentParser & me, TString const & line) @@ -146,17 +140,15 @@ inline void addSection(ArgumentParser & me, TString const & line) // Function addUsageLine() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#addUsageLine: -..class:Class.ArgumentParser -..summary:Adds a line of text to the usage output of the @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:addUsageLine(parser, text) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.text:A text line that will be added to the usage output. -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#addUseLine + * @brief Adds a line of text to the usage output of the ArgumentParser. + * + * @signature void addUsageLine(parser, line); + * + * @param[in,out] parser The ArgumentParser to add the line to. + * @param[in] line The line to add, a std::string. + */ inline void addUsageLine(ArgumentParser & me, std::string const & line) { @@ -183,18 +175,15 @@ inline void _addUsage(ToolDoc & toolDoc, ArgumentParser const & me) // Function addDescription() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#addDescription -..class:Class.ArgumentParser -..summary:Appends a description paragraph to the @Class.ArgumentParser@ documentation. -..cat:Miscellaneous -..signature:addDescription(parser, text) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.text:The description paragraph. -..returns:$void$ -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#addDescription + * @brief Appends a description paragraph to the ArgumentParser documentation. + * + * @signature void addDescription(parser, description); + * + * @param[in,out] parser The ArgumentParser to add the line to. + * @param[in] description The description text, a std::string. + */ inline void addDescription(ArgumentParser & me, std::string const & description) { @@ -205,18 +194,15 @@ inline void addDescription(ArgumentParser & me, std::string const & description) // Function setAppName() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#setAppName -..class:Class.ArgumentParser -..summary:Sets application name of @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:setAppName(parser, appName) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.appName:The name of the application. -..returns:$void$ -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#setAppName + * @brief Sets application name of ArgumentParser. + * + * @signature void setAppName(parser, name); + * + * @param[in,out] parser The ArgumentParser to set the name of. + * @param[in] name The application name, std::string. + */ inline void setAppName(ArgumentParser & me, std::string const & name) { @@ -227,18 +213,15 @@ inline void setAppName(ArgumentParser & me, std::string const & name) // Function setShortDescription() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#setShortDescription -..class:Class.ArgumentParser -..summary:Sets short description of the @Class.ArgumentParser@ object. -..cat:Miscellaneous -..signature:setShortDescription(parser, text) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.text:The short description text. -..returns:$void$ -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#setShortDescription + * @brief Sets shortDescription of ArgumentParser. + * + * @signature void setShortDescription(parser, desc); + * + * @param[in,out] parser The ArgumentParser to set the short description of. + * @param[in] desc The short description, std::string. + */ inline void setShortDescription(ArgumentParser & me, std::string const & description) { @@ -249,17 +232,16 @@ inline void setShortDescription(ArgumentParser & me, std::string const & descrip // Function getShortDescription() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#getShortDescription -..class:Class.ArgumentParser -..summary:Gets short description of @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:getShortDescription(parser) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..returns:The short description of the @Class.ArgumentParser@ object. -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#getShortDescription + * @brief Returns the short description. + * + * @signature CharString getShortDescription(parser); + * + * @param[in,out] parser The ArgumentParser to get short description for. + * + * @return CharString A @link CharString @endlink with the short description. + */ inline CharString getShortDescription(ArgumentParser const & me) { @@ -270,65 +252,176 @@ inline CharString getShortDescription(ArgumentParser const & me) // Function setVersion() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#setVersion -..class:Class.ArgumentParser -..summary:Sets version string of @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:setVersion(parser, versionString) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.versionString:The version string to set. -..returns:$void$ -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#setVersion + * @brief Sets version of ArgumentParser. + * + * @signature void setVersion(parser, version); + * + * @param[in,out] parser The ArgumentParser to set the version of. + * @param[in] version The version string to set, std::string. + */ inline void setVersion(ArgumentParser & me, std::string const & versionString) { setVersion(me._toolDoc, versionString); if (!hasOption(me, "version")) - addOption(me, ArgParseOption("", "version", "Display version information")); + addOption(me, ArgParseOption("", "version", "Display version information.")); } // -------------------------------------------------------------------------- // Function getVersion() // -------------------------------------------------------------------------- -/** -.Function.ArgumentParser#getVersion -..class:Class.ArgumentParser -..cat:Miscellaneous -..summary:Get version string from @Class.ArgumentParser@ object. -..signature:getVersion(parser) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..returns:Date string. -...type:Shortcut.CharString -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#getVersion + * @brief Returns the version string. + * + * @signature TCharStringRef getVersion(parser); + * + * @param[in,out] parser The ArgumentParser to get the version string from. + * + * @return TCharString A const-ref to a @link CharString @endlink with the version string. + */ inline CharString const & getVersion(ArgumentParser const & me) { return getVersion(me._toolDoc); } +// ---------------------------------------------------------------------------- +// Function setShortCopyright() +// ---------------------------------------------------------------------------- + +/*! + * @fn ArgumentParser#setShortCopyright + * @brief Sets short copyright of ArgumentParser. + * + * @signature void setShortCopyright(parser, short copyright); + * + * @param[in,out] parser The ArgumentParser to set the short copyright of. + * @param[in] short copyright The short copyright string to set, std::string. + */ + +inline void setShortCopyright(ArgumentParser & me, CharString const & shortCopyrightString) +{ + setShortCopyright(me._toolDoc, shortCopyrightString); +} + +// -------------------------------------------------------------------------- +// Function getShortCopyright() +// -------------------------------------------------------------------------- + +/*! + * @fn ArgumentParser#getShortCopyright + * @brief Returns the short copyright string. + * + * @signature TCharStringRef getShortCopyright(parser); + * + * @param[in,out] parser The ArgumentParser to get the short copyright string from. + * + * @return TCharString A const-ref to a @link CharString @endlink with the short copyright string. + */ + +inline CharString const & getShortCopyright(ArgumentParser const & me) +{ + return getShortCopyright(me._toolDoc); +} + +// ---------------------------------------------------------------------------- +// Function setLongCopyright() +// ---------------------------------------------------------------------------- + +/*! + * @fn ArgumentParser#setLongCopyright + * @brief Sets long copyright of ArgumentParser. + * + * @signature void setLongCopyright(parser, long copyright); + * + * @param[in,out] parser The ArgumentParser to set the long copyright of. + * @param[in] long copyright The long copyright string to set, std::string. + */ + +inline void setLongCopyright(ArgumentParser & me, CharString const & longCopyrightString) +{ + setLongCopyright(me._toolDoc, longCopyrightString); + if (!hasOption(me, "copyright")) + addOption(me, ArgParseOption("", "copyright", "Display long copyright information.")); +} + +// -------------------------------------------------------------------------- +// Function getLongCopyright() +// -------------------------------------------------------------------------- + +/*! + * @fn ArgumentParser#getLongCopyright + * @brief Returns the long copyright string. + * + * @signature TCharStringRef getLongCopyright(parser); + * + * @param[in,out] parser The ArgumentParser to get the long copyright string from. + * + * @return TCharString A const-ref to a @link CharString @endlink with the long copyright string. + */ + +inline CharString const & getLongCopyright(ArgumentParser const & me) +{ + return getLongCopyright(me._toolDoc); +} + + +// ---------------------------------------------------------------------------- +// Function setCitation() +// ---------------------------------------------------------------------------- + +/*! + * @fn ArgumentParser#setCitation + * @brief Sets citation of ArgumentParser. + * + * @signature void setCitation(parser, citation); + * + * @param[in,out] parser The ArgumentParser to set the citation of. + * @param[in] citation The citation string to set, std::string. + */ + +inline void setCitation(ArgumentParser & me, CharString const & citationString) +{ + setCitation(me._toolDoc, citationString); +} + +// -------------------------------------------------------------------------- +// Function getCitation() +// -------------------------------------------------------------------------- + +/*! + * @fn ArgumentParser#getCitation + * @brief Returns the citation string. + * + * @signature TCharStringRef getCitation(parser); + * + * @param[in,out] parser The ArgumentParser to get the citation string from. + * + * @return TCharString A const-ref to a @link CharString @endlink with the citation string. + */ + +inline CharString const & getCitation(ArgumentParser const & me) +{ + return getCitation(me._toolDoc); +} + // -------------------------------------------------------------------------- // Function setCategory() // -------------------------------------------------------------------------- -/** -.Function.ArgumentParser#setCategory -..class:Class.ArgumentParser -..summary:Set tool category for @Class.ArgumentParser@ object. -..cat:Miscellaneous -..signature:setCategory(parser, category) -..param.parser:The @Class.ArgumentParser@ object to set the category. -...type:Class.ArgumentParser -..param.category:Category to set. -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#setCategory + * @brief Sets category of ArgumentParser. + * + * @signature void setCategory(parser, category); + * + * @param[in,out] parser The ArgumentParser to set the category of. + * @param[in] category The category to set, std::string. + */ inline void setCategory(ArgumentParser & parser, CharString const & category) { @@ -339,18 +432,16 @@ inline void setCategory(ArgumentParser & parser, CharString const & category) // Function getCategory() // -------------------------------------------------------------------------- -/** -.Function.ArgumentParser#getCategory -..class:Class.ArgumentParser -..summary:Get tool category of @Class.ArgumentParser@ object. -..cat:Miscellaneous -..signature:getCategory(parser) -..param.parser:The @Class.ArgumentParser@ object to get the tool category of. -...type:Class.ArgumentParser -..returns:Tool category of the @Class.ArgumentParser@ object. -...type:Shortcut.CharString -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#getCategory + * @brief Returns the category. + * + * @signature TCharStringRef getCategory(parser); + * + * @param[in,out] parser The ArgumentParser to get the category from. + * + * @return TCharString A const-ref to a @link CharString @endlink with the category. + */ inline CharString const & getCategory(ArgumentParser const & parser) { @@ -361,18 +452,15 @@ inline CharString const & getCategory(ArgumentParser const & parser) // Function setDate() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#setDate -..class:Class.ArgumentParser -..summary:Sets date string of @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:setDate(parser, date) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.date:The date string. -..returns:$void$ -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#setDate + * @brief Sets date string of ArgumentParser. + * + * @signature void setDate(parser, date); + * + * @param[in,out] parser The ArgumentParser to set the date string of. + * @param[in] date The date string to set, std::string. + */ inline void setDate(ArgumentParser & me, std::string const & date) { @@ -383,19 +471,15 @@ inline void setDate(ArgumentParser & me, std::string const & date) // Function addTextSection() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#addTextSection -..class:Class.ArgumentParser -..summary:Adds a text section to the @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:addTextSection(parser, title) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.title:The section title. -..returns:$void$ -..remarks:This will result in an additional section heading to be printed. -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#addTextSection + * @brief Add a text section to the ArgumentParser. + * + * @signature void addTextSection(parser, title); + * + * @param[in,out] parser The ArgumentParser to add the text section title to. + * @param[in] title The section title to add, std::string. + */ inline void addTextSection(ArgumentParser & me, std::string const & title) { @@ -406,19 +490,15 @@ inline void addTextSection(ArgumentParser & me, std::string const & title) // Function addTextSubSection() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#addTextSubSection -..class:Class.ArgumentParser -..summary:Adds a text subsection to the @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:addTextSubSection(parser, title) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.title:The subsection title. -..returns:$void$ -..remarks:This will result in an additional subsection heading to be printed. -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#addTextSubSection + * @brief Add a text sub section to the ArgumentParser. + * + * @signature void addTextSubSection(parser, title); + * + * @param[in,out] parser The ArgumentParser add the subsection title to of. + * @param[in] title The sub section title to add, std::string. + */ inline void addTextSubSection(ArgumentParser & me, std::string const & title) { @@ -429,18 +509,15 @@ inline void addTextSubSection(ArgumentParser & me, std::string const & title) // Function addText() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#addText -..class:Class.ArgumentParser -..summary:Appends a text paragraph to the @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:addText(parser, text) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.text:The content of the text. -..returns:$void$ -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#addText + * @brief Add text to an ArgumentParser. + * + * @signature void addText(parser, text); + * + * @param[in,out] parser ArgumentParser to add text to. + * @param[in] text The std::string to add to the parser. + */ inline void addText(ArgumentParser & me, std::string const & text) { @@ -451,21 +528,16 @@ inline void addText(ArgumentParser & me, std::string const & text) // Function addListItem() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#addListItem -..class:Class.ArgumentParser -..summary:Appends a list item to the @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:addListItem(parser, item, description) -..description: -This method adds a list item to the parser's output. -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.item:The item text. -..param.description:The description text. -..returns:$void$ -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#addListItem + * @brief Appends a list item to the ArgumentParser + * + * @signature void addListItem(parser, item, description); + * + * @param[in,out] parser The ArgumentParser to add the list item to. + * @param[in] item The item to add, std::string. + * @param[in] description The item to add, std::string. + */ inline void addListItem(ArgumentParser & me, std::string const & item, std::string const & description) { @@ -476,17 +548,15 @@ inline void addListItem(ArgumentParser & me, std::string const & item, std::stri // Function printShortHelp() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#printShortHelp -..class:Class.ArgumentParser -..summary:Prints a short help message for the parser to a stream -..cat:Miscellaneous -..signature:printShortHelp(parser[, stream]) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.stream:Target stream (e.g. $std::cerr$). -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#printShortHelp + * @brief Prints a short help message for the parser to a stream. + * + * @signature void printShortHelp(parser, out); + * + * @param[in,out] parser The ArgumentParser to print help for. + * @param[in,out] out The std::ostream to print help to. + */ inline void printShortHelp(ArgumentParser const & me, std::ostream & stream) { @@ -512,22 +582,24 @@ inline void printShortHelp(ArgumentParser const & me) // Function printVersion() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#printVersion -..class:Class.ArgumentParser -..summary:Prints the version information of the parser to a stream. -..cat:Miscellaneous -..signature:printVersion(parser[, stream]) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.stream:Target std::ostream (e.g. $std::cerr$). -...default: $std::cerr$ -..include:seqan/arg_parse.h -*/ +/*! + * @fn ArgumentParser#printVersion + * @brief Prints the version information of the parser to a stream. + * + * @signature void printVersion(parser, stream); + * + * @param[in,out] parser The ArgumenParser to print for. + * @param[in,out] stream The std::ostream to print to. + */ inline void printVersion(ArgumentParser const & me, std::ostream & stream) { - stream << getAppName(me) << " version " << getVersion(me) << std::endl; + stream << getAppName(me) << " version: " << getVersion(me) << std::endl; + stream << "SeqAn version: " << SEQAN_VERSION_MAJOR << '.' << SEQAN_VERSION_MINOR << '.' + << SEQAN_VERSION_PATCH; + if (SEQAN_VERSION_PRE_RELEASE != 0) + stream << "-pre" << SEQAN_VERSION_PRE_RELEASE; + stream << "\n"; } inline void printVersion(ArgumentParser const & me) @@ -536,9 +608,64 @@ inline void printVersion(ArgumentParser const & me) } // ---------------------------------------------------------------------------- -// Function _addNumericalRestriction() +// Function printLongCopyright() // ---------------------------------------------------------------------------- +/*! + * @fn ArgumentParser#printLongCopyright + * @brief Prints the long copyright information of the parser to a stream. + * + * @signature void printLongCopyright(parser, stream); + * + * @param[in,out] parser The ArgumenParser to print for. + * @param[in,out] stream The std::ostream to print to. + */ + +inline void printLongCopyright(ArgumentParser const & me, std::ostream & stream) +{ + stream << "=============================================================================" << std::endl + << "Copyright information for " << getAppName(me) << ":" << std::endl + << "-----------------------------------------------------------------------------" << std::endl + << me._toolDoc._longCopyright << std::endl << std::endl + << "=============================================================================" << std::endl + << "This program contains SeqAn code licensed under the following terms:" << std::endl + << "-----------------------------------------------------------------------------" << std::endl + << " Copyright (c) 2006-2015, Knut Reinert, FU Berlin" << std::endl + << " All rights reserved." << std::endl + << "" << std::endl + << " Redistribution and use in source and binary forms, with or without" << std::endl + << " modification, are permitted provided that the following conditions are met:" << std::endl + << "" << std::endl + << " * Redistributions of source code must retain the above copyright" << std::endl + << " notice, this list of conditions and the following disclaimer." << std::endl + << " * Redistributions in binary form must reproduce the above copyright" << std::endl + << " notice, this list of conditions and the following disclaimer in the" << std::endl + << " documentation and/or other materials provided with the distribution." << std::endl + << " * Neither the name of Knut Reinert or the FU Berlin nor the names of" << std::endl + << " its contributors may be used to endorse or promote products derived" << std::endl + << " from this software without specific prior written permission." << std::endl + << "" << std::endl + << " THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"" << std::endl + << " AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE" << std::endl + << " IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE" << std::endl + << " ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE" << std::endl + << " FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL" << std::endl + << " DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR" << std::endl + << " SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER" << std::endl + << " CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT" << std::endl + << " LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY" << std::endl + << " OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH" << std::endl + << " DAMAGE." << std::endl; +} + +inline void printLongCopyright(ArgumentParser const & me) +{ + printLongCopyright(me, std::cerr); +} + +// ---------------------------------------------------------------------------- +// Function _addNumericalRestriction() +// ---------------------------------------------------------------------------- inline void _addNumericalRestriction(std::string & text, ArgParseOption const & opt) { @@ -632,21 +759,24 @@ inline void _addValidValuesRestrictions(std::string & text, ArgParseOption const // Function printHelp() // ---------------------------------------------------------------------------- -/** -.Function.ArgumentParser#printHelp -..class:Class.ArgumentParser -..summary:Prints the complete help message for the parser to a stream. -..cat:Miscellaneous -..signature:printHelp(parser[, stream][, format]) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.stream:Target std::ostream (e.g. $std::cerr$). -...default: $std::cerr$ -..param.format:Format to print, one of "html", "man", "txt". -..include:seqan/arg_parse.h -*/ - -inline void printHelp(ArgumentParser const & me, std::ostream & stream, CharString const & format) +// TODO(holtgrew): Parameter order. + +/*! + * @fn ArgumentParser#printHelp + * @brief Prints the help message for the parser. + * + * @signature void printHelp(parser, out, format, showAdvancedOptions); + * + * @param[in,out] parser The ArgumentParser print the help for. + * @param[out] out The output stream to print to (std::ostream). + * @param[in] format The format to print, one of "html", "man", and "txt". + * @param[in] showAdvancedOptions Also show advanced options to user (default = false). + */ + +inline void printHelp(ArgumentParser const & me, + std::ostream & stream, + CharString const & format, + bool const showAdvancedOptions) { ToolDoc toolDoc(me._toolDoc); clearEntries(toolDoc); // We will append me._toolDoc later. @@ -670,11 +800,22 @@ inline void printHelp(ArgumentParser const & me, std::ostream & stream, CharStri continue; // Skip empty lines. // Is command line parser section, maps to ToolDoc subsection. - std::string title = opt._helpText; - append(title, ":"); - addSubSection(toolDoc, title); + for (unsigned j = i + 1; j < length(me.optionMap); ++j) + { + ArgParseOption const & nextopt = me.optionMap[j]; + if (empty(nextopt.shortName) && empty(nextopt.longName)) + break; + // has visible children + if (!isHidden(nextopt) && (!isAdvanced(nextopt) || showAdvancedOptions)) + { + std::string title = opt._helpText; + append(title, ":"); + addSubSection(toolDoc, title); + break; + } + } } - else if (!isHidden(opt)) + else if (!isHidden(opt) && (!isAdvanced(opt) || showAdvancedOptions)) { // Build list item term. std::string term; @@ -732,16 +873,21 @@ inline void printHelp(ArgumentParser const & me, std::ostream & stream, CharStri print(stream, toolDoc, format); } +inline void printHelp(ArgumentParser const & me, std::ostream & stream, CharString const & format) +{ + printHelp(me, stream, format, false); +} + inline void printHelp(ArgumentParser const & me, std::ostream & stream) { - printHelp(me, stream, "txt"); + printHelp(me, stream, "txt", false); } inline void printHelp(ArgumentParser const & me) { - printHelp(me, std::cerr, "txt"); + printHelp(me, std::cerr, "txt", false); } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_DOC_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_DOC_H_ diff --git a/seqan/arg_parse/arg_parse_exceptions.h b/seqan/arg_parse/arg_parse_exceptions.h index a6cde3d..ee7dde7 100644 --- a/seqan/arg_parse/arg_parse_exceptions.h +++ b/seqan/arg_parse/arg_parse_exceptions.h @@ -1,7 +1,7 @@ // ========================================================================== // arg_parse_exceptions.h // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Stephan Aiche // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_EXCEPTIONS_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_EXCEPTIONS_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_EXCEPTIONS_H_ +#define SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_EXCEPTIONS_H_ namespace seqan { @@ -45,30 +45,13 @@ namespace seqan { // Class ParseError // ---------------------------------------------------------------------------- -/* -.Internal.Class.ParseError -..cat:Miscellaneous -..summary:General ParseError. -*/ - -class ParseError : public RuntimeError -{ -public: - ParseError(std::string const & option) : - RuntimeError(option) - {} -}; +// Defined in include/seqan/stream/tokenization.h +struct ParseError; // ---------------------------------------------------------------------------- // Class InvalidOption // ---------------------------------------------------------------------------- -/* -.Internal.Class.InvalidOption -..cat:Miscellaneous -..summary:Thrown if an unknown option was set on the command line. -*/ - class InvalidOption : public ParseError { public: @@ -81,13 +64,6 @@ class InvalidOption : public ParseError // Class MissingArgument // ---------------------------------------------------------------------------- -/* -.Internal.Class.MissingArgument -..cat:Miscellaneous -..summary:Thrown if an option was set on the command line but without giving the -required arguments for this option. -*/ - class MissingArgument : public ParseError { public: @@ -100,13 +76,6 @@ class MissingArgument : public ParseError // Class NotEnoughArguments // ---------------------------------------------------------------------------- -/* -.Internal.Class.NotEnoughArguments -..cat:Miscellaneous -..summary:Thrown if an option was set on the command line but not enough arguments for -this option were provided. -*/ - class NotEnoughArguments : public ParseError { public: @@ -117,4 +86,4 @@ class NotEnoughArguments : public ParseError } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_EXCEPTIONS_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_EXCEPTIONS_H_ diff --git a/seqan/arg_parse/arg_parse_option.h b/seqan/arg_parse/arg_parse_option.h index 5329cb4..7cfd4a3 100644 --- a/seqan/arg_parse/arg_parse_option.h +++ b/seqan/arg_parse/arg_parse_option.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Stephan Aiche // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_OPTION_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_OPTION_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_OPTION_H_ +#define SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_OPTION_H_ #include #include @@ -58,33 +58,21 @@ namespace seqan { * object. */ -/** -.Class.ArgParseOption -..base:Class.ArgParseArgument -..cat:Miscellaneous -..summary:Stores information for a specific command line option. -..signature:ArgParseOption -..remarks:A @Class.ArgParseOption@ object can be added to a @Class.ArgumentParser@ via @Function.ArgumentParser#addOption@. -..include:seqan/arg_parse.h -..see:Class.ArgParseArgument -..see:Class.ArgumentParser -*/ - /*! * @fn ArgParseOption::ArgParseOption * @brief Constructor. * * @signature ArgParseOption::ArgParseOption(shortName, longName, helpText, argumentType[, argumentLabel[, isList[, numValues]]]); * - * @param shortName The short name of the argument. - * @param longName The long name of the argument (std::string). - * @param helpText The text to display as help (std::string). - * @param argumentType The type of the argument (@link ArgParseArgument::ArgumentType @endlink). - * @param argumentLabel The label for the value to use in the help display, e.g. 'INT' in '--value INT' - * (std::string). - * @param isList Flag for whether this option can be given multiple times (bool, true for - * allowing multiple values). - * @param numValues Number of command line arguments that each option should bind (unsigned). + * @param[in] shortName The short name of the argument. + * @param[in] longName The long name of the argument (std::string). + * @param[in] helpText The text to display as help (std::string). + * @param[in] argumentType The type of the argument (@link ArgParseArgument::ArgumentType @endlink). + * @param[in] argumentLabel The label for the value to use in the help display, e.g. 'INT' in '--value INT' + * (std::string). + * @param[in] isList Flag for whether this option can be given multiple times (bool, true for + * allowing multiple values). + * @param[in] numValues Number of command line arguments that each option should bind (unsigned). * * @section Short and Long Option Names * @@ -101,27 +89,6 @@ namespace seqan { *
*/ -/** -.Memfunc.ArgParseOption#ArgParseOption -..class:Class.ArgParseOption -..summary:Constructor -..signature:ArgParseOption(shortName, longName, helpText, argumentType[, argumentLabel[, isList]]) -..param.shortName:A std::string containing the short-name option identifier (e.g. $"h"$ for the $-h/--help$ option). -Although not suggested the short-name can contain more than 1 character. -...remarks:Note that the leading "-" is not passed. -..param.longName:A std::string containing the long-name option identifier (e.g. $"help"$ for the $-h/--help$ option). -...remarks:Note that the leading "--" is not passed. -..param.helpText:A std::string containing the help text associated with this option. -..param.argumentType:A $ArgParseArgument::ArgumentType$ for the option (e.g., an integer argument). -...type:Class.ArgParseArgument -..param.argumentLabel:The label to use for the argument in the help text, e.g. $"NUMBER"$ for an integer. Optional. -...default:$""$ -...type:nolink:$char const *$ -..param.isList:Whether or not the argument allows multiple values. -...default:$false$ -...type:nolink:$bool$ -*/ - class ArgParseOption : public ArgParseArgument { @@ -141,6 +108,8 @@ class ArgParseOption : bool _isRequired; // true if this ArgParseOption must be set bool _isHidden; // true if this ArgParseOption should not be // shown on the command line + bool _isAdvanced; // true if this ArgParseOption should only + // be shown in the full help // ---------------------------------------------------------------------------- // Constructors @@ -157,7 +126,8 @@ class ArgParseOption : longName(_longName), _isFlag(false), _isRequired(false), - _isHidden(false) + _isHidden(false), + _isAdvanced(false) { _helpText = _help; } @@ -170,7 +140,8 @@ class ArgParseOption : longName(_longName), _isFlag(true), _isRequired(false), - _isHidden(false) + _isHidden(false), + _isAdvanced(false) { defaultValue.push_back("false"); setValidValues(*this, "true false"); @@ -199,25 +170,13 @@ inline bool isStringArgument(ArgParseOption const & me) * * @signature bool isBooleanOption(option); * - * @param option The ArgParseOption object to query. + * @param[in] option The ArgParseOption object to query. * * @return bool true if option is a switch and false otherwise. * * Flags are given without arguments, e.g. the -l flag in the Unix ls command. */ -/** -.Function.ArgParseOption#isBooleanOption -..class:Class.ArgParseOption -..summary:Returns whether option is a switch. -..cat:Miscellaneous -..signature:isBooleanOption(option) -..param.option:The @Class.ArgParseOption@ object. -...type:Class.ArgParseOption -..returns:$true$ if the option is a switch. -..include:seqan/arg_parse.h -*/ - inline bool isBooleanOption(ArgParseOption const & me) { return me._isFlag; @@ -234,25 +193,13 @@ inline bool isBooleanOption(ArgParseOption const & me) * * @signature bool isHidden(option); * - * @param option The ArgParseOption object to query. + * @param[in] option The ArgParseOption object to query. * * @return bool true if it is hidden, false otherwise. * * By default, options are not hidden. */ -/** -.Function.ArgParseOption#isHidden -..class:Class.ArgParseOption -..summary:Returns whether option is hidden on the help screen. Default is false. -..cat:Miscellaneous -..signature:isHidden(option) -..param.option:The @Class.ArgParseOption@ object. -...type:Class.ArgParseOption -..returns:$true$ if the option is hidden on the help screen. -..include:seqan/arg_parse.h -*/ - inline bool isHidden(ArgParseOption const & me) { return me._isHidden; @@ -269,28 +216,58 @@ inline bool isHidden(ArgParseOption const & me) * * @signature void hideOption(option[, hide]); * - * @param option The ArgParseOption object to set the hidden flag of. - * @param hide bool that indicates whether to hide the flag (default: true) + * @param[in,out] option The ArgParseOption object to set the hidden flag of. + * @param[in] hide bool that indicates whether to hide the flag (default: true) */ -/** -.Function.ArgParseOption#hideOption -..class:Class.ArgParseOption -..summary:Hides the ArgParseOption from the help screen. -..cat:Miscellaneous -..signature:hideOption(option [, hide]) -..param.option:The @Class.ArgParseOption@ object. -...type:Class.ArgParseOption -..param.hide:The new visibility of the option. Default is false. -...type:nolink:bool -..include:seqan/arg_parse.h -*/ - inline void hideOption(ArgParseOption & me, bool hide = true) { me._isHidden = hide; } +// ---------------------------------------------------------------------------- +// Function isHidden() +// ---------------------------------------------------------------------------- + +/*! + * @fn ArgParseOption#isAdvanced + * @headerfile + * @brief Return whether an option is only shown in the full help screen. + * + * @signature bool isAdvanced(option); + * + * @param[in] option The ArgParseOption object to query. + * + * @return bool true if it is hidden, false otherwise. + * + * By default, options are not marked as advanced. + */ + +inline bool isAdvanced(ArgParseOption const & me) +{ + return me._isAdvanced; +} + +// ---------------------------------------------------------------------------- +// Function hideOption() +// ---------------------------------------------------------------------------- + +/*! + * @fn ArgParseOption#setAdvanced + * @headerfile + * @brief Shows the ArgParseOption only on the full help screen. + * + * @signature void setAdvanced(option[, advanced]); + * + * @param[in,out] option The ArgParseOption object to set the advanced flag of. + * @param[in] advanced bool that indicates whether to hide the flag (default: true) + */ + +inline void setAdvanced(ArgParseOption & me, bool advanced = true) +{ + me._isAdvanced = advanced; +} + // ---------------------------------------------------------------------------- // Function isRequired() // ---------------------------------------------------------------------------- @@ -302,25 +279,13 @@ inline void hideOption(ArgParseOption & me, bool hide = true) * * @signature bool isRequired(option); * - * @param option The ArgParseOption object to query. - * + * @param[in] option The ArgParseOption object to query. + * * @return bool true if the option is mandatory and false if it not. * * By default, options are not mandatory. */ -/** -.Function.ArgParseOption#isRequired -..class:Class.ArgParseOption -..summary:Returns whether the option is mandatory. -..cat:Miscellaneous -..signature:isRequired(option) -..param.option:The @Class.ArgParseOption@ object. -...type:Class.ArgParseOption -..returns:$true$ if the option is mandatory. -..include:seqan/arg_parse.h -*/ - inline bool isRequired(ArgParseOption const & me) { return me._isRequired; @@ -337,24 +302,12 @@ inline bool isRequired(ArgParseOption const & me) * * @signature void setDefaultValue(option, v); * - * @param option The ArgParseOption to set the default value for. - * @param v The value to set, (any type that can be streamed into an std::stringstream). + * @param[in,out] option The ArgParseOption to set the default value for. + * @param[in] v The value to set, (any type that can be streamed into an std::stringstream). */ // TODO(holtgrew): Deprecate in favour of string-only variant? -/** -.Function.ArgParseOption#setDefaultValue -..summary:Sets the default value for the given option. -..cat:Miscellaneous -..remarks:Note that this overwrites any previously given default values. -..signature:setDefaultValue(option, value) -..param.option:The @Class.ArgParseOption@ object. -...type:Class.ArgParseOption -..param.value:The new default value. -..include:seqan/arg_parse.h -*/ - template inline void setDefaultValue(ArgParseOption & me, const TValue & value) { @@ -389,27 +342,15 @@ inline void setDefaultValue(ArgParseOption & me, const TValue & value) * * @signature void setDefaultValue(option, v); * - * @param option The ArgParseOption to appen the default value for. - * @param v The value to append, (any type that can be streamed into an std::stringstream). - * + * @param[in,out] option The ArgParseOption to appen the default value for. + * @param[in] v The value to append, (any type that can be streamed into an std::stringstream). + * * @section Remarks * * This function does not check any length restrictions for this value. */ -/** -.Function.ArgParseOption#addDefaultValue -..summary:Adds/appends a new value to the list of default values. -..cat:Miscellaneous -..remarks:Note that this method does not check any length restrictions for this value. -..signature:addDefaultValue(option, value) -..param.option:The @Class.ArgParseOption@ object. -...type:Class.ArgParseOption -..param.value:The new default value. -..include:seqan/arg_parse.h -*/ - template inline void addDefaultValue(ArgParseOption & me, const TValue & value) { @@ -441,25 +382,12 @@ inline void addDefaultValue(ArgParseOption & me, const TValue & value) * * @signature void setRequired(option, required); * - * @param option The ArgParseOption to modify. - * @param required Flag whether the option is mandatory (bool). + * @param[in,out] option The ArgParseOption to modify. + * @param[in] required Flag whether the option is mandatory (bool). * * By default, options are not mandatory. */ -/** -.Function.ArgParseOption#setRequired -..class:Class.ArgParseOption -..summary:Sets whether or not the option is mandatory. -..cat:Miscellaneous -..signature:setRequired(option, required) -..param.option:The @Class.ArgParseOption@ object. -...type:Class.ArgParseOption -..param.required:The new required value of the option. -...type:nolink:bool -..include:seqan/arg_parse.h -*/ - inline void setRequired(ArgParseOption & me, bool required) { me._isRequired = required; @@ -476,7 +404,7 @@ inline void setRequired(ArgParseOption & me, bool required) * * @signature std::string getArgumentLabel(option); * - * @param option The ArgParseOption object to query. + * @param[in] option The ArgParseOption object to query. * * @return std::string The argument label string. */ @@ -500,22 +428,11 @@ inline std::string const getArgumentLabel(ArgParseOption const & me) * * @signature std::string getOptionName(option); * - * @param option The ArgParseOption object to query. + * @param[in] option The ArgParseOption object to query. * * @return std::string The option name string. */ -/** - .Function.ArgParseOption#getOptionName - ..class:Class.ArgParseOption - ..summary:Returns the name of the @Class.ArgParseOption@ in a well formated way. - ..cat:Miscellaneous - ..signature:getOptionName(option) - ..param.option:The @Class.ArgParseOption@ object. - ...type:Class.ArgParseOption - ..include:seqan/arg_parse.h - ..returns:The name of the option as well formated string (e.g., -h, --help). - */ inline std::string getOptionName(ArgParseOption const & me) { std::stringstream stream; @@ -542,30 +459,18 @@ inline std::string getOptionName(ArgParseOption const & me) * * @signature void write(stream, option); * - * @param stream The @link StreamConcept stream @endlink to write to. - * @param option The ArgParseOption object to write to stream. + * @param[in,out] stream The @link StreamConcept stream @endlink to write to. + * @param[out] option The ArgParseOption object to write to stream. */ -/** -.Function.ArgParseOption#write -..class:Class.ArgParseOption -..summary:Writes the basic information about the @Class.ArgParseOption@ to the provided stream. -..cat:Miscellaneous -..signature:write(stream, option) -..param.stream:The target stream. -..param.option:The @Class.ArgParseOption@ object. -...type:Class.ArgParseOption -..include:seqan/arg_parse.h -*/ - template inline void write(TStream & target, ArgParseOption const & me) { - streamPut(target, '\t'); - streamPut(target, getOptionName(me)); - streamPut(target, '\t'); - streamPut(target, '\t'); - streamPut(target, me._helpText); + writeValue(target, '\t'); + write(target, getOptionName(me)); + writeValue(target, '\t'); + writeValue(target, '\t'); + write(target, me._helpText); } // ---------------------------------------------------------------------------- @@ -577,11 +482,11 @@ inline void write(TStream & target, ArgParseOption const & me) template inline TStream & operator<<(TStream & target, ArgParseOption const & source) { - - write(target, source); + typename DirectionIterator::Type it = directionIterator(target, Output()); + write(it, source); return target; } } // namespace seqan -#endif // SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_OPTION_H_ +#endif // SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_OPTION_H_ diff --git a/seqan/arg_parse/arg_parse_parse.h b/seqan/arg_parse/arg_parse_parse.h index af4941c..7ee7780 100644 --- a/seqan/arg_parse/arg_parse_parse.h +++ b/seqan/arg_parse/arg_parse_parse.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Stephan Aiche // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_PARSE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_PARSE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_PARSE_H_ +#define SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_PARSE_H_ #include #include @@ -50,39 +50,24 @@ namespace seqan { * @headerfile * @brief Parse command line parameters. * - * @signature TResult parse(parser, argc, argv[, outStream, errStream]]); + * @signature TResult parse(parser, argc, argv[, outStream[, errStream]]); * - * @param parser The ArgumentParser to use for parsing and for storing parse results. - * @param argc The number of arguments (int). - * @param argv The arguments (const char * argv[]). - * @param outStream The std::ostream to use for output. - * @param errStream The std::ostream to use for error output. + * @param[in,out] parser The ArgumentParser to use for parsing and for storing parse results. + * @param[in] argc The number of arguments (int). + * @param[in] argv The arguments (const char * argv[]). + * @param[in,out] outStream The std::ostream to use for output. + * @param[in,out] errStream The std::ostream to use for error output. * * @return TResult The parse result, of type @link ArgumentParser::ParseResult @endlink. * * This function must be called before retrieving any options or arguments from the parser. */ -/** -.Function.ArgumentParser#parse -..summary:Parses the command line. -..class:Class.ArgumentParser -..cat:Miscellaneous -..signature:parse(parser, argc, argv[, outputStream, errorStream]) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.argc:Count of the objects on the command line. -..param.argv:Array of the different command line arguments ($const char *argv[]$). -..param.errorStream:A stream where error messages are sent to. -..remarks:Must be called before retrieving options or arguments. -..returns:$true$ if all required arguments are set and parseable and neither the help nor version argument is set. -..include:seqan/arg_parse.h -*/ - // Helper class for parsing command line arguments. // // Putting things into its a class allows us to structure the parsing in a fine way. +template class ArgumentParserHelper_ { public: @@ -92,7 +77,7 @@ class ArgumentParserHelper_ ArgumentParser & parser; // The argc and argv from the main() method. int argc; - const char ** argv; + TChar ** argv; // The parser's state is stored in the following variables. @@ -102,7 +87,7 @@ class ArgumentParserHelper_ // The index of the current positional argument. TArgumentPosition currentArgument; - ArgumentParserHelper_(ArgumentParser & parser, int argc, const char * argv[]) + ArgumentParserHelper_(ArgumentParser & parser, int argc, TChar * argv[]) : parser(parser), argc(argc), argv(argv), seenDashDash(false), currentArgument(0) {} @@ -123,7 +108,12 @@ class ArgumentParserHelper_ for (int argi = 1; argi < argc; ++argi) { - if (seenDashDash || strlen(argv[argi]) == 0 || argv[argi][0] != '-') + // after "--" ever arg is treated as argument (not as option), e.g. "rm -rf -- --file-name" + // "-" is a treated as argument as for a filename arguments it represents stdin + // everything else that begins with "-" is an option + + size_t argLen = strlen(argv[argi]); + if (seenDashDash || argLen == 0 || ((argv[argi][0] != '-') || (argLen == 1))) // // Handle as position argument if we have seen "--" or does not start with dash. handleArgument(argv[argi]); else if (strcmp(argv[argi], "--") == 0) @@ -271,16 +261,17 @@ class ArgumentParserHelper_ }; // Parser driver function. -inline ArgumentParser::ParseResult parse(ArgumentParser & me, - int argc, - const char * argv[], - std::ostream & outputStream, - std::ostream & errorStream) +template +ArgumentParser::ParseResult parse(ArgumentParser & me, + int argc, + TChar * argv[], + std::ostream & outputStream, + std::ostream & errorStream) { SEQAN_TRY { // Perform the parsing without any valid value checking on the argument values. - ArgumentParserHelper_ parserHelper(me, argc, argv); + ArgumentParserHelper_ parserHelper(me, argc, argv); parserHelper.parseArgs(); // Copy the file extensions from the "--${NAME}-file-ext" options to "--${NAME}". @@ -322,6 +313,11 @@ inline ArgumentParser::ParseResult parse(ArgumentParser & me, printVersion(me, outputStream); return ArgumentParser::PARSE_VERSION; } + else if (hasOption(me, "copyright") && isSet(me, "copyright")) + { + printLongCopyright(me, outputStream); + return ArgumentParser::PARSE_COPYRIGHT; + } else if (hasOption(me, "write-ctd") && isSet(me, "write-ctd")) { if (writeCTD(me)) @@ -334,14 +330,19 @@ inline ArgumentParser::ParseResult parse(ArgumentParser & me, printHelp(me, outputStream); return ArgumentParser::PARSE_HELP; } + else if (isSet(me, "full-help")) + { + printHelp(me, outputStream, "txt", true); + return ArgumentParser::PARSE_HELP; + } else if (isSet(me, "export-help")) { std::string format; getOptionValue(format, me, "export-help"); - printHelp(me, outputStream, format); + printHelp(me, outputStream, format, true); return ArgumentParser::PARSE_EXPORT_HELP; } - else if (argc == 1 && (me.argumentList.size() > 0 || !_allRequiredSet(me))) + else if (argc == 1 && !(_allRequiredSet(me) && _allArgumentsSet(me))) { // print short help and exit printShortHelp(me, errorStream); @@ -364,13 +365,14 @@ inline ArgumentParser::ParseResult parse(ArgumentParser & me, return ArgumentParser::PARSE_ERROR; } -inline ArgumentParser::ParseResult parse(ArgumentParser & me, - int argc, - const char * argv[]) +template +ArgumentParser::ParseResult parse(ArgumentParser & me, + int argc, + TChar * argv[]) { return parse(me, argc, argv, std::cout, std::cerr); } } // namespace seqan -#endif // SEQAN_CORE_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_PARSE_H_ +#endif // SEQAN_INCLUDE_SEQAN_ARG_PARSE_ARG_PARSE_PARSE_H_ diff --git a/seqan/arg_parse/arg_parse_type_support.h b/seqan/arg_parse/arg_parse_type_support.h index 771bcdc..e483b9a 100644 --- a/seqan/arg_parse/arg_parse_type_support.h +++ b/seqan/arg_parse/arg_parse_type_support.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // TODO(aiche): check if parts of the functionality could be merged with lexicalCast in stream module -#ifndef SEQAN_CORE_INCLUDE_ARG_PARSE_ARG_PARSE_TYPE_SUPPRT_H_ -#define SEQAN_CORE_INCLUDE_ARG_PARSE_ARG_PARSE_TYPE_SUPPRT_H_ +#ifndef SEQAN_INCLUDE_ARG_PARSE_ARG_PARSE_TYPE_SUPPRT_H_ +#define SEQAN_INCLUDE_ARG_PARSE_ARG_PARSE_TYPE_SUPPRT_H_ #include #include @@ -187,4 +187,4 @@ inline bool _convertArgumentValue(TObject & dst, ArgParseArgument const & opt, s } // namespace seqan -#endif // SEQAN_CORE_INCLUDE_ARG_PARSE_ARG_PARSE_TYPE_SUPPRT_H_ +#endif // SEQAN_INCLUDE_ARG_PARSE_ARG_PARSE_TYPE_SUPPRT_H_ diff --git a/seqan/arg_parse/argument_parser.h b/seqan/arg_parse/argument_parser.h index 674fc03..e3342cf 100644 --- a/seqan/arg_parse/argument_parser.h +++ b/seqan/arg_parse/argument_parser.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -30,8 +30,8 @@ // // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_ARG_PARSE_ARGUMENT_PARSER_H_ -#define SEQAN_CORE_INCLUDE_ARG_PARSE_ARGUMENT_PARSER_H_ +#ifndef SEQAN_INCLUDE_ARG_PARSE_ARGUMENT_PARSER_H_ +#define SEQAN_INCLUDE_ARG_PARSE_ARGUMENT_PARSER_H_ #include #include @@ -91,7 +91,7 @@ inline ArgParseArgument & getArgument(ArgumentParser & me, unsigned position); * * The following gives a simple example of how to use the ArgumentParser class. * - * @include demos/arg_parse/argument_parser.cpp + * @include demos/dox/arg_parse/argument_parser.cpp * * @code{.console} * $ demo_arg_parse_argument_parser in.fa out.txt --id 0 @@ -114,7 +114,7 @@ inline ArgParseArgument & getArgument(ArgumentParser & me, unsigned position); * * @signature ArgumentParser::ArgumentParser([appName]); * - * @param appName The name of the application (std::string), defaults to argv[0]. + * @param[in] appName The name of the application (std::string), defaults to argv[0]. */ /*! @@ -123,73 +123,28 @@ inline ArgParseArgument & getArgument(ArgumentParser & me, unsigned position); * * @signature enum ArgumentParser::ParseResult; * - * @var ArgumentParser::ParseResult ArgumentParser::PARSE_OK; + * @val ArgumentParser::ParseResult ArgumentParser::PARSE_OK; * @brief Parsing the program's arguments was successful and no builtin command was triggered. * - * @var ArgumentParser::ParseResult ArgumentParser::PARSE_ERROR; + * @val ArgumentParser::ParseResult ArgumentParser::PARSE_ERROR; * @brief There were errors parsing the arguments. * - * @var ArgumentParser::ParseResult ArgumentParser::PARSE_HELP; - * @brief Parsing was successful, built-in --help option was used. + * @val ArgumentParser::ParseResult ArgumentParser::PARSE_HELP; + * @brief Parsing was successful, built-in --help or --full-help option was used. * - * @var ArgumentParser::ParseResult ArgumentParser::PARSE_VERSION; + * @val ArgumentParser::ParseResult ArgumentParser::PARSE_VERSION; * @brief Parsing was successful, built-in --version option was used. * - * @var ArgumentParser::ParseResult ArgumentParser::PARSE_WRITE_CTD; + * @val ArgumentParser::ParseResult ArgumentParser::PARSE_COPYRIGHT; + * @brief Parsing was successful, built-in --copyright option was used. + * + * @val ArgumentParser::ParseResult ArgumentParser::PARSE_WRITE_CTD; * @brief Parsing was successful, built-in --write-ctd option was used. * - * @var ArgumentParser::ParseResult ArgumentParser::PARSE_EXPORT_HELP; + * @val ArgumentParser::ParseResult ArgumentParser::PARSE_EXPORT_HELP; * @brief Parsing was successful, built-in --export-help option was used. */ -/** -.Class.ArgumentParser -..cat:Miscellaneous -..summary:Stores multiple @Class.ArgParseOption@ objects and parses the command line arguments for these options. -..signature:ArgumentParser -..include:seqan/arg_parse.h -..remarks: -See the documentation of @Class.ToolDoc@ on how to format text. -Where possible, formatting is added automatically for you. -You have to use formatting in the following places: (1) usage lines, (2) option help texts, (3) description and additional text sections. -..example.text: -The following gives a simple example of how to use the @Class.ArgumentParser@. -..example.code: -ArgumentParser parser("alf"); -setShortDescription(parser, "Alignment free sequence comparison"); -setVersion(parser, "1.0"); -setDate(parser, "Jan 2010"); - -addUsageLine(parser, "[\\fIOPTIONS\\fP] \\fB-i\\fP \\fIIN\\fP \\fB-o\\fP \\fIOUT\\fP"); - -addDescription(parser, - "ALF can be used to calculate the pairwise similarity of sequences " - "using alignment-free methods. All methods which are implemented are " - "based on k-mer counts."); - -addOption(parser, ArgParseOption("i", "inputFile", "Name of the multi-FASTA input.", - ArgParseArgument(ArgParseArgument::INPUTFILE, "IN"))); -setRequired(parser, "i"); - -addOption(parser, ArgParseOption("o", "outputFile", "Name of the multi-FASTA input.", - ArgParseArgument(ArgParseArgument::OUTPUTFILE, "OUT"))); -setRequired(parser, "o"); - -addTextSection(parser, "See Also"); -addText(parser, "http://www.seqan.de/projects/alf"); -..see:Class.ToolDoc -..see:Class.ArgParseArgument -..see:Class.ArgParseOption - -.Memfunc.ArgumentParser#ArgumentParser -..class:Class.ArgumentParser -..summary:Constructor -..signature:ArgumentParser () -..signature:ArgumentParser (applicationName) -..param.applicationName:A std::string containing the name of the application. -..remarks:If the name of the application is not passed to the constructor it will be extracted from the command line. -*/ - class ArgumentParser { public: @@ -205,6 +160,7 @@ class ArgumentParser PARSE_ERROR, PARSE_HELP, PARSE_VERSION, + PARSE_COPYRIGHT, PARSE_WRITE_CTD, PARSE_EXPORT_HELP }; @@ -247,13 +203,15 @@ class ArgumentParser void init() { - addOption(*this, ArgParseOption("h", "help", "Displays this help message.")); + addOption(*this, ArgParseOption("h", "help", "Display the help message.")); + addOption(*this, ArgParseOption("hh", "full-help", "Display the help message with advanced options.")); + hideOption(*this, "full-help", true); // hidden by default // hidden flags used for export of man pages and ctd formats addOption(*this, ArgParseOption("", "write-ctd", "Exports the app's interface description to a .ctd file.", - ArgParseArgument::OUTPUTFILE)); + ArgParseArgument::OUTPUT_FILE)); hideOption(*this, "write-ctd", true); addOption(*this, ArgParseOption("", @@ -301,25 +259,12 @@ class ArgumentParser * * @signature bool hasOption(parser, name); * - * @param parser The ArgumentParser to query. - * @param name The name to query for (std::string). + * @param[in] parser The ArgumentParser to query. + * @param[in] name The name to query for (std::string). * * @return bool true if there is such an option, false otherwise. */ -/** -.Function.ArgumentParser#hasOption -..class:Class.ArgumentParser -..summary:Returns whether a certain option is registered in the parser. -..cat:Miscellaneous -..signature:hasOption(parser, optionIdentifier) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionIdentifier:A @Shortcut.CharString@ that identifies the option. -..returns:$true$ if the option is registered. -..include:seqan/arg_parse.h -*/ - inline bool hasOption(ArgumentParser const & me, std::string const & name) { return hasKey(me.shortNameMap, name) || hasKey(me.longNameMap, name); @@ -336,23 +281,10 @@ inline bool hasOption(ArgumentParser const & me, std::string const & name) * * @signature void addOption(parser, option); * - * @param parser The ArgumentParser to add the option to. - * @param option The ArgParseOption to add to parser. + * @param[in,out] parser The ArgumentParser to add the option to. + * @param[in] option The ArgParseOption to add to parser. */ -/** -.Function.ArgumentParser#addOption -..class:Class.ArgumentParser -..summary:Adds a @Class.ArgParseOption@ object to the @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:addOption(parser, option) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.option:The new @Class.ArgParseOption@ object that should be added. -...type:Class.ArgParseOption -..include:seqan/arg_parse.h -*/ - inline void _copyValidValuesToFileExt(ArgumentParser & me, std::string const & name) { // Copy valid values, remove leading dots. @@ -414,23 +346,10 @@ inline void addOption(ArgumentParser & me, ArgParseOption const & opt) * * @signature void addArgument(parser, arg); * - * @param parser The ArgumentParser to add the argument to. - * @param arg The ArgParseArgument to add to parser. + * @param[in,out] parser The ArgumentParser to add the argument to. + * @param[in] arg The ArgParseArgument to add to parser. */ -/** -.Function.ArgumentParser#addArgument -..class:Class.ArgumentParser -..summary:Adds a @Class.ArgParseArgument@ object to the @Class.ArgumentParser@. -..cat:Miscellaneous -..signature:addArgument(parser, argument) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.arg:The new @Class.ArgParseArgument@ object that should be added. -...type:Class.ArgParseArgument -..include:seqan/arg_parse.h -*/ - inline void _copyValidValuesToFileExt(ArgumentParser & me, unsigned no) { // Copy valid values, remove leading dots. @@ -518,25 +437,12 @@ inline ArgumentParser::TOptionMapSize _getOptionIndex(ArgumentParser const & me, * * @signature TOption getOption(parser, name); * - * @param parser The parser to query. - * @param name The short or long name of the option (std::string). + * @param[in] parser The parser to query. + * @param[in] name The short or long name of the option (std::string). * * @return TOption Reference to the @link ArgParseOption @endlink with the given short or long name. */ -/** -.Function.ArgumentParser#getOption -..class:Class.ArgumentParser -..summary:Returns a reference to the specified option. -..cat:Miscellaneous -..signature:getOption(parser, optionName) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionName:The identifier of the command line option. -..returns: a reference to the specified @Class.ArgParseOption@ object. -..include:seqan/arg_parse.h -*/ - inline ArgParseOption & getOption(ArgumentParser & me, std::string const & name) { SEQAN_CHECK(hasOption(me, name), "Unknown option: %s", toCString(name)); @@ -560,27 +466,11 @@ inline ArgParseOption const & getOption(ArgumentParser const & me, std::string c * * @signature void setRequired(parser, name[, required]). * - * @param parser The ArgumentParser to set the flag of. - * @param name The short or long name of the option (std::string). - * @param required Whether or not the option is required (bool, default to true). + * @param[in,out] parser The ArgumentParser to set the flag of. + * @param[in] name The short or long name of the option (std::string). + * @param[in] required Whether or not the option is required (bool, default to true). */ -/** -.Function.ArgumentParser#setRequired -..class:Class.ArgumentParser -..summary:Sets whether or not the option defined by the parameter $name$ (which can be - either the short or the long name) is mandatory. -..remarks: Note that the empty string is, at least for string options, also a valid string. -Hence setting an option to required does not guarantee that the returned string is not empty. -..cat:Miscellaneous -..signature:setRequired(parser, optionName [, required]) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionName:The identifier of the command line option. -..param.required.remarks: The default value is true. -..include:seqan/arg_parse.h -*/ - inline void setRequired(ArgumentParser & me, std::string const & name, bool required = true) { SEQAN_CHECK(hasOption(me, name), "Unknown option: %s", toCString(name)); @@ -598,32 +488,42 @@ inline void setRequired(ArgumentParser & me, std::string const & name, bool requ * * @signature void hideOption(parser, name[, hide]). * - * @param parser The ArgParseOption to the the hidden flag of. - * @param name The short or long name of the option to modify. - * @param hide Whether or not to hide the flag (bool, defaults to true). + * @param[in,out] parser The ArgParseOption to the the hidden flag of. + * @param[in] name The short or long name of the option to modify. + * @param[in] hide Whether or not to hide the flag (bool, defaults to true). */ -/** -.Function.ArgumentParser#hideOption -..class:Class.ArgumentParser -..summary:Hides the ArgParseOption defined by the parameter $name$ (which can be - either the short or the long name) from the help screen. -..cat:Miscellaneous -..signature:hideOption(parser, optionName [, hide]) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionName:The identifier of the command line option. -..param.hide:The new visibility of the option. Default is false. -...type:nolink:bool -..include:seqan/arg_parse.h -*/ - inline void hideOption(ArgumentParser & me, std::string const & name, bool hide) { SEQAN_CHECK(hasOption(me, name), "Unknown option: %s", toCString(name)); hideOption(getOption(me, name), hide); } +// ---------------------------------------------------------------------------- +// Function setAdvanced() +// ---------------------------------------------------------------------------- + +/*! + * @fn ArgumentParser#setAdvanced + * @headerfile + * @brief Sets whether or not the option with the givne name is advanced. + * + * @signature void setAdvanced(parser, name[, required]). + * + * @param[in,out] parser The ArgumentParser to set the flag of. + * @param[in] name The short or long name of the option (std::string). + * @param[in] required Whether or not the option is required (bool, default to true). + */ + +inline void setAdvanced(ArgumentParser & me, std::string const & name, bool advanced = true) +{ + SEQAN_CHECK(hasOption(me, name), "Unknown option: %s", toCString(name)); + setAdvanced(getOption(me, name), advanced); + // make sure the full-help options is visible so advanced options can be shown + if (advanced) + hideOption(me, "full-help", false); +} + // ---------------------------------------------------------------------------- // Function getArgument() // ---------------------------------------------------------------------------- @@ -635,25 +535,12 @@ inline void hideOption(ArgumentParser & me, std::string const & name, bool hide) * * @signature TArgument getArgument(parser, pos); * - * @param parser The ArgumentParser to query. - * @param pos The position of the argument to return (unsigned, starting at 0). + * @param[in] parser The ArgumentParser to query. + * @param[in] pos The position of the argument to return (unsigned, starting at 0). * - * @return TArgument Reference to the argument with the given position. + * @return TArgument Reference to the @link ArgParseArgument @endlink with the given position. */ -/** -.Function.ArgumentParser#getArgument -..class:Class.ArgumentParser -..summary:Returns a reference to the specified argument. -..cat:Miscellaneous -..signature:getArgument(parser, argumentPosition) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.argumentPosition:The index of the argument in the argument list. -..returns: a reference to the specified @Class.ArgParseArgument@ object. -..include:seqan/arg_parse.h -*/ - inline ArgParseArgument & getArgument(ArgumentParser & me, unsigned position) { SEQAN_CHECK(position < me.argumentList.size(), @@ -679,25 +566,12 @@ inline ArgParseArgument const & getArgument(ArgumentParser const & me, unsigned * * @signature bool isSet(parser, name); * - * @param parser The ArgumentParser to query. - * @param name The short or long name of the option (std::string). + * @param[in] parser The ArgumentParser to query. + * @param[in] name The short or long name of the option (std::string). * * @return bool Whether or not the option was set on the command line or not. */ -/** -.Function.ArgumentParser#isSet -..class:Class.ArgumentParser -..summary:Returns whether an option was set on the parsed command line. -..cat:Miscellaneous -..signature:isSet(parser,optionIdentifier) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionIdentifier:A std::string that identifies the option (either short or long name). -..returns:$true$ if the option was set. -..include:seqan/arg_parse.h -*/ - inline bool isSet(ArgumentParser const & me, std::string const & name) { SEQAN_CHECK(hasOption(me, name), "Unknown option: %s", toCString(name)); @@ -715,24 +589,12 @@ inline bool isSet(ArgumentParser const & me, std::string const & name) * * @signature bool hasDefault(parser, name); * - * @param parser The ArgumentParser to query. - * @param name The short or long name of the option (std::string). + * @param[in] parser The ArgumentParser to query. + * @param[in] name The short or long name of the option (std::string). * * @return bool Whether or not the option has a default value. */ -/** -.Function.ArgumentParser#hasDefault -..summary:Returns whether an option has a default value or not. -..cat:Miscellaneous -..signature:hasDefault(parser,optionIdentifier) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionIdentifier:A std::string that identifies the option (either short or long name). -..returns:$true$ if the option has a default value, $false$ otherwise. -..include:seqan/arg_parse.h -*/ - inline bool hasDefault(ArgumentParser const & me, std::string const & name) { SEQAN_CHECK(hasOption(me, name), "Unknown option: %s", toCString(name)); @@ -776,33 +638,17 @@ inline bool _allArgumentsSet(ArgumentParser const & me) * * @signature bool getOptionValue(dest, parser, name[, pos]); * - * @param dest The variable to write the result to (the type is a template parameter and the value type of the option - * must be convertible in the type of dest for the retrieval to work, also see result value). - * @param parser The ArgumentParser to get the value from. - * @param name The short or long name of the option (std::string). - * @param pos Optional position for multi-value options (unsigned, defaults to 0). + * @param[in] dest The variable to write the result to (the type is a template parameter and the value type of the + * option must be convertible in the type of dest for the retrieval to work, also see + * result value). + * @param[in] parser The ArgumentParser to get the value from. + * @param[in] name The short or long name of the option (std::string). + * @param[in] pos Optional position for multi-value options (unsigned, defaults to 0). * * @return bool true if the requested option was given on the command line and could be coverted to the type of * dest. */ -/** -.Function.ArgumentParser#getOptionValue -..class:Class.ArgumentParser -..summary:Retrieves the value of an option given either the short or long name. -..cat:Miscellaneous -..signature:getOptionValue(value, parser, optionIdentifier[, argNo]) -..param.value:The variable where the resulting value should be stored. -...remarks:The type of $value$ must be compatible the option type. -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionIdentifier:A std::string that is either the short or long name of the option. -..param.argNo:If the option is list, the $argNo$-th list element is returned. -..returns: $true$ if the requested option is set and has the requested type, $false$ otherwise. -..remarks:The value passed to the method (value) was only updated if the method returns $true$. -..include:seqan/arg_parse.h -*/ - template inline bool getOptionValue(TValue & val, ArgumentParser const & me, @@ -838,29 +684,27 @@ inline bool getOptionValue(TValue & val, * * @signature std::string getOptionFileExtension(parser, name[, pos]); * - * Can be overridden by --${name}-file-ext. - * - * @param parser The ArgumentParser to get the value from. - * @param name The short or long name of the option (std::string). - * @param pos Optional position for multi-value options (unsigned, defaults to 0). + * @param[in] parser The ArgumentParser to get the value from. + * @param[in] name The short or long name of the option (std::string). + * @param[in] pos Optional position for multi-value options (unsigned, defaults to 0). * * @return std::string The extension of the option. Empty if not set or no extension. + * + * @see ArgumentParser#getArgumentFileExtension + * + * @section Overriding File Extension on the Command Line + * + * For each option with type INPUT_FILE and OUTPUT_FILE, an option with the name + * ${name}-file-ext is automatically added to the ArgumentParser (where ${name} is the name + * of the original option). The extension can be overridden by specifying the argument. Thus, the user of + * the program could give the value "file.ext" to the parameter "fname" and override the extension on the + * command line to "ext2" as follows: + * + * @code{.console} + * # program_name --fname file.ext --fname-file-ext ext2 + * @endcode */ -/** -.Function.ArgumentParser#getOptionFileExtension -..class:Class.ArgumentParser -..summary:Returns the extension of a file option. -..cat:Miscellaneous -..signature:std::string getOptionFileExtension(parser, name[, argNo]); -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParsre -..param.name:The name of the option. -..param.argNo:An optional index for multi-value options. -...type:nolink:$unsigned$ -..returns:A $std::string$ with the extension. Empty if no extension or empty value. -*/ - inline std::string getOptionFileExtension(ArgumentParser const & me, std::string const & name, unsigned argNo = 0) @@ -881,23 +725,12 @@ inline std::string getOptionFileExtension(ArgumentParser const & me, * * @signature unsigned getOptionValueCount(parser, name); * - * @param parser The ArgumentParser to query. - * @param name The short or long name of the option (string). + * @param[in] parser The ArgumentParser to query. + * @param[in] name The short or long name of the option (string). + * + * @return unsigned The number of values for the option with the given name. */ -/** -.Function.ArgumentParser#getOptionValueCount -..class:Class.ArgumentParser -..summary:Returns the number of values stored in the specified option. -..cat:Miscellaneous -..signature:getOptionValueCount(parser, optionIdentifier) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionIdentifier:A std::string that is either the short or long name of the option. -..returns: The number of values stored for this option. -..include:seqan/arg_parse.h -*/ - inline unsigned getOptionValueCount(ArgumentParser const & me, std::string const & name) { SEQAN_CHECK(hasOption(me, name), "Unknown option: %s", toCString(name)); @@ -915,23 +748,12 @@ inline unsigned getOptionValueCount(ArgumentParser const & me, std::string const * * @signature unsigned getArgumentValueCount(parser, pos); * - * @param parser The ArgumentParser to query. - * @param name The position of the argument (unsigned, 0-based). + * @param[in] parser The ArgumentParser to query. + * @param[in] name The position of the argument (unsigned, 0-based). + * + * @return unsigned The number of values for the argument with the given position. */ -/** -.Function.ArgumentParser#getArgumentValueCount -..class:Class.ArgumentParser -..summary:Returns the number of values stored in the specified option. -..cat:Miscellaneous -..signature:getArgumentValueCount(parser, argumentPosition) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.argumentPosition:The index of the argument in the argument list. -..returns: The number of values stored for the specified argument. -..include:seqan/arg_parse.h -*/ - inline unsigned getArgumentValueCount(ArgumentParser const & me, unsigned argumentPosition) { SEQAN_CHECK(me.argumentList.size() > argumentPosition, @@ -951,32 +773,16 @@ inline unsigned getArgumentValueCount(ArgumentParser const & me, unsigned argume * * @signature bool getArgumentValue(dest, parser, pos[, no]); * - * @param dest The variable to write the result to (the type is a template parameter and the value type of the - * argument must be convertible in the type of dest for the retrieval to work, also see result - * value). - * @param parser The ArgumentParser to get the value from. - * @param pos The position of the argument to get the value of. - * @param no Optional position for multi-value arguments (unsigned, defaults to 0). + * @param[in] dest The variable to write the result to (the type is a template parameter and the value type of the + * argument must be convertible in the type of dest for the retrieval to work, also see + * result value). + * @param[in] parser The ArgumentParser to get the value from. + * @param[in] pos The position of the argument to get the value of. + * @param[in] no Optional position for multi-value arguments (unsigned, defaults to 0). * * @return bool true if the retrieval was successful, false otherwise. */ -/** -.Function.ArgumentParser#getArgumentValue -..class:Class.ArgumentParser -..summary:Retrieves the value of an argument given by its position. -..cat:Miscellaneous -..signature:getArgumentValue(value, parser, argumentPosition[, argNo]) -..param.value:The variable where the resulting value should be stored. -...remarks:The type of $value$ must be compatible the option type. -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.argumentPosition:The index of the argument in the argument list. -..param.argNo:If the argument is a list, the $argNo$-th list element is returned. -..returns: $true$ if the requested argument is set and has the requested type, $false$ otherwise. -..include:seqan/arg_parse.h -*/ - template inline bool getArgumentValue(TValue & value, ArgumentParser const & me, @@ -1008,30 +814,27 @@ inline bool getArgumentValue(TValue & value, * * @signature std::string argumentFileExtension(parser, pos[, argNo]); * - * Can be overridden by --arg-${pos}-file-ext. - * - * @param parser The ArgumentParser to get the value from. - * @param pos The position of the argument to query (unsigned). - * @param argNo Optional position for multi-value options (unsigned, defaults to 0). + * @param[in] parser The ArgumentParser to get the value from. + * @param[in] pos The position of the argument to query (unsigned). + * @param[in] argNo Optional position for multi-value options (unsigned, defaults to 0). * * @return std::string The extension of the argument if any. + * + * @see ArgumentParser#getOptionFileExtension + * + * @section Overriding File Extensions on the Command Line + * + * For each argument with type INPUT_FILE and OUTPUT_FILE, an option with the index + * arg-${idx}-file-ext is automatically added to the ArgumentParser (where ${idx} is the index + * of the original option). The extension can be overridden by specifying the argument. Thus, the user of + * the program could give the value "file.ext" to the parameter "0" and override the extension on the + * command line to "ext2" as follows: + * + * @code{.console} + * # program_name file.ext --arg-0-file-ext ext2 + * @endcode */ -/** -.Function.ArgumentParser#getArgumentFileExtension -..class:Class.ArgumentParser -..summary:Returns the extension of a file argument. -..cat:Miscellaneous -..signature:std::string argumentFileExtension(parser, argPos[, argNo]); -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParsre -..param.argPos:The position of the argument. -...type:nolink:$unsigned$ -..param.argNo:An optional index for multi-value arguments. -...type:nolink:$unsigned$ -..returns:A $std::string$ with the extension. Empty if no extension or empty value. -*/ - inline std::string getArgumentFileExtension(ArgumentParser const & me, unsigned argumentPosition, unsigned argNo = 0) @@ -1055,25 +858,12 @@ inline std::string getArgumentFileExtension(ArgumentParser const & me, * * @signature TVector getOptionValues(parser, name); * - * @param parser The ArgumentParser to query. - * @param name The short or long name of the option to get (std::string). + * @param[in] parser The ArgumentParser to query. + * @param[in] name The short or long name of the option to get (std::string). * * @return TVector The resulting values (std::vector<std::string>). */ -/** -.Function.ArgumentParser#getOptionValues -..class:Class.ArgumentParser -..summary:Returns all values of an option given on the command line. -..cat:Miscellaneous -..signature:getOptionValues(parser, optionIdentifier) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionIdentifier:A std::string that is either the short or long name of the option. -..returns: A $String$ of option values. -..include:seqan/arg_parse.h -*/ - inline std::vector const & getOptionValues(ArgumentParser const & me, std::string const & name) { @@ -1092,25 +882,12 @@ inline std::vector const & getOptionValues(ArgumentParser const & m * * @signature TVector getArgumentValues(parser, pos); * - * @param parser The ArgumentParser to query. - * @param pos The position of the argument (unsigned, 0-based). + * @param[in] parser The ArgumentParser to query. + * @param[in] pos The position of the argument (unsigned, 0-based). * * @return TVector The resulting values (std::vector<std::string>). */ -/** -.Function.ArgumentParser#getArgumentValues -..class:Class.ArgumentParser -..summary:Returns all values of an option given on the command line. -..cat:Miscellaneous -..signature:getArgumentValues(parser, argumentPosition) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.argumentPosition:The index of the argument in the argument list. -..returns: A $String$ of argument values. -..include:seqan/arg_parse.h -*/ - inline std::vector const & getArgumentValues(ArgumentParser const & me, unsigned argumentPosition) { @@ -1131,23 +908,11 @@ inline std::vector const & getArgumentValues(ArgumentParser const & * * @signature void setDefaultValue(parser, name, v); * - * @param parser The ArgumentParser to set the default value to. - * @param name The short or long name of the argument (std::string). - * @param v The value to set (template parameter, must be streamable into a std::stringstream). + * @param[in] parser The ArgumentParser to set the default value to. + * @param[in] name The short or long name of the argument (std::string). + * @param[in] v The value to set (template parameter, must be streamable into a std::stringstream). */ -/** -.Function.ArgumentParser#setDefaultValue -..class:Class.ArgumentParser -..summary:Set default value of an option of an ArgumentParser. -..signature:setDefaultValue(parser, optionName, value) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionName:The identifier of the command line option. -..param.value:The new default value. -..include:seqan/arg_parse.h -*/ - template inline void setDefaultValue(ArgumentParser & me, std::string const & name, @@ -1168,23 +933,11 @@ inline void setDefaultValue(ArgumentParser & me, * * @signature void addDefaultValue(parser, name, v); * - * @param parser The ArgumentParser to append the default value to. - * @param name The short or long name of the argument (std::string). - * @param v The value to append (template parameter, must be streamable into a std::stringstream). + * @param[in,out] parser The ArgumentParser to append the default value to. + * @param[in] name The short or long name of the argument (std::string). + * @param[in] v The value to append (template parameter, must be streamable into a std::stringstream). */ -/** -.Function.ArgumentParser#addDefaultValue -..class:Class.ArgumentParser -..summary:Add to the default values of an option of an ArgumentParser. -..signature:addDefaultValue(parser, optionName, value) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionName:The identifier of the command line option. -..param.value:The new default value. -..include:seqan/arg_parse.h -*/ - template inline void addDefaultValue(ArgumentParser & me, std::string const & name, @@ -1206,30 +959,16 @@ inline void addDefaultValue(ArgumentParser & me, * @signature void setMinValue(parser, name, v); * @signature void setMinValue(parser, pos, v); * - * @param parser The ArgumentParser to set the minimal value for. - * @param name The name of the option to set the minimal value for (std::string). - * @param pos The position of the argument to set the minimal value for (unsigned, 0-based). - * @param v The minimal value to set (std::string). + * @param[in,out] parser The ArgumentParser to set the minimal value for. + * @param[in] name The name of the option to set the minimal value for (std::string). + * @param[in] pos The position of the argument to set the minimal value for (unsigned, 0-based). + * @param[in] v The minimal value to set (std::string). * * @section Remarks * * The option/argument must have an integer or double type. */ -/** -.Function.ArgumentParser#setMinValue -..class:Class.ArgumentParser -..summary:Set smallest allowed value for an option or argument of an ArgumentParser. -..signature:setMinValue(parser,optionName,minValue) -..signature:setMinValue(parser,argumentPosition,minValue) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionName:The identifier of the command line option. -..param.argumentPosition:The index of the argument in the argument list. -..param.minValue:A std::string containing a string representation of the minimum value of the @Class.ArgParseOption@. -..include:seqan/arg_parse.h -*/ - inline void setMinValue(ArgumentParser & me, std::string const & name, std::string const & _minValue) @@ -1260,30 +999,16 @@ inline void setMinValue(ArgumentParser & me, * @signature void setMaxValue(parser, name, v); * @signature void setMaxValue(parser, pos, v); * - * @param parser The ArgumentParser to set the maximal value for. - * @param name The name of the option to set the maximal value for (std::string). - * @param pos The position of the argument to set the maximal value for (unsigned, 0-based). - * @param v The maximal value to set (std::string). + * @param[in,out] parser The ArgumentParser to set the maximal value for. + * @param[in] name The name of the option to set the maximal value for (std::string). + * @param[in] pos The position of the argument to set the maximal value for (unsigned, 0-based). + * @param[in] v The maximal value to set (std::string). * * @section Remarks * * The option/argument must have an integer or double type. */ -/** -.Function.ArgumentParser#setMaxValue -..class:Class.ArgumentParser -..summary:Set largest allowed value for an option or argument of an ArgumentParser. -..signature:setMaxValue(parser,optionName,maxValue) -..signature:setMaxValue(parser,argumentPosition,minValue) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionName:The identifier of the command line option. -..param.argumentPosition:The index of the argument in the argument list. -..param.maxValue:A std::string containing a string representation of the maximum value of the @Class.ArgParseOption@. -..include:seqan/arg_parse.h -*/ - inline void setMaxValue(ArgumentParser & me, std::string const & name, std::string const & _maxValue) @@ -1314,28 +1039,13 @@ inline void setMaxValue(ArgumentParser & me, * @signature void setValidValues(parser, name, values); * @signature void setValidValues(parser, pos, values); * - * @param parser The ArgumentParser to set the default values to. - * @param name The name of the option (std::string). - * @param pos The position of the argument (unsigned, 0-based). - * @param values The values to set. Either a std::string with the values as space-separated list - * or a std::vector<std::string> with the values. + * @param[in,out] parser The ArgumentParser to set the default values to. + * @param[in] name The name of the option (std::string). + * @param[in] pos The position of the argument (unsigned, 0-based). + * @param[in] values The values to set. Either a std::string with the values as space-separated list + * or a std::vector<std::string> with the values. */ -/** -.Function.ArgumentParser#setValidValues -..class:Class.ArgumentParser -..summary:Set valid values for an argument or option of an ArgumentParser. -..signature:setValidValues(parser,optionName,values) -..signature:setValidValues(parser,argumentPosition,values) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionName:The identifier of the command line option. -..param.argumentPosition:The index of the argument in the argument list. -..param.values:A $std::string$ containing all valid entries for the option. -Alternatively you can pass a string containing all values separated by spaces. -..include:seqan/arg_parse.h -*/ - inline void setValidValues(ArgumentParser & me, std::string const & name, std::vector const & values) @@ -1388,26 +1098,12 @@ inline void setValidValues(ArgumentParser & me, * @signature void setHelpText(parser, name, text); * @signature void setHelpText(parser, pos, text); * - * @param parser The ArgumentParser object. - * @param name The name of the option to set the help text for (std::string). - * @param pos The position of the argument to set the help text for. - * @param text The string to use for the help text (std::string). + * @param[in,out] parser The ArgumentParser object. + * @param[in] name The name of the option to set the help text for (std::string). + * @param[in] pos The position of the argument to set the help text for. + * @param[in] text The string to use for the help text (std::string). */ -/** -.Function.ArgumentParser#setHelpText -..class:Class.ArgumentParser -..summary:Set help text of argument parser. -..signature:setHelpText(parser,optionName,text) -..signature:setHelpText(parser,argumentPosition,text) -..param.parser:The @Class.ArgumentParser@ object. -...type:Class.ArgumentParser -..param.optionName:The identifier of the command line option. -..param.argumentPosition:The index of the argument in the argument list. -..param.text:A $std::string$ describing the option or argument. -..include:seqan/arg_parse.h -*/ - inline void setHelpText(ArgumentParser & me, std::string const & name, std::string const & text) @@ -1427,11 +1123,11 @@ inline void setHelpText(ArgumentParser & me, } // ---------------------------------------------------------------------------- -// Function getFileFormatExtensions() +// Function getFileExtensions() // ---------------------------------------------------------------------------- /*! - * @fn ArgumentParser#getFileFormatExtensions + * @fn ArgumentParser#getFileExtensions * @headerfile * @brief Returns file format extension given a format tag. * @@ -1439,40 +1135,23 @@ inline void setHelpText(ArgumentParser & me, * @signature TVector getFormatExtension(tagList); * @signature TVector getFormatExtension(tagSelector); * - * @param tag A single file foramt, e.g. Fastq(). - * @param tagList A list of file format (@link TagList @endlink). - * @param tagSelector A file format selector (@link TagSelector @endlink). + * @param[in] tag A single file foramt, e.g. Fastq(). + * @param[in] tagList A list of file format (@link TagList @endlink). + * @param[in] tagSelector A file format selector (@link TagSelector @endlink). * * @return TVector A std::vector<std::string> with the allowed file format extensions. */ -/** -.Function.ArgumentParser#getFileFormatExtensions -..class:Class.ArgumentParser -..summary:Returns file format extensions given a format tag. -..signature:getFileFormatExtensions(formatTag) -..signature:getFileFormatExtensions(formatTagList) -..signature:getFileFormatExtensions(formatTagSelector) -..param.format:A single file format, e.g. @Tag.File Format.tag.Fastq@ or @Tag.Sam@. -...type:Tag.Tag -..param.formatTagList:A list of file formats. -...type:Tag.TagList -..param.formatTagSelector:A file format selector. -...type:Class.TagSelector -..returns:A $std::vector$ of all extensions supported by a single format or all formats of a list or selector. -..include:seqan/arg_parse.h -*/ - template inline std::vector -getFileFormatExtensions(T const formatTag) +getFileExtensions(T const formatTag) { std::vector extensions; - _getFileFormatExtensions(extensions, formatTag); + _getFileExtensions(extensions, formatTag); return extensions; } } // namespace seqan -#endif // SEQAN_CORE_INCLUDE_ARG_PARSE_ARGUMENT_PARSER_H_ +#endif // SEQAN_INCLUDE_ARG_PARSE_ARGUMENT_PARSER_H_ diff --git a/seqan/arg_parse/tool_doc.h b/seqan/arg_parse/tool_doc.h index 13447a8..902beb9 100644 --- a/seqan/arg_parse/tool_doc.h +++ b/seqan/arg_parse/tool_doc.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,13 +32,14 @@ // Author: Manuel Holtgrewe // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_ARG_PARSE_TOOL_DOC_H_ -#define SEQAN_CORE_INCLUDE_ARG_PARSE_TOOL_DOC_H_ +#ifndef SEQAN_INCLUDE_ARG_PARSE_TOOL_DOC_H_ +#define SEQAN_INCLUDE_ARG_PARSE_TOOL_DOC_H_ #include -#include +#include #include +#include namespace seqan { @@ -359,7 +360,7 @@ class TextToolDocPrinter_ : { std::ostream_iterator out(stream); stream << '\n' << _toText("\\fB"); - std::transform(begin(section._title), end(section._title), out, toupper); + std::transform(begin(section._title), end(section._title), out, static_cast < int(*)(int) > (toupper)); stream << _toText("\\fP") << '\n'; } @@ -559,26 +560,27 @@ class TextToolDocPrinter_ : /*! * @class ToolDoc + * @implements AssignableConcept * @headerfile * @brief Container for string documentation on a command line tool. - * + * * @signature class ToolDoc; - * + * * @section Remarks - * + * * This class is generally not used directly by the user but through @link ArgumentParser @endlink. It allows to store * and represent all information related to a command line tool that would normally go into a man page. It can be * printed to STL streams in different formats, currently plain text, HTML and man pages are supported. - * + * * You can also use basic formatting in text. This formatting is tailored to the usage on the command line. Use * \fB to start bold font, \fI to start italic font and \fP to use the previous font (of * course, use correct escaping of the backslash in C strings, so use "\\fB", "\\fI", and * "\\fP" in your code. - * + * * @section Examples - * + * * The following shows a brief example of how to use @link ToolDoc @endlink. - * + * * @code{.cpp} * ToolDoc doc; * setName(doc, "RazerS"); @@ -587,13 +589,13 @@ class TextToolDocPrinter_ : * setVersion(doc, "1.0"); * setCategory(doc, "Read Mapping"); * setManTitle(doc, "SeqAn Apps Reference Manual"); - * + * * addSection(doc, "Synopsis"); * addText(doc, "\\fBrazers\\fP [\\fIOPTIONS\\fP] \\fIREFERENCE\\fP \\fIREADS\\fP", false); * addText(doc, * "\\fBrazers\\fP [\\fIOPTIONS\\fP] \\fIREFERENCE\\fP \\fILEFT_READS\\fP " * "\\fIRIGHT_READS\\fP", false); - * + * * addSection(doc, "Description"); * addText(doc, * "RazerS is a read mapper with controllable, sensitivity. This " @@ -602,14 +604,14 @@ class TextToolDocPrinter_ : * "performance."); * addText(doc, * "What's special about RazerS is that you can control the sensitivity."); - * + * * addSection(doc, "Options"); * addSubSection(doc, "Main Options"); * addListItem(doc, "\\fB-id\\fP, \\fB--indels\\fP", * "Enable mapping with indels enabled."); * addListItem(doc, "\\fB-i\\fP, \\fB--identity\\fP \\fIIDENTITY\\fP", * "Set minimal identity of matches to find."); - * + * * print(std::cout, doc, "text"); * @endcode * @@ -621,63 +623,10 @@ class TextToolDocPrinter_ : /*! * @fn ToolDoc::ToolDoc * @brief Constructor - * + * * @signature ToolDoc::ToolDoc() */ -/** -.Class.ToolDoc -..cat:Miscellaneous -..summary:Container for string documentation on a command line tool. -..signature:ToolDoc -..remarks: -This class is generally not used directly by the user but through @Class.ArgumentParser@. -It allows to store and represent all information related to a command line tool that would normally go into a man page. -It can be printed to STL streams in different formats, currently plain text, HTML and man pages are supported. -..remarks: -You can also use basic formatting in text. This formatting is tailored to the usage on the command line. -Use $\fB$ to start bold font, $\fI$ to start italic font and $\fP$ to use the previous font (of course, use correct escaping of the backslash in C strings, so use $"\\fB"$, $"\\fI"$, and $"\\fP"$ in your code. -..example.text:The following shows a brief example of how to use @Class.ToolDoc@. -..example.code: -ToolDoc doc; -setName(doc, "RazerS"); -setShortDescription(doc, "Read mapping with controllable sensitivity."); -setDate(doc, "04 March 2012"); -setVersion(doc, "1.0"); -setCategory(doc, "Read Mapping"); -setManTitle(doc, "SeqAn Apps Reference Manual"); - -addSection(doc, "Synopsis"); -addText(doc, "\\fBrazers\\fP [\\fIOPTIONS\\fP] \\fIREFERENCE\\fP \\fIREADS\\fP", false); -addText(doc, - "\\fBrazers\\fP [\\fIOPTIONS\\fP] \\fIREFERENCE\\fP \\fILEFT_READS\\fP " - "\\fIRIGHT_READS\\fP", false); - -addSection(doc, "Description"); -addText(doc, - "RazerS is a read mapper with controllable, sensitivity. This " - "means that you can find all read matches in the reference sequence " - "and optionally, you can trade lower sensitivity for better " - "performance."); -addText(doc, - "What's special about RazerS is that you can control the sensitivity."); - -addSection(doc, "Options"); -addSubSection(doc, "Main Options"); -addListItem(doc, "\\fB-id\\fP, \\fB--indels\\fP", - "Enable mapping with indels enabled."); -addListItem(doc, "\\fB-i\\fP, \\fB--identity\\fP \\fIIDENTITY\\fP", - "Set minimal identity of matches to find."); - -print(std::cout, doc, "text"); -..include:seqan/arg_parse/tool_doc.h - -.Memfunc.ToolDoc#ToolDoc -..summary:constructor -..class:Class.ToolDoc -..signature:ToolDoc() -*/ - class ToolDoc { public: @@ -685,6 +634,9 @@ class ToolDoc CharString _shortDescription; CharString _date; CharString _version; + CharString _shortCopyright; + CharString _longCopyright; + CharString _citation; CharString _manTitle; CharString _category; unsigned _manSection; @@ -698,7 +650,8 @@ class ToolDoc ToolDoc(ToolDoc const & toolDoc) : _name(toolDoc._name), _shortDescription(toolDoc._shortDescription), - _date(toolDoc._date), _version(toolDoc._version), _manTitle(toolDoc._manTitle), + _date(toolDoc._date), _version(toolDoc._version), _shortCopyright(toolDoc._shortCopyright), + _longCopyright(toolDoc._longCopyright), _citation(toolDoc._citation), _manTitle(toolDoc._manTitle), _category(toolDoc._category), _manSection(1) { append(*this, toolDoc); @@ -764,27 +717,13 @@ class ToolDoc * @fn ToolDoc#append * @headerfile * @brief Append two @link ToolDoc @endlink objects. - * + * * @signature void append(a, b); - * - * @param a This object is updated - * @param b This object is appended to b. + * + * @param[in,out] a This object is updated + * @param[in] b This object is appended to b. */ -/** -.Function.ToolDoc#append -..summary:Append two @Class.ToolDoc@ objects. -..cat:Miscellaneous -..signature:append(a, b) -..class:Class.ToolDoc -..param.a:This object is updated. -...type:Class.ToolDoc -..param.b:This object is appended to $b$. -...type:Class.ToolDoc -..returns:$void$ -..include:seqan/arg_parse/tool_doc.h -*/ - inline void append(ToolDoc & a, ToolDoc const & b) { for (unsigned i = 0; i < length(b._entries); ++i) @@ -822,24 +761,10 @@ inline void append(ToolDoc & a, ToolDoc const & b) * * @signature void setName(toolDoc, name); * - * @param toolDoc The ToolDoc object to the set the name for. - * @param name The name of the tool (@link CharString @endlink). + * @param[in,out] toolDoc The ToolDoc object to the set the name for. + * @param[in] name The name of the tool (@link CharString @endlink). */ -/** -.Function.ToolDoc#setName -..summary:Set tool name for @Class.ToolDoc@ object. -..cat:Miscellaneous -..signature:setName(doc, name) -..class:Class.ToolDoc -..param.doc:Tool documentation object to set the name of. -...type:Class.ToolDoc -..param.name:Name to set. -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/arg_parse/tool_doc.h -*/ - inline void setName(ToolDoc & doc, CharString const & name) { doc._name = name; @@ -856,24 +781,11 @@ inline void setName(ToolDoc & doc, CharString const & name) * * @signature CharString getName(toolDoc); * - * @param toolDoc The ToolDoc object to the get the name for. + * @param[in] toolDoc The ToolDoc object to the get the name for. * * @return CharString Resulting name (@link CharString @endlink). */ -/** -.Function.ToolDoc#getName -..summary:Get tool name of @Class.ToolDoc@ object. -..cat:Miscellaneous -..signature:getName(doc) -..class:Class.ToolDoc -..param.doc:Tool documentation object to get the tool name of. -...type:Class.ToolDoc -..returns:Tool name of documentation object. -...type:Shortcut.CharString -..include:seqan/arg_parse/tool_doc.h -*/ - inline CharString const & getName(ToolDoc const & doc) { return doc._name; @@ -890,24 +802,10 @@ inline CharString const & getName(ToolDoc const & doc) * * @signature void setName(toolDoc, name); * - * @param toolDoc The ToolDoc object to the set the name for. - * @param name The name of the tool (@link CharString @endlink). + * @param[in,out] toolDoc The ToolDoc object to the set the name for. + * @param[in] name The name of the tool (@link CharString @endlink). */ -/** -.Function.ToolDoc#setCategory -..summary:Set tool category for @Class.ToolDoc@ object. -..cat:Miscellaneous -..signature:setCategory(doc, category) -..class:Class.ToolDoc -..param.doc:Tool documentation object to set the name of. -...type:Class.ToolDoc -..param.category:Category to set. -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/arg_parse/tool_doc.h -*/ - inline void setCategory(ToolDoc & doc, CharString const & category) { doc._category = category; @@ -924,24 +822,11 @@ inline void setCategory(ToolDoc & doc, CharString const & category) * * @signature CharString getCategory(toolDoc); * - * @param toolDoc The ToolDoc object to the get the category for. + * @param[in] toolDoc The ToolDoc object to the get the category for. * * @return CharString Resulting category (@link CharString @endlink). */ -/** -.Function.ToolDoc#getCategory -..summary:Get tool category of @Class.ToolDoc@ object. -..cat:Miscellaneous -..signature:getCategory(doc) -..class:Class.ToolDoc -..param.doc:Tool documentation object to get the tool category of. -...type:Class.ToolDoc -..returns:Tool category of documentation object. -...type:Shortcut.CharString -..include:seqan/arg_parse/tool_doc.h -*/ - inline CharString const & getCategory(ToolDoc const & doc) { return doc._category; @@ -958,24 +843,10 @@ inline CharString const & getCategory(ToolDoc const & doc) * * @signature void setShortDescription(toolDoc, text); * - * @param toolDoc The ToolDoc object to the set the short description for. - * @param text The short description of the tool (@link CharString @endlink). + * @param[in,out] toolDoc The ToolDoc object to the set the short description for. + * @param[in] text The short description of the tool (@link CharString @endlink). */ -/** -.Function.ToolDoc#setShortDescription -..summary:Set short description for @Class.ToolDoc@ object. -..cat:Miscellaneous -..signature:setShortDescriptioin(doc, description) -..class:Class.ToolDoc -..param.doc:Tool documentation object to set the short description of. -...type:Class.ToolDoc -..param.description:Short description to set. -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/arg_parse/tool_doc.h -*/ - inline void setShortDescription(ToolDoc & doc, CharString const & shortDescription) { doc._shortDescription = shortDescription; @@ -992,24 +863,11 @@ inline void setShortDescription(ToolDoc & doc, CharString const & shortDescripti * * @signature CharString getShortDescription(toolDoc); * - * @param toolDoc The ToolDoc object to the get the short description for. + * @param[in] toolDoc The ToolDoc object to the get the short description for. * * @return CharString Resulting short description (@link CharString @endlink). */ -/** -.Function.ToolDoc#getShortDescription -..summary:Get short description of @Class.ToolDoc@ object. -..cat:Miscellaneous -..signature:setName(doc) -..class:Class.ToolDoc -..param.doc:Tool documentation object to set the short description of. -...type:Class.ToolDoc -..returns:Tool description of documentation object. -...type:Shortcut.CharString -..include:seqan/arg_parse/tool_doc.h -*/ - inline CharString const & getShortDescription(ToolDoc const & doc) { return doc._shortDescription; @@ -1026,24 +884,10 @@ inline CharString const & getShortDescription(ToolDoc const & doc) * * @signature void setName(toolDoc, str); * - * @param toolDoc The ToolDoc object to the set the date string for. - * @param str The date string of the tool (@link CharString @endlink). + * @param[in,out] toolDoc The ToolDoc object to the set the date string for. + * @param[in] str The date string of the tool (@link CharString @endlink). */ -/** -.Function.ToolDoc#setDate -..cat:Miscellaneous -..summary:Set date string for @Class.ToolDoc@ object. -..signature:setDate(doc, date) -..class:Class.ToolDoc -..param.doc:Tool documentation object to set the date string to. -...type:Class.ToolDoc -..param.date:Date string to set. -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/arg_parse/tool_doc.h -*/ - inline void setDate(ToolDoc & doc, CharString const & date) { doc._date = date; @@ -1060,24 +904,11 @@ inline void setDate(ToolDoc & doc, CharString const & date) * * @signature CharString getDate(toolDoc); * - * @param toolDoc The ToolDoc object to the get the date from. + * @param[in] toolDoc The ToolDoc object to the get the date from. * * @return CharString Resulting date string (@link CharString @endlink). */ -/** -.Function.ToolDoc#getDate -..cat:Miscellaneous -..summary:Get date string from @Class.ToolDoc@ object. -..signature:getDate(doc) -..class:Class.ToolDoc -..param.doc:Tool documentation object to get the date string of. -...type:Class.ToolDoc -..returns:Date string. -...type:Shortcut.CharString -..include:seqan/arg_parse/tool_doc.h -*/ - inline CharString const & getDate(ToolDoc const & doc) { return doc._date; @@ -1092,26 +923,12 @@ inline CharString const & getDate(ToolDoc const & doc) * @headerfile * @brief Set the tool version string. * - * @signature void setName(toolDoc, str); + * @signature void setVersion(toolDoc, str); * - * @param toolDoc The ToolDoc object to the set the version string for. - * @param str The version string of the tool (@link CharString @endlink). + * @param[in,out] toolDoc The ToolDoc object to the set the version string for. + * @param[in] str The version string of the tool (@link CharString @endlink). */ -/** -.Function.ToolDoc#setVersion -..cat:Miscellaneous -..summary:Set version string for @Class.ToolDoc@ object. -..signature:setVersion(doc, version) -..class:Class.ToolDoc -..param.doc:Tool documentation object to set the version string to. -...type:Class.ToolDoc -..param.version:Version string to set. -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/arg_parse/tool_doc.h -*/ - inline void setVersion(ToolDoc & doc, CharString const & version) { doc._version = version; @@ -1128,29 +945,140 @@ inline void setVersion(ToolDoc & doc, CharString const & version) * * @signature CharString getVersion(toolDoc); * - * @param toolDoc The ToolDoc object to the get the version string. + * @param[in] toolDoc The ToolDoc object to the get the version string. * * @return CharString Resulting version string (@link CharString @endlink). */ -/** -.Function.ToolDoc#getVersion -..cat:Miscellaneous -..summary:Get version string from @Class.ToolDoc@ object. -..class:Class.ToolDoc -..signature:CharString getVersion(doc) -..param.doc:Tool documentation object to get the version string of. -...type:Class.ToolDoc -..returns:Date string. -...type:Shortcut.CharString -..include:seqan/arg_parse/tool_doc.h -*/ - inline CharString const & getVersion(ToolDoc const & doc) { return doc._version; } +// -------------------------------------------------------------------------- +// Function setShortCopyright() ToolDoc +// -------------------------------------------------------------------------- + +/*! + * @fn ToolDoc#setShortCopyright + * @headerfile + * @brief Set the tool short copyright string. + * + * @signature void setShortCopyright(toolDoc, str); + * + * @param[in,out] toolDoc The ToolDoc object to the set the short copyright string for. + * @param[in] str The short copyright string of the tool (@link CharString @endlink). + */ + +inline void setShortCopyright(ToolDoc & doc, CharString const & shortCopyright) +{ + doc._shortCopyright = shortCopyright; +} + +// -------------------------------------------------------------------------- +// Function getShortCopyright() ToolDoc +// -------------------------------------------------------------------------- + +/*! + * @fn ToolDoc#getShortCopyright + * @headerfile + * @brief Get the tool short copyright string. + * + * @signature CharString getShortCopyright(toolDoc); + * + * @param[in] toolDoc The ToolDoc object to the get the short copyright string. + * + * @return CharString Resulting short copyright string (@link CharString @endlink). + */ + +inline CharString const & getShortCopyright(ToolDoc const & doc) +{ + return doc._shortCopyright; +} + +// -------------------------------------------------------------------------- +// Function setLongCopyright() ToolDoc +// -------------------------------------------------------------------------- + +/*! + * @fn ToolDoc#setLongCopyright + * @headerfile + * @brief Set the tool long copyright string. + * + * @signature void setLongCopyright(toolDoc, str); + * + * @param[in,out] toolDoc The ToolDoc object to the set the long copyright string for. + * @param[in] str The long copyright string of the tool (@link CharString @endlink). + */ + +inline void setLongCopyright(ToolDoc & doc, CharString const & longCopyright) +{ + doc._longCopyright = longCopyright; +} + +// -------------------------------------------------------------------------- +// Function getLongCopyright() ToolDoc +// -------------------------------------------------------------------------- + +/*! + * @fn ToolDoc#getLongCopyright + * @headerfile + * @brief Get the tool long copyright string. + * + * @signature CharString getLongCopyright(toolDoc); + * + * @param[in] toolDoc The ToolDoc object to the get the long copyright string. + * + * @return CharString Resulting long copyright string (@link CharString @endlink). + */ + +inline CharString const & getLongCopyright(ToolDoc const & doc) +{ + return doc._longCopyright; +} + +// -------------------------------------------------------------------------- +// Function setCitation() ToolDoc +// -------------------------------------------------------------------------- + +/*! + * @fn ToolDoc#setCitation + * @headerfile + * @brief Set the tool citation string. + * + * @signature void setCitation(toolDoc, str); + * + * @param[in,out] toolDoc The ToolDoc object to the set the citation string for. + * @param[in] str The citation string of the tool (@link CharString @endlink). + */ + +inline void setCitation(ToolDoc & doc, CharString const & citation) +{ + doc._citation = citation; +} + +// -------------------------------------------------------------------------- +// Function getCitation() ToolDoc +// -------------------------------------------------------------------------- + +/*! + * @fn ToolDoc#getCitation + * @headerfile + * @brief Get the tool citation string. + * + * @signature CharString getCitation(toolDoc); + * + * @param[in] toolDoc The ToolDoc object to the get the citation string. + * + * @return CharString Resulting citation string (@link CharString @endlink). + */ + +inline CharString const & getCitation(ToolDoc const & doc) +{ + return doc._citation; +} + + // -------------------------------------------------------------------------- // Function setManTitle() ToolDoc // -------------------------------------------------------------------------- @@ -1162,24 +1090,10 @@ inline CharString const & getVersion(ToolDoc const & doc) * * @signature void setTitle(toolDoc, title); * - * @param toolDoc The ToolDoc object to the set the title for. - * @param title The title of the tool (@link CharString @endlink). + * @param[in,out] toolDoc The ToolDoc object to the set the title for. + * @param[in] title The title of the tool (@link CharString @endlink). */ -/** -.Function.ToolDoc#setManTitle -..cat:Miscellaneous -..summary:Set version string for @Class.ToolDoc@ object. -..signature:setManTitle(doc, title) -..class:Class.ToolDoc -..param.doc:Tool documentation object to set the man title to. -...type:Class.ToolDoc -..param.title:Title string to set. -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/arg_parse/tool_doc.h -*/ - inline void setManTitle(ToolDoc & doc, CharString const & title) { doc._manTitle = title; @@ -1196,31 +1110,18 @@ inline void setManTitle(ToolDoc & doc, CharString const & title) * * @signature CharString getManTitle(toolDoc); * - * @param toolDoc The ToolDoc object to the get the man page title. + * @param[in] toolDoc The ToolDoc object to the get the man page title. * * @return CharString Resulting man page title (@link CharString @endlink). */ -/** -.Function.ToolDoc#getManTitle -..cat:Miscellaneous -..summary:Get man title from @Class.ToolDoc@ object. -..signature:getManTitle(doc) -..class:Class.ToolDoc -..param.doc:Tool documentation object to get the man title of. -...type:Class.ToolDoc -..returns:Man title. -...type:Shortcut.CharString -..include:seqan/arg_parse/tool_doc.h -*/ - inline CharString const & getManTitle(ToolDoc & doc) { return doc._manTitle; } // -------------------------------------------------------------------------- -// Function addSection() ToolDoc +// Function addSection()ToolDoc // -------------------------------------------------------------------------- /*! @@ -1230,24 +1131,10 @@ inline CharString const & getManTitle(ToolDoc & doc) * * @signature void addSection(toolDoc, title); * - * @param toolDoc The ToolDoc object to add a section for. - * @param title The section title (@link CharString @endlink). + * @param[in,out] toolDoc The ToolDoc object to add a section for. + * @param[in] title The section title (@link CharString @endlink). */ -/** -.Function.ToolDoc#addSection -..cat:Miscellaneous -..summary:Add section to @Class.ToolDoc@ object. -..signature:addSection(doc, title) -..class:Class.ToolDoc -..param.doc:Tool documentation object to add section to. -...type:Class.ToolDoc -..param.title:Section title. -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/arg_parse/tool_doc.h -*/ - inline void addSection(ToolDoc & doc, CharString const & title) { appendValue(doc._entries, new ToolDocSection_(title)); @@ -1264,24 +1151,10 @@ inline void addSection(ToolDoc & doc, CharString const & title) * * @signature void addSubSection(toolDoc, title); * - * @param toolDoc The ToolDoc object to add a subsection for. - * @param title The subsection title (@link CharString @endlink). + * @param[in,out] toolDoc The ToolDoc object to add a subsection for. + * @param[in] title The subsection title (@link CharString @endlink). */ -/** -.Function.ToolDoc#addSubSection -..cat:Miscellaneous -..summary:Add subsection to @Class.ToolDoc@ object. -..signature:addSubSection(doc, title) -..class:Class.ToolDoc -..param.doc:Tool documentation object to add subsection to. -...type:Class.ToolDoc -..param.title:Subsection title. -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/arg_parse/tool_doc.h -*/ - inline void addSubSection(ToolDoc & doc, CharString const & title) { appendValue(doc._entries, new ToolDocSubSection_(title)); @@ -1298,29 +1171,11 @@ inline void addSubSection(ToolDoc & doc, CharString const & title) * * @signature void addText(toolDoc, text[, isParagraph]); * - * @param toolDoc The ToolDoc to add the text to. - * @param text The text to add (@link CharString @endlink). - * @param isParagraph Whether to insert as paragraph or just a line (only one line break if not a paragraph). + * @param[in,out] toolDoc The ToolDoc to add the text to. + * @param[in] text The text to add (@link CharString @endlink). + * @param[in] isParagraph Whether to insert as paragraph or just a line (only one line break if not a paragraph). */ -/** -.Function.ToolDoc#addText -..cat:Miscellaneous -..summary:Add text line/paragraph to @Class.ToolDoc@. -..signature:addText(doc, text, [isParagraph]) -..class:Class.ToolDoc -..param.doc:Tool documentation object to text to. -...type:Class.ToolDoc -..param.text:Text to add. -...type:Shortcut.CharString -..param.isParagraph:Whether to insert as paragraph or just a line (only line break in the last case). -...type:nolink:$bool$ -..returns:$void$ -..remarks:See @Class.ToolDoc@ for information on formatting of text. -..see:Class.ToolDoc -..include:seqan/arg_parse/tool_doc.h -*/ - inline void addText(ToolDoc & doc, CharString const & text, bool isParagraph) { appendValue(doc._entries, new ToolDocLine_(text, isParagraph)); @@ -1342,29 +1197,11 @@ inline void addText(ToolDoc & doc, CharString const & text) * * @signature void addListItem(toolDoc, key, value); * - * @param toolDoc The ToolDoc object to add the list item to. - * @param key The key for the list (@link CharString @endlink). - * @param value The value for the list (@link CharString @endlink). + * @param[in,out] toolDoc The ToolDoc object to add the list item to. + * @param[in] key The key for the list (@link CharString @endlink). + * @param[in] value The value for the list (@link CharString @endlink). */ -/** -.Function.ToolDoc#addListItem -..cat:Miscellaneous -..summary:Add list item to @Class.ToolDoc@ object. -..signature:addListItem(doc, key, value) -..class:Class.ToolDoc -..param.doc:Tool documentation object to add subsection to. -...type:Class.ToolDoc -..param.key:List item key. -...type:Shortcut.CharString -..param.key:List item value. -...type:Shortcut.CharString -..returns:$void$ -..remarks:You can add formatting to both $key$ and $value$. See @Class.ToolDoc@ for information on formatting of text. -..see:Class.ToolDoc -..include:seqan/arg_parse/tool_doc.h -*/ - inline void addListItem(ToolDoc & doc, CharString const & key, CharString const & value) { appendValue(doc._entries, new ToolDocListItem_(key, value)); @@ -1381,27 +1218,11 @@ inline void addListItem(ToolDoc & doc, CharString const & key, CharString const * * @signature void print(stream, toolDoc, format); * - * @param stream The std::ostream to write to. - * @param toolDoc The ToolDoc to print. - * @param format The format, one of {"html", "man", "txt"}. + * @param[in,out] stream The std::ostream to write to. + * @param[in] toolDoc The ToolDoc to print. + * @param[in] format The format, one of {"html", "man", "txt"}. */ -/** -.Function.ToolDoc#print -..cat:Miscellaneous -..summary:Print @Class.ToolDoc@ object in a given format. -..signature:print(stream, doc, format) -..class:Class.ToolDoc -..param.stream:List item key. -...type:nolink:$std::ostream$ -..param.doc:Tool documentation object to print. -...type:Class.ToolDoc -..param.format:Format to print in. One of "html", "man", "txt". -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/arg_parse/tool_doc.h -*/ - inline void print(std::ostream & stream, ToolDoc const & doc, CharString const & format) { doc.print(stream, format); @@ -1418,21 +1239,9 @@ inline void print(std::ostream & stream, ToolDoc const & doc, CharString const & * * @signature void clearEntries(toolDoc); * - * @param toolDoc The ToolDoc object to clear entries from. + * @param[in,out] toolDoc The ToolDoc object to clear entries from. */ -/** -.Function.ToolDoc#clearEntries -..cat:Miscellaneous -..summary:Clear entries from @Class.ToolDoc@ object. -..signature:clearEntries(doc) -..class:Class.ToolDoc -..param.doc:Tool documentation object to clear. -...type:Class.ToolDoc -..returns:$void$ -..include:seqan/arg_parse/tool_doc.h -*/ - inline void clearEntries(ToolDoc & doc) { typedef Iterator, Rooted>::Type TIter; @@ -1525,8 +1334,33 @@ void HtmlToolDocPrinter_::print(std::ostream & stream, ToolDoc const & doc) // Print version and date. stream << "

Version

\n" - << "

Last update: " << _toHtml(doc._date) << ", " << doc._name - << " version: " << doc._version << "

\n"; + << "Last update: " << _toHtml(doc._date) << "
\n" + << doc._name << " version: " << doc._version << "
\n" + << "SeqAn version: " << SEQAN_VERSION_MAJOR << '.' << SEQAN_VERSION_MINOR << '.' + << SEQAN_VERSION_PATCH; + if (SEQAN_VERSION_PRE_RELEASE != 0) + stream << "-pre" << SEQAN_VERSION_PRE_RELEASE; + stream << "
\n"; + + // Print legal stuff + if ((!empty(doc._shortCopyright)) || (!empty(doc._longCopyright)) || (!empty(doc._citation))) + { + stream << "

Legal

\n"; + + if (!empty(doc._shortCopyright)) + stream << doc._name << " Copyright: " + << doc._shortCopyright << "
\n"; + + stream << "SeqAn Copyright: 2006-2015 Knut Reinert, FU-Berlin; released under the 3-clause BSDL.
\n"; + + if (!empty(doc._citation)) + stream << "In your academic works please cite: " << doc._citation << "
\n"; + else + stream << ""; + + if (!empty(doc._longCopyright)) + stream << "For full copyright and/or warranty information see --copyright.\n"; + } // Print HTML boilerplate footer. stream << ""; @@ -1598,9 +1432,43 @@ void TextToolDocPrinter_::print(std::ostream & stream, ToolDoc const & doc) // Print version and date. stream << "\n" << _toText("\\fB") << "VERSION" << _toText("\\fP") << "\n"; std::fill_n(out, _layout.leftPadding, ' '); - stream << doc._name << " version: " << doc._version << "\n"; + stream << _toText("\\fB") << "Last update: " << _toText("\\fP") << doc._date << "\n"; + std::fill_n(out, _layout.leftPadding, ' '); + stream << _toText("\\fB") << doc._name << " version: " << _toText("\\fP") << doc._version << "\n"; std::fill_n(out, _layout.leftPadding, ' '); - stream << "Last update " << doc._date << "\n"; + stream << _toText("\\fB") << "SeqAn version: " << _toText("\\fP") << SEQAN_VERSION_MAJOR << '.' + << SEQAN_VERSION_MINOR << '.' << SEQAN_VERSION_PATCH; + if (SEQAN_VERSION_PRE_RELEASE != 0) + stream << "-pre" << SEQAN_VERSION_PRE_RELEASE; + stream << "\n"; + + // Print legal stuff + if ((!empty(doc._shortCopyright)) || (!empty(doc._longCopyright)) || (!empty(doc._citation))) + { + stream << "\n" << _toText("\\fB") << "LEGAL" << _toText("\\fP") << "\n"; + + if (!empty(doc._shortCopyright)) + { + std::fill_n(out, _layout.leftPadding, ' '); + stream << _toText("\\fB") << doc._name << " Copyright: " + << _toText("\\fP") << doc._shortCopyright << "\n"; + } + std::fill_n(out, _layout.leftPadding, ' '); + stream << _toText("\\fB") << "SeqAn Copyright: " << _toText("\\fP") + << "2006-2015 Knut Reinert, FU-Berlin; released under the 3-clause BSDL.\n"; + if (!empty(doc._citation)) + { + std::fill_n(out, _layout.leftPadding, ' '); + stream << _toText("\\fB") << "In your academic works please cite: " << _toText("\\fP") + << doc._citation << "\n"; + } + if (!empty(doc._longCopyright)) + { + std::fill_n(out, _layout.leftPadding, ' '); + stream << "For full copyright and/or warranty information see " << _toText("\\fB") + << "--copyright" << _toText("\\fP") << ".\n"; + } + } } inline @@ -1610,9 +1478,9 @@ void ManToolDocPrinter_::print(std::ostream & stream, ToolDoc const & doc) // Print .TH line. stream << ".TH "; - std::transform(begin(doc._name), end(doc._name), out, toupper); + std::transform(begin(doc._name), end(doc._name), out, static_cast < int(*)(int) > (toupper)); stream << " " << doc._manSection << " \"" << doc._date << "\" \""; - std::transform(begin(doc._name), end(doc._name), out, tolower); + std::transform(begin(doc._name), end(doc._name), out, static_cast < int(*)(int) > (tolower)); stream << " " << doc._version << "\" \"" << doc._manTitle << "\"\n"; // Print NAME section. @@ -1639,7 +1507,7 @@ void ManToolDocPrinter_::print(std::ostream & stream, ToolDoc const & doc) { ToolDocSection_ const * sec = static_cast(entry); stream << ".SH "; - std::transform(begin(sec->_title), end(sec->_title), out, toupper); + std::transform(begin(sec->_title), end(sec->_title), out, static_cast < int(*)(int) > (toupper)); stream << "\n"; isFirstInSection = true; } @@ -1668,8 +1536,25 @@ void ManToolDocPrinter_::print(std::ostream & stream, ToolDoc const & doc) break; } } + + // Print legal stuff + if ((!empty(doc._shortCopyright)) || (!empty(doc._longCopyright)) || (!empty(doc._citation))) + { + stream << ".SH LEGAL\n"; + + if (!empty(doc._shortCopyright)) + stream << "\\fB" << doc._name << " Copyright:\\fR " << doc._shortCopyright << "\n.br\n"; + + stream << "\\fBSeqAn Copyright:\\fR 2006-2015 Knut Reinert, FU-Berlin; released under the 3-clause BSDL.\n.br\n"; + + if (!empty(doc._citation)) + stream << "\\fBIn your academic works please cite:\\fR " << doc._citation << "\n.br\n"; + + if (!empty(doc._longCopyright)) + stream << "For full copyright and/or warranty information see \\fB--copyright\\fR.\n"; + } } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_MISC_TOOL_DOC_H_ +#endif // #ifndef SEQAN_INCLUDE_MISC_TOOL_DOC_H_ diff --git a/seqan/arg_parse/xml_support.h b/seqan/arg_parse/xml_support.h index 14db0ce..a5120ff 100644 --- a/seqan/arg_parse/xml_support.h +++ b/seqan/arg_parse/xml_support.h @@ -1,7 +1,7 @@ // ========================================================================== -// xml_support.h +// SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Stephan Aiche // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_ARG_PARSE_XML_SUPPORT_H_ -#define CORE_INCLUDE_SEQAN_ARG_PARSE_XML_SUPPORT_H_ +#ifndef INCLUDE_SEQAN_ARG_PARSE_XML_SUPPORT_H_ +#define INCLUDE_SEQAN_ARG_PARSE_XML_SUPPORT_H_ namespace seqan { @@ -66,15 +66,6 @@ namespace seqan { * < -> < * > -> > */ -/** -.Function.xmlEscape: -..summary:Replaces invalid XML characters in the given sequence with their valid XML equivalent. -..cat:Miscellaneous -..signature:xmlEscape(sequence) -..param.sequence:The sequence to escape. -..returns:An escaped version of the given string. -..include:seqan/arg_parse/xml_support.h -*/ template TSequence xmlEscape(TSequence const & original) { @@ -94,11 +85,11 @@ TSequence xmlEscape(TSequence const & original) else if (value(ch) == '>') append(escaped, ">"); else - append(escaped, *ch); + appendValue(escaped, *ch); } return escaped; } } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_ARG_PARSE_XML_SUPPORT_H_ +#endif // #ifndef INCLUDE_SEQAN_ARG_PARSE_XML_SUPPORT_H_ diff --git a/seqan/bam_io.h b/seqan/bam_io.h index 66ee7d1..cb4071a 100644 --- a/seqan/bam_io.h +++ b/seqan/bam_io.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -30,12 +30,13 @@ // // ========================================================================== // Author: Manuel Holtgrewe +// Author: David Weese // ========================================================================== // Facade header for module bam_io. // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_H_ +#define INCLUDE_SEQAN_BAM_IO_H_ // =========================================================================== // Prerequisites. @@ -46,7 +47,7 @@ #include #include #include -#include +#include // =========================================================================== // Data Structures & Conversion. @@ -65,45 +66,36 @@ #include #include - -// BAM I/O is only available when ZLIB is available (and thus the BGz Stream). -#if SEQAN_HAS_ZLIB #include #include -#endif // #if SEQAN_HAS_ZLIB // =========================================================================== -// BAM Index Related. +// Easy BAM / SAM I/O. // =========================================================================== -// BAM indices are only available when ZLIB is available. -#if SEQAN_HAS_ZLIB -#include -#include -#endif // #if SEQAN_HAS_ZLIB +//#include // =========================================================================== // Utility Routines. // =========================================================================== -#include +#include + +// Not included by default, requires C++11 +// #include // =========================================================================== -// Easy BAM / SAM I/O. +// BAM Index Related. // =========================================================================== -#include -#if SEQAN_HAS_ZLIB -#include -#endif // #if SEQAN_HAS_ZLIB -#include +#include + +// BAM indices are only available when ZLIB is available. +// #if SEQAN_HAS_ZLIB +// #include +// #endif // #if SEQAN_HAS_ZLIB*/ + -#include -#if SEQAN_HAS_ZLIB -#include -#endif // #if SEQAN_HAS_ZLIB -#include -#include -#endif // CORE_INCLUDE_SEQAN_BAM_IO_H_ +#endif // INCLUDE_SEQAN_BAM_IO_H_ diff --git a/seqan/bam_io/INFO b/seqan/bam_io/INFO deleted file mode 100644 index 16e6f47..0000000 --- a/seqan/bam_io/INFO +++ /dev/null @@ -1,12 +0,0 @@ -Name: seqan-bam_io -Author: Manuel Holtgrewe -Maintainer: Manuel Holtgrewe -License: BSD 3-clause -Copyright: 2006-2013, FU Berlin -Status: beta -Description: BAM and SAM record I/O code. - This module contains code for streaming through SAM and BAM files. - . - It provides data structures for representing alignments, code to work with - tags and the necessary routines for reading data from and writing data to - streams. diff --git a/seqan/bam_io/bam_alignment_record.h b/seqan/bam_io/bam_alignment_record.h index fd64316..d7f9537 100644 --- a/seqan/bam_io/bam_alignment_record.h +++ b/seqan/bam_io/bam_alignment_record.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,10 @@ // The class BamAlignmentRecord, flag checking methods, flag constants. // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_RECORD_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_BAM_RECORD_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_BAM_RECORD_H_ +#define INCLUDE_SEQAN_BAM_IO_BAM_RECORD_H_ + +#include namespace seqan { @@ -57,63 +59,43 @@ inline void clear(BamAlignmentRecord & record); * * @signature enum BamFlags; * - * @var BamFlags BAM_FLAG_MULTIPLE = 0x0001; + * @val BamFlags BAM_FLAG_MULTIPLE = 0x0001; * @brief Template has multiple fragments in sequencing. * - * @var BamFlags BAM_FLAG_ALL_PROPER = 0x0002; + * @val BamFlags BAM_FLAG_ALL_PROPER = 0x0002; * @brief All fragments in the template are properly mapped. * - * @var BamFlags BAM_FLAG_UNMAPPED = 0x0004; + * @val BamFlags BAM_FLAG_UNMAPPED = 0x0004; * @brief This fragment is unmapped. * - * @var BamFlags BAM_FLAG_NEXT_UNMAPPED = 0x0008; + * @val BamFlags BAM_FLAG_NEXT_UNMAPPED = 0x0008; * @brief Next fragment in template is unmapped. * - * @var BamFlags BAM_FLAG_RC = 0x0010; + * @val BamFlags BAM_FLAG_RC = 0x0010; * @brief Fragment is reverse-complemented. * - * @var BamFlags BAM_FLAG_NEXT_RC = 0x0020; + * @val BamFlags BAM_FLAG_NEXT_RC = 0x0020; * @brief Next fragment in template is reverse-complemented. * - * @var BamFlags BAM_FLAG_FIRST = 0x0040; + * @val BamFlags BAM_FLAG_FIRST = 0x0040; * @brief This fragment is the first one in its template. * - * @var BamFlags BAM_FLAG_LAST = 0x0080; + * @val BamFlags BAM_FLAG_LAST = 0x0080; * @brief This fragment is the last one in its template (second in case of paired sequencing). * - * @var BamFlags BAM_FLAG_SECONDARY = 0x0100; + * @val BamFlags BAM_FLAG_SECONDARY = 0x0100; * @brief Secondary alignment. * - * @var BamFlags BAM_FLAG_QC_NO_PASS = 0x0200; + * @val BamFlags BAM_FLAG_QC_NO_PASS = 0x0200; * @brief Does not pass quality controls. * - * @var BamFlags BAM_FLAG_DUPLICATE = 0x0400; + * @val BamFlags BAM_FLAG_DUPLICATE = 0x0400; * @brief PCR or optical duplicate. * * @var BamFlags BAM_FLAG_SUPPLEMENTARY = 0x0800; * @brief Supplementary alignment. */ -/** -.Enum.BamFlags -..cat:BAM I/O -..signature:BamFlags -..summary:Shortcuts to the bitmask flags for BAM/SAM files. -..value.BAM_FLAG_MULTIPLE:$0x0001$ Template has multiple fragments in sequencing. -..value.BAM_FLAG_ALL_PROPER:$0x0002$ All fragments have been aligned properly. -..value.BAM_FLAG_UNMAPPED:$0x0004$ This fragment could not be mapped. -..value.BAM_FLAG_NEXT_UNMAPPED:$0x0008$ Next fragment is unmapped. -..value.BAM_FLAG_RC:$0x0010$ This fragment is reverse-complemented. -..value.BAM_FLAG_NEXT_RC:$0x0020$ Next fragment is reverse-complemented. -..value.BAM_FLAG_FIRST:$0x0040$ This fragment is the first one in its template. -..value.BAM_FLAG_LAST:$0x0080$ This fragment is the last one in its template. -..value.BAM_FLAG_SECONDARY:$0x0100$ This is a secondary alignment. -..value.BAM_FLAG_QC_NO_PASS:$0x0200$ Does not pass quality controls. -..value.BAM_FLAG_DUPLICATE:$0x0400$ PCR or optical duplicate. -..remarks:Also see the SAM standard on these flags for more explanation. -..include:seqan/bam_io.h -*/ - enum BamFlags { BAM_FLAG_MULTIPLE = 0x0001, @@ -130,24 +112,56 @@ enum BamFlags BAM_FLAG_SUPPLEMENTARY = 0x0800 }; +template +struct BamTypeChar +{ + enum + { + VALUE = + (IsSameType::VALUE)? 'A': + (IsSameType::VALUE)? 'c': + (IsSameType::VALUE)? 'C': + (IsSameType::VALUE)? 's': + (IsSameType::VALUE)? 'S': + (IsSameType::VALUE)? 'i': + (IsSameType::VALUE)? 'I': + (IsSameType::VALUE)? 'f': +// (IsSameType::VALUE)? 'd': + (IsSequence::VALUE)? 'Z': + '?' + }; +}; + +// List of primitive BAM types (ordered by expected usage frequency) +typedef TagList > > > > > > > BamTagTypes; + +// ---------------------------------------------------------------------------- +// Class BamAlignmentRecord +// ---------------------------------------------------------------------------- + /*! * @class BamAlignmentRecord * @headerfile - * @brief Represent a record from a BAM or SAM file. - * + * @implements FormattedFileRecordConcept * @signature class BamAlignmentRecord; + * @brief Represent a record from a BAM or SAM file. * * @section Remarks * * While also used to represent SAM records, the type is called BamAlignmentRecord since the data directly * reflects a BAM records (0-based positions, identify references by id, and tags are stored in BAM format. - */ - -/*! - * @fn BamAlignmentRecord::BamAlignmentRecord - * @brief Default constructor. * - * @signature BamAlignmentRecord::BamAlignmentRecord(); + * @see BamFileIn + * @see BamFileOut */ /*! @@ -160,13 +174,13 @@ enum BamFlags * @var __uint32 BamAlignmentRecord::INVALID_LEN * @brief Static member with invalid/sentinel reference ids (0 as in BAM/SAM). * - * @var CharString BamAlignmentRecord::qName + * @var CharString BamAlignmentRecord::qName; * @brief The query/read name. * * Note that the reads of a template all of the same query name and are differentiated by their position * and the BAM_FLAG_FIRST/BAM_FLAG_LAST flag values. * - * @var __uint16 BamAlignmentRecord::flag + * @var __uint16 BamAlignmentRecord::flag; * @brief The flag of this mapping. * * See @link BamFlags @endlink for flag constants and also see the hasFlag*() functions. @@ -211,131 +225,40 @@ enum BamFlags * @brief Raw BAM tag string, use @link BamTagsDict @endlink for comfortable access. */ -/** -.Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Represent a record from a BAM/SAM file. -..remarks:While also used to represent SAM records, called $BamAlignmentRecord$ since the data directly reflects a BAM record (0-based positions, identify references by ids, not names, tags stored in BAM format.) -..include:seqan/bam_io.h -..see:Enum.BamFlags - -.Memfunc.BamAlignmentRecord#BamAlignmentRecord -..class:Class.BamAlignmentRecord -..summary:Constructor. -..signature:BamAlignmentRecord() -..remarks:Only the default constructor is provided. - -.Memvar.BamAlignmentRecord#INVALID_POS -..class:Class.BamAlignmentRecord -..summary:Static member with invalid/sentinel position value (-1). -..type:nolink:$__uint32$ - -.Memvar.BamAlignmentRecord#INVALID_REFID -..class:Class.BamAlignmentRecord -..summary:Static member with invalid/sentinel reference id (-1). -..type:nolink:$__int32$ - -.Memvar.BamAlignmentRecord#INVALID_LEN -..class:Class.BamAlignmentRecord -..summary:Static member with invalid/sentinel position value (0 as in BAM/SAM). -..type:nolink:$__int32$ - -.Memvar.BamAlignmentRecord#qName -..class:Class.BamAlignmentRecord -..summary:The read/query name. -..type:Shortcut.CharString - -.Memvar.BamAlignmentRecord#flag -..class:Class.BamAlignmentRecord -..summary:The flag of this mapping, see @Enum.BamFlags@ for flag constants and the $hasFlag*$ functions. -..type:nolink:$__uint16$ - -.Memvar.BamAlignmentRecord#rID -..class:Class.BamAlignmentRecord -..summary:ID of reference for this fragment mapping (0-based, $INVALID_REFID$ for '*' in SAM). -..type:nolink:$__int32$ - -.Memvar.BamAlignmentRecord#beginPos -..class:Class.BamAlignmentRecord -..summary:The position of this fragment mapping (0-based, $INVALID_POS$ for '*' in SAM). -..type:nolink:$__int32$ - -.Memvar.BamAlignmentRecord#mapQ -..class:Class.BamAlignmentRecord -..summary:The mapping quality (255 for '*'). -..type:nolink:$__uint8$ - -.Memvar.BamAlignmentRecord#bin -..class:Class.BamAlignmentRecord -..summary:The bin of the alignment, automatically computed when writing BAM. -..type:nolink:$__uint16$ - -.Memvar.BamAlignmentRecord#cigar -..class:Class.BamAlignmentRecord -..summary:The CIGAR string as string of @Class.CigarElement@ objects (empty for '*'). -..type:nolink:$String >$ - -.Memvar.BamAlignmentRecord#rNextId -..class:Class.BamAlignmentRecord -..summary:ID of reference for next fragment mapping (0-based, $INVALID_REFID$ for '*') -..type:nolink:$__int32$ - -.Memvar.BamAlignmentRecord#pNext -..class:Class.BamAlignmentRecord -..summary:Position of next fragment mapping (0-based, $INVALID_POS$ for '*') -..type:nolink:$__uint32$ - -.Memvar.BamAlignmentRecord#tLen -..class:Class.BamAlignmentRecord -..summary:The inferred template size ($INVALID_LEN$ for '*') -..type:nolink:$__int32$ - -.Memvar.BamAlignmentRecord#seq -..class:Class.BamAlignmentRecord -..summary:The sequence string (empty for '*'). -..type:Shortcut.CharString - -.Memvar.BamAlignmentRecord#qual -..class:Class.BamAlignmentRecord -..summary:String with Phred scores (as in SAM file, empty for '*'). -..type:Shortcut.CharString - -.Memvar.BamAlignmentRecord#tags -..class:Class.BamAlignmentRecord -..summary:Raw BAM tag string, use @Class.BamTagsDict@ for comfortable access. -..type:Shortcut.CharString -*/ - -class BamAlignmentRecord +struct BamAlignmentRecordCore { -public: - static __int32 const INVALID_POS = -1; - static __int32 const INVALID_REFID = -1; // TODO(holtgrew): Rename to ...REF_ID. - static __int32 const INVALID_LEN = 0; - static __uint32 const INVALID_QID = 4294967295u; // TODO(holtgrew): Undocumented as of yet. + __int32 rID; + __int32 beginPos; + mutable __uint32 _l_qname:8; + __uint32 mapQ:8; + mutable __uint32 bin:16; + mutable __uint32 _n_cigar:16; + __uint32 flag:16; + mutable __int32 _l_qseq; // _l_qname, _n_cigar and _l_qseq for internal usage + __int32 rNextId; + __int32 pNext; + __int32 tLen; +}; +class BamAlignmentRecord : public BamAlignmentRecordCore +{ +public: __uint32 _qID; // TODO(holtgrew): Undocumented as of yet. - CharString qName; - __uint16 flag; - __int32 rID; - __int32 beginPos; - __uint8 mapQ; - __uint16 bin; String > cigar; - __int32 rNextId; - __int32 pNext; - __int32 tLen; - CharString seq; + CharString qName; + IupacString seq; CharString qual; CharString tags; // raw tags in BAM format + CharString _buffer; // reusable internal buffer (used for I/O) + + static __int32 const INVALID_POS = -1; + static __int32 const INVALID_REFID = -1; // TODO(holtgrew): Rename to ...REF_ID. + static __int32 const INVALID_LEN = 0; + static __uint32 const INVALID_QID = 4294967295u; // TODO(holtgrew): Undocumented as of yet. BamAlignmentRecord() : _qID(MaxValue::VALUE) { clear(*this); } }; -// ============================================================================ -// Metafunctions -// ============================================================================ - // ============================================================================ // Functions // ============================================================================ @@ -350,18 +273,16 @@ class BamAlignmentRecord * * @signature void clear(record); * - * @param record The BamAlignmentRecord to clear. + * @param[in,out] record The BamAlignmentRecord to clear. * * Clears all strings and resets it to default initialization state. */ -///.Function.clear.param.object.type:Class.BamAlignmentRecord -///.Function.clear.class:Class.BamAlignmentRecord - inline void clear(BamAlignmentRecord & record) { clear(record.qName); + record.flag = 0; record._qID = MaxValue<__uint32>::VALUE; record.rID = BamAlignmentRecord::INVALID_REFID; record.beginPos = BamAlignmentRecord::INVALID_POS; @@ -387,39 +308,25 @@ clear(BamAlignmentRecord & record) * * @signature bool hasFlagMultiple(record); * - * @param record The BamAlignmentRecord to query. + * @param[in] record The BamAlignmentRecord to query. * * @return bool true if the flag is set, false otherwise. + * + * @see BamFlags */ -/** -.Function.hasFlagMultiple -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Return true if a @Class.BamAlignmentRecord@ has the "multiple" flag set. -..signature:hasFlagMultiple(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$bool$, indicating the flag's status. -..include:seqan/bam_io.h -..see:Function.hasFlagAllProper -..see:Function.hasFlagUnmapped -..see:Function.hasFlagNextUnmapped -..see:Function.hasFlagRC -..see:Function.hasFlagNextRC -..see:Function.hasFlagFirst -..see:Function.hasFlagLast -..see:Function.hasFlagSecondary -..see:Function.hasFlagQCNoPass -..see:Function.hasFlagDuplicate -*/ - inline bool hasFlagMultiple(BamAlignmentRecord const & record) { return (record.flag & BAM_FLAG_MULTIPLE) == BAM_FLAG_MULTIPLE; } +inline void +toggleFlagMultiple(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_MULTIPLE; +} + // ---------------------------------------------------------------------------- // Function hasFlagAllProper() // ---------------------------------------------------------------------------- @@ -431,39 +338,25 @@ hasFlagMultiple(BamAlignmentRecord const & record) * * @signature bool hasFlagAllProper(record); * - * @param record The BamAlignmentRecord to query. + * @param[in] record The BamAlignmentRecord to query. * * @return bool true if the flag is set, false otherwise. + * + * @see BamFlags */ -/** -.Function.hasFlagAllProper -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Return true if a @Class.BamAlignmentRecord@ has the "all properly aligned" flag set. -..signature:hasFlagAllProper(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$bool$, indicating the flag's status. -..include:seqan/bam_io.h -..see:Function.hasFlagMultiple -..see:Function.hasFlagUnmapped -..see:Function.hasFlagNextUnmapped -..see:Function.hasFlagRC -..see:Function.hasFlagNextRC -..see:Function.hasFlagFirst -..see:Function.hasFlagLast -..see:Function.hasFlagSecondary -..see:Function.hasFlagQCNoPass -..see:Function.hasFlagDuplicate -*/ - inline bool hasFlagAllProper(BamAlignmentRecord const & record) { return (record.flag & BAM_FLAG_ALL_PROPER) == BAM_FLAG_ALL_PROPER; } +inline void +toggleFlagAllProper(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_ALL_PROPER; +} + // ---------------------------------------------------------------------------- // Function hasFlagUnmapped() // ---------------------------------------------------------------------------- @@ -475,39 +368,26 @@ hasFlagAllProper(BamAlignmentRecord const & record) * * @signature bool hasFlagUnmapped(record); * - * @param record The BamAlignmentRecord to query. + * @param[in] record The BamAlignmentRecord to query. * * @return bool true if the flag is set, false otherwise. + * + * @see BamFlags */ -/** -.Function.hasFlagUnmapped -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Return true if a @Class.BamAlignmentRecord@ has the "fragment unmapped" flag set. -..signature:hasFlagUnmapped(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$bool$, indicating the flag's status. -..include:seqan/bam_io.h -..see:Function.hasFlagMultiple -..see:Function.hasFlagAllProper -..see:Function.hasFlagNextUnmapped -..see:Function.hasFlagRC -..see:Function.hasFlagNextRC -..see:Function.hasFlagFirst -..see:Function.hasFlagLast -..see:Function.hasFlagSecondary -..see:Function.hasFlagQCNoPass -..see:Function.hasFlagDuplicate -*/ - inline bool hasFlagUnmapped(BamAlignmentRecord const & record) { return (record.flag & BAM_FLAG_UNMAPPED) == BAM_FLAG_UNMAPPED; } +inline void +toggleFlagUnmapped(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_UNMAPPED; +} + + // ---------------------------------------------------------------------------- // Function hasFlagNextUnmapped() // ---------------------------------------------------------------------------- @@ -519,39 +399,25 @@ hasFlagUnmapped(BamAlignmentRecord const & record) * * @signature bool hasFlagNextUnmapped(record); * - * @param record The BamAlignmentRecord to query. + * @param[in] record The BamAlignmentRecord to query. * * @return bool true if the flag is set, false otherwise. + * + * @see BamFlags */ -/** -.Function.hasFlagNextUnmapped -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Return true if a @Class.BamAlignmentRecord@ has the "next fragment unmapped" flag set. -..signature:hasFlagNextUnmapped(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$bool$, indicating the flag's status. -..include:seqan/bam_io.h -..see:Function.hasFlagMultiple -..see:Function.hasFlagAllProper -..see:Function.hasFlagUnmapped -..see:Function.hasFlagRC -..see:Function.hasFlagNextRC -..see:Function.hasFlagFirst -..see:Function.hasFlagLast -..see:Function.hasFlagSecondary -..see:Function.hasFlagQCNoPass -..see:Function.hasFlagDuplicate -*/ - inline bool hasFlagNextUnmapped(BamAlignmentRecord const & record) { return (record.flag & BAM_FLAG_NEXT_UNMAPPED) == BAM_FLAG_NEXT_UNMAPPED; } +inline void +toggleFlagNextUnmapped(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_NEXT_UNMAPPED; +} + // ---------------------------------------------------------------------------- // Function hasFlagRC() // ---------------------------------------------------------------------------- @@ -563,39 +429,25 @@ hasFlagNextUnmapped(BamAlignmentRecord const & record) * * @signature bool hasFlagRC(record); * - * @param record The BamAlignmentRecord to query. + * @param[in] record The BamAlignmentRecord to query. * * @return bool true if the flag is set, false otherwise. + * + * @see BamFlags */ -/** -.Function.hasFlagRC -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..cat:BAM I/O -..summary:Return true if a @Class.BamAlignmentRecord@ has the "reverse-complemented" flag set. -..signature:hasFlagRC(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$bool$, indicating the flag's status. -..include:seqan/bam_io.h -..see:Function.hasFlagMultiple -..see:Function.hasFlagAllProper -..see:Function.hasFlagUnmapped -..see:Function.hasFlagNextUnmapped -..see:Function.hasFlagNextRC -..see:Function.hasFlagFirst -..see:Function.hasFlagLast -..see:Function.hasFlagSecondary -..see:Function.hasFlagQCNoPass -..see:Function.hasFlagDuplicate -*/ inline bool hasFlagRC(BamAlignmentRecord const & record) { return (record.flag & BAM_FLAG_RC) == BAM_FLAG_RC; } +inline void +toggleFlagRC(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_RC; +} + // ---------------------------------------------------------------------------- // Function hasFlagNextRC() // ---------------------------------------------------------------------------- @@ -607,39 +459,25 @@ hasFlagRC(BamAlignmentRecord const & record) * * @signature bool hasFlagNextRC(record); * - * @param record The BamAlignmentRecord to query. + * @param[in] record The BamAlignmentRecord to query. * * @return bool true if the flag is set, false otherwise. + * + * @see BamFlags */ -/** -.Function.hasFlagNextRC -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Return true if a @Class.BamAlignmentRecord@ has the "next fragment reverse-complemented" flag set. -..signature:hasFlagNextRC(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$bool$, indicating the flag's status. -..include:seqan/bam_io.h -..see:Function.hasFlagMultiple -..see:Function.hasFlagAllProper -..see:Function.hasFlagUnmapped -..see:Function.hasFlagNextUnmapped -..see:Function.hasFlagRC -..see:Function.hasFlagFirst -..see:Function.hasFlagLast -..see:Function.hasFlagSecondary -..see:Function.hasFlagQCNoPass -..see:Function.hasFlagDuplicate -*/ - inline bool hasFlagNextRC(BamAlignmentRecord const & record) { return (record.flag & BAM_FLAG_NEXT_RC) == BAM_FLAG_NEXT_RC; } +inline void +toggleFlagNextRC(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_NEXT_RC; +} + // ---------------------------------------------------------------------------- // Function hasFlagFirst() // ---------------------------------------------------------------------------- @@ -651,39 +489,25 @@ hasFlagNextRC(BamAlignmentRecord const & record) * * @signature bool hasFlagFirst(record); * - * @param record The BamAlignmentRecord to query. + * @param[in] record The BamAlignmentRecord to query. * * @return bool true if the flag is set, false otherwise. + * + * @see BamFlags */ -/** -.Function.hasFlagFirst -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Return true if a @Class.BamAlignmentRecord@ has the "first fragment of template" flag set. -..signature:hasFlagFirst(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$bool$, indicating the flag's status. -..include:seqan/bam_io.h -..see:Function.hasFlagMultiple -..see:Function.hasFlagAllProper -..see:Function.hasFlagUnmapped -..see:Function.hasFlagNextUnmapped -..see:Function.hasFlagRC -..see:Function.hasFlagNextRC -..see:Function.hasFlagLast -..see:Function.hasFlagSecondary -..see:Function.hasFlagQCNoPass -..see:Function.hasFlagDuplicate -*/ - inline bool hasFlagFirst(BamAlignmentRecord const & record) { return (record.flag & BAM_FLAG_FIRST) == BAM_FLAG_FIRST; } +inline void +toggleFlagFirst(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_FIRST; +} + // ---------------------------------------------------------------------------- // Function hasFlagLast() // ---------------------------------------------------------------------------- @@ -695,39 +519,25 @@ hasFlagFirst(BamAlignmentRecord const & record) * * @signature bool hasFlagLast(record); * - * @param record The BamAlignmentRecord to query. + * @param[in] record The BamAlignmentRecord to query. * * @return bool true if the flag is set, false otherwise. + * + * @see BamFlags */ -/** -.Function.hasFlagLast -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Return true if a @Class.BamAlignmentRecord@ has the "last fragment of template" flag set. -..signature:hasFlagLast(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$bool$, indicating the flag's status. -..include:seqan/bam_io.h -..see:Function.hasFlagMultiple -..see:Function.hasFlagAllProper -..see:Function.hasFlagUnmapped -..see:Function.hasFlagNextUnmapped -..see:Function.hasFlagRC -..see:Function.hasFlagNextRC -..see:Function.hasFlagFirst -..see:Function.hasFlagSecondary -..see:Function.hasFlagQCNoPass -..see:Function.hasFlagDuplicate -*/ - inline bool hasFlagLast(BamAlignmentRecord const & record) { return (record.flag & BAM_FLAG_LAST) == BAM_FLAG_LAST; } +inline void +toggleFlagLast(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_LAST; +} + // ---------------------------------------------------------------------------- // Function hasFlagSecondary() // ---------------------------------------------------------------------------- @@ -739,39 +549,25 @@ hasFlagLast(BamAlignmentRecord const & record) * * @signature bool hasFlagSecondary(record); * - * @param record The BamAlignmentRecord to query. + * @param[in] record The BamAlignmentRecord to query. * * @return bool true if the flag is set, false otherwise. + * + * @see BamFlags */ -/** -.Function.hasFlagSecondary -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Return true if a @Class.BamAlignmentRecord@ has the "secondary alignment" flag set. -..signature:hasFlagSecondary(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$bool$, indicating the flag's status. -..include:seqan/bam_io.h -..see:Function.hasFlagMultiple -..see:Function.hasFlagAllProper -..see:Function.hasFlagUnmapped -..see:Function.hasFlagNextUnmapped -..see:Function.hasFlagRC -..see:Function.hasFlagNextRC -..see:Function.hasFlagFirst -..see:Function.hasFlagLast -..see:Function.hasFlagQCNoPass -..see:Function.hasFlagDuplicate -*/ - inline bool hasFlagSecondary(BamAlignmentRecord const & record) { return (record.flag & BAM_FLAG_SECONDARY) == BAM_FLAG_SECONDARY; } +inline void +toggleFlagSecondary(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_SECONDARY; +} + // ---------------------------------------------------------------------------- // Function hasFlagQCNoPass() // ---------------------------------------------------------------------------- @@ -783,39 +579,25 @@ hasFlagSecondary(BamAlignmentRecord const & record) * * @signature bool hasFlagQCNoPass(record); * - * @param record The BamAlignmentRecord to query. + * @param[in] record The BamAlignmentRecord to query. * * @return bool true if the flag is set, false otherwise. + * + * @see BamFlags */ -/** -.Function.hasFlagQCNoPass -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Return true if a @Class.BamAlignmentRecord@ has the "does not pass quality controls" flag set. -..signature:hasFlagQCNoPass(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$bool$, indicating the flag's status. -..include:seqan/bam_io.h -..see:Function.hasFlagMultiple -..see:Function.hasFlagAllProper -..see:Function.hasFlagUnmapped -..see:Function.hasFlagNextUnmapped -..see:Function.hasFlagRC -..see:Function.hasFlagNextRC -..see:Function.hasFlagFirst -..see:Function.hasFlagLast -..see:Function.hasFlagSecondary -..see:Function.hasFlagDuplicate -*/ - inline bool hasFlagQCNoPass(BamAlignmentRecord const & record) { return (record.flag & BAM_FLAG_QC_NO_PASS) == BAM_FLAG_QC_NO_PASS; } +inline void +toggleFlagQCNoPass(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_QC_NO_PASS; +} + // ---------------------------------------------------------------------------- // Function hasFlagDuplicate() // ---------------------------------------------------------------------------- @@ -827,39 +609,26 @@ hasFlagQCNoPass(BamAlignmentRecord const & record) * * @signature bool hasFlagDuplicate(record); * - * @param record The BamAlignmentRecord to query. + * @param[in] record The BamAlignmentRecord to query. * * @return bool true if the flag is set, false otherwise. + * + * @see BamFlags */ -/** -.Function.hasFlagDuplicate -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Return true if a @Class.BamAlignmentRecord@ has the "PCR or optical duplicate" flag set. -..signature:hasFlagDuplicate(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$bool$, indicating the flag's status. -..include:seqan/bam_io.h -..see:Function.hasFlagMultiple -..see:Function.hasFlagAllProper -..see:Function.hasFlagUnmapped -..see:Function.hasFlagNextUnmapped -..see:Function.hasFlagRC -..see:Function.hasFlagNextRC -..see:Function.hasFlagFirst -..see:Function.hasFlagLast -..see:Function.hasFlagSecondary -..see:Function.hasFlagQCNoPass -*/ - inline bool hasFlagDuplicate(BamAlignmentRecord const & record) { return (record.flag & BAM_FLAG_DUPLICATE) == BAM_FLAG_DUPLICATE; } +inline void +toggleFlagDuplicate(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_DUPLICATE; +} + + // ---------------------------------------------------------------------------- // Function hasFlagSupplementary() // ---------------------------------------------------------------------------- @@ -882,6 +651,12 @@ hasFlagSupplementary(BamAlignmentRecord const & record) return (record.flag & BAM_FLAG_SUPPLEMENTARY) == BAM_FLAG_SUPPLEMENTARY; } +inline void +toggleFlagSupplementary(BamAlignmentRecord & record) +{ + record.flag ^= BAM_FLAG_SUPPLEMENTARY; +} + // ---------------------------------------------------------------------------- // Function getAlignmentLengthInRef() // ---------------------------------------------------------------------------- @@ -893,31 +668,21 @@ hasFlagSupplementary(BamAlignmentRecord const & record) * * @signature unsigned getAlignmentLengthInRef(record); * - * @param record The BamAlignmentRecord to compute length for. + * @param[in] record The BamAlignmentRecord to compute length for. * * @return unsigned The alignment length. + * + * @see BamFlags */ -/** -.Function.getAlignmentLengthInRef -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Returns length of @Class.BamAlignmentRecord@'s projection in reference. -..signature:getAlignmentLengthInRef(record) -..param.record:The record to query. -...type:Class.BamAlignmentRecord -..returns:$unsigned$, the alignment length in the reference. -..include:seqan/bam_io.h -*/ - inline unsigned getAlignmentLengthInRef(BamAlignmentRecord const & record) { unsigned l = 0; - _getLengthInRef(record.cigar, l); + _getLengthInRef(l, record.cigar); return l; } } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_RECORD_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_BAM_RECORD_H_ diff --git a/seqan/bam_io/bam_alignment_record_util.h b/seqan/bam_io/bam_alignment_record_util.h index bb9dd44..f0fc724 100644 --- a/seqan/bam_io/bam_alignment_record_util.h +++ b/seqan/bam_io/bam_alignment_record_util.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,26 +32,64 @@ // Author: Manuel Holtgrewe // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_ALIGNMENT_RECORD_UTIL_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_BAM_ALIGNMENT_RECORD_UTIL_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_BAM_ALIGNMENT_RECORD_UTIL_H_ +#define INCLUDE_SEQAN_BAM_IO_BAM_ALIGNMENT_RECORD_UTIL_H_ namespace seqan { // ============================================================================ -// Forwards +// Functions // ============================================================================ -// ============================================================================ -// Tags, Classes, Enums -// ============================================================================ +// ---------------------------------------------------------------------------- +// Function getContigName() +// ---------------------------------------------------------------------------- -// ============================================================================ -// Metafunctions -// ============================================================================ +/*! + * @fn BamAlignmentRecord#getContigName + * @brief Return the name of the reference contig of a @link BamAlignmentRecord @endlink. + * + * @signature TNameString getContigName(record, file); + * + * @param[in] record The @link BamAlignmentRecord @endlink to query. + * @param[in] file The @link BamFileIn @endlink or @link BamFileOut @endlink where the record belongs to. + * + * @return TNameString The name of the reference contig. TNameString is the @link Value @endlink type of the NameStore. + * The NameStore type can be determined using the @link Member @endlink metafunction + * for the @link BamIOContext @endlink in conjunction with the @link BamIOContextMemberTag#NameStoreMember @endlink tag. + */ -// ============================================================================ -// Functions -// ============================================================================ +template +inline typename Value, Dependent<> >::Type, NameStoreMember>::Type>::Type const & +getContigName(BamAlignmentRecord const & record, FormattedFile const & file) +{ + return contigNames(context(file))[record.rID]; +} + +// ---------------------------------------------------------------------------- +// Function getContigLength() +// ---------------------------------------------------------------------------- + +/*! + * @fn BamAlignmentRecord#getContigLength + * @brief Return the length of the reference contig of a @link BamAlignmentRecord @endlink. + * + * @signature TLength getContigLength(record, file); + * + * @param[in] record The @link BamAlignmentRecord @endlink to query. + * @param[in] file The @link BamFileIn @endlink or @link BamFileOut @endlink where the record belongs to. + * + * @return TLength The length of the reference contig. TLength is the @link Value @endlink type of the LengthStore. + * The LengthStore type can be determined using the @link Member @endlink metafunction + * for the @link BamIOContext @endlink in conjunction with the @link BamIOContextMemberTag#LengthStoreMember @endlink tag. + */ + +template +inline typename Value, Dependent<> >::Type, LengthStoreMember>::Type>::Type +getContigLength(BamAlignmentRecord const & record, FormattedFile const & file) +{ + return contigLengths(context(file))[record.rID]; +} // ---------------------------------------------------------------------------- // Function getClippedPos() @@ -98,7 +136,7 @@ getClippedPos(unsigned & posBegin, unsigned & posEnd, BamAlignmentRecord const & // Returns clipped infix of seq member. -inline Infix::Type +inline Infix::Type clippedSeqInfix(BamAlignmentRecord & record) { unsigned begPos = 0, endPos = 0; @@ -106,7 +144,7 @@ clippedSeqInfix(BamAlignmentRecord & record) return infix(record.seq, begPos, endPos); } -inline Infix::Type +inline Infix::Type clippedSeqInfix(BamAlignmentRecord const & record) { unsigned begPos = 0, endPos = 0; @@ -158,6 +196,8 @@ inline unsigned countPaddings(String > const & cigarString) // Function bamRecordToAlignment() // ---------------------------------------------------------------------------- +// TODO(holtgrew): Should rather be a global function? + /*! * @fn BamAlignmentRecord#bamRecordToAlignment * @headerfile @@ -165,9 +205,9 @@ inline unsigned countPaddings(String > const & cigarString) * * @signature void bamRecordToAlignment(align, reference, record); * - * @param align The @link Align @endlink object to create the alignment object in. - * @param reference The string with the reference that record lies on. - * @param record The @link BamAlignmentRecord @endlink to construct alignment from. + * @param[out] align The @link Align @endlink object to create the alignment object in. + * @param[in] reference The string with the reference that record lies on. + * @param[in] record The @link BamAlignmentRecord @endlink to construct alignment from. * * The function will resize align to have two rows. The part of the reference that the read from * record aligns to will be copied to the first row and the sequence from record will be copied to the second @@ -190,29 +230,6 @@ inline unsigned countPaddings(String > const & cigarString) // TODO(holtgrew): Convert into full example. -/** -.Function.bamRecordToAlignment -..class:Class.BamAlignmentRecord -..cat:BAM I/O -..summary:Convert @Class.BamAlignmentRecord@ to an @Class.Align@ object. -..signature:bamRecordToAlignment(align, reference, record) -..param.align:The alignment to create. -...type:Class.Align -..param.reference:String of Dna, Dna5, ... characters. -...type:Class.String -..param.record:The alignment record to convert. -...type:Class.BamAlignmentRecord -..returns:$void$ -..include:seqan/bam_io.h -..example.code: -StringSet references; -BamAlignment record; -// Read references and record. -Align align; -if (record.rID != BamAlignmentRecord::INVALID_REFID) - bamRecordToAlignment(align, references[record.refId], record); - */ - template void bamRecordToAlignment(Align & result, TReference & reference, BamAlignmentRecord & record) @@ -225,12 +242,12 @@ bamRecordToAlignment(Align & result, TReference & reference, Bam setSource(row(result, 0), reference); setClippedEndPosition(row(result, 0), record.beginPos + len); setClippedBeginPosition(row(result, 0), record.beginPos); - cigarToGapAnchorContig(record.cigar, row(result, 0)); + cigarToGapAnchorContig(row(result, 0), record.cigar); assignSource(row(result, 1), record.seq); - cigarToGapAnchorRead(record.cigar, row(result, 1)); + cigarToGapAnchorRead(row(result, 1), record.cigar); } } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_ALIGNMENT_RECORD_UTIL_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_BAM_ALIGNMENT_RECORD_UTIL_H_ diff --git a/seqan/bam_io/bam_file.h b/seqan/bam_io/bam_file.h new file mode 100644 index 0000000..d262943 --- /dev/null +++ b/seqan/bam_io/bam_file.h @@ -0,0 +1,382 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: David Weese +// ========================================================================== +// Class for reading/writing files in SAM or BAM format. +// ========================================================================== + +#ifndef SEQAN_BAM_IO_BAM_FILE_H_ +#define SEQAN_BAM_IO_BAM_FILE_H_ + +namespace seqan { + +// ============================================================================ +// Typedefs +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Type BamFileIn +// ---------------------------------------------------------------------------- + +/*! + * @class BamFileIn + * @signature typedef FormattedFile BamFileIn; + * @extends FormattedFileIn + * @headerfile + * @brief Class for reading SAM and BAM files. + * + * @see BamHeader + * @see BamAlignmentRecord + * + * @section Example + * + * Access SAM or BAM files. + * + * @include demos/tutorial/sam_and_bam_io/solution1.cpp + * + * The output is as follows: + * + * @include demos/tutorial/sam_and_bam_io/example.sam + */ + +typedef FormattedFile BamFileIn; + +// ---------------------------------------------------------------------------- +// Type BamFileOut +// ---------------------------------------------------------------------------- + +/*! + * @class BamFileOut + * @signature typedef FormattedFile BamFileOut; + * @extends FormattedFileOut + * @headerfile + * @brief Class for writing SAM and BAM files. + * + * @see BamHeader + * @see BamAlignmentRecord + * + * @section Example + * + * Access SAM or BAM files. + * + * @include demos/tutorial/sam_and_bam_io/solution1.cpp + * + * The output is as follows: + * + * @include demos/tutorial/sam_and_bam_io/example.sam + */ + +typedef FormattedFile BamFileOut; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Metafunction FormattedFileContext +// ---------------------------------------------------------------------------- + +template +struct FormattedFileContext, TStorageSpec> +{ + typedef StringSet TNameStore; + typedef NameStoreCache TNameStoreCache; + typedef BamIOContext Type; +}; + +// ---------------------------------------------------------------------------- +// Metafunction FileFormats +// ---------------------------------------------------------------------------- + +template +struct FileFormat > +{ +#if SEQAN_HAS_ZLIB + typedef TagSelector< + TagList > + > Type; +#else + typedef Sam Type; +#endif +}; + +// -------------------------------------------------------------------------- +// Function _mapBamFormatToCompressionFormat() +// -------------------------------------------------------------------------- + +inline BgzfFile +_mapFileFormatToCompressionFormat(Bam) +{ + return BgzfFile(); +} + +// ---------------------------------------------------------------------------- +// Function readHeader(); BamHeader +// ---------------------------------------------------------------------------- + +// support for dynamically chosen file formats +template +inline void +readHeader(BamHeader & /* header */, + BamIOContext & /* context */, + TForwardIter & /* iter */, + TagSelector<> const & /* format */) +{ + SEQAN_FAIL("BamFileIn: File format not specified."); +} + +template +inline void +readHeader(BamHeader & header, + BamIOContext & context, + TForwardIter & iter, + TagSelector const & format) +{ + typedef typename TTagList::Type TFormat; + + if (isEqual(format, TFormat())) + readHeader(header, context, iter, TFormat()); + else + readHeader(header, context, iter, static_cast::Base const &>(format)); +} + +// convient BamFile variant +template +inline void +readHeader(BamHeader & header, FormattedFile & file) +{ + readHeader(header, context(file), file.iter, file.format); +} + +// ---------------------------------------------------------------------------- +// Function readRecord(); BamAlignmentRecord +// ---------------------------------------------------------------------------- + +// support for dynamically chosen file formats +template +inline void +_readBamRecord(TBuffer & /* rawRecord */, TForwardIter & /* iter */, TagSelector<> const & /* format */) +{ + SEQAN_FAIL("BamFileIn: File format not specified."); +} + +template +inline void +_readBamRecord(TBuffer & rawRecord, TForwardIter & iter, TagSelector const & format) +{ + typedef typename TTagList::Type TFormat; + + if (isEqual(format, TFormat())) + _readBamRecord(rawRecord, iter, TFormat()); + else + _readBamRecord(rawRecord, iter, static_cast::Base const &>(format)); +} + +// ---------------------------------------------------------------------------- +// Function readRecord(); BamAlignmentRecord +// ---------------------------------------------------------------------------- + +// support for dynamically chosen file formats +template +inline void +readRecord(BamAlignmentRecord & /* record */, + BamIOContext & /* context */, + TForwardIter & /* iter */, + TagSelector<> const & /* format */) +{ + SEQAN_FAIL("BamFileIn: File format not specified."); +} + +template +inline void +readRecord(BamAlignmentRecord & record, + BamIOContext & context, + TForwardIter & iter, + TagSelector const & format) +{ + typedef typename TTagList::Type TFormat; + + if (isEqual(format, TFormat())) + readRecord(record, context, iter, TFormat()); + else + readRecord(record, context, iter, static_cast::Base const &>(format)); +} + +// convient BamFile variant +template +inline void +readRecord(BamAlignmentRecord & record, FormattedFile & file) +{ + readRecord(record, context(file), file.iter, file.format); +} + +template +inline SEQAN_FUNC_ENABLE_IF(And::Type, BamAlignmentRecord>, + IsInteger >, TSize) +readRecords(TRecords & records, FormattedFile & file, TSize maxRecords) +{ + String & buffers = context(file).buffers; + if (static_cast(length(buffers)) < maxRecords) + resize(buffers, maxRecords, Exact()); + if (static_cast(length(records)) < maxRecords) + resize(records, maxRecords, Exact()); + + + TSize numRecords = 0; + for (; numRecords < maxRecords && !atEnd(file.iter); ++numRecords) + _readBamRecord(buffers[numRecords], file.iter, file.format); + +// SEQAN_OMP_PRAGMA(parallel for) + for (int i = 0; i < (int)numRecords; ++i) + { + CharIterator bufIter = begin(buffers[i]); + readRecord(records[i], context(file), bufIter, file.format); + } + return numRecords; +} + +// ---------------------------------------------------------------------------- +// Function writeHeader(); BamHeader +// ---------------------------------------------------------------------------- + +// support for dynamically chosen file formats +template +inline void +write(TTarget & /* target */, + BamHeader const & /* header */, + BamIOContext & /* context */, + TagSelector<> const & /* format */) +{ + SEQAN_FAIL("BamFileOut: File format not specified."); +} + +template +inline void +write(TTarget & target, + BamHeader const & header, + BamIOContext & context, + TagSelector const & format) +{ + typedef typename TTagList::Type TFormat; + + if (isEqual(format, TFormat())) + write(target, header, context, TFormat()); + else + write(target, header, context, static_cast::Base const &>(format)); +} + +// convient BamFile variant +template +inline void +writeHeader(FormattedFile & file, BamHeader const & header) +{ + write(file.iter, header, context(file), file.format); +} + +// ---------------------------------------------------------------------------- +// Function writeRecord(); BamAlignmentRecord +// ---------------------------------------------------------------------------- + +// support for dynamically chosen file formats +template +inline void +write(TTarget & /* target */, + BamAlignmentRecord const & /* record */, + BamIOContext & /* context */, + TagSelector<> const & /* format */) +{ + SEQAN_FAIL("BamFileOut: File format not specified."); +} + +template +inline void +write(TTarget & target, + BamAlignmentRecord const & record, + BamIOContext & context, + TagSelector const & format) +{ + typedef typename TTagList::Type TFormat; + + if (isEqual(format, TFormat())) + write(target, record, context, TFormat()); + else + write(target, record, context, static_cast::Base const &>(format)); +} + +template +inline void +writeRecord(FormattedFile & file, BamAlignmentRecord const & record) +{ + write(file.iter, record, context(file), file.format); +} + +template +inline SEQAN_FUNC_ENABLE_IF(IsSameType::Type, BamAlignmentRecord>, void) +writeRecords(FormattedFile & file, TRecords const & records) +{ + String & buffers = context(file).buffers; + if (length(buffers) < length(records)) + resize(buffers, length(records)); + + SEQAN_OMP_PRAGMA(parallel for) + for (int i = 0; i < (int)length(records); ++i) + { + clear(buffers[i]); + write(buffers[i], records[i], context(file), file.format); + } + for (int i = 0; i < (int)length(records); ++i) + write(file.iter, buffers[i]); +} + +// ---------------------------------------------------------------------------- +// Function getFileExtensions() +// ---------------------------------------------------------------------------- +// NOTE(h-2): this is overloaded so we get Bgzf in addition to other +// compressions which is crucial for Bam + +template +static std::vector +getFileExtensions(FormattedFile const &) +{ + std::vector extensions; + _getCompressionExtensions(extensions, + typename FormattedFile::TFileFormats(), + CompressedFileTypes(), + false); + return extensions; +} + +} // namespace seqan + +#endif // SEQAN_BAM_IO_BAM_FILE_H_ diff --git a/seqan/bam_io/bam_header_record.h b/seqan/bam_io/bam_header_record.h index 466fdf3..0eeeb42 100644 --- a/seqan/bam_io/bam_header_record.h +++ b/seqan/bam_io/bam_header_record.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,19 +35,19 @@ // in headers. // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_HEADER_RECORD_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_BAM_HEADER_RECORD_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_BAM_HEADER_RECORD_H_ +#define INCLUDE_SEQAN_BAM_IO_BAM_HEADER_RECORD_H_ namespace seqan { -// ============================================================================ -// Forwards -// ============================================================================ - // ============================================================================ // Tags, Classes, Enums // ============================================================================ +// ---------------------------------------------------------------------------- +// Enum BamHeaderRecordType +// ---------------------------------------------------------------------------- + /*! * @enum BamHeaderRecordType * @headerfile @@ -55,35 +55,22 @@ namespace seqan { * * @signature enum BamHeaderRecordType; * - * @var BamHeaderRecordType BAM_HEADER_FIRST = 0; + * @val BamHeaderRecordType BAM_HEADER_FIRST = 0; * @brief Is the first header (HD). * - * @var BamHeaderRecordType BAM_HEADER_REFERENCE = 1; + * @val BamHeaderRecordType BAM_HEADER_REFERENCE = 1; * @brief Is a reference (SQ) header. * - * @var BamHeaderRecordType BAM_HEADER_READ_GROUP = 2; + * @val BamHeaderRecordType BAM_HEADER_READ_GROUP = 2; * @brief Is a read group (RG) header. * - * @var BamHeaderRecordType BAM_HEADER_PROGRAM = 3; + * @val BamHeaderRecordType BAM_HEADER_PROGRAM = 3; * @brief Is a program (PG) header. * - * @var BamHeaderRecordType BAM_HEADER_COMMENT = 4; + * @val BamHeaderRecordType BAM_HEADER_COMMENT = 4; * @brief Is a comment (CO) header. */ -/** -.Enum.BamHeaderRecordType -..cat:BAM I/O -..summary:Enumeration for the header record type. -..signature:BamHeaderRecordType -..value.BAM_HEADER_FIRST:@Class.BamHeaderRecord@ is of type $@HD$ -..value.BAM_HEADER_REFERENCE:@Class.BamHeaderRecord@ is of type $@SQ$ -..value.BAM_HEADER_READ_GROUP:@Class.BamHeaderRecord@ is of type $@RG$ -..value.BAM_HEADER_PROGRAM:@Class.BamHeaderRecord@ is of type $@PG$ -..value.BAM_HEADER_COMMENT:@Class.BamHeaderRecord@ is of type $@CO$ -..include:seqan/bam_io.h -*/ - enum BamHeaderRecordType { BAM_HEADER_FIRST = 0, @@ -93,6 +80,10 @@ enum BamHeaderRecordType BAM_HEADER_COMMENT = 4 }; +// ---------------------------------------------------------------------------- +// Enum BamSortOrder +// ---------------------------------------------------------------------------- + /*! * @enum BamSortOrder * @headerfile @@ -100,31 +91,19 @@ enum BamHeaderRecordType * * @signature enum BamSortOrder; * - * @var BamSortOrder BAM_SORT_UNKNOWN = 0; + * @val BamSortOrder BAM_SORT_UNKNOWN = 0; * @brief BAM file sort order is unknown. * - * @var BamSortOrder BAM_SORT_UNSORTED = 1; + * @val BamSortOrder BAM_SORT_UNSORTED = 1; * @brief BAM file is unsorted. * - * @var BamSortOrder BAM_SORT_QUERYNAME = 2; + * @val BamSortOrder BAM_SORT_QUERYNAME = 2; * @brief BAM file is sorted by query name; * - * @var BamSortOrder BAM_SORT_COORDINATE = 3; + * @val BamSortOrder BAM_SORT_COORDINATE = 3; * @brief BAM file is sorted by coordinate. */ -/** -.Enum.BamSortOrder -..cat:BAM I/O -..summary:Enumeration for the header record type. -..signature:BamSortOrder -..value.BAM_SORT_UNKNOWN:BAM file sort order is unknown. -..value.BAM_SORT_UNSORTED:BAM file is unsorted. -..value.BAM_SORT_QUERYNAME:BAM file is sorted by query name. -..value.BAM_SORT_COORDINATE:BAM file is sorted by coordinates. -..include:seqan/bam_io.h -*/ - enum BamSortOrder { BAM_SORT_UNKNOWN = 0, @@ -133,6 +112,10 @@ enum BamSortOrder BAM_SORT_COORDINATE = 3 }; +// ---------------------------------------------------------------------------- +// Class BamHeaderRecord +// ---------------------------------------------------------------------------- + /*! * @class BamHeaderRecord * @headerfile @@ -150,167 +133,79 @@ enum BamSortOrder * @brief Constructor. * @signature BamHeaderRecord::BamRecord(); * - * @section Remarks - * * Only the default constructor is provided. */ /*! * @typedef BamHeaderRecord::TTagName - * @brief Type of the tag keys. + * @brief Type of the tag keys (@link CharString @endlink). * @signature BamHeaderRecord::TTagName; * * @typedef BamHeaderRecord::TTagValue - * @brief Type of the tag values. + * @brief Type of the tag values (@link CharString @endlink). * @signature BamHeaderRecord::TTagValue; * * @typedef BamHeaderRecord::TTag - * @brief Type of the tag keys. + * @brief Type of the tag keys (@link Pair @endlink of TTagName and TTagValue). * @signature BamHeaderRecord::TTag; * * @typedef BamHeaderRecord::TTags - * @brief Type of the string of @link Pair Pairs @endlink. + * @brief Type of the tags string (@link AllocString @endlink of TTag). * @signature BamHeaderRecord::TTags; * - * @var BamHeaderRecordType BamHeaderRecord::type + * @var BamHeaderRecordType BamHeaderRecord::type; * @brief Type of the record. * - * @var TRecordString BamHeaderRecord::tags + * @var TRecordString BamHeaderRecord::tags; * @brief The header record's tags, of type @link BamHeaderRecord::TTags @endlink. */ -/** -.Class.BamHeaderRecord -..cat:BAM I/O -..summary:Represents a header entry in a SAM file or the header section of the BAM header. -..signature:BamHeaderRecord -..remarks:Comment records are stored with one tag where the key is empty and the value is the comment. -..include:seqan/bam_io.h - -.Memfunc.BamHeaderRecord#BamHeaderRecord -..class:Class.BamHeaderRecord -..signature:BamHeaderRecord() -..summary:Constructor. -..remarks:Only the default constructor is provided. - -.Typedef.BamHeaderRecord#TTagName -..class:Class.BamHeaderRecord -..summary:Type of the tag keys. - -.Typedef.BamHeaderRecord#TTagValue -..class:Class.BamHeaderRecord -..summary:Type of the tag values. - -.Typedef.BamHeaderRecord#TTag -..class:Class.BamHeaderRecord -..summary:@Class.Pair@ to use for storing tags. - -.Typedef.BamHeaderRecord#TTags -..class:Class.BamHeaderRecord -..summary:Type of the string of tag @Class.Pair|Pairs@. - -.Memvar.BamHeaderRecord#type -..summary:Type of the record. -..class:Class.BamHeaderRecord -..type:Enum.BamHeaderRecordType - -.Memvar.BamHeaderRecord#tags -..summary:The header record's tags. -..class:Class.BamHeaderRecord -..type:Typedef.BamHeaderRecord#TTags -*/ - class BamHeaderRecord { public: - typedef CharString TTagName; - typedef CharString TTagValue; - typedef Pair TTag; - typedef String TTags; + typedef CharString TTagName; + typedef CharString TTagValue; + typedef Pair TTag; + typedef String TTags; BamHeaderRecordType type; - String > tags; + TTags tags; - BamHeaderRecord() {} + BamHeaderRecord() : type(BAM_HEADER_FIRST) {} }; // ---------------------------------------------------------------------------- -// Function std::swap() +// Class BamHeader // ---------------------------------------------------------------------------- -inline void -swap(BamHeaderRecord &a, BamHeaderRecord &b) -{ - std::swap(a.type, b.type); - swap(a.tags, b.tags); -} - /*! * @class BamHeader * @headerfile - * @brief Stores the information of the BAM header. - * - * @signature class BamHeader; - * - * @fn BamHeader::BamHeader - * @brief Constructor. - * - * @signature BamHeader::BamHeader(); - * - * Only the the default constructor is provided. - * - * @var TSequenceInfos BamHeader::sequenceInfos - * @brief String of (seqId, length) with reference name / length information. - * - * TSequenceInfos is a String of @link Pair @endlink objects with entries @link CharString @endlink and - * __int32. + * @implements FormattedFileHeaderConcept + * @signature typedef String BamHeader; + * @brief Represent the information of the BAM header. * - * @var TBamHeaderRecords BamHeader::records - * @brief String of @link BamHeaderRecord BamHeaderRecords @endlink. + * @see BamFileIn + * @see BamFileOut */ -/** -.Class.BamHeader -..cat:BAM I/O -..summary:Stores the information of the BAM header. -..signature:BamHeader -..see:Class.BamHeaderRecord -..include:seqan/bam_io.h - -.Memfunc.BamHeader#BamHeader -..class:Class.BamHeader -..signature:BamHeader() -..summary:Constructor. -..remarks:Only the default constructor is provided. - -.Memvar.BamHeader#sequenceInfos -..class:Class.BamHeader -..summary:String of $(seqid, length)$ with reference name / length information. -..type:nolink:$String >$ - -.Memvar.BamHeader#records -..class:Class.BamHeader -..summary:String of @Class.BamHeaderRecord|BamHeaderRecords@. -..type:nolink:$String$ -*/ - -class BamHeader -{ -public: - typedef Pair TSequenceInfo; - - String > sequenceInfos; - String records; -}; - -// ============================================================================ -// Metafunctions -// ============================================================================ +typedef String BamHeader; // ============================================================================ // Functions // ============================================================================ +// ---------------------------------------------------------------------------- +// Function std::swap() +// ---------------------------------------------------------------------------- + +inline void +swap(BamHeaderRecord &a, BamHeaderRecord &b) +{ + std::swap(a.type, b.type); + swap(a.tags, b.tags); +} + // ---------------------------------------------------------------------------- // Function clear() // ---------------------------------------------------------------------------- @@ -319,13 +214,11 @@ class BamHeader * @fn BamHeaderRecord::clear * @brief Clear BamHeaderRecord object. * - * @signature void(record); + * @signature void clear(record); * - * @param record The record to clear. + * @param[in,out] record The record to clear. */ -///.Function.clear.param.object.type:Class.BamHeaderRecord - inline void clear(BamHeaderRecord & record) { @@ -336,24 +229,6 @@ clear(BamHeaderRecord & record) // Function findTagKey() // ---------------------------------------------------------------------------- -/** -.Function.BamHeaderRecord#findTagKey -..cat:BAM I/O -..summary:Find a tag's key of a @Class.BamHeaderRecord@. -..signature:findTagKey(idx, key, record) -..param.idx:The index of the found key is stored here. -...type:nolink:$unsigned$ -..param.key:The name of the tag key whose position is to be stored in $idx$. -...type:Shortcut.CharString -..param.record:The record to query. -...type:Class.BamHeaderRecord -..returns:$bool$, indicating whether the key could be found. -..include:seqan/bam_io.h -..example.code: -unsigned myIdx = 0; -bool keyFound = findTagKey(myIdx, "SN", record); -*/ - template inline bool findTagKey(unsigned & idx, TKeyName const & key, BamHeaderRecord const & record) @@ -381,9 +256,9 @@ findTagKey(unsigned & idx, TKeyName const & key, BamHeaderRecord const & record) * @signature bool getTagValue(tagValue, idx, record); * @signature bool getTagValue(tagValue, key, record); * - * @param tagValue The @link CharString @endlink to write the tag value to. - * @param idx An integer with the index of the tag in the header record. - * @param key A two-letter sequence with the key of the tag in the header record. + * @param[out] tagValue The @link CharString @endlink to write the tag value to. + * @param[in] idx An integer with the index of the tag in the header record. + * @param[in] key A two-letter sequence with the key of the tag in the header record. * * @return bool true in case the value could be retrieved, false otherwise. * @@ -395,28 +270,6 @@ findTagKey(unsigned & idx, TKeyName const & key, BamHeaderRecord const & record) * @endcode */ -/** -.Function.BamHeaderRecord#getTagValue -..cat:BAM I/O -..summary:Return tag value from a @Class.BamHeaderRecord@ or @Class.BamTagsDict@. -..signature:getTagValue(tagValue, idx, record) -..signature:getTagValue(tagValue, key, record) -..param.tagValue:The tag's value is stored here. -...type:Shortcut.CharString -..param.idx:The index of the tag whose value is to be retrieved. -...type:nolink:$unsigned$ -..param.key:The name of tag whose value is to be retrieved. -...type:Shortcut.CharString -..param.record:The record to query. -...type:Class.BamHeaderRecord -..returns:$bool$, indicating whether the value could be retrieved, always $true$ if $idx$ is given. -..include:seqan/bam_io.h -..example.code: -CharString tagValue; -bool keyFound = getTagValue(tagValue, "SN", record); -..see:Function.BamHeaderRecord#findTagKey -*/ - template SEQAN_FUNC_ENABLE_IF( IsInteger, @@ -448,33 +301,13 @@ inline getTagValue(CharString & value, TKeyName const & key, BamHeaderRecord con * @signature void setTagValue(idx, value, record); * @signature void setTagValue(key, value, record); * - * @param idx The index of the tag in the header record to set the value for. - * @param key The name of the tag (two-letter sequence) to set. - * @param record The header record to set the value for. + * @param[in,out] idx The index of the tag in the header record to set the value for. + * @param[in] key The name of the tag (two-letter sequence) to set. + * @param[in] record The header record to set the value for. */ // TODO(holtgrew): Parameter order! -/** -.Function.BamHeaderRecord#setTagValue -..cat:BAM I/O -..summary:Set tag value of a @Class.BamHeaderRecord@. -..signature:setTagValue(idx, tagValue, record) -..signature:setTagValue(key, tagValue, record) -..param.idx:The index of the tag whose value should be set. -...type:nolink:$unsigned$ -..param.key:The name of tag whose value should be set. -...type:Shortcut.CharString -..param.tagValue:The new tag value. -...type:Shortcut.CharString -..param.record:The record to query. -...type:Class.BamHeaderRecord -..include:seqan/bam_io.h -..example.code: -setTagValue("SN", "chr1", record); -..see:Function.BamHeaderRecord#findTagKey -*/ - template SEQAN_FUNC_ENABLE_IF( IsInteger, @@ -515,11 +348,9 @@ searchRecord(unsigned & recordIdx, BamHeaderRecordType recordType, unsigned startIdx) { - for (recordIdx = startIdx; recordIdx < length(header.records); ++recordIdx) - { - if (header.records[recordIdx].type == recordType) + for (recordIdx = startIdx; recordIdx < length(header); ++recordIdx) + if (header[recordIdx].type == recordType) return true; - } return false; } @@ -555,22 +386,20 @@ removeDuplicates(BamHeader & header) BamHeaderRecordTypeLess less; BamHeaderRecordEqual pred; - std::stable_sort(begin(header.records, Standard()), end(header.records, Standard()), less); + std::stable_sort(begin(header, Standard()), end(header, Standard()), less); - for (size_t uniqueBegin = 0, uniqueEnd = 1; uniqueEnd < length(header.records);) + for (size_t uniqueBegin = 0, uniqueEnd = 1; uniqueEnd < length(header);) { - if (less(header.records[uniqueBegin], header.records[uniqueEnd])) + if (less(header[uniqueBegin], header[uniqueEnd])) uniqueBegin = uniqueEnd; size_t j; for (j = uniqueBegin; j < uniqueEnd; ++j) - { - if (pred(header.records[j], header.records[uniqueEnd])) + if (pred(header[j], header[uniqueEnd])) { - erase(header.records, uniqueEnd); + erase(header, uniqueEnd); break; } - } if (j == uniqueEnd) ++uniqueEnd; @@ -583,7 +412,7 @@ getSortOrder(BamHeader const & header) CharString soString; for (unsigned recIdx = 0; searchRecord(recIdx, header, BAM_HEADER_FIRST, recIdx); ++recIdx) { - if (getTagValue(soString, "SO", header.records[recIdx])) + if (getTagValue(soString, "SO", header[recIdx])) { if (soString == "unsorted") return BAM_SORT_UNSORTED; @@ -601,48 +430,54 @@ getSortOrder(BamHeader const & header) inline void setSortOrder(BamHeader & header, BamSortOrder sortOrder) { - for (unsigned recIdx = 0; searchRecord(recIdx, header, BAM_HEADER_FIRST, recIdx); ++recIdx) + char const * soString; + switch (sortOrder) { - unsigned idx = 0; - if (findTagKey(idx, "SO", header.records[recIdx])) - { - CharString soString; - switch (sortOrder) - { - case BAM_SORT_UNSORTED: - soString = "unsorted"; - break; + case BAM_SORT_UNSORTED: + soString = "unsorted"; + break; - case BAM_SORT_QUERYNAME: - soString = "queryname"; - break; + case BAM_SORT_QUERYNAME: + soString = "queryname"; + break; - case BAM_SORT_COORDINATE: - soString = "coordinate"; - break; + case BAM_SORT_COORDINATE: + soString = "coordinate"; + break; - default: - soString = "unknown"; - } - setTagValue(idx, soString, header.records[recIdx]); - } + default: + soString = "unknown"; } -} - -// ---------------------------------------------------------------------------- -// Function clear() -// ---------------------------------------------------------------------------- -///.Function.clear.param.object.type:Class.BamHeader + bool notFound = true; + for (unsigned recIdx = 0; searchRecord(recIdx, header, BAM_HEADER_FIRST, recIdx); ++recIdx) + { + unsigned idx = 0; + if (findTagKey(idx, "SO", header[recIdx])) + { + notFound = false; + setTagValue(idx, soString, header[recIdx]); + } + } -inline void -clear(BamHeader & header) -{ - clear(header.sequenceInfos); - clear(header.records); + if (notFound) + { + unsigned recIdx = 0; + if (searchRecord(recIdx, header, BAM_HEADER_FIRST)) + { + setTagValue("SO", soString, header[recIdx]); + } + else + { + BamHeaderRecord rec; + rec.type = BAM_HEADER_FIRST; + setTagValue("VN", "1.4", rec); + setTagValue("SO", soString, rec); + insert(header, 0, rec); + } + } } - } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_HEADER_RECORD_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_BAM_HEADER_RECORD_H_ diff --git a/seqan/bam_io/bam_index_bai.h b/seqan/bam_io/bam_index_bai.h index da9a27f..ea60fd5 100644 --- a/seqan/bam_io/bam_index_bai.h +++ b/seqan/bam_io/bam_index_bai.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -58,19 +58,36 @@ THE SOFTWARE. */ -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_INDEX_BAI_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_BAM_INDEX_BAI_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_BAM_INDEX_BAI_H_ +#define INCLUDE_SEQAN_BAM_IO_BAM_INDEX_BAI_H_ namespace seqan { -// ============================================================================ -// Forwards -// ============================================================================ - // ============================================================================ // Tags, Classes, Enums // ============================================================================ +// ---------------------------------------------------------------------------- +// Class BamIndex +// ---------------------------------------------------------------------------- + +/*! + * @class BamIndex + * @headerfile + * + * @brief Access to BAM indices. + * + * @signature template + * class BamIndex; + * + * This is an abstract class; don't use it itself but its specializations. + * + * @see BamFileIn + */ + +template +class BamIndex; + // ---------------------------------------------------------------------------- // Tag Bai // ---------------------------------------------------------------------------- @@ -95,8 +112,9 @@ struct BaiBamIndexBinData_ /*! * @class BaiBamIndex + * @headerfile * @extends BamIndex - * @brief Access to BAI (samtools-style) + * @brief Access to BAI (samtools-style). * * @signature template <> * class BamIndex; @@ -113,21 +131,6 @@ struct BaiBamIndexBinData_ * Only the default constructor is provided. */ -/** -.Spec.BAI BamIndex -..cat:BAM I/O -..general:Class.BamIndex -..summary:Access to BAI (samtools-style) Indices. -..signature:BamIndex -..include:seqan/bam_io.h - -.Memfunc.BAI BamIndex#BamIndex -..class:Spec.BAI BamIndex -..signature:BamIndex() -..summary:Constructor. -..remarks:Only the default constructor is provided. -*/ - template <> class BamIndex { @@ -147,10 +150,6 @@ class BamIndex {} }; -// ============================================================================ -// Metafunctions -// ============================================================================ - // ============================================================================ // Functions // ============================================================================ @@ -159,59 +158,57 @@ class BamIndex // Function jumpToRegion() // ---------------------------------------------------------------------------- -/** -.Function.BamIndex#jumpToRegion -..class:Class.BamIndex -..cat:BAM I/O -..signature:jumpToRegion(bgzfStream, hasAlignments, bamIOContext, refId, pos, posEnd, bamIndex) -..summary:Seek in BAM BGZF stream using an index. -..remark:Note that because of the structure of BAI indices, you cannot simply jump to a position and you have to jump to region. -..param.bgzfStream:The BGZF Stream to seek in. -...type:Spec.BGZF Stream -..param.refId:Reference ID to seek to. -...type:nolink:$__int32$ -..param.hasAlignments:Set to $true$ iff there are alignments at this position. -...type:nolink:$bool$ -..param.bamIOContext:Context to use for loading alignments. -...type:Class.BamIOContext -..param.pos:Zero-based begin position in the reference. -...type:nolink:$__int32$ -..param.pos:Zero-based (exclusive, C-style) end position in the reference. -...type:nolink:$__int32$ -..param.bamIndex:The index to use. -...type:Class.BamIndex -..returns:$bool$ indicating success. -..remarks:This function may fail if the refId/pos is invalid. -..include:seqan/bam_io.h -*/ +/*! + * @fn BamFileIn#jumpToRegion + * @brief Seek in BamFileIn using an index. + * + * You provide a region [pos, posEnd) on the reference refID that you want to jump to and the function + * jumps to the first alignment in this region, if any. + * + * @signature bool jumpToRegion(bamFileIn, hasAlignments, refID, pos, posEnd, index); + * + * @param[in,out] bamFileIn The @link BamFileIn @endlink to jump with. + * @param[out] hasAlignments A bool that is set true if the region [pos, posEnd) has any + * alignments. + * @param[in] refID The reference id to jump to (__int32). + * @param[in] pos The begin of the region to jump to (__int32). + * @param[in] posEnd The end of the region to jump to (__int32). + * @param[in] index The @link BamIndex @endlink to use for the jumping. + * + * @return bool true if seeking was successful, false if not. + * + * @section Remarks + * + * This function fails if refID/pos are invalid. + */ static inline void _baiReg2bins(String<__uint16> & list, __uint32 beg, __uint32 end) { - unsigned k; - if (beg >= end) return; - if (end >= 1u<<29) end = 1u<<29; - --end; - appendValue(list, 0); - for (k = 1 + (beg>>26); k <= 1 + (end>>26); ++k) appendValue(list, k); - for (k = 9 + (beg>>23); k <= 9 + (end>>23); ++k) appendValue(list, k); - for (k = 73 + (beg>>20); k <= 73 + (end>>20); ++k) appendValue(list, k); - for (k = 585 + (beg>>17); k <= 585 + (end>>17); ++k) appendValue(list, k); + unsigned k; + if (beg >= end) return; + if (end >= 1u<<29) end = 1u<<29; + --end; + appendValue(list, 0); + for (k = 1 + (beg>>26); k <= 1 + (end>>26); ++k) appendValue(list, k); + for (k = 9 + (beg>>23); k <= 9 + (end>>23); ++k) appendValue(list, k); + for (k = 73 + (beg>>20); k <= 73 + (end>>20); ++k) appendValue(list, k); + for (k = 585 + (beg>>17); k <= 585 + (end>>17); ++k) appendValue(list, k); for (k = 4681 + (beg>>14); k <= 4681 + (end>>14); ++k) appendValue(list, k); } -// TODO(holtgrew): Switch order of hasAlignments and stream, stream is state. - -template +template inline bool -jumpToRegion(Stream & stream, +jumpToRegion(FormattedFile & bamFile, bool & hasAlignments, - BamIOContext /*const*/ & bamIOContext, __int32 refId, __int32 pos, __int32 posEnd, BamIndex const & index) { + if (!isEqual(format(bamFile), Bam())) + return false; + hasAlignments = false; if (refId < 0) return false; // Cannot seek to invalid reference. @@ -287,7 +284,7 @@ jumpToRegion(Stream & stream, offsetCandidates.insert(it2->i1); } - // Search through candidate offsets, find smallest with a fitting alignment. + // Search through candidate offsets, find rightmost possible. // // Note that it is not necessarily the first. // @@ -296,30 +293,28 @@ jumpToRegion(Stream & stream, BamAlignmentRecord record; for (TOffsetCandidateIter candIt = offsetCandidates.begin(); candIt != offsetCandidates.end(); ++candIt) { - if (streamSeek(stream, *candIt, SEEK_SET) != 0) - return false; // Error while seeking. - if (readRecord(record, bamIOContext, stream, Bam()) != 0) - return false; // Error while reading. + setPosition(bamFile, *candIt); + + readRecord(record, bamFile); // std::cerr << "record.beginPos == " << record.beginPos << "\n"; // __int32 endPos = record.beginPos + getAlignmentLengthInRef(record); if (record.rID != refId) continue; // Wrong contig. - if (record.beginPos >= posEnd) - continue; // Cannot overlap with [pos, posEnd). + if (!hasAlignments || record.beginPos <= pos) + { + // Found a valid alignment. + hasAlignments = true; + offset = *candIt; + } - // Found an alignment. - hasAlignments = true; - offset = *candIt; - // std::cerr << "offset == " << offset << "\n"; - break; + if (record.beginPos >= posEnd) + break; // Cannot find overlapping any more. } if (offset != MaxValue<__uint64>::VALUE) - { - if (streamSeek(stream, offset, SEEK_SET) != 0) - return false; // Error while seeking. - } + setPosition(bamFile, offset); + // Finding no overlapping alignment is not an error, hasAlignments is false. return true; } @@ -328,30 +323,25 @@ jumpToRegion(Stream & stream, // Function jumpToOrphans() // ---------------------------------------------------------------------------- -/** -.Function.BamIndex#jumpToOrphans -..class:Class.BamIndex -..cat:BAM I/O -..signature:jumpToOrphans(bgzfStream, bamIOContext, bamIndex) -..summary:Seek to orphans block in BAM BGZF stream using an index. -..param.bgzfStream:The BGZF Stream to seek in. -...type:Spec.BGZF Stream -..param.bamIOContext:Context to use for loading alignments. -...type:Class.BamIOContext -..param.bamIndex:The index to use. -...type:Class.BamIndex -..returns:$bool$ indicating success. -..include:seqan/bam_io.h -*/ - -// TODO(holtgrew): Parameter order, see jumpToRegion()! +/*! + * @fn BamFileIn#jumpToOrphans + * @brief Seek to orphans block in BamFileIn using an index. + * + * @signature bool jumpToOrphans(bamFileIn, hasAlignments, index); + * + * @param[in,out] bamFileIn The @link BamFileIn @endlink object to jump with. + * @param[out] hasAlignments A bool that is set to true if there are any orphans. + * @param[in] index The @link BamIndex @endlink to use for jumping. + */ -template -bool jumpToOrphans(Stream & stream, +template +bool jumpToOrphans(FormattedFile & bamFile, bool & hasAlignments, - BamIOContext /*const*/ & bamIOContext, BamIndex const & index) { + if (!isEqual(format(bamFile), Bam())) + return false; + hasAlignments = false; // Search linear indices for the largest entry of all references. @@ -369,15 +359,12 @@ bool jumpToOrphans(Stream & stream, BamAlignmentRecord record; __uint64 offset = MaxValue<__uint64>::VALUE; __uint64 result = 0; - int res = streamSeek(stream, aliOffset, SEEK_SET); - if (res != 0) + if (!setPosition(bamFile, aliOffset)) return false; // Error while seeking. - while (!atEnd(stream)) + while (!atEnd(bamFile)) { - result = streamTell(stream); - res = readRecord(record, bamIOContext, stream, Bam()); - if (res != 0) - return false; // Error while reading. + result = position(bamFile); + readRecord(record, bamFile); if (record.rID == -1) { // Found alignment. @@ -390,8 +377,7 @@ bool jumpToOrphans(Stream & stream, // Jump back to the first alignment. if (offset != MaxValue<__uint64>::VALUE) { - int res = streamSeek(stream, offset, SEEK_SET); - if (res != 0) + if (!setPosition(bamFile, offset)) return false; // Error while seeking. } @@ -403,17 +389,15 @@ bool jumpToOrphans(Stream & stream, // Function getUnalignedCount() // ---------------------------------------------------------------------------- -/** -.Function.BamIndex#getUnalignedCount -..class:Class.BamIndex -..cat:BAM I/O -..signature:getUnalignedCount(index) -..summary:Query index for number of unaligned reads. -..param.index:Index to query. -...type:Class.BamIndex -..returns:$__uint64$ with number of unaligned reads. -..include:seqan/bam_io.h -*/ +/*! + * @fn BamIndex#getUnalignedCount + * @brief Query index for number of unaligned reads. + * + * @signature __uint64 getUnalignedCount(index); + * + * @param[in] index Index to query. + * @return __uint64 The number of unaligned reads. + */ inline __uint64 getUnalignedCount(BamIndex const & index) @@ -422,43 +406,40 @@ getUnalignedCount(BamIndex const & index) } // ---------------------------------------------------------------------------- -// Function read() +// Function open() // ---------------------------------------------------------------------------- -/** -.Function.BamIndex#read -..class:Class.BamIndex -..cat:BAM I/O -..signature:read(index, filename) -..summary:Load a BAM index from a given file name. -..param.index:Target data structure. -...type:Class.BamIndex -..param.filename:Path to file to load. -...type:nolink:$char const *$ -..returns:$int$ status code, $0$ indicating success. -..include:seqan/bam_io.h +/*! + * @fn BamIndex#open + * @brief Load a BAM index from a given file name. + * @signature bool open(index, filename); + + * @param[in,out] index Target data structure. + * @param[in] filename Path to file to load. Types: char const * + * + * @return bool Returns true on success, false otherwise. */ -inline int -read(BamIndex & index, char const * filename) +inline bool +open(BamIndex & index, char const * filename) { std::fstream fin(filename, std::ios::binary | std::ios::in); if (!fin.good()) - return 1; // Could not open file. + return false; // Could not open file. // Read magic number. CharString buffer; resize(buffer, 4); fin.read(&buffer[0], 4); if (!fin.good()) - return 1; + return false; if (buffer != "BAI\1") - return 1; // Magic number is wrong. + return false; // Magic number is wrong. __int32 nRef = 0; fin.read(reinterpret_cast(&nRef), 4); if (!fin.good()) - return 1; + return false; resize(index._linearIndices, nRef); resize(index._binIndices, nRef); @@ -469,7 +450,7 @@ read(BamIndex & index, char const * filename) __int32 nBin = 0; fin.read(reinterpret_cast(&nBin), 4); if (!fin.good()) - return 1; + return false; index._binIndices[i].clear(); BaiBamIndexBinData_ data; for (int j = 0; j < nBin; ++j) // For each bin. @@ -479,12 +460,12 @@ read(BamIndex & index, char const * filename) __uint32 bin = 0; fin.read(reinterpret_cast(&bin), 4); if (!fin.good()) - return 1; + return false; __int32 nChunk = 0; fin.read(reinterpret_cast(&nChunk), 4); if (!fin.good()) - return 1; + return false; reserve(data.chunkBegEnds, nChunk); for (int k = 0; k < nChunk; ++k) // For each chunk; { @@ -493,7 +474,7 @@ read(BamIndex & index, char const * filename) fin.read(reinterpret_cast(&chunkBeg), 8); fin.read(reinterpret_cast(&chunkEnd), 8); if (!fin.good()) - return 1; + return false; appendValue(data.chunkBegEnds, Pair<__uint64>(chunkBeg, chunkEnd)); } @@ -505,7 +486,7 @@ read(BamIndex & index, char const * filename) __int32 nIntv = 0; fin.read(reinterpret_cast(&nIntv), 4); if (!fin.good()) - return 1; + return false; clear(index._linearIndices[i]); reserve(index._linearIndices[i], nIntv); for (int j = 0; j < nIntv; ++j) @@ -513,13 +494,13 @@ read(BamIndex & index, char const * filename) __uint64 ioffset = 0; fin.read(reinterpret_cast(&ioffset), 8); if (!fin.good()) - return 1; + return false; appendValue(index._linearIndices[i], ioffset); } } if (!fin.good()) - return 1; + return false; // Read (optional) number of alignments without coordinate. __uint64 nNoCoord = 0; @@ -531,41 +512,37 @@ read(BamIndex & index, char const * filename) } index._unalignedCount = nNoCoord; - return 0; + return true; } // TODO(holtgrew): This is only here because of the read() function with TSequence in old file.h. -inline int -read(BamIndex & index, char * filename) +inline bool +open(BamIndex & index, char * filename) { - return read(index, static_cast(filename)); + return open(index, static_cast(filename)); } -// ---------------------------------------------------------------------------- -// Function buildIndex() -// ---------------------------------------------------------------------------- +// --------------------------------------------------------------------------- +// Function save() +// --------------------------------------------------------------------------- -/*DISABLED -.Function.BamIndex#buildIndex -..class:Class.BamIndex -..cat:BAM I/O -..signature:buildIndex(index, filename) -..summary:Build index for BAM file with given filename. -..remarks:This will create an index file named $filename + ".bai"$. -..param.index:Target data structure. -...type:Class.BamIndex -..param.filename:Path to BAM file to load. -...type:nolink:$char const *$ -..returns:$bool$ indicating success. -..include:seqan/bam_io.h +/*! + * @fn BamIndex#save + * @brief Save a BamIndex object. + * + * @signature bool save(baiIndex, baiFileName); + * + * @param[in] baiIndex The BamIndex to write out. + * @param[in] baiFileName The name of the BAI file to write to. + * + * @return bool true on success, false otherwise. */ -inline int _writeIndex(BamIndex const & index, char const * filename) +inline bool save(BamIndex const & index, char const * baiFilename) { - std::cerr << "WRITE INDEX TO " << filename << std::endl; - // Open output stream. - std::ofstream out(filename, std::ios::binary | std::ios::out); + // Open output file. + std::ofstream out(baiFilename, std::ios::binary | std::ios::out); SEQAN_ASSERT_EQ(length(index._binIndices), length(index._linearIndices)); @@ -604,7 +581,7 @@ inline int _writeIndex(BamIndex const & index, char const * filename) } // Write out linear index. - __int32 numIntervals = length(index._linearIndices); + __int32 numIntervals = length(linearIndex); out.write(reinterpret_cast(&numIntervals), 4); typedef Iterator const, Rooted>::Type TLinearIndexIter; for (TLinearIndexIter it = begin(linearIndex, Rooted()); !atEnd(it); goNext(it)) @@ -612,13 +589,14 @@ inline int _writeIndex(BamIndex const & index, char const * filename) } // Write the number of unaligned reads if set. - std::cerr << "UNALIGNED\t" << index._unalignedCount << std::endl; + //std::cerr << "UNALIGNED\t" << index._unalignedCount << std::endl; if (index._unalignedCount != maxValue<__uint64>()) out.write(reinterpret_cast(&index._unalignedCount), 8); - return !out.good(); // 1 on error, 0 on success. + return out.good(); // false on error, true on success. } + inline void _baiAddAlignmentChunkToBin(BamIndex & index, __uint32 currBin, __uint32 currOffset, @@ -642,34 +620,40 @@ inline void _baiAddAlignmentChunkToBin(BamIndex & index, } } -inline bool -buildIndex(BamIndex & index, char const * filename) +// --------------------------------------------------------------------------- +// Function build() +// --------------------------------------------------------------------------- +// TODO(dadi): uncomment when BamIndex.build index is fixed. DOX commented out +/* + * @fn BamIndex#build + * @brief Create a BamIndex from BAM file. + * + * @signature bool build(baiIndex, bamFileName); + * + * @param[out] baiIndex The BamIndex to build into. + * @param[in] bamFileName Path to the BAM file to build an index for. Type: char const *. + * + * @return bool true on success, false otherwise. + */ +/* +inline bool build(BamIndex & index, char const * bamFilename) { - SEQAN_FAIL("This does not work ye!"); + // SEQAN_FAIL("This does not work yet!"); index._unalignedCount = 0; clear(index._binIndices); clear(index._linearIndices); // Open BAM file for reading. - Stream bamStream; - if (!open(bamStream, filename, "r")) + BamFileIn bamFile; + if (!open(bamFile, bamFilename)) return false; // Could not open BAM file. - // Initialize BamIOContext. - typedef StringSet TNameStore; - typedef NameStoreCache TNameStoreCache; - - TNameStore refNameStore; - TNameStoreCache refNameStoreCache(refNameStore); - BamIOContext bamIOContext(refNameStore, refNameStoreCache); - // Read BAM header. BamHeader header; - int res = readRecord(header, bamIOContext, bamStream, Bam()); - if (res != 0) - return false; // Could not read BAM header. - __uint32 numRefSeqs = length(header.sequenceInfos); + readHeader(header, bamFile); + + __uint32 numRefSeqs = length(contigNames(context(bamFile))); // Scan over BAM file and create index. BamAlignmentRecord record; @@ -677,16 +661,14 @@ buildIndex(BamIndex & index, char const * filename) __uint32 prevBin = maxValue<__uint32>(); __int32 currRefId = BamAlignmentRecord::INVALID_REFID; __int32 prevRefId = BamAlignmentRecord::INVALID_REFID; - __uint64 currOffset = streamTell(bamStream); + __uint64 currOffset = position(bamFile); __uint64 prevOffset = currOffset; __int32 prevPos = minValue<__int32>(); - while (!atEnd(bamStream)) + while (!atEnd(bamFile)) { // Load next record. - res = readRecord(record, bamIOContext, bamStream, Bam()); - if (res != 0) - return false; + readRecord(record, bamFile); // Check ordering. if (prevRefId == record.rID && prevPos > record.beginPos) @@ -770,21 +752,21 @@ buildIndex(BamIndex & index, char const * filename) } // Make sure that the current file pointer is beyond prevOffset. - if (streamTell(bamStream) <= static_cast<__int64>(prevOffset)) + if (position(bamFile) <= static_cast<__int64>(prevOffset)) return false; // Calculating offsets failed. // Update prevOffset and prevPos. - prevOffset = streamTell(bamStream); + prevOffset = position(bamFile); prevPos = record.beginPos; } // Count remaining unaligned records. - while (!streamEof(bamStream)) + while (!atEnd(bamFile)) { SEQAN_ASSERT_GT(index._unalignedCount, 0u); - res = readRecord(record, bamIOContext, bamStream, Bam()); - if (res != 0 || record.rID >= 0) + readRecord(record, bamFile); + if (record.rID >= 0) return false; // Could not read record. index._unalignedCount += 1; @@ -803,16 +785,11 @@ buildIndex(BamIndex & index, char const * filename) } // Merge small bins if possible. - SEQAN_FAIL("TODO: Merge bins!"); - - // Write out index. - CharString baiFilename(filename); - append(baiFilename, ".bai"); - res = _writeIndex(index, toCString(baiFilename)); - - return (res == 0); + // SEQAN_FAIL("TODO: Merge bins!"); + return true; } +*/ } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_INDEX_BAI_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_BAM_INDEX_BAI_H_ diff --git a/seqan/bam_io/bam_io_context.h b/seqan/bam_io/bam_io_context.h index a5a07c5..17c50af 100644 --- a/seqan/bam_io/bam_io_context.h +++ b/seqan/bam_io/bam_io_context.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,17 +34,67 @@ // Class BamIOContext, accessor functions. // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_IO_CONTEXT_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_BAM_IO_CONTEXT_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_BAM_IO_CONTEXT_H_ +#define INCLUDE_SEQAN_BAM_IO_BAM_IO_CONTEXT_H_ namespace seqan { // ============================================================================ -// Forwards +// Tags // ============================================================================ +// ---------------------------------------------------------------------------- +// Tag NameStoreMember +// ---------------------------------------------------------------------------- + +/*! + * @defgroup BamIOContextMemberTag BamIOContext Member Tags + * @brief Defines standard tags used to get the type of the members of the @link BamIOContext @endlink using the @link Member @endlink metafunction. + */ + +/*! + * @tag BamIOContextMemberTag#NameStoreMember + * @brief Tag used to get the type for the NameStore. + * @headerfile + * + * @signature typedef Tag NameStoreMember; + */ + +struct NameStoreMember_; +typedef Tag NameStoreMember; + +// ---------------------------------------------------------------------------- +// Tag NameStoreCacheMember +// ---------------------------------------------------------------------------- + +/*! + * @tag BamIOContextMemberTag#NameStoreCacheMember + * @brief Tag used to get the type for the NameStoreCache. + * @headerfile + * + * @signature typedef Tag NameStoreCacheMember; + */ + +struct NameStoreCacheMember_; +typedef Tag NameStoreCacheMember; + +// ---------------------------------------------------------------------------- +// Tag LengthStoreMember +// ---------------------------------------------------------------------------- + +/*! + * @tag BamIOContextMemberTag#LengthStoreMember + * @brief Tag used to get the type for the LengthStore. + * @headerfile + * + * @signature typedef Tag LengthStoreMember; + */ + +struct LengthStoreMember_; +typedef Tag LengthStoreMember; + // ============================================================================ -// Tags, Classes, Enums +// Classes // ============================================================================ /*! @@ -52,20 +102,22 @@ namespace seqan { * @headerfile * @brief The I/O context to use for BAM I/O. * - * @signature template > + * @signature template * class BamIOContext; * - * @tparam TNameStore The name store class. - * @tparam TNameStoreCache The name store cache class. Defaults to @link NameStoreCache @endlink <TNameStore>l;. + * @tparam TNameStore The type used to represent the names. + * @tparam TNameStoreCache The type used to cache the names. Defaults to @link NameStoreCache @endlink <TNameStore>l;. + * + * BamIOContext objects store the names of (and provide a cache for) reference contig names. * * @section Examples * * Creating a @link BamIOContext @endlink for a raw @link StringSet @endlink of @link CharString @endlink. * * @code{.cpp} - * StringSet nameStore; - * NameStoreCache > nameStoreCache(nameStore); - * BamIOContext > bamIOContext(nameStore, nameStoreCache); + * StringSet contigNames; + * NameStoreCache > contigNamesCache(contigNames); + * BamIOContext > bamIOContext(contigNames, contigNamesCache); * // ... * @endcode * @@ -88,84 +140,60 @@ namespace seqan { * @brief Constructor. * * @signature BamIOContext::BamIOContext(); + * @signature BamIOContext::BamIOContext(contigNameStore, contigNamesStoreCache); * - * @section Remarks - * - * Only the default constructor is provided. - */ - -/*! - * @typedef BamIOContext::TNameStore - * - * @brief The name store class. - - * @signature typedef (...) BamIOContext::TNameStore; - */ - -/*! - * @typedef BamIOContext::TNameStoreCache - * @brief The name store cache class. - * - * @signature typedef (...) BamIOContext::TNameStoreCache; + * Default constructor or construction with references to sequence and sample names. */ -/** -.Class.BamIOContext -..cat:BAM I/O -..signature:BamIOContext -..summary:The I/O context to use for BAM I/O. -..param.TNameStore:The name store class. -..param.TNameStoreCache:The name store cache class. -...default:@Class.NameStoreCache@ -..include:bam_io.h -..example.text:Creating a @Class.BamIOContext@ for a raw @Class.StringSet@ of @Shortcut.CharString@. -..example.code: -StringSet nameStore; -NameStoreCache > nameStoreCache(nameStore); -BamIOContext > bamIOContext(nameStore, nameStoreCache); -// ... -..example.text:Using a @Class.BamIOContext@ with a @Class.FragmentStore@. -..example.code: -typedef FragmentStore<>::TContigNameStore TNameStore; -typedef NameStoreCache TNameStoreCache; -FragmentStore<> store; -// Optionally, do something with store. -typedef BamIOContext TBamIOContext; -TBamIOContext bamIOContext(store.contigNameStore, store.contigNameStoreCache); -// ... - -.Memfunc.BamIOContext#BamIOContext -..class:Class.BamIOContext -..signature:BamIOContext() -..summary:Constructor. -..remarks:Only the default constructor is provided. - -.Typedef.BamIOContext#TNameStore -..class:Class.BamIOContext -..summary:The name store class. - -.Typedef.BamIOContext#TNameStoreCache -..class:Class.BamIOContext -..summary:The name store cache class. -*/ - -template > +template , + typename TNameStoreCache_ = NameStoreCache, + typename TStorageSpec = void> class BamIOContext { public: - typedef TNameStore_ TNameStore; - typedef TNameStoreCache_ TNameStoreCache; + typedef typename Member::Type TNameStore; + typedef typename Member::Type TNameStoreCache; + typedef typename Member::Type TLengthStore; + + typedef typename If, + Dependent<>, TStorageSpec>::Type TNSStorageSpec; + typedef typename If, + Owner<>, TStorageSpec>::Type TSLStorageSpec; - TNameStore * _nameStore; - TNameStoreCache * _nameStoreCache; - CharString buffer; - String translateFile2GlobalRefId; + typedef typename StorageSwitch::Type TNameStoreMember; + typedef typename StorageSwitch::Type TNameStoreCacheMember; + typedef typename StorageSwitch::Type TLengthStoreMember; - BamIOContext() : _nameStore(0), _nameStoreCache(0) + TNameStoreMember _contigNames; + TNameStoreCacheMember _contigNamesCache; + TLengthStoreMember _contigLengths; + CharString buffer; + String buffers; + String translateFile2GlobalRefId; + + BamIOContext() : + _contigNames(TNameStoreMember()), + _contigNamesCache(ifSwitch(typename IsPointer::Type(), + (TNameStoreCache*)NULL, + _contigNames)), + _contigLengths(TLengthStoreMember()) + {} + + BamIOContext(TNameStore & contigNames_, TNameStoreCache & contigNamesCache_) : + _contigNames(_referenceCast::Type>(contigNames_)), + _contigNamesCache(ifSwitch(typename IsPointer::Type(), + &contigNamesCache_, + _contigNames)), + _contigLengths(TLengthStoreMember()) {} - BamIOContext(TNameStore & nameStore, TNameStoreCache & nameStoreCache) : - _nameStore(&nameStore), _nameStoreCache(&nameStoreCache) + template + BamIOContext(BamIOContext & other) : + _contigNames(_referenceCast::Type>(contigNames(other))), + _contigNamesCache(ifSwitch(typename IsPointer::Type(), + &contigNamesCache(other), + _contigNames)), + _contigLengths(_referenceCast::Type>(contigLengths(other))) {} }; @@ -173,100 +201,153 @@ class BamIOContext // Metafunctions // ============================================================================ +// ---------------------------------------------------------------------------- +// Metafunction NameStore +// ---------------------------------------------------------------------------- + +template +struct Member, + NameStoreMember> +{ + typedef TNameStore Type; +}; + +// ---------------------------------------------------------------------------- +// Metafunction NameStoreCache +// ---------------------------------------------------------------------------- + +template +struct Member, + NameStoreCacheMember> +{ + typedef TNameStoreCache Type; +}; + +// ---------------------------------------------------------------------------- +// Metafunction LengthStore +// ---------------------------------------------------------------------------- + +template +struct Member, + LengthStoreMember> +{ + typedef String<__int32> Type; +}; + // ============================================================================ // Functions // ============================================================================ // ---------------------------------------------------------------------------- -// Function nameStore() +// Function contigNames() // ---------------------------------------------------------------------------- /*! - * @fn BamIOContext#nameStoreCache - * @brief Return reference to name store cache from @link BamIOContext @endlink. + * @fn BamIOContext#contigNames + * @brief Return reference to contig names from @link BamIOContext @endlink. * - * @signature TNameStoreRef nameStoreCache(context); + * @signature TNameStoreRef contigNames(context); * - * @param context The @link BamIOContext @endlink to query. + * @param[in] context The @link BamIOContext @endlink to query. * - * @return TNameStoreRef A reference to the TNameStore of the context. + * @return TNameStoreRef A reference to the TNameStore of the context. */ -/** -.Function.BamIOContext#nameStore -..class:Class.BamIOContext -..cat:BAM I/O -..summary:Return reference to name store from @Class.BamIOContext@. -..signature:nameStore(context) -..param.context:The @Class.BamIOContext@ to query. -...type:Class.BamIOContext -..see:Typedef.BamIOContext#TNameStore -..include:seqan/bam_io.h -*/ - -// TODO(holtgrew): Rename to referenceNameStore -template -TNameStore & -nameStore(BamIOContext & context) +template +inline TNameStore & +contigNames(BamIOContext & context) { - SEQAN_ASSERT(context._nameStore != 0); - return *context._nameStore; + return _referenceCast(context._contigNames); +} + +template +inline TNameStore const & +contigNames(BamIOContext const & context) +{ + return _referenceCast(context._contigNames); } template -TNameStore const & -nameStore(BamIOContext const & context) +inline void +setContigNames(BamIOContext > & context, TNameStore & contigNames) { - SEQAN_ASSERT(context._nameStore != 0); - return *context._nameStore; + context._contigNames = &contigNames; } // ---------------------------------------------------------------------------- -// Function nameStoreCache() +// Function contigLengths() // ---------------------------------------------------------------------------- -/* - * @fn BamIOContext#nameStore - * @headerfile - * @brief Return reference to name store from @link BamIOContext @endlink. +/*! + * @fn BamIOContext#contigLengths + * @brief Return reference to contig lengths from @link BamIOContext @endlink. * - * @signature TNameStoreCacheRef nameStore(context); + * @signature TLengthStoreRef contigLengths(context); * - * @param context The @link BamIOContext @endlink to query. + * @param[in] context The @link BamIOContext @endlink to query. * - * @return TNameStoreCacheRef A reference to the TNameStoreCache of the context. + * @return TLengthStoreRef A reference to the TLengthStore of the context. */ -/** -.Function.BamIOContext#nameStoreCache -..class:Class.BamIOContext -..cat:BAM I/O -..summary:Return reference to name store cache from @Class.BamIOContext@. -..signature:nameStoreCache(context) -..param.context:The @Class.BamIOContext@ to query. -...type:Class.BamIOContext -..see:Typedef.BamIOContext#TNameStoreCache -..include:seqan/bam_io.h -..see:Function.BamIOContext#nameStore -*/ - -// TODO(holtgrew): Rename to referenceNameStoreCache -template -TNameStoreCache & -nameStoreCache(BamIOContext & context) +template +inline typename BamIOContext::TLengthStore & +contigLengths(BamIOContext & context) +{ + typedef typename BamIOContext::TLengthStore TLengthStore; + return _referenceCast(context._contigLengths); +} + +template +inline typename BamIOContext::TLengthStore const & +contigLengths(BamIOContext const & context) +{ + typedef typename BamIOContext::TLengthStore TLengthStore; + return _referenceCast(context._contigLengths); +} + +template +inline void +setContigLengths(BamIOContext > & context, TLengthStore & contigLengths) +{ + context._contigLengths = &contigLengths; +} + +// ---------------------------------------------------------------------------- +// Function contigNamesCache() +// ---------------------------------------------------------------------------- + +/*! + * @fn BamIOContext#contigNamesCache + * @brief Return reference to contig names cache from @link BamIOContext @endlink. + * + * @signature TNameStoreCacheRef contigNamesCache(context); + * + * @param[in] context The @link BamIOContext @endlink to query. + * + * @return TNameStoreCacheRef A reference to the TNameStoreCache of the context. + */ + +template +inline TNameStoreCache & +contigNamesCache(BamIOContext & context) +{ + return _referenceCast(context._contigNamesCache); +} + +template +inline TNameStoreCache const & +contigNamesCache(BamIOContext const & context) { - SEQAN_ASSERT(context._nameStoreCache != 0); - return *context._nameStoreCache; + return _referenceCast(context._contigNamesCache); } template -TNameStoreCache const & -nameStoreCache(BamIOContext const & context) +inline void +setContigNamesCache(BamIOContext > & context, TNameStoreCache & contigNamesCache) { - SEQAN_ASSERT(context._nameStoreCache != 0); - return *context._nameStoreCache; + context._contigNamesCache = &contigNamesCache; } } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_IO_CONTEXT_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_BAM_IO_CONTEXT_H_ diff --git a/seqan/bam_io/bam_sam_conversion.h b/seqan/bam_io/bam_sam_conversion.h index 289abab..bd206ac 100644 --- a/seqan/bam_io/bam_sam_conversion.h +++ b/seqan/bam_io/bam_sam_conversion.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -30,12 +30,13 @@ // // ========================================================================== // Author: Manuel Holtgrewe +// Author: David Weese // ========================================================================== // Code to convert between SAM and BAM format tags (textual <-> binary). // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_SAM_CONVERSION_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_BAM_SAM_CONVERSION_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_BAM_SAM_CONVERSION_H_ +#define INCLUDE_SEQAN_BAM_IO_BAM_SAM_CONVERSION_H_ namespace seqan { @@ -59,245 +60,112 @@ namespace seqan { // Function assignTagsSamToBam() // ---------------------------------------------------------------------------- -template -void _assignTagsSamToBamOneTag(TTarget & target, TRecordReader & reader, CharString & buffer) +template +struct AppendTagsSamToBamOneTagHelper_ { - SEQAN_ASSERT_NOT(atEnd(reader)); - int res = readNChars(target, reader, 2); // Read tag name. - (void)res; // If run without assertions. - SEQAN_ASSERT_EQ(res, 0); - SEQAN_ASSERT_NOT(atEnd(reader)); + TTarget ⌖ + TBuffer buffer; + char typeC; + + AppendTagsSamToBamOneTagHelper_(TTarget &target, TBuffer buffer, char typeC): + target(target), + buffer(buffer), + typeC(typeC) + {} + + template + bool operator() (Type) + { + if (BamTypeChar::VALUE != typeC) + return false; + + appendRawPod(target, lexicalCast(buffer)); + return true; + } +}; + +template +void _appendTagsSamToBamOneTag(TTarget & target, TForwardIter & iter, CharString & buffer) +{ + write(target, iter, 2); clear(buffer); - res = readNChars(buffer, reader, 3); // Read '::'. - SEQAN_ASSERT_EQ(res, 0); + write(buffer, iter, 3); SEQAN_ASSERT_EQ(buffer[0], ':'); SEQAN_ASSERT_EQ(buffer[2], ':'); - char t = buffer[1]; - appendValue(target, t); - SEQAN_ASSERT_NOT(atEnd(reader)); + char typeC = buffer[1]; + appendValue(target, typeC); - switch (t) + switch (typeC) { - case 'A': - clear(buffer); - res = readNChars(target, reader, 1); - SEQAN_ASSERT_EQ(res, 0); - break; - case 'i': - { - clear(buffer); - res = readUntilTabOrLineBreak(buffer, reader); - SEQAN_ASSERT(res == 0 || res == EOF_BEFORE_SUCCESS); - __int32 x = 0; - bool b = lexicalCast2<__int32>(x, buffer); - (void)b; - SEQAN_ASSERT(b); - char const * ptr = reinterpret_cast(&x); - for (int i = 0; i < 4; ++i, ++ptr) - appendValue(target, *ptr); - } - break; - case 'f': - { - clear(buffer); - res = readUntilTabOrLineBreak(buffer, reader); - SEQAN_ASSERT(res == 0 || res == EOF_BEFORE_SUCCESS); - float x = 0; - bool b = lexicalCast2(x, buffer); - (void)b; - SEQAN_ASSERT(b); - char const * ptr = reinterpret_cast(&x); - for (int i = 0; i < 4; ++i, ++ptr) - appendValue(target, *ptr); - } - break; - case 'H': - case 'Z': - { + case 'Z': + case 'H': + // BAM string // TODO(holtgrew): Could test on even length in case of 'H'. - res = readUntilTabOrLineBreak(target, reader); - SEQAN_ASSERT(res == 0 || res == EOF_BEFORE_SUCCESS); + readUntil(target, iter, OrFunctor()); appendValue(target, '\0'); - } - break; - case 'B': + break; + + case 'B': { - CharString buffer2; // TODO(holtgrew): Also give from outside. + // BAM array // Read type. - clear(buffer); - res = readNChars(buffer, reader, 1); - SEQAN_ASSERT_EQ(res, 0); - char t2 = back(buffer); - appendValue(target, t2); + readOne(typeC, iter); + appendValue(target, typeC); // Read array contents. clear(buffer); - res = readUntilTabOrLineBreak(buffer, reader); - SEQAN_ASSERT(res == 0 || res == EOF_BEFORE_SUCCESS); - typename Iterator::Type it, it2; - // Search first non-comma position. - it = begin(buffer, Rooted()); - for (;!atEnd(it) && *it == ','; ++it) - continue; - // Count number of entries. - __int32 nEntries = !atEnd(it); // At least one if array not empty. - for (it2 = it; !atEnd(it2); ++it2) - nEntries += (*it2 == ','); - // Write out array length to result. - char const * ptr = reinterpret_cast(&nEntries); - for (int i = 0; i < 4; ++i, ++ptr) - appendValue(target, *ptr); - - // Now, write out the arrays, depending on the entry type. - // TODO(holtgrew): Whee, this could be a bit more compact... - switch (t2) + readUntil(buffer, iter, OrFunctor()); + + size_t len = length(buffer); + + // Count number of entries (== number of commas after type character). + __uint32 nEntries = 0; + for (size_t i = 0; i != len; ++i) + if (buffer[i] == ',') + ++nEntries; + + // Write out array length. + appendRawPod(target, (__uint32)nEntries); + + // Write out array values. + size_t startPos = 1; + for (unsigned i = 0; i < nEntries; ++i) { - case 'c': - for (int i = 0; i < nEntries; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - clear(buffer2); - for (; !atEnd(it) && *it != ',' && *it != '\t'; goNext(it)) - appendValue(buffer2, *it); - __int16 x = 0; // short to avoid textual interpretation in lexicalCast<> below. - bool b = lexicalCast2<__int16>(x, buffer2); - (void)b; - SEQAN_ASSERT(b); - appendValue(target, static_cast<__int8>(x)); - if (!atEnd(it) && *it == ',') - goNext(it); // Skip ','. - else - break; // End of field or end of string. - } - break; - case 'C': - for (int i = 0; i < nEntries; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - clear(buffer2); - for (; !atEnd(it) && *it != ',' && *it != '\t'; goNext(it)) - appendValue(buffer2, *it); - __uint16 x = 0; // short to avoid textual interpretation in lexicalCast<> below. - bool b = lexicalCast2<__uint16>(x, buffer2); - (void)b; - SEQAN_ASSERT(b); - appendValue(target, static_cast<__int8>(x)); - if (!atEnd(it) && *it == ',') - goNext(it); // Skip ','. - else - break; // End of field or end of string. - } - break; - case 's': - for (int i = 0; i < nEntries; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - clear(buffer2); - for (; !atEnd(it) && *it != ',' && *it != '\t'; goNext(it)) - appendValue(buffer2, *it); - __int16 x = 0; - bool b = lexicalCast2<__int16>(x, buffer2); - (void)b; - SEQAN_ASSERT(b); - char const * ptr = reinterpret_cast(&x); // write out byte-wise - for (int i = 0; i < 2; ++i, ++ptr) - appendValue(target, *ptr); - if (!atEnd(it) && *it == ',') - goNext(it); // Skip ','. - else - break; // End of field or end of string. - } - break; - case 'S': - for (int i = 0; i < nEntries; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - clear(buffer2); - for (; !atEnd(it) && *it != ',' && *it != '\t'; goNext(it)) - appendValue(buffer2, *it); - __uint16 x = 0; - bool b = lexicalCast2<__uint16>(x, buffer2); - (void)b; - SEQAN_ASSERT(b); - char const * ptr = reinterpret_cast(&x); // write out byte-wise - for (int i = 0; i < 2; ++i, ++ptr) - appendValue(target, *ptr); - if (!atEnd(it) && *it == ',') - goNext(it); // Skip ','. - else - break; // End of field or end of string. - } - break; - case 'i': - for (int i = 0; i < nEntries; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - clear(buffer2); - for (; !atEnd(it) && *it != ',' && *it != '\t'; goNext(it)) - appendValue(buffer2, *it); - __int32 x = 0; - bool b = lexicalCast2<__int32>(x, buffer2); - (void)b; - SEQAN_ASSERT(b); - char const * ptr = reinterpret_cast(&x); // write out byte-wise - for (int i = 0; i < 4; ++i, ++ptr) - appendValue(target, *ptr); - if (!atEnd(it) && *it == ',') - goNext(it); // Skip ','. - else - break; // End of field or end of string. - } - break; - case 'I': - for (int i = 0; i < nEntries; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - clear(buffer2); - for (; !atEnd(it) && *it != ',' && *it != '\t'; goNext(it)) - appendValue(buffer2, *it); - __uint32 x = 0; - bool b = lexicalCast2<__uint32>(x, buffer2); - (void)b; - SEQAN_ASSERT(b); - char const * ptr = reinterpret_cast(&x); // write out byte-wise - for (int i = 0; i < 4; ++i, ++ptr) - appendValue(target, *ptr); - if (!atEnd(it) && *it == ',') - goNext(it); // Skip ','. - else - break; // End of field or end of string. - } - break; - case 'f': - for (int i = 0; i < nEntries; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - clear(buffer2); - for (; !atEnd(it) && *it != ',' && *it != '\t'; goNext(it)) - appendValue(buffer2, *it); - float x = 0; - bool b = lexicalCast2(x, buffer2); - (void)b; - SEQAN_ASSERT(b); - char const * ptr = reinterpret_cast(&x); // write out byte-wise - for (int i = 0; i < 4; ++i, ++ptr) - appendValue(target, *ptr); - if (!atEnd(it) && *it == ',') - goNext(it); // Skip ','. - else - break; // End of field or end of string. - } - break; - default: - SEQAN_FAIL("Invalid array type: %c!", t2); + SEQAN_ASSERT_LT(startPos, len); + + // search end of current entry + size_t endPos = startPos; + for (; endPos < len; ++endPos) + if (buffer[endPos] == ',') + { + buffer[endPos] = '\0'; + break; + } + + AppendTagsSamToBamOneTagHelper_ func(target, + toCString(buffer) + startPos, + typeC); + if (!tagApply(func, BamTagTypes())) + SEQAN_ASSERT_FAIL("Invalid tag type: %c!", typeC); + + startPos = endPos + 1; } + break; + } + + default: + { + // BAM simple value + clear(buffer); + readUntil(buffer, iter, OrFunctor()); + + AppendTagsSamToBamOneTagHelper_ func(target, buffer, typeC); + if (!tagApply(func, BamTagTypes())) + SEQAN_ASSERT_FAIL("Invalid tag type: %c!", typeC); } - break; - default: - SEQAN_ASSERT_FAIL("Invalid tag type: %c!", t); } } @@ -308,323 +176,152 @@ void _assignTagsSamToBamOneTag(TTarget & target, TRecordReader & reader, CharStr * * @signature void assignTagsBamToSam(bamTags, samTags); * - * @param bamTags[out] A sequence of char (e.g. @link CharString @endlink) for the target BAM tags. - * @param samTags[in] A sequence of char (e.g. @link CharString @endlink) for the source SAM tags. + * @param[out] bamTags A sequence of char (e.g. @link CharString @endlink) for the target BAM tags. + * @param[in] samTags A sequence of char (e.g. @link CharString @endlink) for the source SAM tags. * * @see assignTagsBamToSam */ -/** -.Function.assignTagsSamToBam -..cat:BAM I/O -..summary:Assign tags in SAM format to tags in BAM format. -..signature:assignTagsSamToBam(bamTags, samTags) -..param.bamTags:Destination BAM tags. -...type:Shortcut.CharString -..param.samTags:Source SAM tags. -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/bam_io.h -*/ - template -void assignTagsSamToBam(TTarget & target, TSource & source) +void appendTagsSamToBam(TTarget & target, TSource const & source) { // Handle case of empty source sequence. if (empty(source)) - clear(target); - - typedef typename Iterator::Type TSourceIter; - TSourceIter it = begin(source, Standard()); - TSourceIter itEnd = end(source, Standard()); + return; - typedef Stream > TStream; - typedef RecordReader > TRecordReader; - - TStream stream(it, itEnd); - TRecordReader reader(stream); + typedef typename Iterator::Type TSourceIter; + TSourceIter it = begin(source, Rooted()); CharString buffer; - while (!atEnd(reader)) + while (!atEnd(it)) { - if (value(reader) == '\t') - goNext(reader); - SEQAN_ASSERT_NOT(atEnd(reader)); + if (value(it) == '\t') + skipOne(it); - _assignTagsSamToBamOneTag(target, reader, buffer); + _appendTagsSamToBamOneTag(target, it, buffer); } } +template +void assignTagsSamToBam(TTarget & target, TSource const & source) +{ + clear(target); + appendTagsSamToBam(target, source); +} + // ---------------------------------------------------------------------------- // Function assignTagsBamToSam() // ---------------------------------------------------------------------------- template -void _assignTagsBamToSamOneTag(TTarget & target, TSourceIter & it) +struct AssignTagsBamToSamOneTagHelper_ +{ + TTarget ⌖ + TSourceIter ⁢ + char typeC; + + AssignTagsBamToSamOneTagHelper_(TTarget &target, TSourceIter &it, char typeC): + target(target), + it(it), + typeC(typeC) + {} + + template + bool operator() (Type) + { + if (BamTypeChar::VALUE != typeC) + return false; + + appendNumber(target, reinterpret_cast(*it)); + it += sizeof(Type); + return true; + } + + bool operator() (char) + { + if (BamTypeChar::VALUE != typeC) + return false; + + writeValue(target, getValue(it)); + ++it; + return true; + } +}; + +template +void _appendTagsBamToSamOneTag(TTarget & target, TSourceIter & it) { // Copy tag name. SEQAN_ASSERT_NOT(atEnd(it)); - appendValue(target, *it++); + writeValue(target, *it++); SEQAN_ASSERT_NOT(atEnd(it)); - appendValue(target, *it++); - unsigned char t = *it; + writeValue(target, *it++); // Add ':'. - appendValue(target, ':'); + writeValue(target, ':'); - // Add type. - SEQAN_ASSERT_NOT(atEnd(it)); - if (*it == 'c' || *it == 'C' || *it == 's' || *it == 'S' || *it == 'i' || *it == 'I') - appendValue(target, 'i'); + char typeC = *it++; + char c = FunctorLowcase()(typeC); + + // The only integer type supported is a 32bit signed int (SAM Format Spec, 28 Feb 2014, Section 1.5) + // This sucks as this projection is not identically reversible + if (c == 'c' || c == 's' || c == 'i') + writeValue(target, 'i'); else - appendValue(target, *it); - ++it; + writeValue(target, typeC); // Add ':'. - appendValue(target, ':'); + writeValue(target, ':'); - // Convert the payload, depending on the field's type. - - switch (t) + switch (typeC) { - case 'A': - appendValue(target, *it++); - break; - case 'c': - { + case 'Z': + case 'H': + // BAM string SEQAN_ASSERT_NOT(atEnd(it)); - __int8 x = *it++; - char buffer[4]; - snprintf(buffer, 4, "%d", x); - append(target, buffer); - } - break; - case 'C': - { - SEQAN_ASSERT_NOT(atEnd(it)); - char buffer[4]; - __uint8 x = *it++; - snprintf(buffer, 4, "%u", x); - append(target, buffer); - } - break; - case 's': - { - __int16 x = 0; - char * ptr = reinterpret_cast(&x); - for (int i = 0; i < 2; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - *ptr++ = *it++; - } - char buffer[32]; - snprintf(buffer, 32, "%d", x); - append(target, buffer); - } - break; - case 'S': - { - __uint16 x = 0; - char * ptr = reinterpret_cast(&x); - for (int i = 0; i < 2; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - *ptr++ = *it++; - } - char buffer[32]; - snprintf(buffer, 32, "%u", x); - append(target, buffer); - } - break; - case 'i': - { - int x = 0; - char * ptr = reinterpret_cast(&x); - for (int i = 0; i < 4; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - *ptr++ = *it++; - } - char buffer[32]; - snprintf(buffer, 32, "%d", x); - append(target, buffer); - } - break; - case 'I': - { - unsigned x = 0; - char * ptr = reinterpret_cast(&x); - for (int i = 0; i < 4; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - *ptr++ = *it++; - } - char buffer[32]; - snprintf(buffer, 32, "%u", x); - append(target, buffer); - } - break; - case 'f': - { - float x = 0; - char * ptr = reinterpret_cast(&x); - for (int i = 0; i < 4; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - *ptr++ = *it++; - } - char buffer[32]; - snprintf(buffer, 32, "%g", x); - append(target, buffer); - } - break; - case 'Z': - { - while (*it != '\0') - { - SEQAN_ASSERT_NOT(atEnd(it)); - appendValue(target, *it++); - } - SEQAN_ASSERT_NOT(atEnd(it)); - it++; - } - break; - case 'H': - { while (*it != '\0') { + writeValue(target, *it); + ++it; SEQAN_ASSERT_NOT(atEnd(it)); - appendValue(target, *it++); } - SEQAN_ASSERT_NOT(atEnd(it)); - it++; - } - break; - case 'B': + ++it; + break; + + case 'B': { - // Read type. - char t2 = *it++; - appendValue(target, t2); + // BAM array + typeC = *it++; + writeValue(target, typeC); + AssignTagsBamToSamOneTagHelper_ func(target, it, typeC); + // Read array length. - __int32 x = 0; - char * ptr = reinterpret_cast(&x); - for (int i = 0; i < 4; ++i) + union { + char raw[4]; + unsigned len; + } tmp; + for (unsigned i = 0; i < 4; ++i) { SEQAN_ASSERT_NOT(atEnd(it)); - *ptr++ = *it++; + tmp.raw[i] = *it++; } - // Depending on t2, read array. - // TODO(holtgrew): Whee, this could be a bit more compact... - switch (t2) + for (unsigned i = 0; i < tmp.len; ++i) { - case 'c': - for (__int32 i = 0; i < x; ++i) - { - appendValue(target, ','); - __int8 y = *it++; - char buffer[32]; - snprintf(buffer, 32, "%d", y); - append(target, buffer); - } - break; - case 'C': - for (__int32 i = 0; i < x; ++i) - { - appendValue(target, ','); - __uint8 y = *it++; - char buffer[32]; - snprintf(buffer, 32, "%u", y); - append(target, buffer); - } - break; - case 's': - for (__int32 i = 0; i < x; ++i) - { - appendValue(target, ','); - __int16 y = 0; - char * ptr = reinterpret_cast(&y); - for (int i = 0; i < 2; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - *ptr++ = *it++; - } - char buffer[32]; - snprintf(buffer, 32, "%d", y); - append(target, buffer); - } - break; - case 'S': - for (__int32 i = 0; i < x; ++i) - { - appendValue(target, ','); - __uint16 y = 0; - char * ptr = reinterpret_cast(&y); - for (int i = 0; i < 2; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - *ptr++ = *it++; - } - char buffer[32]; - snprintf(buffer, 32, "%d", y); - append(target, buffer); - } - break; - case 'i': - for (__int32 i = 0; i < x; ++i) - { - appendValue(target, ','); - int y = 0; - char * ptr = reinterpret_cast(&y); - for (int i = 0; i < 4; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - *ptr++ = *it++; - } - char buffer[32]; - snprintf(buffer, 32, "%d", y); - append(target, buffer); - } - break; - case 'I': - for (__int32 i = 0; i < x; ++i) - { - appendValue(target, ','); - unsigned y = 0; - char * ptr = reinterpret_cast(&y); - for (int i = 0; i < 4; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - *ptr++ = *it++; - } - char buffer[32]; - snprintf(buffer, 32, "%u", y); - append(target, buffer); - } - break; - case 'f': - for (__int32 i = 0; i < x; ++i) - { - appendValue(target, ','); - float y = 0; - char * ptr = reinterpret_cast(&y); - for (int i = 0; i < 4; ++i) - { - SEQAN_ASSERT_NOT(atEnd(it)); - *ptr++ = *it++; - } - char buffer[32]; - snprintf(buffer, 32, "%g", y); - append(target, buffer); - } - break; - default: - SEQAN_FAIL("Invalid array type: %c!", t2); + writeValue(target, ','); + if (!tagApply(func, BamTagTypes())) + SEQAN_ASSERT_FAIL("Invalid tag type: %c!", typeC); } + break; + } + + default: + { + // BAM simple value + AssignTagsBamToSamOneTagHelper_ func(target, it, typeC); + if (!tagApply(func, BamTagTypes())) + SEQAN_ASSERT_FAIL("Invalid tag type: %c!", typeC); } - break; - default: - SEQAN_ASSERT_FAIL("Invalid tag type: %c!", t); } } @@ -635,48 +332,38 @@ void _assignTagsBamToSamOneTag(TTarget & target, TSourceIter & it) * * @signature void assignTagsBamToSam(samTags, bamTags); * - * @param samTags[out] A sequence of char (e.g. @link CharString @endlink) for the target SAM tags. - * @param bamTags[in] A sequence of char (e.g. @link CharString @endlink) for the source BAM tags. + * @param[out] samTags A sequence of char (e.g. @link CharString @endlink) for the target SAM tags. + * @param[in] bamTags A sequence of char (e.g. @link CharString @endlink) for the source BAM tags. * * @see assignTagsSamToBam */ -/** -.Function.assignTagsBamToSam -..cat:BAM I/O -..summary:Assign tags in BAM format to tags in SAM format. -..signature:assignTagsSamToBam(bamTags, samTags) -..param.samTags:Destination SAM tags. -...type:Shortcut.CharString -..param.bamTags:Source BAM tags. -...type:Shortcut.CharString -..returns:$void$ -..include:seqan/bam_io.h -..see:Function.assignTagsSamToBam -*/ - template -void assignTagsBamToSam(TTarget & target, TSource const & source) +inline void +appendTagsBamToSam(TTarget & target, TSource const & source) { - // Handle case of empty source sequence. if (empty(source)) - clear(target); - - clear(target); + return; typedef typename Iterator::Type TSourceIter; TSourceIter it = begin(source, Rooted()); - bool first = true; - while (!atEnd(it)) + while (true) { - if (!first) - appendValue(target, '\t'); - first = false; - _assignTagsBamToSamOneTag(target, it); + _appendTagsBamToSamOneTag(target, it); + if (atEnd(it)) + return; + writeValue(target, '\t'); } } +template +void assignTagsBamToSam(TTarget & target, TSource const & source) +{ + clear(target); + appendTagsBamToSam(target, source); +} + } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_SAM_CONVERSION_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_BAM_SAM_CONVERSION_H_ diff --git a/seqan/bam_io/bam_scanner_cache.h b/seqan/bam_io/bam_scanner_cache.h new file mode 100644 index 0000000..85775e1 --- /dev/null +++ b/seqan/bam_io/bam_scanner_cache.h @@ -0,0 +1,318 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: David Weese +// ========================================================================== + +#ifndef INCLUDE_SEQAN_BAM_IO_BAM_SCANNER_CACHE_H_ +#define INCLUDE_SEQAN_BAM_IO_BAM_SCANNER_CACHE_H_ + +#ifdef SEQAN_CXX11_STANDARD +#include +#include +#else +#include +#include +#endif + +namespace seqan { + +#ifdef SEQAN_CXX11_STANDARD +using namespace std; +#else +using namespace std::tr1; +#endif + +// ============================================================================ +// Forwards +// ============================================================================ + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +struct BamScannerCacheKey_ +{ + __int32 rID; + __int32 beginPos; + __uint64 qnameHash; + + bool operator== (BamScannerCacheKey_ const &other) const + { + return rID == other.rID && beginPos == other.beginPos && qnameHash == other.qnameHash; + } +}; + +struct BamScannerCacheSearchKey_ +{ + typedef __uint16 TFlag; + + BamScannerCacheKey_ cacheKey; + TFlag flags; + TFlag flagsMask; +}; + +struct BamScannerCacheHash_ : + std::unary_function +{ + size_t operator()(BamScannerCacheKey_ const &v) const + { + return std::hash<__int32>()(v.rID) ^ std::hash<__int32>()(v.beginPos) ^ std::hash<__uint64>()(v.qnameHash); + } +}; + +class BamScannerCache +{ +public: + // The Key is a pair of (genomic pos, name) where genomic pos is a pair of (rId, pos). + typedef String TRecords; + typedef Size::Type TRecordId; + typedef BamScannerCacheKey_ TKey; + + // A mapping from the key type to the BamAlignmentRecord at this position. + typedef unordered_multimap TMap; + typedef TMap::const_iterator TMapIter; + + TRecords records; + String unusedIds; + TMap map; + BamAlignmentRecord tmpRecord; + + static const TRecordId INVALID_ID = (TRecordId)-1; +}; + + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ============================================================================ +// Functions +// ============================================================================ + +template +__uint64 _suffixHash(TSequence const &sequence) +{ + typedef typename Iterator::Type TIter; + typedef typename Value::Type TValue; + typedef typename Size::Type TSize; + + const __uint64 ALPH_SIZE = ValueSize::VALUE; + const unsigned MAX_LEN = LogN<~(ALPH_SIZE - 1) / ALPH_SIZE, ALPH_SIZE>::VALUE + 1; + + TSize len = length(sequence); + TIter itEnd = end(sequence, Standard()); + + if (len > 2 && (sequence[len - 2] == ':' || sequence[len - 2] == '/')) + { + len -= 2; + itEnd -= 2; + } + + if (len > MAX_LEN) + len = MAX_LEN; + + TIter it = itEnd - len; + + __uint64 hash = 0; + for (; it != itEnd; ++it) + hash = hash * ALPH_SIZE + ordValue(*it); + return hash; +} + +inline void +insertRecord(BamScannerCache &cache, seqan::BamAlignmentRecord const &record) +{ + int _id; + if (empty(cache.unusedIds)) + { + _id = length(cache.records); + appendValue(cache.records, record); + } + else + { + _id = back(cache.unusedIds); + eraseBack(cache.unusedIds); + cache.records[_id] = record; + } + + BamScannerCache::TKey key = { record.rID, record.beginPos, _suffixHash(record.qName) }; + cache.map.insert(std::make_pair(key, _id)); +} + +template +inline bool +_qNamesEqual(TQName1 const &name1, TQName2 const &name2) +{ + unsigned len1 = length(name1); + unsigned len2 = length(name2); + + if (len1 != len2) + return false; + + if (len1 > 2 && len2 > 2) + { + if (name1[len1 - 2] == ':' || name1[len1 - 2] == '/') + return prefix(name1, len1 - 1) == prefix(name2, len2 - 1); + } + return name1 == name2; +} + + +// search a certain segment that might refer to lastRecord +// if recursively search for a intermediate segments that might refer to lastRecord +inline bool +_recursivelyFindSegmentGraph( + String &records, + BamScannerCacheSearchKey_ &searchKey, + unsigned segmentNo, + BamScannerCache &cache) +{ + typedef BamScannerCache::TMapIter TMapIter; + typedef BamScannerCacheSearchKey_::TFlag TFlag; + + // search for segment using contigId, position + std::pair range = cache.map.equal_range(searchKey.cacheKey); + for (TMapIter iter = range.first; iter != range.second;) + { + TMapIter it = iter; + ++iter; + BamAlignmentRecord &record = cache.records[it->second]; + + // ... additionally check flags and qName + if ((record.flag & searchKey.flagsMask) == searchKey.flags && + _qNamesEqual(record.qName, records[0].qName)) + { + if (record.rNextId == records[0].rID && record.pNext == records[0].beginPos) + { + resize(records, segmentNo + 2); + records[segmentNo + 1] = records[0]; + records[segmentNo] = record; + appendValue(cache.unusedIds, it->second); + cache.map.erase(it); + return true; + } + } + } + + for (; range.first != range.second; ++range.first) + { + BamAlignmentRecord &record = cache.records[range.first->second]; + + // ... additionally check flags and qName + if ((record.flag & searchKey.flagsMask) == searchKey.flags && + _qNamesEqual(record.qName, records[0].qName)) + { + BamScannerCacheSearchKey_ newSearchKey = { + { record.rNextId, record.pNext, searchKey.cacheKey.qnameHash }, + static_cast((record.flag & BAM_FLAG_MULTIPLE) | ((record.flag & BAM_FLAG_NEXT_RC) >> 1)), + BAM_FLAG_MULTIPLE | BAM_FLAG_RC + }; + if (_recursivelyFindSegmentGraph(records, newSearchKey, segmentNo + 1, cache)) + { + records[segmentNo] = record; + appendValue(cache.unusedIds, range.first->second); + cache.map.erase(range.first); + return true; + } + } + } + + return false; +} + + +inline void +readMultiRecords(String &records, BamFileIn &bamFile, BamScannerCache &cache) +{ + typedef BamScannerCacheSearchKey_::TFlag TFlag; + + if (empty(records)) + resize(records, 1); + + while (!atEnd(bamFile)) + { + // read next record + BamAlignmentRecord &record = records[0]; + readRecord(record, bamFile); + + // is this a single-end read or single alignment? + if (!hasFlagMultiple(record) || + record.rID == BamAlignmentRecord::INVALID_REFID || + record.beginPos == BamAlignmentRecord::INVALID_POS || + record.rNextId == BamAlignmentRecord::INVALID_REFID || + record.pNext == BamAlignmentRecord::INVALID_POS) + { + resize(records, 1); + return; + } + + if (record.rID < record.rNextId || (record.rID == record.rNextId && record.beginPos < record.pNext)) + { + // store record to retrieve it later + insertRecord(cache, record); + continue; + } + + // search mates in case of multiple templates + BamScannerCacheSearchKey_ searchKey = { + { record.rNextId, record.pNext, _suffixHash(record.qName) }, + static_cast((record.flag & BAM_FLAG_MULTIPLE) | ((record.flag & BAM_FLAG_NEXT_RC) >> 1)), + BAM_FLAG_MULTIPLE | BAM_FLAG_RC + }; + if (_recursivelyFindSegmentGraph(records, searchKey, 0, cache)) + { + // order paired-end reads by first and second segment in the template +// if (length(records) == 2) +// { +// if (hasFlagLast(records[0])) +// std::swap(records[0], records[1]); +// } + return; + } + else + { + // we could get here if both mates align to same position and we are the first + // hence, insert our record to be retrieved by the second. + if (records[0].beginPos != records[0].pNext) + { + std::cerr << "WARNING: Mate could not be found for:\n"; + write(std::cerr, records[0], context(bamFile), seqan::Sam()); + } + insertRecord(cache, records[0]); + } + } + clear(records); +} + +} // namespace seqan; + +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_BAM_SCANNER_CACHE_H_ diff --git a/seqan/bam_io/bam_stream.h b/seqan/bam_io/bam_stream.h deleted file mode 100644 index a73fb68..0000000 --- a/seqan/bam_io/bam_stream.h +++ /dev/null @@ -1,851 +0,0 @@ -// ========================================================================== -// SeqAn - The Library for Sequence Analysis -// ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// * Neither the name of Knut Reinert or the FU Berlin nor the names of -// its contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE -// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH -// DAMAGE. -// -// ========================================================================== -// Author: Manuel Holtgrewe -// ========================================================================== -// Simple-to-use I/O for SAM and BAM files. -// -// The actual implementation is done using the class hierarchies rooted in -// XamReader and XamWriter. We use virtual functions and native C++ -// inheritance here because the implementations requires some kind of dynamic -// lookup, and using the built-in inheritance model is probably fast. Also, -// this API is on a very high level/layer and thus some loss in performance is -// OK. -// ========================================================================== - -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_STREAM_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_BAM_STREAM_H_ - -#include -#include - -#include - -// TODO(holtgrew): Replace std::fstream by MMap String? -// TODO(holtgrew): Implement BAI support? - -namespace seqan { - -// ============================================================================ -// Forwards -// ============================================================================ - -// ============================================================================ -// Tags, Classes, Enums -// ============================================================================ - -/*! - * @class BamStream - * @headerfile - * @brief Class that provides an easy to use interface for reading and writing SAM and BAM files. - * - * @signature class BamStream; - * - * @section Example - * - * Read and write SAM or BAM files. - * - * @include demos/bam_io/bam_stream.cpp - * - * The output is as follows: - * - * @include demos/bam_io/bam_stream.cpp.stdout - */ - -/*! - * @fn BamStream::BamStream - * @brief Constructor - * - * @signature BamStream::BamStream([fileName[, mode[, format]]]); - * - * @param[in] fileName The path to the SAM or BAM file to load, char const *. - * @param[in] mode The open mode, of type @link BamStream::OperationMode @endlink, defaults to READ. - * @param[in] format The format, of type @link BamStream::Format @endlink, defaults to AUTO. - */ - -/*! - * @var THeader BamStream::header - * @brief The @link BamHeader @endlink of the @link BamStream @endlink object. - * - * SAM and BAM files have a header. When writing SAM or BAM files, you have to fill this member before writing @link - * BamAlignmentRecord @endlinks. Upon writing the first record, the header will be written out. - * - * When reading BAM files, the header will be read upon opening the file. When reading SAM files, any header will be - * read upon opening the file. - * - * Note that there is a special case when reading SAM records: If there is no header, or records refer to reference - * sequences that are previously unknown when reading SAM then a new entry is added to @link BamHeader::sequenceInfos - * @endlink. - */ - -/*! - * @var TBamIOContext BamStream::bamIOContext - * - * @brief The @link BamIOContext @endlink object to use for reading and writing @link BamAlignmentRecord @endlinks. - * - * When reading, the bamIOContext will be updated automatically. When reading SAM, new reference sequences can - * be introduced "on the fly" when a new sequence appears. When writing, the bamIOContext is automatically - * filled/reset when the first record is written. - */ - -/** -.Class.BamStream -..cat:BAM I/O -..summary:Class that provides an easy to use interface for reading and writing SAM and BAM files. -..signature:BamStream -..example:Read and write SAM or BAM files. -..example.file:demos/bam_io/bam_stream.cpp -..example.text:The output is as follows: -..example.output: -@HD VN:1.3 SO:coordinate -@SQ SN:ref LN:45 -@SQ SN:ref2 LN:40 -r001 163 ref 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112 -r002 0 ref 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA * -r003 0 ref 9 30 5H6M * 0 0 AGCTAA * -r004 0 ref 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC * -r003 16 ref 29 30 6H5M * 0 0 TAGGC * -r001 83 ref 37 30 9M = 7 -39 CAGCGCCAT * -..include:seqan/bam_io.h - -.Memfunc.BamStream#BamStream: -..class:Class.BamStream -..description:See documentation of @Class.BamStream@ for more information. -..summary:Constructor -..signature:BamStream() -..signature:BamStream(fileName[, mode[, format]]) -..param.fileName:Path to the file to open. -...type:nolink:$char const *$ -..param.mode:The mode to use for opening the file (read/write). Optional -...default:@Enum.BamStream\colon\colonOperationMode.value.READ@ -...type:Enum.BamStream\colon\colonOperationMode -..param.format:Use this to enforce opening the file in the given format. Autodetected from file name or content if not specified. Optional. -...type:Enum.BamStream\colon\colonFormat -...default:@Enum.BamStream\colon\colonFormat.value.AUTO@ - -.Memvar.BamStream#header: -..class:Class.BamStream -..type:Class.BamHeader -..summary:The @Class.BamHeader@ of the @Class.BamStream@ object. -..description: -SAM and BAM files have a header. -When writing SAM or BAM files, you have to fill this member before writing @Class.BamAlignmentRecord@s. -Upon writing the first record, the header will be written out. -..description: -When reading BAM files, the header will be read upon opening the file. -When reading SAM files, any header will be read upon opening the file. -..description: -Note that there is a special case when reading SAM records: -If there is no header, or records refer to reference sequences that are previously unknown when reading SAM then a new entry is added to @Memvar.BamHeader#sequenceInfos@.\ - -.Memvar.BamStream#bamIOContext: -..class:Class.BamStream -..summary:The @Class.BamIOContext@ object to use for reading and writing @Class.BamAlignmentRecord@s. -..description: -When reading, the $bamIOContext$ will be updated automatically. -When reading SAM, new reference sequences can be introduced "on the fly" when a new sequence appears. -When writing, the $bamIOContext$ is automatically filled/reset when the first record is written. - -.Enum.BamStream\colon\colonOperationMode: -..cat:BAM I/O -..summary:Select the operation mode of a @Class.BamStream@. -..value.READ:Open stream for reading. -..value.WRITE:Open stream for writing. -..include:seqan/bam_io.h - -.Enum.BamStream\colon\colonFormat: -..cat:BAM I/O -..summary:Select the format to use for reading/writing. -..value.AUTO:Auto-detect format from file content on reading and from the file name on writing. If Auto-detection fails, SAM is used. -..value.SAM:Force reading/writing of SAM. -..value.BAM:Force reading/writing of BAM. -..include:seqan/bam_io.h -*/ - -/*! - * @enum BamStream::OperationMode - * @brief Selects teh operation mode of a @link BamStream @endlink. - * @see BamStream - * - * @signature enum BamStream::OperationMode; - * - * @var BamStream::OperationMode BamStream::READ; - * @brief Enum value for reading. - * - * @var BamStream::OperationMode BamStream::WRITE; - * @brief Enum value for writing. - */ - -/*! - * @enum BamStream::Format - * @brief Select the format to use for reading/writing. - * - * @signature enum BamStream::Format; - * - * @var BamStream::Format BamStream::AUTO; - * @brief Auto-detect the format from file content on reading and from the file name on writing. If auto-detection - * fails, SAM is used. - * - * @var BamStream::Format BamStream::SAM; - * @brief Force reading/writing of SAM. - * - * @var BamStream::Format BamStream::BAM; - * @brief Force reading/writing of BAM. - */ - -class BamStream -{ -public: - - // Enum for selecting read/write mode. - enum OperationMode - { - READ, - WRITE - }; - - // Enum for selecting format. AUTO is only used as the default, after opening, only SAM and BAM are used. - enum Format - { - AUTO, - SAM, - BAM - }; - - // Name of the BAM file. - CharString _filename; - // The open mode. - OperationMode _mode; - // The format. - Format _format; - // Whether or not the header was written out. - bool _headerWritten; - - // Indicates whether stream is at end when reading. - bool _atEnd; - // Indicates whether there was an error when reading or writing. - bool _isGood; - - // The BAM Header record. - BamHeader header; - // The BAM I/O Context and its elements. - StringSet _nameStore; - NameStoreCache > _nameStoreCache; - BamIOContext > bamIOContext; - - // The actual implementation of writing SAM or BAM. - std::SEQAN_AUTO_PTR_NAME _writer; - // The actual implementation of reading SAM or BAM. - std::SEQAN_AUTO_PTR_NAME _reader; - - // Constructors. - - BamStream() : - _mode(READ), _format(AUTO), _headerWritten(false), _atEnd(false), _isGood(true), - _nameStoreCache(_nameStore), bamIOContext(_nameStore, _nameStoreCache) - {} - - BamStream(char const * filename, OperationMode mode = READ, Format format = AUTO); - - // Write header if necessary. - inline int _writeHeader() - { - if (this->_headerWritten) - return 0; - - // Rewrite name store and cache. - clear(_nameStore); - for (unsigned i = 0; i < length(header.sequenceInfos); ++i) - appendValue(_nameStore, header.sequenceInfos[i].i1); - refresh(_nameStoreCache); - - // Write out header. - this->_headerWritten = true; - return this->_writer->writeHeader(header, bamIOContext); - } -}; - -// ============================================================================ -// Metafunctions -// ============================================================================ - -// ============================================================================ -// Functions -// ============================================================================ - -// ---------------------------------------------------------------------------- -// Member Function BamStream::BamStream() -// ---------------------------------------------------------------------------- - -// Forward declaration is here since it refers to enum member type. -inline int open(BamStream & bamIO, - char const * filename, - BamStream::OperationMode mode, - BamStream::Format format); - -inline BamStream::BamStream(char const * filename, OperationMode mode, Format format) : - _filename(filename), _mode(mode), _format(format), _headerWritten(false), _atEnd(false), _isGood(true), - _nameStoreCache(_nameStore), bamIOContext(_nameStore, _nameStoreCache) -{ - open(*this, filename, _mode, _format); -} - -// ---------------------------------------------------------------------------- -// Function open() -// ---------------------------------------------------------------------------- - -/*! - * @fn BamStream#open - * @brief Open a @link BamStream @endlink object for reading/writing. - * - * @signature int open(bamIO, fileName[, mode[, format]]); - * - * @param[in,out] bamIO The @link BamStream @endlink object to open. Types: BamStream - * @param[in] fileName The path to the file to open, char const *. - * @param[in] mode The mode to open the file in, optional, of type @link BamStream::OperationMode @endlink, - * defaults to BamStream::READ. - * @param[in] format The format to use, inferred from file contents (reading) or file name (writing) by default. - * the path to the file to open, of type @link BamStream::Format @endlink, defaults to - * AUTO. - * - * @return int A status code, 0 on success, a value != 0 on errors. - */ - -/** -.Function.BamStream#open -..class:Class.BamStream -..summary:Open a @Class.BamStream@ object for reading/writing. -..signature:open(bamIO, fileName[, mode[, format]]) -..param.bamIO:The @Class.BamStream@ object to open. -...type:Class.BamStream -..param.fileName:The path to the file to open. -...type:Shortcut.CharString -..param.mode:The mode to open the file in. Optional. -...default:$BamStream::READ$ -...type:nolink:$BamStream::OperationMode$. -..param.format:The format to use, inferred from file contents (reading) or file name (writing) by default. -...default:$BamStream::AUTO$ -...type:nolink:$BamStream::Format$. -..param.format:The path to the file to open. -...type:Shortcut.CharString -..returns:An $int$ status code: $0$ on success, $1$ on errors. -..include:seqan/bam_io.h -*/ - -inline int open(BamStream & bamIO, - char const * fileName, - BamStream::OperationMode mode = BamStream::READ, - BamStream::Format format = BamStream::AUTO) -{ - bamIO._filename = fileName; - bamIO._isGood = true; - - // Guess format if necessary. - if (format == BamStream::AUTO) - { - if (mode == BamStream::READ) - { - format = BamStream::SAM; // SAM is default. - - // Look whether the file is in BAM format. - std::fstream inStream(toCString(fileName), std::ios_base::binary | std::ios_base::in); - if (!inStream.good()) - { - bamIO._isGood = false; - return 1; // Error opening the file. - } - char buffer[3]; - inStream.read(&buffer[0], 3); - if (buffer[0] == '\x1F' && buffer[1] == '\x8B' && buffer[2] == '\x08') - format = BamStream::BAM; - } - else // mode == WRITE - { - format = BamStream::SAM; // SAM is default. - if (endsWith(fileName, ".bam")) - format = BamStream::BAM; - } - } - -#if !SEQAN_HAS_ZLIB - // Guard against opening BAM files without zlib. - if (format == BamStream::BAM) - { - std::cerr << "ERROR: Trying to open BAM file and ZLIB is not available!\n"; - bamIO._isGood = false; - return 1; - } -#endif // #if !SEQAN_HAS_ZLIB - - if (mode == BamStream::READ) - { - if (format == BamStream::SAM) - bamIO._reader.reset(new SamReader_()); -#if SEQAN_HAS_ZLIB - // The branch above is always taken if zlib is not available, there already is a check above. - else - bamIO._reader.reset(new BamReader_()); -#endif // #if !SEQAN_HAS_ZLIB - if (bamIO._reader->open(fileName) != 0) - { - bamIO._isGood = false; - return 1; - } - } - else // (format == BamStream::WRITE) - { - if (format == BamStream::SAM) - bamIO._writer.reset(new SamWriter_()); -#if SEQAN_HAS_ZLIB - // The branch above is always taken if zlib is not available, there already is a check above. - else - bamIO._writer.reset(new BamWriter_()); -#endif // #if !SEQAN_HAS_ZLIB - if (bamIO._writer->open(fileName) != 0) - { - bamIO._isGood = false; - return 1; - } - } - - bamIO._mode = mode; - bamIO._format = format; - - // Read header. - if (bamIO._isGood && bamIO._mode == BamStream::READ) - { - clear(bamIO.header); - if (bamIO._reader->readHeader(bamIO.header, bamIO.bamIOContext) != 0) - { - bamIO._isGood = false; - return 1; - } - } - - return 0; -} - -// ---------------------------------------------------------------------------- -// Function reset() -// ---------------------------------------------------------------------------- - -/* - * @fn BamStream#reset - * @brief Reset @link BamStream @endlink object to status after construction. - * - * @signature void reset(stream); - * - * @param stream The @link BamStream @endlink object to reset. - * - * @return int A status code, 0 on success, != 0 on error. - */ - -/** -.Function.BamStream#reset -..class:Class.BamStream -..summary:Reset @Class.BamStream@ object to status after construction. -..signature:reset(bamIO) -..param.bamIO:The @Class.BamStream@ object to reset. -...type:Class.BamStream -..returns:$int$, a status code ($0$ for success, non-$0$ for error). -..include:seqan/bam_io.h -*/ - -inline int reset(BamStream & bamIO) -{ - return open(bamIO, toCString(bamIO._filename), bamIO._mode, bamIO._format); -} - -// ---------------------------------------------------------------------------- -// Function flush() -// ---------------------------------------------------------------------------- - -/*! - * @fn BamStream#flush - * @brief Flush output when writing. - * - * @signature int flush(stream); - * - * @param stream The @link BamStream @endlink object to flush. - * - * @return int A status code, 0 on success, != 0 on errors. - * - * @section Remarks - * - * This will write out the header if no record has been written out yet. - */ - -/** -.Function.BamStream#flush -..class:Class.BamStream -..summary:Flush output when writing. -..description:This will write out the header if no record has been written out yet. -..signature:flush(bamIO) -..param.bamIO:The @Class.BamStream@ object to flush. -...type:Class.BamStream -..returns:$int$ with an error code. -..include:seqan/bam_io.h -*/ - -inline int flush(BamStream & bamIO) -{ - if (bamIO._mode == BamStream::WRITE) - { - bamIO._writeHeader(); - return bamIO._writer->flush(); - } - return 0; -} - -// ---------------------------------------------------------------------------- -// Function close() -// ---------------------------------------------------------------------------- - -/*! - * @fn BamStream#close - * @brief Close BamStream object's underlying file. - * - * @signature int close(stream); - * - * @param stream[in,out] The @link BamStream @endlink object to close. - * - * @return int A status code, 0 on success, != 0 on error. - */ - -/** -.Function.BamStream#close -..class:Class.BamStream -..summary:Close BamStream object's underlying file. -..signature:close(bamIO) -..param.bamIO:The @Class.BamStream@ object to close -...type:Class.BamStream -..returns:$int$ with an error code. -..include:seqan/bam_io.h -*/ - -inline int close(BamStream & bamIO) -{ - if (bamIO._mode == BamStream::WRITE) - { - bamIO._writeHeader(); - return bamIO._writer->close(); - } - else - { - return bamIO._reader->close(); - } -} - -// ---------------------------------------------------------------------------- -// Function atEnd() -// ---------------------------------------------------------------------------- - -/*! - * @fn BamStream#atEnd - * @brief Check whether a @link BamStream @endlink object is at end when reading. - * - * @signature bool atEnd(stream); - * - * @param[in] stream The @link BamStream @endlink object to query. - * - * @return bool true in case of the stream being at the end, false otherwise. - * - * @section Remarks - * - * The stream will only be guaranteed at the end after trying to read after the last character. - */ - -/** -.Function.BamStream#atEnd -..class:Class.BamStream -..summary:Check whether a @Class.BamStream@ object is at end when reading. -..signature:atEnd(bamIO) -..param.bamIO:The @Class.BamStream@ object to query. -...type:Class.BamStream -..returns:$bool$, indicating whether the object is at the end of the file. -..include:seqan/bam_io.h -*/ - -inline bool atEnd(BamStream const & bamIO) -{ - SEQAN_ASSERT_EQ_MSG(bamIO._mode, BamStream::READ, "You can only call atEnd() when opened the file for reading."); - return bamIO._reader->atEnd(); -} - -inline bool atEnd(BamStream & bamIO) -{ - SEQAN_ASSERT_EQ_MSG(bamIO._mode, BamStream::READ, "You can only call atEnd() when opened the file for reading."); - return bamIO._reader->atEnd(); -} - -// ---------------------------------------------------------------------------- -// Function isGood() -// ---------------------------------------------------------------------------- - -/*! - * @fn BamStream#isGood - * @brief Check whether the @link BamStream @endlink object has is in the failure state. - * - * @signature bool isGood(stream); - * - * @param stream The @link BamStream @endlink object to query. - * - * @return bool true if the stream is not in an error state and false otherwise. - */ - -/** -.Function.BamStream#isGood -..class:Class.BamStream -..summary:Check whether the @Class.BamStream@ object has is in the failure state. -..signature:isGood(bamIO) -..param.bamIO:The @Class.BamStream@ object to query. -...type:Class.BamStream -..returns:$bool$, indicating whether there was no error or not. -..include:seqan/bam_io.h -*/ - -inline bool isGood(BamStream const & bamIO) -{ - return bamIO._isGood; -} - -// ---------------------------------------------------------------------------- -// Function readRecord() -// ---------------------------------------------------------------------------- - -/*! - * @fn BamStream#readRecord - * @brief Read one @link BamAlignmentRecord @endlink from a @link BamStream @endlink. - * - * @signature int readRecord(record, stream); - * - * @param[out] record The @link BamAlignmentRecord @endlink to read the next alignment record into. Of type - * @link BamAlignmentRecord @endlink. - * @param[in,out] stream The @link BamStream @endlink object to read from. - * - * @return int A status code, 0 on success. - */ - -/** -.Function.BamStream#readRecord -..class:Class.BamStream -..summary:Read one @Class.BamAlignmentRecord@ from a @Class.BamStream@. -..signature:readRecord(record, bamIO) -..param.record:The @Class.BamAlignmentRecord@ to read the next alignment record into. -...class:Class.BamAlignmentRecord -..param.bamIO:The @Class.BamStream@ object to read from. -...type:Class.BamStream -..returns:An $int$ status code: $0$ on success, non-$0$ on failure. -..include:seqan/bam_io.h -*/ - -inline int readRecord(BamAlignmentRecord & record, BamStream & bamIO) -{ - int res = bamIO._reader->readRecord(record, bamIO.bamIOContext); - bamIO._isGood = bamIO._isGood && (res == 0); - return res; -} - -// ---------------------------------------------------------------------------- -// Function writeRecord() -// ---------------------------------------------------------------------------- - -/*! - * @fn BamStream#writeRecord - * @brief Write one @link BamAlignmentRecord @endlink to a @link BamStream @endlink. - * - * @signature int writeRecord(stream, record); - * - * @param[in,out] bamIO The @link BamStream @endlink object to write to. - * @param[in] record The @link BamAlignmentRecord @endlink to write out. - * - * @return int A status code, 0 on success. - */ - -/** -.Function.BamStream#writeRecord -..class:Class.BamStream -..summary:Write one @Class.BamAlignmentRecord@ to a @Class.BamStream@. -..signature:writeRecord(bamIO, record) -..param.record:The @Class.BamAlignmentRecord@ to write out. -...class:Class.BamAlignmentRecord -..param.bamIO:The @Class.BamStream@ object to write to. -...type:Class.BamStream -..returns:An $int$ status code: $0$ on success, non-$0$ on failure. -..include:seqan/bam_io.h -*/ - -inline int writeRecord(BamStream & bamIO, BamAlignmentRecord const & record) -{ - bamIO._writeHeader(); // Does nothing if head already written out. - - int res = bamIO._writer->writeRecord(record, bamIO.bamIOContext); - bamIO._isGood = bamIO._isGood && (res == 0); - return res; -} - -// ---------------------------------------------------------------------------- -// Function fileSize() -// ---------------------------------------------------------------------------- - -/*! - * @fn BamStream#fileSize - * @brief Returns the size of the file in bytes as stored on the disk. - * - * @signature __int64 fileSize(stream); - * - * @param[in] stream The @link BamStream @endlink to query. - * - * @return __int64 The size of the file on the disk. - * - * @section Remarks - * - * This only works when reading. - */ - -// Returns size of file in bytes as stored on the disk. - -inline __int64 fileSize(BamStream const & bamIO) -{ - if (bamIO._mode == BamStream::WRITE) - return 0; - return bamIO._reader->fileSize(); -} - -// ---------------------------------------------------------------------------- -// Function positionInFile() -// ---------------------------------------------------------------------------- - -/*! - * @fn BamStream#positionInFile - * @brief Approximate byte position in file, to be used for progress display, not for seeking. - * - * @signature __int64 positionInFile(stream); - * - * @param[in] stream The @link BamStream @endlink to query for its position in the file. - * - * @return __int64 The position in the file. - * - * @section Remarks - * - * This function returns the "approximate" position in the file and only works when the file is opened in BAM format. - * It is meant for progress display in connection with @link BamStream#fileSize @endlink and not for jumping within the - * file. The position is approximate in the sense that it points between the block boundaries of the BGZ file. - */ - -// TODO(holtgrew): Review this functionality, extend, fix. - -// Returns "approximate" byte position in file. To be used for progress display, not for seeking. For this, we have to -// implement streamTell() and streamSeek() for BamStream. This works for BAM only at the moment. - -inline __int64 positionInFile(BamStream const & bamIO) -{ - if (bamIO._mode == BamStream::WRITE) - return 0; - return bamIO._reader->positionInFile(); -} - -// ---------------------------------------------------------------------------- -// Function jumpToRegion() -// ---------------------------------------------------------------------------- - -/*! - * @fn BamStream#jumpToRegion - * @brief Seek in BamStream using an index. - * - * You provide a region [pos, posEnd) on the reference refID that you want to jump to and the function - * jumps to the first alignment in this region, if any. - * - * @signature bool jumpToRegion(stream, hasAlignments, bamIOContext, refID, pos, posEnd, index); - * - * @param[in,out] stream The @link BamStream @endlink to jump with. - * @param[out] hasAlignments A bool that is set true if the region [pos, posEnd) has any - * alignments. - * @param[in] refID The reference id to jump to (__int32). - * @param[in] pos The begin of the region to jump to. - * @param[in] posEnd The end of the region to jump to. - * @param[in] index The @link BamIndex @endlink to use for the jumping. - * - * @return bool true if seeking was successful, false if not. - * - * @section Remarks - * - * This function fails if refID/pos are invalid. - * - * @see BamIndex#jumpToRegion - */ - -#if SEQAN_HAS_ZLIB -inline bool jumpToRegion(BamStream & bamIO, bool & hasAlignments, __int32 refId, __int32 pos, __int32 posEnd, BamIndex const & index) -{ - if (bamIO._format != BamStream::BAM) - return false; // Can only jump in BAM files. - if (bamIO._mode != BamStream::READ) - return false; // Can only jump when reading. - - BamReader_ * s = static_cast(bamIO._reader.get()); - return s->jumpToRegion(hasAlignments, refId, pos, posEnd, index, bamIO.bamIOContext); -} -#endif // #if SEQAN_HAS_ZLIB - -// ---------------------------------------------------------------------------- -// Function jumpToOrphans() -// ---------------------------------------------------------------------------- - -/*! - * @fn BamStream#jumpToOrphans - * @brief Seek to orphans block in BamStream using an index. - * - * @signature bool jumpToOrphans(stream, hasAlignments, index); - * - * @param[in,out] stream The @link BgzfStream @endlink object to jump with. - * @param[out] hasAlignments A bool that is set to true if there are any orphans. - * @param[in] index The index to use for jumping. - * - * @see BamIndex#jumpToOrphans - */ - -#if SEQAN_HAS_ZLIB -inline bool jumpToOrphans(BamStream & bamIO, BamIndex const & index) -{ - if (bamIO._format != BamStream::BAM) - return false; // Can only jump in BAM files. - if (bamIO._mode != BamStream::READ) - return false; // Can only jump when reading. - - BamReader_ * s = static_cast(bamIO._reader.get()); - return s->jumpToOrphans(index, bamIO.bamIOContext); -} -#endif // #if SEQAN_HAS_ZLIB - -} // namespace seqan; - -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_STREAM_H_ diff --git a/seqan/bam_io/bam_tags_dict.h b/seqan/bam_io/bam_tags_dict.h index 32f0c51..05de73c 100644 --- a/seqan/bam_io/bam_tags_dict.h +++ b/seqan/bam_io/bam_tags_dict.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -30,12 +30,13 @@ // // ========================================================================== // Author: Manuel Holtgrewe +// Author: David Weese // ========================================================================== // Code for read/write access to BAM tag dicts. // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_TAGS_DICT_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_BAM_TAGS_DICT_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_BAM_TAGS_DICT_H_ +#define INCLUDE_SEQAN_BAM_IO_BAM_TAGS_DICT_H_ namespace seqan { @@ -43,6 +44,43 @@ namespace seqan { // Forwards // ============================================================================ +class BamTagsDict; + +inline bool hasIndex(BamTagsDict const & bamTags); +inline void buildIndex(BamTagsDict const & bamTags); + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Metafunction Host +// ---------------------------------------------------------------------------- + +template <> +struct Host +{ + typedef CharString Type; +}; + +template <> +struct Host +{ + typedef CharString const Type; +}; + +template <> +struct Size +{ + typedef unsigned Type; +}; + +template <> +struct Position +{ + typedef unsigned Type; +}; + // ============================================================================ // Tags, Classes, Enums // ============================================================================ @@ -56,33 +94,11 @@ namespace seqan { * * @section Example * - * @code{.cpp} - * CharString samStr = "AA:Z:value1\tAB:Z:value2\tAC:i:30"; - * CharString bamStr; - * assignSamToBam(bamStr, samStr); - * BamTagsDict tags(bamStr); - * std::cerr << length(tags) << std::endl; // #=> "3" - * for (unsigned i = 0; i < length(tags); ++i) - * { - * std::cerr << getTagKey(tags, i) << " -> " << getTagValue(tags, i) << std::endl; - * if (getTagValue(tags, i)[0] == 'i') // is 32 bit integer - * { - * __int32 x = 0; - * bool res = extractTagValue(x, tags, i); - * SEQAN_ASSERT_MSG(res, "Not a valid integer at pos %u!", i); - * std::cerr << " " << x << std::endl; - * } - * } - * @endcode + * @include demos/dox/bam_io/bam_tags_dict.cpp * * Output is: * - * @code{.cpp} - * "AA -> Zvalue1" - * "AB -> Zvalue2" - * "AC -> i" - * -> " 30" - * @endcode + * @include demos/dox/bam_io/bam_tags_dict.cpp.stdout * * @see getBamTypeSize * @see getBamTypeChar @@ -95,72 +111,28 @@ namespace seqan { * @signature BamTagsDict::BamTagsDict(); */ -/** -.Class.BamTagsDict -..cat:BAM I/O -..cat:Fragment Store -..signature:BamTagsDict -..summary:Indexes start positions of BAM tags in a @Shortcut.CharString@ and provides a dict-like API. -..example.code: -CharString samStr = "AA:Z:value1\tAB:Z:value2\tAC:i:30"; -CharString bamStr; -assignSamToBam(bamStr, samStr); -BamTagsDict tags(bamStr); -std::cerr << length(tags) << std::endl; // #=> "3" -for (unsigned i = 0; i < length(tags); ++i) -{ - std::cerr << getTagKey(tags, i) << " -> " << getTagValue(tags, i) << std::endl; - if (getTagValue(tags, i)[0] == 'i') // is 32 bit integer - { - __int32 x = 0; - bool res = extractTagValue(x, tags, i); - SEQAN_ASSERT_MSG(res, "Not a valid integer at pos %u!", i); - std::cerr << " " << x << std::endl; - } -} -// #=> "AA -> Zvalue1" -// #=> "AB -> Zvalue2" -// #-> "AC -> i" -# #-> " 30" -..include:seqan/bam_io.h - -.Memfunc.BamTagsDict#BamTagsDict -..class:Class.BamTagsDict -..signature:BamTagsDict() -..summary:Constructor. -..remarks:Only the default constructor is provided. -*/ - class BamTagsDict { public: - Holder _host; - String _positions; + typedef Host::Type TBamTagsSequence; + typedef Position::Type TPos; + + Holder _host; + mutable String _positions; BamTagsDict() {} explicit - BamTagsDict(CharString & tags) : _host(tags) {} -}; - -// ============================================================================ -// Metafunctions -// ============================================================================ - -// ---------------------------------------------------------------------------- -// Metafunction Host -// ---------------------------------------------------------------------------- + BamTagsDict(TBamTagsSequence & tags) : _host(tags) {} -template <> -struct Host -{ - typedef CharString Type; -}; - -template <> -struct Host -{ - typedef CharString const Type; + template + inline Infix::Type>::Type + operator[] (TPos pos) const + { + if (!hasIndex(*this)) + buildIndex(*this); + return infix(host(*this), _positions[pos], _positions[pos + 1]); + } }; // ============================================================================ @@ -193,35 +165,15 @@ host(BamTagsDict const & bamTags) * * @signature bool hasIndex(dict); * - * @param dict The @link BamTagsDict @endlink to query. + * @param[in] dict The @link BamTagsDict @endlink to query. * - * @return bool true if dict has an index and false otherwise. + * @return bool true if dict has an index and false otherwise. */ -/** -.Function.BamTagsDict#hasIndex -..class:Class.BamTagsDict -..cat:Fragment Store -..summary:Return $true$ if @Class.BamTagsDict@ has an index. -..signature:hasIndex(bamTags) -..param.bamTags:SAM Tags to query -...type:Class.BamTagsDict -..returns:$bool$ -..include: -*/ - -// TODO(holtgrew): Remove non-const variante. - inline bool hasIndex(BamTagsDict const & bamTags) { - return length(bamTags._positions) != 0u; -} - -inline bool -hasIndex(BamTagsDict & bamTags) -{ - return hasIndex(const_cast(bamTags)); + return !empty(bamTags._positions) || empty(host(bamTags)); } // ---------------------------------------------------------------------------- @@ -230,13 +182,14 @@ hasIndex(BamTagsDict & bamTags) /*! * @fn getBamTypeSize + * @headerfile * @brief Return size of the type identified by a type char. * * @signature int getBamTypeSize(c); * - * @param c A char that identifies a type. + * @param[in] c A char that identifies a type. * - * @return int The size of the type in bytes, -1 vor variable-sized types, -2 for invalid paramters. + * @return int The size of the type in bytes, -1 for variable-length types, -2 for invalid paramters. * * @see BamTagsDict * @see getBamTypeChar @@ -245,41 +198,44 @@ hasIndex(BamTagsDict & bamTags) // Return sizeof() of the type identified with the given char. Returns -2 if not // valid, -1 if of variable length. -/** -.Function.getBamTypeSize -..class:Class.BamTagsDict -..cat:BAM I/O -..signature:getBamTypeSize(c) -..summary:Return size of the type identified by $c$. -..param.c:The BAM type identifier -..returns:$int$ with the $sizeof()$ of the type, -1 for variable sized types, -2 for invalid parameters. -..include:seqan/bam_io.h -*/ +struct GetBamTypeSizeHelper_ +{ + int &resultSize; + char typeC; + + GetBamTypeSizeHelper_(int &resultSize, char typeC) : + resultSize(resultSize), + typeC(typeC) + {} + + template + bool operator() (Type) const + { + if (BamTypeChar::VALUE != typeC) + return false; + + resultSize = sizeof(Type); + return true; + } +}; + inline int getBamTypeSize(char c) { - switch (c) + switch (toUpperValue(c)) { - case 'A': - return 1; - case 'f': - return 4; case 'Z': case 'H': case 'B': return -1; - case 'c': - case 'C': - return 1; - case 's': - case 'S': - return 2; - case 'i': - case 'I': - return 4; + + default: + int result = -2; + GetBamTypeSizeHelper_ func(result, c); + tagApply(func, BamTagTypes()); + return result; } - return -2; } // ---------------------------------------------------------------------------- @@ -295,87 +251,106 @@ getBamTypeSize(char c) * @param[in,out] bamTags The BamTagsDict object to build the index for. */ -/** -.Function.BamTagsDict#buildIndex -..class:Class.BamTagsDict -..cat:Fragment Store -..summary:Build index for a @Class.BamTagsDict@ object. -..signature:buildIndex(bamTags) -..param.bamTags:SAM Tags to build index for. -...type:Class.BamTagsDict -..returns:$void$ -..include: -*/ - inline void -buildIndex(BamTagsDict & bamTags) +buildIndex(BamTagsDict const & bamTags) { - typedef Host::Type TCharString; - typedef Iterator::Type TCharStringIter; + typedef Host::Type TTagString; + typedef Iterator::Type TIter; clear(bamTags._positions); - if (empty(value(bamTags._host))) + if (empty(host(bamTags))) return; // Done. appendValue(bamTags._positions, 0); - for (TCharStringIter it = begin(host(bamTags)); !atEnd(it);) + TIter itBegin = begin(host(bamTags), Standard()); + TIter itEnd = end(host(bamTags), Standard()); + for (TIter it = itBegin; it != itEnd; ) { + SEQAN_ASSERT(it < itEnd); + // skip tag name (e.g. "NM") it += 2; - char c = *it; + + // get tag type (e.g. 'I') + char c = *(it++); if (c == 'H' || c == 'Z') { - while (!atEnd(it) && *it != '\0') + // skip string and its end-of-string marker + while (*it != '\0') + { ++it; + SEQAN_ASSERT(it != itEnd); + } ++it; } else if (c == 'B') { - ++it; - c = *it; - ++it; - __uint32 len = 0; - memcpy(&len, &*it, 4); - it += 4; - it += len * getBamTypeSize(c); + // skip array of PODs + c = *(it++); + union { + char raw[4]; + __uint32 len; + } tmp; + arrayCopyForward(it, it + 4, tmp.raw); + it += 4 + tmp.len * getBamTypeSize(c); } else { - ++it; + // skip POD type (e.g. byte, int) it += getBamTypeSize(c); } - - appendValue(bamTags._positions, position(it)); + appendValue(bamTags._positions, it - itBegin); } // if (!empty(value(bamTags._host))) // appendValue(bamTags._positions, length(host(bamTags)) + 1); // +1 since there is not tab at the end } // ---------------------------------------------------------------------------- -// Function setHost() +// Function _dataHost() // ---------------------------------------------------------------------------- -inline Holder & +inline Holder::Type> & _dataHost(BamTagsDict & bamTags) { return bamTags._host; } +inline Holder::Type> const & +_dataHost(BamTagsDict const & bamTags) +{ + return bamTags._host; +} + +// ---------------------------------------------------------------------------- +// Function setHost() +// ---------------------------------------------------------------------------- + +#ifdef SEQAN_CXX11_STANDARD +template +inline void +setHost(BamTagsDict & me, THost && host_) +{ + setValue(_dataHost(me), std::forward(host_)); + clear(me._positions); +} +#else +template inline void -setHost(BamTagsDict & me, CharString & host_) +setHost(BamTagsDict & me, THost & host_) { SEQAN_CHECKPOINT; - setValue(_dataHost(me), host_); + setValue(_dataHost(me), host_); clear(me._positions); } +template inline void -setHost(BamTagsDict & me, CharString const & host_) +setHost(BamTagsDict & me, THost const & host_) { SEQAN_CHECKPOINT; - setValue(_dataHost(me), host_); + setValue(_dataHost(me), host_); clear(me._positions); } - +#endif // SEQAN_CXX11_STANDARD // ---------------------------------------------------------------------------- // Function length() // ---------------------------------------------------------------------------- @@ -386,20 +361,19 @@ setHost(BamTagsDict & me, CharString const & host_) * * @signature unsigned length(tagsDict); * - * @param tagsDict The BamTagsDict object to query for its length. + * @param[in] tagsDict The BamTagsDict object to query for its length. * - * @return unsigned The number of entries in the BamTagsDict. + * @return TSize The number of entries in the BamTagsDict. TSize is the result of + * Size<BamTagsDict>::Type. */ -///.Function.length.param.object.type:Class.BamTagsDict - -inline unsigned +inline Size::Type length(BamTagsDict const & tags) { - if (empty(value(tags._host))) + if (empty(host(tags))) return 0; if (!hasIndex(tags)) - buildIndex(const_cast(tags)); + buildIndex(tags); return length(tags._positions) - 1; } @@ -411,31 +385,19 @@ length(BamTagsDict const & tags) * @fn BamTagsDict#getTagType * @brief Returns the tag type char for an entry of a BamTagsDict. * - * @signature char getTagType(tags, idx); + * @signature char getTagType(tags, id); * * @param[in] tags The BamTagsDict to query. - * @param[in] idx The position for which to retrieve the type. + * @param[in] id The id of the tag for which to determine the type. See @link BamTagsDict#findTagKey @endlink. + * + * @return char A char that identifies the tag type. */ -/** -.Function.BamTagsDict#getTagType -..class:Class.BamTagsDict -..cat:BAM I/O -..signature:getTagType(tagsDict, idx) -..summary:Get key of a tag by index. -..param.tagsDict:The @Class.BamTagsDict@ to retrieve data from. -..param.idx:Index of the tag whose key to retrieve. -..returns:$char$, the SAM/BAM identifier of the type. -..include:seqan/bam_io.h -*/ - -template +template inline char -getTagType(BamTagsDict & tags, TPos idx) +getTagType(BamTagsDict const & tags, TId id) { - if (!hasIndex(tags)) - buildIndex(tags); - return host(tags)[tags._positions[idx] + 2]; + return tags[id][2]; } // ---------------------------------------------------------------------------- @@ -446,42 +408,19 @@ getTagType(BamTagsDict & tags, TPos idx) * @fn BamTagsDict#getTagKey * @brief Return key of a tag by index. * - * @signature TKey getTagKey(tagsDict, idx); + * @signature TKey getTagKey(tagsDict, id); * * @param[in] tagsDict The BamTagsDict to query. - * @param[in] idx The index of the dict entry. + * @param[in] id The index of the dict entry. * * @return TKey An infix of a @link CharString @endlink. Will be a two-character char sequence. */ -/** -.Function.BamTagsDict#getTagKey -..class:Class.BamTagsDict -..cat:BAM I/O -..signature:getTagKey(tagsDict, idx) -..summary:Return key of a tag by index. -..param.tagsDict:The @Class.BamTagsDict@ to retrieve data from. -...type:Class.BamTagsDict -..param.idx:Index of the tag whose key to retrieve. -..returns:Infix of the underlying string. -..remarks:See @Class.BamTagsDict@ for an example. -..include:seqan/bam_io.h -*/ - -template -inline Infix::Type>::Type -getTagKey(BamTagsDict & tags, TPos idx) -{ - if (!hasIndex(tags)) - buildIndex(tags); - return infix(host(tags), tags._positions[idx], tags._positions[idx] + 2); -} - -template +template inline Infix::Type>::Type -getTagKey(BamTagsDict const & tags, TPos idx) +getTagKey(BamTagsDict const & tags, TId id) { - return getTagKey(const_cast(tags), idx); + return prefix(tags[id], 2); } // ---------------------------------------------------------------------------- @@ -492,252 +431,123 @@ getTagKey(BamTagsDict const & tags, TPos idx) * @fn BamTagsDict#findTagKey * @brief Find a tag by its key for a @link BamTagsDict @endlink object. * - * @signature bool findTagKey(idx, tagsDict, name); + * @signature bool findTagKey(id, tagsDict, key); * - * @param[out] idx The index of the tag is stored here (unsigned). + * @param[out] id The id of the found tag. * @param[in] tagsDict The BamTagsDict to query. - * @param[in] name The key to query for: @link CharString @endlink. + * @param[in] key The key to query for: @link CharString @endlink. * - * @return bool true if the key could be found and false otherwise. + * @return bool true if the key could be found and false otherwise. */ -/** -.Function.BamTagsDict#findTagKey -..summary:Find a tag by its key for a @Class.BamTagsDict@ object. -..class:Class.BamTagsDict -..signature:findTagKey(idx, tagsDict, name) -..param.idx:Index of the tag with the given key. -...type:nolink:$unsigned$ -..param.tagsDict:The @Class.BamTagsDict@ to retrieve data from. -..param.name:Name of the key to find. -...type:Shortcut.CharString -..returns:$bool$, indicating whether such a key could be found. -..include:seqan/bam_io.h -*/ - +template inline bool -findTagKey(unsigned & idx, BamTagsDict & tags, CharString const & name) +findTagKey(TId & id, BamTagsDict const & tags, TKey const & key) { - for (idx = 0; idx < length(tags); ++idx) - if (getTagKey(tags, idx) == name) + for (id = 0; id < (TId)length(tags); ++id) + if (getTagKey(tags, id) == key) return true; return false; } -inline bool -findTagKey(unsigned & idx, BamTagsDict const & tags, CharString const & name) -{ - return findTagKey(idx, const_cast(tags), name); -} - // ---------------------------------------------------------------------------- -// Function getTagValue() +// Function extractTagValue() // ---------------------------------------------------------------------------- /*! - * @fn BamTagsDict#getTagValue - * @brief The value of a tag by its key. + * @fn BamTagsDict#extractTagValue + * @brief Extract and cast "atomic" value from tags string with index id. + * + * @signature bool extractTagValue(dest, tags, id) * - * @signature CharString getTagValue(tagsDict, idx); + * @param[out] dest The variable to write the value to.The value is first copied in a variable of the type indicated in + * the BAM file. Then it is cast into the type of dest. * - * @param[in] tagsDict The tags dict to query. - * @param[in] idx The index of the entry to query for its value. + * @param[in] tags The BamTagsDict object to query. + * @param[in] id The id of the tag to extract the value from. See @link BamTagsDict#findTagKey @endlink. * - * @return CharString the raw tags data. + * @return bool true if the value could be extracted. * * @section Remarks * - * Note that you will get <type char> + payload in the case of @link BamTagsDict @endlink. + * The function only works for atomic types such as int, not for char* or arrays. * - * See documentation of @link BamTagsDict @endlink for an example. + * See @link BamTagsDict @endlink for an example. */ - -/** -.Function.BamTagsDict#getTagValue -..class:Class.BamTagsDict -..cat:BAM I/O -..summary:Return the value of a tag by its index in the @Class.BamTagsDict@. -..signature:getTagValue(tagsDict, idx) -..param.tagsDict:The @Class.BamTagsDict@ to retrieve data from. -...type:Class.BamTagsDict -..param.idx:Index of the tag whose value to retrieve. -..returns:@Shortcut.CharString@ with the raw tags data. -..remarks:Note that you will get $ + payload$ in case of @Class.BamTagsDict@. -..remarks:See @Class.BamTagsDict@ for an example. -..include:seqan/bam_io.h -*/ - -template -inline CharString -getTagValue(BamTagsDict & tags, TIdx idx) +template +struct ExtractTagValueHelper_ { - if (!hasIndex(tags)) - buildIndex(tags); - - // TODO(holtgrew): Can't we use positions to speed this up? + TResultType &result; + TIter rawIter; + char typeC; + + ExtractTagValueHelper_(TResultType &result, char typeC, TIter rawIter) : + result(result), + rawIter(rawIter), + typeC(typeC) + {} + + template + bool operator() (Type) const + { + if (BamTypeChar::VALUE != typeC) + return false; - typedef typename Position::Type TPos; - TPos beginPos = tags._positions[idx] + 2; - TPos endPos = beginPos + 1; + union { + char raw[sizeof(Type)]; + Type i; + } tmp; - char theType = getTagType(tags, idx); - if (theType == 'Z' || theType == 'H') - { - typedef typename Iterator::Type TIterator; - TIterator it = begin(host(tags), Rooted()) + beginPos + 1; - for (; !atEnd(it) && *it != '\0'; goNext(it)) - endPos += 1; - endPos += 1; - } - else if (theType == 'B') - { - __uint32 len = 0; - memcpy(&len, &host(tags)[tags._positions[idx]] + 4, 4); - char c = host(tags)[tags._positions[idx] + 3]; - int typeSize = getBamTypeSize(c); - SEQAN_ASSERT_GT(typeSize, 0); - endPos += 5 + len * typeSize; - } - else - { - endPos += getBamTypeSize(theType); + arrayCopyForward(rawIter, rawIter + sizeof(Type), tmp.raw); + result = static_cast(tmp.i); + return true; } +}; - return infix(host(tags), beginPos, endPos); -} - -template -inline CharString //Infix::Type>::Type -getTagValue(BamTagsDict const & tags, TPos idx) +template +SEQAN_FUNC_ENABLE_IF(Is >, bool) +extractTagValue(TResultValue & val, BamTagsDict const & tags, TId id) { - return getValue(const_cast(tags), idx); -} + typedef Infix::Type>::Type TInfix; + typedef Iterator::Type TIter; -// ---------------------------------------------------------------------------- -// Function extractTagValue() -// ---------------------------------------------------------------------------- + TInfix inf = tags[id]; + if (length(inf) < 4 || inf[2] == 'Z') + return false; -/*! - * @fn BamTagsDict#extractTagValuej - * @brief Extract and cast "atomic" value from tags string with index idx. - * - * @signature bool extractTagValue(dest, tags, idx) - * - * @param[out] dest The variable to write the value to.The value is first copied in a variable of the type indicated in - * the BAM file. Then it is cast into the type of dest. - * - * @param[in] tags The BamTagsDict object to query. - * @param[in] idx The integer index in the dict to use. - * - * @return bool true if the value could be extracted. - * - * @section Remarks - * - * The function only works for atomic types such as int, not for char* or arrays. - * - * See @link BamTagsDict @endlink for an example. - */ + TIter it = begin(inf, Standard()) + 2; + char typeC = getValue(it++); + ExtractTagValueHelper_ func(val, typeC, it); -/** -.Function.BamTagsDict#extractTagValue -..class:Class.BamTagsDict -..cat:BAM I/O -..signature:extractTagValue(dest, tags, idx) -..summary:Extract and cast "atomic" value from tags string with index $idx$. -..param.dest:The variable to write the value to. -...remarks:The value is first copied in a variable of the type indicated in the BAM file. Then it is cast into the type of $dest$. -..param.tags:@Class.BamTagsDict@ object. -...type:Class.BamTagsDict -..params.idx:Index of the tag in the tag list. -..returns:$bool$, indicating the success. -..remarks:The function only works for atomic types such as $int$, not for $char*$ or arrays. -..remarks:See @Class.BamTagsDict@ for an example. -..see:Function.BamTagsDict#getTagValue -..include:seqan/bam_io.h -*/ - -template -inline bool -extractTagValue(TDest & dest, BamTagsDict & tags, TIdx idx) + return tagApply(func, BamTagTypes()); +} + +template +SEQAN_FUNC_ENABLE_IF(IsSequence, bool) +extractTagValue(TResultValue & val, BamTagsDict const & tags, TId id) { - if (!hasIndex(tags)) - buildIndex(tags); + typedef Infix::Type>::Type TInfix; - char typeC = host(tags)[tags._positions[idx] + 2]; - if (typeC == 'A') - { - char x = 0; - char * ptr = reinterpret_cast(&x); - memcpy(ptr, &host(tags)[tags._positions[idx] + 3], 1); - dest = static_cast(x); - } - else if (typeC == 'c') - { - __int8 x = 0; - char * ptr = reinterpret_cast(&x); - memcpy(ptr, &host(tags)[tags._positions[idx] + 3], 1); - dest = static_cast(x); - } - else if (typeC == 'C') - { - __uint8 x = 0; - char * ptr = reinterpret_cast(&x); - memcpy(ptr, &host(tags)[tags._positions[idx] + 3], 1); - dest = static_cast(x); - } - else if (typeC == 's') - { - __int16 x = 0; - char * ptr = reinterpret_cast(&x); - memcpy(ptr, &host(tags)[tags._positions[idx] + 3], 2); - dest = static_cast(x); - } - else if (typeC == 'S') - { - __uint16 x = 0; - char * ptr = reinterpret_cast(&x); - memcpy(ptr, &host(tags)[tags._positions[idx] + 3], 2); - dest = static_cast(x); - } - else if (typeC == 'i') - { - __int32 x = 0; - char * ptr = reinterpret_cast(&x); - memcpy(ptr, &host(tags)[tags._positions[idx] + 3], 4); - dest = static_cast(x); - } - else if (typeC == 'I') - { - __uint32 x = 0; - char * ptr = reinterpret_cast(&x); - memcpy(ptr, &host(tags)[tags._positions[idx] + 3], 4); - dest = static_cast(x); - } - else if (typeC == 'f') - { - float x = 0; - char * ptr = reinterpret_cast(&x); - memcpy(ptr, &host(tags)[tags._positions[idx] + 3], 4); - dest = static_cast(x); - } - else // variable sized type or invald - { + TInfix inf = tags[id]; + if (length(inf) < 4 || inf[2] != 'Z') return false; - } + + val = infix(inf, 3, length(inf) - 1); return true; } - // ---------------------------------------------------------------------------- // Function getBamTypeChar() // ---------------------------------------------------------------------------- /*! * @fn getBamTypeChar + * @headerfile * @brief Return char identifying the type of the argument type. * * @signature char getBamTypeChar(); + * @signature BamTypeChar::VALUE * * @tparam T The type to query for its type char. * @@ -758,43 +568,14 @@ extractTagValue(TDest & dest, BamTagsDict & tags, TIdx idx) * @see getBamTypeSize */ -/** -.Function.getBamTypeChar -..class:Class.BamTagsDict -..cat:BAM I/O -..summary:Return char identifying the type of the atomic argument. -..signature:getBamTypeChar() -..param.T:The type to get the BAM char for. -..returns:$char$ describing the BAM type. One of $ACcSsIifZ$. -..remarks:Note that this function is defined for the $__int16$, $__uint16$ etc. but not for the types $short$, $int$ etc. An exception are 8-bit characters/char, where it is defined for $__int8$, $__uint8$, and $char$ unless $char$ is equal to one of the other two types. This is important when used in @Function.BamTagsDict#setTagValue@ etc. since BAM gives type chars for printable characters, signed 8-bit numbers and unsigned 8-bit numbers. -..remarks:If $__int8$ and $__uint8$ are not identical to $char$, we can make this decision from the type, otherwise we cannot and we will give the integer types a higher precedence. -..remarks:In your programs, this should not make any difference, only the written SAM/BAM will differ. -..include:seqan/bam_io.h -*/ +template +struct BamTypeChar : + BamTypeChar {}; template -inline char getBamTypeChar() +inline char getBamTypeChar(T const &) { - if (IsSameType::Type::VALUE) - return 'C'; - if (IsSameType::Type::VALUE) - return 'c'; - if (IsSameType::Type::VALUE) - return 'A'; - if (IsSameType::Type::VALUE) - return 's'; - if (IsSameType::Type::VALUE) - return 'S'; - if (IsSameType::Type::VALUE) - return 'i'; - if (IsSameType::Type::VALUE) - return 'I'; - if (IsSameType::Type::VALUE) - return 'f'; - if (IsSameType::Type::VALUE || IsSameType::Type::VALUE) - return 'Z'; - else - return '\0'; + return BamTypeChar::VALUE; } // ---------------------------------------------------------------------------- @@ -806,22 +587,20 @@ inline char getBamTypeChar() /*! * @fn BamTagsDict#setTagValue * - * @headerfile seqan/bam_io.h + * @headerfile * * @brief Set the value of a tag through a @link BamTagsDict @endlink. * * @signature bool setTagValue(tags, key, val[, typeC]); * * @param[in,out] tags The BamTagsDict to modify. - * @param[in] key The key of the tag.Must be a string of length 2. Types: CharString - * @param[in] val The value to set the the tag to. - - * @param[in] typeC BAM type char to use.For portability (so the generated files are the same on all platforms), use + * @param[in] key The key of the tag. Must be a sequence of length 2. + * @param[in] val The value to set the tag to. + * @param[in] typeC BAM type char to use. For portability (so the generated files are the same on all platforms), use * a signed/unsigned qualified type for val or give typeC. Also see the remarks * for @link getBamTypeChar @endlink. Types: getBamTypeChar@. - * - * @return bool true on success, false on failure. This function can fail if the key is not a valid tag id (e.g. does + * @return bool true on success, false on failure. This function can fail if the key is not a valid tag id (e.g. does * not have length 2) or if the type of val is not an atomic value or a string (anything but * char *, char const *, a character, integer or float type is invalid). * @@ -870,100 +649,73 @@ inline char getBamTypeChar() * @see getBamTypeChar */ -/** -.Function.BamTagsDict#setTagValue -..class:Class.BamTagsDict -..cat:BAM I/O -..summary:Set the value of a tag through a @Class.BamTagsDict@. -..signature:setTagValue(tags, key, val[, typeC]) -..param.tags:The dict to modify. -...type:Class.BamTagsDict -..param.key:The key of the tag. -...type:Shortcut.CharString -...remarks:Must be a string of length 2. -..param.val:The value to set the the tag to. -..param.typeC:BAM type char to use. -...type:nolink:By default, the type is inflected using @Function.getBamTypeChar@. -...remarks:For portability (so the generated files are the same on all platforms), use a signed/unsigned qualified type for $val$ or give $typeC$. Also see the remarks for @Function.getBamTypeChar@. -..returns:$bool$ indicating the success. This function can fail if the key is not a valid tag id (e.g. does not have length 2) or if the type of $val$ is not an atomic value or a string (anything but $char *$, $char const *$, a character, integer or float type is invalid). -..see:Function.getBamTypeChar -..remarks:Note that $setTagValue$ does not cast the type, so $typeC$ only influences the type character written out but $val$ is written out in binary without modification. -..include:seqan/bam_io.h -..example.text:An example setting some atomic tag values. -..example.code: -CharString rawTagsText; -BamTagsDict tags(rawTagsText); -setTagValue(tags, "XA", 9); // int -setTagValue(tags, "XB", 9u); // unsigned int -setTagValue(tags, "XC", 'X'); // char -..example.text:If $char$ is equal to $__int8$ or $__uint8$ then the last line produces an entry with type 'c' or 'C'. To make sure that the type char 'A' (for "printable character") is written to the file, give it explicitely: -..example.code: -setTagValue(tags, "XC", 'X', 'A'); // Overrwrite XC, enforce type 'printable character'. -..example.text:Note that on most systems $int$s have a width of 32 bytes, but the C++ standard leaves this open. For all types but characters, you should not give an explicit type char but use one of the types with explicit width and signed/unsigned qualifier such as $__int32$, $__uint32$ etc. -..example.code: -// The following is not recommended since the type of $x$ is not "unsigned 32 bit int." -__int32 x = -1; -setTagValue(tags, "XB", x, 'I'); -// Instead, explicitely use an unsigned type if you need one. Note that your compiler -// might warn you about assigning -1 to an unsigned variable so you know that you are -// probably doing something unintended. -__uint32 y = -1; -setTagValue(tags, "XB", y); - -// Do not do this! -setTagValue(tags, "XA", 9, 'f'); // BOGUS since 9 is not a floating point number. -*/ +template +struct ToBamTagValueHelper_ +{ + TBamValueSequence &result; + TValue val; + char typeC; + + ToBamTagValueHelper_(TBamValueSequence &result, char typeC, TValue val) : + result(result), + val(val), + typeC(typeC) + {} + + template + bool operator() (Type) const + { + if (BamTypeChar::VALUE != typeC) + return false; + + union { + char raw[sizeof(Type)]; + Type i; + } tmp; + + tmp.i = static_cast(val); + append(result, toRange(&tmp.raw[0], &tmp.raw[sizeof(Type)])); + return true; + } +}; // Convert "atomic" value to BAM tag. Return whether val was atomic. -template -bool _toBamTagValue(CharString & result, T const & val, char const typeC) +template +SEQAN_FUNC_ENABLE_IF(Is >, bool) +_toBamTagValue(TBamValueSequence & result, TValue const & val, char typeC) { + if (typeC == 'Z') + return false; + appendValue(result, typeC); + ToBamTagValueHelper_ func(result, typeC, val); + if (tagApply(func, BamTagTypes())) + return true; - if (typeC == 'A' || typeC == 'c' || typeC == 'C') - { - resize(result, length(result) + 1); - char * dst = reinterpret_cast(&result[0]) + length(result) - 1; - char const * src = reinterpret_cast(&val); - memcpy(dst, src, 1); - } - else if (typeC == 's' || typeC == 'S') - { - resize(result, length(result) + 2); - char * dst = reinterpret_cast(&result[0]) + length(result) - 2; - char const * src = reinterpret_cast(&val); - memcpy(dst, src, 2); - } - else if (typeC == 'i' || typeC == 'I' || typeC == 'f') - { - resize(result, length(result) + 4); - char * dst = reinterpret_cast(&result[0]) + length(result) - 4; - char const * src = reinterpret_cast(&val); - memcpy(dst, src, 4); - } - else if (typeC == 'Z') - { - unsigned oldSize = length(result); - unsigned valLen = length(val) + 1; - resize(result, length(result) + valLen); - char * dst = reinterpret_cast(&result[0] + oldSize); - char const * src = reinterpret_cast(val); - memcpy(dst, src, valLen); - *(dst + valLen - 1) = '\0'; - } - else // non-string and variable sized type or invald - { + resize(result, length(result) - 1); + return false; +} + +template +SEQAN_FUNC_ENABLE_IF(IsSequence, bool) +_toBamTagValue(TBamValueSequence & result, TValue const & val, char typeC) +{ + if (typeC != 'Z') return false; - } + + appendValue(result, typeC); + append(result, val); + appendValue(result, '\0'); return true; } + // Sets an atomic value in a BamTagsDict. -// Returns true successful, can fail if val not atomic or key is not a valid tag id (2 chars). +// Returns true successful, can fail if val not atomic or key is not a valid tag id (2 chars). -template +template inline bool -setTagValue(BamTagsDict & tags, CharString const & key, T const & val, char const typeC) +setTagValue(BamTagsDict & tags, TKey const & key, TValue const & val, char typeC) { if (!hasIndex(tags)) buildIndex(tags); @@ -971,37 +723,97 @@ setTagValue(BamTagsDict & tags, CharString const & key, T const & val, char cons // Build value to insert/append. if (length(key) != 2u) return false; - CharString bamTagVal; - // append(bamTagVal, key); - if (!_toBamTagValue(bamTagVal, val, typeC)) - return false; - unsigned idx = 0; - if (findTagKey(idx, tags, key)) + Position::Type id = 0; + if (findTagKey(id, tags, key)) { - // TODO(holtgrew): Speed this up with positions? - CharString tmp; - tmp = getTagValue(tags, idx); - replace(host(tags), tags._positions[idx] + 2, tags._positions[idx] + 2 + length(tmp), bamTagVal); + CharString bamTagVal; + if (!_toBamTagValue(bamTagVal, val, typeC)) + return false; + + replace(host(tags), tags._positions[id] + 2, tags._positions[id + 1], bamTagVal); + clear(tags._positions); } else { append(host(tags), key); - append(host(tags), bamTagVal); + if (!_toBamTagValue(host(tags), val, typeC)) + { + resize(host(tags), length(host(tags)) - length(key)); + return false; + } + appendValue(tags._positions, length(host(tags))); } - // Remove index and return success. - clear(tags._positions); // Also necessary when appending? return true; } -template +template inline bool -setTagValue(BamTagsDict & tags, CharString const & key, T const & val) +setTagValue(BamTagsDict & tags, TKey const & key, TValue const & val) { - return setTagValue(tags, key, val, getBamTypeChar()); + return setTagValue(tags, key, val, BamTypeChar::VALUE); } +/*! + * @fn BamTagsDict#appendTagValue + * + * @headerfile + * + * @brief Append a tag/value pair to a @link BamTagsDict @endlink. + * + * @signature bool appendTagValue(tags, key, val[, typeC]); + * + * @param[in,out] tags The BamTagsDict to modify. + * @param[in] key The key of the tag. Must be a sequence of length 2. + * @param[in] val The value to set the tag to. + * @param[in] typeC BAM type char to use. For portability (so the generated files are the same on all platforms), use + * a signed/unsigned qualified type for val or give typeC. Also see the remarks + * for @link getBamTypeChar @endlink. Types: getBamTypeChar@. + * + * @return bool true on success, false on failure. This function can fail if the key is not a valid tag id (e.g. does + * not have length 2) or if the type of val is not an atomic value or a string (anything but + * char *, char const *, a character, integer or float type is invalid). + * + * @section Remarks + * + * @link BamTagsDict#setTagValue @endlink behaves like appendTagValue if key was not part of tags + * before. However, in this case appendTagValue is faster. + */ + +template +inline bool +appendTagValue(TSequence & tags, TKey const & key, TValue const & val, char typeC) +{ + // Build value to insert/append. + if (length(key) != 2u) + return false; + + append(tags, key); + return _toBamTagValue(tags, val, typeC); +} + +template +inline bool +appendTagValue(BamTagsDict & tags, TKey const & key, TValue const & val, char typeC) +{ + if (appendTagValue(host(tags), key, val, typeC)) + { + appendValue(tags._positions, length(host(tags))); + return true; + } + return false; +} + + +template +inline bool +appendTagValue(TDictOrString & tags, TKey const & key, TValue const & val) +{ + return appendTagValue(tags, key, val, BamTypeChar::VALUE); +} + + // ---------------------------------------------------------------------------- // Function eraseTag() // ---------------------------------------------------------------------------- @@ -1013,46 +825,45 @@ setTagValue(BamTagsDict & tags, CharString const & key, T const & val) * @signature bool eraseTag(tagsDict, key); * * @param[in,out] tagsDict The BamTagsDict to erase the tag from. - * @param[in] key The key of the tag to ersae, of type @link CharString @endlink. + * @param[in] key The key of the tag to erase. * - * @return bool true if the tag was present for erasing, false if not. + * @return bool true if the tag could be erased, false if the key wasn't present. */ -/** -.Function.BamTagsDict#eraseTag -..class:Class.BamTagsDict -..summary:Erase tag from @Class.BamTagsDict@. -..cat:BAM I/O -..signature:eraseTag(tagsDict, key) -..param.tags:The dict to erase from. -...type:Class.BamTagsDict -..param.key:The key of the entry to remove. -...type:Shortcut.CharString -..returns:$bool$, indicating whether the key was present. -..include:seqan/bam_io.h - */ - -inline bool -eraseTag(BamTagsDict & tags, CharString const & key) +template +inline SEQAN_FUNC_DISABLE_IF(Is >, bool) +eraseTag(BamTagsDict & tags, TKey const & key) { if (!hasIndex(tags)) buildIndex(tags); - unsigned idx = 0; - if (!findTagKey(idx, tags, key)) + Position::Type id = 0; + if (!findTagKey(id, tags, key)) return false; - // TODO(holtgrew): Speed this up with positions? - CharString tmp; - tmp = getTagValue(tags, idx); - erase(host(tags), tags._positions[idx], tags._positions[idx + 1]); + erase(host(tags), tags._positions[id], tags._positions[id + 1]); + clear(tags._positions); + return true; +} - // TODO(weese): is this really working and tested? Should _positions be updated as well? - // Why is tmp not used? +template +inline SEQAN_FUNC_ENABLE_IF(Is >, bool) +eraseTag(BamTagsDict & tags, TId const & id) +{ + typedef typename Iterator, Standard>::Type TIter; + if (!hasIndex(tags)) + buildIndex(tags); + typename BamTagsDict::TPos delta = tags._positions[id + 1] - tags._positions[id]; + erase(host(tags), tags._positions[id], tags._positions[id + 1]); + erase(tags._positions, id); + TIter it = begin(tags._positions, Standard()) + id; + TIter itEnd = end(tags._positions, Standard()); + for (; it != itEnd; ++it) + *it -= delta; return true; } } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_BAM_TAGS_DICT_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_BAM_TAGS_DICT_H_ diff --git a/seqan/bam_io/cigar.h b/seqan/bam_io/cigar.h old mode 100755 new mode 100644 index d5b8c20..c5bb34a --- a/seqan/bam_io/cigar.h +++ b/seqan/bam_io/cigar.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: David Weese // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_CIGAR_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_CIGAR_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_CIGAR_H_ +#define INCLUDE_SEQAN_BAM_IO_CIGAR_H_ namespace seqan { @@ -54,7 +54,7 @@ namespace seqan { * @headerfile * @brief One entry of a CIGAR string. * - * @signature template + * @signature template <[typename TOperation[, typename TCount]]> * class CigarElement; * * @tparam TOperation Type to use for storing operations, defaults to char. @@ -78,50 +78,17 @@ namespace seqan { */ /*! - * @var TCount CigarElement::count + * @var TCount CigarElement::count; * * @brief The number of operations. */ /*! - * @var TOperation CigarElement::operation + * @var TOperation CigarElement::operation; * * @brief The described operation. */ -/** -.Class.CigarElement -..cat:Fragment Store -..summary:One entry of a CIGAR string. -..signature:CigarElement -..param.TOperation:Type to use for storing operations. -...default:nolink:$char$ -..param.TCount:Type to use for storing counts. -...default:nolink:$unsigned$ -..include:seqan/store.h - -.Memfunc.CigarElement#CigarElement -..class:Class.CigarElement -..summary:Constructor -..signature:CigarElement() -..signature:CigarElement(operation, count) -..param.operation:The operation to use. -...type:nolink:$TOperation$, typically $char$. -..param.count:The operation count. -...type:nolink:$Count$, typically $unsigned$. -..remarks:The default constructor initialized both @Memvar.CigarElement#operation@ and @Memvar.CigarElement#count@ with $0$. - -.Memvar.CigarElement#operation -..class:Class.CigarElement -..summary:The described operation. -..type:nolink:$TOperation$ - -.Memvar.CigarElement#count -..class:Class.CigarElement -..summary:The number of operations. -..type:nolink:$TCount$ -*/ - template struct CigarElement { @@ -142,6 +109,12 @@ struct CigarElement // Metafunctions // ============================================================================ +template +struct Size > +{ + typedef TCount Type; +}; + // ============================================================================ // Functions // ============================================================================ @@ -150,21 +123,21 @@ template inline bool operator>(CigarElement const & lhs, CigarElement const & rhs) { - return lhs.operation > rhs.operation || (lhs.operation == rhs.operation && lhs.count > rhs.count); + return lhs.operation > rhs.operation || (lhs.operation == rhs.operation && (lhs.count) > (rhs.count)); } template inline bool operator<(CigarElement const & lhs, CigarElement const & rhs) { - return lhs.operation < rhs.operation || (lhs.operation == rhs.operation && lhs.count < rhs.count); + return lhs.operation < rhs.operation || (lhs.operation == rhs.operation && (lhs.count) < (rhs.count)); } template inline bool operator==(CigarElement const & lhs, CigarElement const & rhs) { - return lhs.operation == rhs.operation && lhs.count == rhs.count; + return lhs.operation == rhs.operation && (lhs.count) == (rhs.count); } // ---------------------------------------------------------------------------- @@ -197,28 +170,51 @@ template < typename TMDString, typename TGaps1, typename TGaps2> -inline void +inline unsigned getMDString( TMDString &md, - TGaps1 &gaps1, - TGaps2 &gaps2) + TGaps1 &gaps1, // typically reference + TGaps2 &gaps2) // typically read { typedef typename Value::Type TMDChar; - typename Iterator::Type it1 = begin(gaps1); - typename Iterator::Type it2 = begin(gaps2); - char op, lastOp = ' '; - unsigned numOps = 0; + typedef typename Value::Type>::Type TVal1; + typedef typename Value::Type>::Type TVal2; + + typename Iterator::Type it1 = begin(gaps1); + typename Iterator::Type it2 = begin(gaps2); + char op, lastOp = ' '; + unsigned numOps = 0; + unsigned errors = 0; clear(md); for (; !atEnd(it1) && !atEnd(it2); goNext(it1), goNext(it2)) { - if (isGap(it1)) continue; + if (isGap(it1)) + { + if (!isGap(it2)) + ++errors; + continue; // insertion to the reference (gaps1) +// op = 'I'; // ignore insertions completely + } if (isGap(it2)) { - op = 'D'; + ++errors; +// if (op == 'I') // ignore paddings +// continue; + op = 'D'; // deletion from the reference (gaps1) } else - op = (*it1 == *it2)? 'M': 'R'; + { + if ((TVal1)*it1 == (TVal2)*it2) + { + op = 'M'; + } + else + { + op = 'R'; + ++errors; + } + } // append match run if (lastOp != op) @@ -230,21 +226,21 @@ getMDString( append(md, num.str()); } numOps = 0; - lastOp = op; } // append deleted/replaced reference character if (op != 'M') { - // add ^ from non-deletion to deletion + // add ^ for deleted reference bases (from non-deletion to deletion) if (op == 'D' && lastOp != 'D') appendValue(md, '^'); - // add 0 from deletion to replacement - if (op == 'R' && lastOp == 'D') + // add 0 for each replaced base that doesn't follow a match (for samtools/BWA compatibility) + else if (op == 'R' && lastOp != 'M') appendValue(md, '0'); appendValue(md, convert(*it1)); } + lastOp = op; ++numOps; } SEQAN_ASSERT_EQ(atEnd(it1), atEnd(it2)); @@ -254,6 +250,7 @@ getMDString( num << numOps; append(md, num.str()); } + return errors; } // ---------------------------------------------------------------------------- @@ -268,42 +265,46 @@ template < inline void getCigarString( TCigar &cigar, - TGaps1 &gaps1, - TGaps2 &gaps2, + TGaps1 &gaps1, // typically reference + TGaps2 &gaps2, // typically read TThresh splicedGapThresh) { - typename Iterator::Type it1 = begin(gaps1); - typename Iterator::Type it2 = begin(gaps2); - clear(cigar); - char op, lastOp = ' '; - unsigned numOps = 0; - - // std::cout << "gaps1\t" << gaps1 << std::endl; - // std::cout << "gaps2\t" << gaps2 << "\t" << clippedBeginPosition(gaps2) << std::endl; - for (; !atEnd(it1) && !atEnd(it2); goNext(it1), goNext(it2)) - { - if (isGap(it1)) - { - if (isGap(it2)) - op = 'P'; - else if (isClipped(it2)) - op = '?'; - else - op = 'I'; - } - else if (isClipped(it1)) - { - op = '?'; - } - else - { - if (isGap(it2)) - op = 'D'; - else if (isClipped(it2)) - op = 'S'; - else - op = 'M'; - } + typename Iterator::Type it1 = begin(gaps1); + typename Iterator::Type it2 = begin(gaps2); +// typedef typename Value::Type>::Type TVal1; +// typedef typename Value::Type>::Type TVal2; + + clear(cigar); + char op, lastOp = ' '; + unsigned numOps = 0; + + // std::cout << "gaps1\t" << gaps1 << std::endl; + // std::cout << "gaps2\t" << gaps2 << "\t" << clippedBeginPosition(gaps2) << std::endl; + for (; !atEnd(it1) && !atEnd(it2); goNext(it1), goNext(it2)) + { + if (isGap(it1)) + { + if (isGap(it2)) + op = 'P'; + else if (isClipped(it2)) + op = '?'; + else + op = 'I'; + } + else if (isClipped(it1)) + { + op = '?'; + } + else + { + if (isGap(it2)) + op = 'D'; + else if (isClipped(it2)) + op = 'S'; + else + op = 'M'; +// op = ((TVal1)*it1 == (TVal2)*it2)? '=': 'X'; + } // append CIGAR operation if (lastOp != op) @@ -324,16 +325,16 @@ getCigarString( } // if (atEnd(it1) != atEnd(it2)) // std::cerr << "Invalid pairwise alignment:" << std::endl << gaps1 << std::endl << gaps2 << std::endl; - SEQAN_CHECK(atEnd(it1) == atEnd(it2), "Cannot get CIGAR from invalid pairwise alignment!"); - if (lastOp == 'D' && numOps >= (unsigned)splicedGapThresh) - lastOp = 'N'; - if (numOps > 0) - { - std::stringstream num; - num << numOps; - append(cigar, num.str()); - appendValue(cigar, lastOp); - } + SEQAN_CHECK(atEnd(it1) == atEnd(it2), "Cannot get CIGAR from invalid pairwise alignment!"); + if (lastOp == 'D' && numOps >= (unsigned)splicedGapThresh) + lastOp = 'N'; + if (numOps > 0) + { + std::stringstream num; + num << numOps; + append(cigar, num.str()); + appendValue(cigar, lastOp); + } } template < @@ -343,8 +344,8 @@ template < inline void getCigarString( TCigar &cigar, - TGaps1 &gaps1, - TGaps2 &gaps2) + TGaps1 &gaps1, // typically reference + TGaps2 &gaps2) // typically read { return getCigarString(cigar, gaps1, gaps2, 20); } @@ -363,11 +364,14 @@ getCigarString( TGaps2 &gaps2, TThresh splicedGapThresh) { - typename Iterator::Type it1 = begin(gaps1); - typename Iterator::Type it2 = begin(gaps2); - clear(cigar); - char op = '?', lastOp = ' '; - unsigned numOps = 0; + typename Iterator::Type it1 = begin(gaps1); + typename Iterator::Type it2 = begin(gaps2); +// typedef typename Value::Type>::Type TVal1; +// typedef typename Value::Type>::Type TVal2; + + clear(cigar); + char op = '?', lastOp = ' '; + unsigned numOps = 0; // std::cout << gaps1 << std::endl; // std::cout << gaps2 << std::endl; @@ -393,6 +397,7 @@ getCigarString( else if (isClipped(it2)) op = 'S'; else +// op = ((TVal1)*it1 == (TVal2)*it2)? '=': 'X'; op = 'M'; } if (lastOp != op) @@ -419,38 +424,129 @@ getCigarString( template < typename TCigar, typename TMDString, typename TContig, typename TReadSeq, - typename TAlignedRead, typename TErrors, typename TAlignFunctor> + typename TAlignedRead, typename TErrors > inline void alignAndGetCigarString( - TCigar &cigar, TMDString &md, TContig &contig, TReadSeq &readSeq, - TAlignedRead &alignedRead, TErrors &errors, TAlignFunctor const & functor) + TCigar &cigar, TMDString &md, TContig &, TReadSeq &, + TAlignedRead &, TErrors &, Nothing const &) { - typedef Align TAlign; + cigar = "*"; + clear(md); +} - TAlign align; - resize(rows(align), 2); +struct BamAlignFunctorEditDistance +{ + typedef String > TGapAnchors; - if (alignedRead.beginPos <= alignedRead.endPos) - assignSource(row(align, 0), infix(contig.seq, alignedRead.beginPos, alignedRead.endPos)); - else - assignSource(row(align, 0), infix(contig.seq, alignedRead.endPos, alignedRead.beginPos)); + TGapAnchors contigAnchors, readAnchors; + + template + inline int + align(TGaps1 &gaps1, TGaps2 &gaps2, TErrors maxErrors) + { + return -globalAlignment( + gaps1, gaps2, + Score(), + -(int)maxErrors, (int)maxErrors + ); + } +}; + +struct BamAlignFunctorSemiGlobalGotoh +{ + typedef String > TGapAnchors; + + Score score; + TGapAnchors contigAnchors, readAnchors; + + BamAlignFunctorSemiGlobalGotoh(Score score_) : + score(score_) + {} + + template + inline int + align(TGaps1 &gaps1, TGaps2 &gaps2, TErrors maxErrors) + { + return globalAlignment( + gaps1, gaps2, score, + AlignConfig(), + -(int)maxErrors, (int)maxErrors, + Gotoh() + ) / scoreMismatch(score); + } +}; + +struct BamAlignFunctorDefault +{ +}; - assignSource(row(align, 1), readSeq); +template < + typename TCigar, typename TMDString, typename TContigInfix, typename TReadSeq, + typename TAlignedRead, typename TErrors, typename TAlignFunctor> +inline void +_alignAndGetCigarString( + TCigar &cigar, TMDString &md, TContigInfix const &contigInfix, TReadSeq const &fwdReadSeq, + TAlignedRead &, TErrors &errors, TAlignFunctor &functor) +{ + typedef Gaps > TContigGaps; + typedef Gaps > TReadGaps; + + clear(functor.contigAnchors); + clear(functor.readAnchors); + + TContigGaps contigGaps(contigInfix, functor.contigAnchors); + TReadGaps readGaps(fwdReadSeq, functor.readAnchors); + + // if there is already an alignment between contigInfix and fwdReadSeq with 0 or 1 error then + // we don't to realign as it contains no gaps + if (!(errors == 0 || (errors == 1 && length(contigInfix) == length(fwdReadSeq)))) + errors = functor.align(contigGaps, readGaps, errors); + + getCigarString(cigar, contigGaps, readGaps); + TErrors mdErrors = getMDString(md, contigGaps, readGaps); + + ignoreUnusedVariableWarning(mdErrors); + SEQAN_ASSERT_EQ(errors, mdErrors); +} + +template < + typename TCigar, typename TMDString, typename TContig, typename TReadSeq, + typename TAlignedRead, typename TErrors, typename TAlignFunctor> +inline void +alignAndGetCigarString( + TCigar &cigar, TMDString &md, TContig const &contig, TReadSeq const &readSeq, + TAlignedRead &alignedRead, TErrors &errors, TAlignFunctor &functor) +{ + typedef typename TContig::TContigSeq TContigSeq; + typedef typename Infix::Type TContigInfix; - if (!(errors == 0 || (errors == 1 && length(readSeq) == length(source(row(align, 0)))))) - errors = functor.align(align); + TContigInfix contigInfix; - getCigarString(cigar, row(align, 0), row(align, 1)); - getMDString(md, row(align, 0), row(align, 1)); + if (alignedRead.beginPos <= alignedRead.endPos) + { + contigInfix = infix(contig.seq, alignedRead.beginPos, alignedRead.endPos); + _alignAndGetCigarString(cigar, md, contigInfix, readSeq, alignedRead, errors, functor); + } + else + { + contigInfix = infix(contig.seq, alignedRead.endPos, alignedRead.beginPos); + _alignAndGetCigarString(cigar, md, contigInfix, reverseComplementString(readSeq), alignedRead, errors, functor); + } } -template +template < + typename TCigar, typename TMDString, typename TContig, typename TReadSeq, + typename TAlignedRead, typename TErrors> inline void -alignAndGetCigarString(TCigar &cigar, TMDString &md, TContig &contig, TReadSeq &readSeq, TAlignedRead &alignedRead, TErrors &, Nothing const &) +alignAndGetCigarString( + TCigar &cigar, TMDString &md, TContig const &contig, TReadSeq const &readSeq, + TAlignedRead &alignedRead, TErrors &errors, BamAlignFunctorDefault &) { - typedef typename TContig::TContigSeq TContigSeq; - typedef Gaps > TContigGaps; - typedef Gaps > TReadGaps; + typedef typename TContig::TContigSeq TContigSeq; + typedef Gaps > TContigGaps; + typedef typename ReverseComplementString::Type TRefCompReadSeq; + typedef Gaps > TReadGaps; + typedef Gaps > TRCReadGaps; TContigGaps contigGaps(contig.seq, contig.gaps); @@ -458,21 +554,22 @@ alignAndGetCigarString(TCigar &cigar, TMDString &md, TContig &contig, TReadSeq & { setClippedBeginPosition(contigGaps, alignedRead.beginPos); setClippedEndPosition(contigGaps, alignedRead.endPos); - } else + + TReadGaps readGaps(readSeq, alignedRead.gaps); + + getCigarString(cigar, contigGaps, readGaps); + errors = getMDString(md, contigGaps, readGaps); + } + else { setClippedBeginPosition(contigGaps, alignedRead.endPos); setClippedEndPosition(contigGaps, alignedRead.beginPos); - } - TReadGaps readGaps(readSeq, alignedRead.gaps); - // TContigGaps contigGaps2(contig.seq, contig.gaps); - // if (i == 4) - // printf("It's it!\n"); - // std::cerr << "read gaps: " << readGaps << std::endl; - // std::cerr << "contig gaps:" << contigGaps << std::endl; + TRCReadGaps readGaps(reverseComplementString(readSeq), alignedRead.gaps); - getCigarString(cigar, contigGaps, readGaps); - getMDString(md, contigGaps, readGaps); + getCigarString(cigar, contigGaps, readGaps); + errors = getMDString(md, contigGaps, readGaps); + } } // ---------------------------------------------------------------------------- @@ -480,9 +577,9 @@ alignAndGetCigarString(TCigar &cigar, TMDString &md, TContig &contig, TReadSeq & // ---------------------------------------------------------------------------- template -inline void _getClippedLength(TCigarString const & cigar, TNum & sum) +inline void _getClippedLength(TNum & sum, TCigarString const & cigar) { - typedef typename Iterator::Type TCigarIter; + typedef typename Iterator::Type TCigarIter; TCigarIter it = begin(cigar, Standard()); TCigarIter itEnd = end(cigar, Standard()); @@ -498,7 +595,7 @@ inline void _getClippedLength(TCigarString const & cigar, TNum & sum) // ---------------------------------------------------------------------------- template -inline void _getLengthInRef(TCigarString const & cigar, TNum & sum) +inline void _getLengthInRef(TNum & sum, TCigarString const & cigar) { typedef typename Iterator::Type TCigarIter; @@ -511,13 +608,32 @@ inline void _getLengthInRef(TCigarString const & cigar, TNum & sum) sum += getValue(it).count; } +// ---------------------------------------------------------------------------- +// _getQueryLength() +// ---------------------------------------------------------------------------- + +template +inline typename Size::Type>::Type +_getQueryLength(TCigarString const & cigar) +{ + typedef typename Iterator::Type TCigarIter; + typedef typename Size::Type>::Type TSize; + TCigarIter it = begin(cigar, Standard()); + TCigarIter itEnd = end(cigar, Standard()); + + TSize len = 0; + for (; it != itEnd; ++it) + if (getValue(it).operation != 'D' && getValue(it).operation != 'H' && getValue(it).operation != 'N' && getValue(it).operation != 'P') + len += getValue(it).count; + return len; +} + // ---------------------------------------------------------------------------- // cigarToGapAnchorRead() // ---------------------------------------------------------------------------- -template -inline unsigned -cigarToGapAnchorRead(TCigarString const & cigar, TGaps & gaps) +template +unsigned cigarToGapAnchorRead(TGaps & gaps, TCigarString const & cigar) { typename Iterator::Type it = begin(gaps); bool atBegin = true; @@ -547,8 +663,7 @@ cigarToGapAnchorRead(TCigarString const & cigar, TGaps & gaps) // ---------------------------------------------------------------------------- template -inline unsigned -cigarToGapAnchorContig(TCigarString const & cigar, TGaps & gaps) +unsigned cigarToGapAnchorContig(TGaps & gaps, TCigarString const & cigar) { typename Iterator::Type it = begin(gaps); bool atBegin = true; @@ -575,4 +690,4 @@ cigarToGapAnchorContig(TCigarString const & cigar, TGaps & gaps) } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_CIGAR_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_CIGAR_H_ diff --git a/seqan/bam_io/read_bam.h b/seqan/bam_io/read_bam.h index ba55fd7..3377d6c 100644 --- a/seqan/bam_io/read_bam.h +++ b/seqan/bam_io/read_bam.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,56 +34,48 @@ // Code for reading Bam. // ========================================================================== -// TODO(holtgrew): Indexing. - -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_READ_BAM_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_READ_BAM_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_READ_BAM_H_ +#define INCLUDE_SEQAN_BAM_IO_READ_BAM_H_ namespace seqan { -// ============================================================================ -// Forwards -// ============================================================================ - // ============================================================================ // Tags, Classes, Enums // ============================================================================ /*! - * @defgroup SamBamIO SAM/BAM I/O - * @brief Tags for identifying SAM/BAM format. - */ - -/*! - * @tag SamBamIO#Bam + * @tag FileFormats#Bam * @brief Identify the BAM format. * - * @tag SamBamIO#Sam + * @tag FileFormats#Sam * @brief Identify the SAM format. */ -/** -.Tag.Bam -..cat:BAM I/O -..signature:Bam -..summary:Tag for identifying the BAM format. -..include:seqan/bam_io.h -..see:Tag.Sam -*/ - struct Bam_; typedef Tag Bam; + +template +struct FileExtensions +{ + static char const * VALUE[1]; // default is one extension +}; + template -struct FileFormatExtensions +char const * FileExtensions::VALUE[1] = { - static char const * VALUE[1]; + ".bam" // default output extension }; + template -char const * FileFormatExtensions::VALUE[1] = { - ".bam" }; +struct MagicHeader +{ + static unsigned char const VALUE[4]; +}; +template +unsigned char const MagicHeader::VALUE[4] = { 'B', 'A', 'M', '\1' }; // BAM's magic header // ============================================================================ // Metafunctions @@ -94,92 +86,46 @@ char const * FileFormatExtensions::VALUE[1] = { // ============================================================================ // ---------------------------------------------------------------------------- -// Function readRecord() BamHeader +// Function readHeader() BamHeader // ---------------------------------------------------------------------------- -/*! - * @fn SamBamIO#readRecord - * @brief Read a record from a SAM/BAM file. - * - * @signature int readRecord(record, context, stream, tag); - * @signature int readRecord(header, context, stream, tag); - * - * @param[out] record The @link BamAlignmentRecord @endlink object to read the information into. - * @param[out] header The @link BamHeader @endlink object to read the header information into. - * @param[in,out] context The BamIOContext object to use. - * @param[in,out] stream The @link StreamConcept Stream @endlink to read from. - * @param[in] tag The format tag, one of Sam and Bam. - * - * @return int A status code, 0 on success, != 0 on failure. - */ - -/** -.Function.readRecord -..signature:readRecord(headerRecord, context, stream, tag) -..param.header:@Class.BamHeader@ to read information into. -...type:Class.BamHeader -..param.context:The context to use for reading. -...type:Class.BamIOContext -..param.stream:The stream to read from (for BAM). -...remarks:BAM data can be read from any stream. For the proper decompression (from compressed BAM, the default) use @Spec.BGZF Stream@. -...type:Concept.StreamConcept -..param.tag:Format to read @Class.BamHeader@ from. -...type:Tag.Sam -...type:Tag.Bam -..include:seqan/bam_io.h -*/ - -template -int readRecord(BamHeader & header, - BamIOContext & context, - TStream & stream, - Bam const & /*tag*/) +template +inline void +readHeader(BamHeader & header, + BamIOContext & context, + TForwardIter & iter, + Bam const & /*tag*/) { - int res = 0; - + clear(header); // Read BAM magic string. - char magic[5] = "\0\0\0\0"; - res = streamReadBlock(&magic[0], stream, 4); - if (res != 4) - return 1; // EOF or error while reading. - if (strcmp(magic, "BAM\1") != 0) - return 1; // Magic was wrong. + String > magic; + read(magic, iter, 4); + if (magic != "BAM\1") + SEQAN_THROW(ParseError("Not in BAM format.")); // Read header text, including null padding. __int32 lText; - res = streamReadBlock(reinterpret_cast(&lText), stream, 4); - if (res != 4) - return 1; // Error reading the length of the header text. + readRawPod(lText, iter); + CharString samHeader; - resize(samHeader, lText); - res = streamReadBlock(&front(samHeader), stream, lText); + write(samHeader, iter, lText); + // Truncate to first position of '\0'. - typedef Iterator::Type TIter; - TIter it = begin(samHeader, Standard()); - for (; it != end(samHeader); ++it) - if (*it == '\0') - break; - resize(samHeader, it - begin(samHeader, Standard())); + cropAfterFirst(samHeader, EqualsChar<'\0'>()); // Parse out header records. - typedef Stream > THeaderStream; - THeaderStream headerStream(&samHeader[0], &samHeader[0] + length(samHeader)); - RecordReader > headerReader(headerStream); BamHeaderRecord headerRecord; - while (!atEnd(headerReader)) + Iterator::Type it = begin(samHeader); + while (!atEnd(it)) { clear(headerRecord); - res = readRecord(headerRecord, context, headerReader, Sam()); - if (res != 0) - return 1; // Error reading embedded SAM header. - appendValue(header.records, headerRecord); + readRecord(headerRecord, context, it, Sam()); + appendValue(header, headerRecord); } // Read # reference sequences. __int32 nRef; - res = streamReadBlock(reinterpret_cast(&nRef), stream, 4); - if (res != 4) - return 1; // Error reading the number of sequences. + readRawPod(nRef, iter); CharString name; clear(context.translateFile2GlobalRefId); @@ -189,219 +135,145 @@ int readRecord(BamHeader & header, { // Read length of the reference name. __int32 nName; - res = streamReadBlock(reinterpret_cast(&nName), stream, 4); - if (res != 4) - return 1; // Error reading the number of sequences. - // Read name of the reference sequence; - resize(name, nName); - res = streamReadBlock(&front(name), stream, nName); - if (res != nName) - return 1; // Error reading the number of sequences. + readRawPod(nName, iter); + clear(name); + write(name, iter, nName); resize(name, nName - 1); // Read length of the reference sequence. __int32 lRef; - res = streamReadBlock(reinterpret_cast(&lRef), stream, 4); - if (res != 4) - return 1; // Error reading the number of sequences. - - // Store sequence info. - typedef typename BamHeader::TSequenceInfo TSequenceInfo; - appendValue(header.sequenceInfos, TSequenceInfo(name, lRef)); - // Append contig name to name store, if not known already. - typename Size::Type globalRId = 0; - if (!getIdByName(nameStore(context), name, globalRId, nameStoreCache(context))) - { - globalRId = length(nameStore(context)); - appendName(nameStore(context), name, nameStoreCache(context)); - } - context.translateFile2GlobalRefId[i] = globalRId; + readRawPod(lRef, iter); + + // Add entry to name store and sequenceInfos if necessary. + // Compute translation from local ids (used in the BAM file) to corresponding ids in the name store + size_t globalRefId = nameToId(contigNamesCache(context), name); + context.translateFile2GlobalRefId[i] = globalRefId; + if (length(contigLengths(context)) <= globalRefId) + resize(contigLengths(context), globalRefId + 1, 0); + contigLengths(context)[globalRefId] = lRef; } - - return 0; } // ---------------------------------------------------------------------------- // Function readRecord() BamAlignmentRecord // ---------------------------------------------------------------------------- -/** -.Function.readRecord -..signature:readRecord(alignmentRecord, context, stream, tag) -..param.alignmentRecord.type:Class.BamAlignmentRecord -*/ - -template -int readRecord(BamAlignmentRecord & record, - BamIOContext & context, - TStream & stream, - Bam const & /*tag*/) +template +inline __int32 +_readBamRecordWithoutSize(TBuffer & rawRecord, TForwardIter & iter) { - int res = 0; - (void)context; // Only used for assertions. - - // Read size of the remaining block. - __int32 remainingBytes = 0; - res = streamReadBlock(reinterpret_cast(&remainingBytes), stream, 4); - if (res != 4) - return 1; // Error reading the number of sequences. - - // Reference sequence id. - SEQAN_ASSERT_GT(remainingBytes, 4); - record.rID = 0; - res = streamReadBlock(reinterpret_cast(&record.rID), stream, 4); - if (res != 4) - return res; - SEQAN_ASSERT_GEQ(record.rID, -1); - - // Translate file local rID into a global rID that is compatible with the context nameStore. + __int32 recordLen = 0; + readRawPod(recordLen, iter); + + // fail, if we read "BAM\1" (did you miss to call readRecord(header, bamFile) first?) + if (recordLen == 0x014D4142) + SEQAN_THROW(ParseError("Unexpected BAM header encountered.")); + + clear(rawRecord); + write(rawRecord, iter, (size_t)recordLen); + return recordLen; +} + +template +inline void +_readBamRecord(TBuffer & rawRecord, TForwardIter & iter, Bam) +{ + __int32 recordLen = 0; + readRawPod(recordLen, iter); + + // fail, if we read "BAM\1" (did you miss to call readRecord(header, bamFile) first?) + if (recordLen == 0x014D4142) + SEQAN_THROW(ParseError("Unexpected BAM header encountered.")); + + clear(rawRecord); + appendRawPod(rawRecord, recordLen); + write(rawRecord, iter, (size_t)recordLen); +} + +template +inline void +readRecord(BamAlignmentRecord & record, + BamIOContext & context, + TForwardIter & iter, + Bam const & /* tag */) +{ + typedef typename Iterator::Type TCharIter; + typedef typename Iterator >, Standard>::Type SEQAN_RESTRICT TCigarIter; + typedef typename Iterator::Type SEQAN_RESTRICT TSeqIter; + typedef typename Iterator::Type SEQAN_RESTRICT TQualIter; + + // Read size and data of the remaining block in one chunk (fastest). + __int32 remainingBytes = _readBamRecordWithoutSize(context.buffer, iter); + TCharIter it = begin(context.buffer, Standard()); + + // BamAlignmentRecordCore. + arrayCopyForward(it, it + sizeof(BamAlignmentRecordCore), reinterpret_cast(&record)); + it += sizeof(BamAlignmentRecordCore); + + remainingBytes -= sizeof(BamAlignmentRecordCore) + record._l_qname + + record._n_cigar * 4 + (record._l_qseq + 1) / 2 + record._l_qseq; + SEQAN_ASSERT_GEQ(remainingBytes, 0); + + // Translate file local rID into a global rID that is compatible with the context contigNames. if (record.rID >= 0 && !empty(context.translateFile2GlobalRefId)) record.rID = context.translateFile2GlobalRefId[record.rID]; - if (record.rID >= 0) - SEQAN_ASSERT_LT(static_cast<__uint64>(record.rID), length(nameStore(context))); - remainingBytes -= 4; - - // 0-based position. - SEQAN_ASSERT_GT(remainingBytes, 4); - record.beginPos = 0; - res = streamReadBlock(reinterpret_cast(&record.beginPos), stream, 4); - if (res != 4) - return res; - remainingBytes -= 4; - - // Bin, mapping quality, read name length. - SEQAN_ASSERT_GT(remainingBytes, 4); - __uint32 binMqNl = 0; - res = streamReadBlock(reinterpret_cast(&binMqNl), stream, 4); - if (res != 4) - return res; - remainingBytes -= 4; - record.bin = binMqNl >> 16; - record.mapQ = (binMqNl >> 8) & 0x000000ff; - __uint16 lReadName = binMqNl & 0x000000ff; - - // flag, cigar string length. - SEQAN_ASSERT_GT(remainingBytes, 4); - __uint32 flagNc = 0; - res = streamReadBlock(reinterpret_cast(&flagNc), stream, 4); - if (res != 4) - return res; - remainingBytes -= 4; - record.flag = flagNc >> 16; - __uint16 nCigarOp = flagNc & 0x0000FFFF; - - // sequence length. - SEQAN_ASSERT_GT(remainingBytes, 4); - __int32 lSeq = 0; - res = streamReadBlock(reinterpret_cast(&lSeq), stream, 4); - if (res != 4) - return res; - remainingBytes -= 4; - - // reference id of the next fragment. - SEQAN_ASSERT_GT(remainingBytes, 4); - record.rNextId = 0; - res = streamReadBlock(reinterpret_cast(&record.rNextId), stream, 4); - if (res != 4) - return res; - remainingBytes -= 4; - - // 0-based position of the next fragment. - SEQAN_ASSERT_GT(remainingBytes, 4); - res = streamReadBlock(reinterpret_cast(&record.pNext), stream, 4); - if (res != 4) - return res; - remainingBytes -= 4; - - // template length. - SEQAN_ASSERT_GT(remainingBytes, 4); - res = streamReadBlock(reinterpret_cast(&record.tLen), stream, 4); - if (res != 4) - return res; - remainingBytes -= 4; - - // read name. - SEQAN_ASSERT_GT(remainingBytes, lReadName); - resize(record.qName, lReadName); - res = streamReadBlock(reinterpret_cast(&record.qName[0]), stream, lReadName); - if (res != lReadName) - return res; - resize(record.qName, lReadName - 1); - remainingBytes -= lReadName; + SEQAN_ASSERT_LT(static_cast<__uint64>(record.rID), length(contigNames(context))); + + // ... the same for rNextId + if (record.rNextId >= 0 && !empty(context.translateFile2GlobalRefId)) + record.rNextId = context.translateFile2GlobalRefId[record.rNextId]; + if (record.rNextId >= 0) + SEQAN_ASSERT_LT(static_cast<__uint64>(record.rNextId), length(contigNames(context))); + + // query name. + resize(record.qName, record._l_qname - 1, Exact()); + arrayCopyForward(it, it + record._l_qname - 1, begin(record.qName, Standard())); + it += record._l_qname; // cigar string. - SEQAN_ASSERT_GT(remainingBytes, nCigarOp * 4); - resize(record.cigar, nCigarOp, Exact()); - static char const * CIGAR_MAPPING = "MIDNSHP="; - typedef typename Iterator >, Rooted>::Type TCigarIter; - for (TCigarIter it = begin(record.cigar, Rooted()); !atEnd(it); goNext(it)) + resize(record.cigar, record._n_cigar, Exact()); + static char const * CIGAR_MAPPING = "MIDNSHP=X*******"; + TCigarIter cigEnd = end(record.cigar, Standard()); + for (TCigarIter cig = begin(record.cigar, Standard()); cig != cigEnd; ++cig) { - __uint32 ui = 0; - res = streamReadBlock(reinterpret_cast(&ui), stream, 4); - if (res != 4) - return res; - it->operation = CIGAR_MAPPING[ui & 0x0007]; - it->count = ui >> 4; + unsigned opAndCnt; + readRawPod(opAndCnt, it); + SEQAN_ASSERT_LEQ(opAndCnt & 15, 8u); + cig->operation = CIGAR_MAPPING[opAndCnt & 15]; + cig->count = opAndCnt >> 4; } - remainingBytes -= nCigarOp * 4; - - // sequence, 4-bit encoded "=ACMGRSVTWYHKDBN". - SEQAN_ASSERT_GT(remainingBytes, (lSeq + 2) / 2); - resize(record.seq, lSeq + 1, Exact()); - static char const * SEQ_MAPPING = "=ACMGRSVTWYHKDBN"; - typedef typename Iterator::Type TSeqIter; + // query sequence. + resize(record.seq, record._l_qseq, Exact()); + TSeqIter sit = begin(record.seq, Standard()); + TSeqIter sitEnd = sit + (record._l_qseq & ~1); + while (sit != sitEnd) { - // Note: Yes, we need separate index i and iterator. The iterator allows the fast iteration and i is for - // book-keeping since we potentially create too long seq records. - TSeqIter it = begin(record.seq, Rooted()); - for (__int32 i = 0; i < lSeq; i += 2) - { - __uint8 ui; - res = streamReadChar(reinterpret_cast(ui), stream); - if (res != 0) - return res; - *it++ = SEQ_MAPPING[ui >> 4]; - *it++ = SEQ_MAPPING[ui & 0x0f]; - } + unsigned char ui = getValue(it); + ++it; + assignValue(sit, Iupac(ui >> 4)); + ++sit; + assignValue(sit, Iupac(ui & 0x0f)); + ++sit; } - resize(record.seq, lSeq); // Possibly trim last, overlap base. - remainingBytes -= (lSeq + 1) / 2; + if (record._l_qseq & 1) + *sit++ = Iupac((__uint8)*it++ >> 4); // phred quality - SEQAN_ASSERT_GEQ(remainingBytes, lSeq); - resize(record.qual, lSeq, Exact()); - if (lSeq > 0) - { - res = streamReadBlock(&(record.qual[0]), stream, lSeq); - if (res != lSeq) - return res; - } + resize(record.qual, record._l_qseq, Exact()); // If qual is a sequence of 0xff (heuristic same as samtools: Only look at first byte) then we clear it, to get the // representation of '*'; - if (!empty(record.qual) && record.qual[0] == '\xFF') + TQualIter qitEnd = end(record.qual, Standard()); + for (TQualIter qit = begin(record.qual, Standard()); qit != qitEnd;) + *qit++ = '!' + *it++; + if (!empty(record.qual) && record.qual[0] == '\xff') clear(record.qual); - typedef typename Iterator::Type TQualIter; - for (TQualIter it = begin(record.qual, Rooted()); !atEnd(it); goNext(it)) - *it += '!'; - remainingBytes -= lSeq; // tags - if (remainingBytes > 0) - { - resize(record.tags, remainingBytes); - res = streamReadBlock(&record.tags[0], stream, remainingBytes); - if (res != remainingBytes) - return 1; - } - else - { - clear(record.tags); - } - - return 0; + resize(record.tags, remainingBytes, Exact()); + arrayCopyForward(it, it + remainingBytes, begin(record.tags, Standard())); } } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_READ_BAM_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_READ_BAM_H_ diff --git a/seqan/bam_io/read_sam.h b/seqan/bam_io/read_sam.h index c822a8e..acf91d1 100644 --- a/seqan/bam_io/read_sam.h +++ b/seqan/bam_io/read_sam.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,30 +34,41 @@ // Code for reading SAM. // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_READ_SAM_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_READ_SAM_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_READ_SAM_H_ +#define INCLUDE_SEQAN_BAM_IO_READ_SAM_H_ namespace seqan { -// ============================================================================ -// Forwards -// ============================================================================ - // ============================================================================ // Tags, Classes, Enums // ============================================================================ -/** -.Tag.Sam -..cat:BAM I/O -..signature:Sam -..summary:Tag for identifying the SAM format. -..include:seqan/bam_io.h -..see:Tag.Bam -*/ - struct Sam_; -typedef Tag const Sam; +typedef Tag Sam; + + +template +struct FileExtensions +{ + static char const * VALUE[1]; // default is one extension +}; + +template +char const * FileExtensions::VALUE[1] = +{ + ".sam" // default output extension +}; + + +template +struct MagicHeader +{ + static unsigned char const * VALUE; +}; + +template +unsigned char const * MagicHeader::VALUE = NULL; // SAM has no magic header + enum SamTokenizeErrors_ { @@ -70,10 +81,6 @@ typedef Tag SamHeader; struct SamAlignment_; typedef Tag SamAlignment; -// ============================================================================ -// Metafunctions -// ============================================================================ - // ============================================================================ // Functions // ============================================================================ @@ -82,90 +89,61 @@ typedef Tag SamAlignment; // Function nextIs() SamHeader // ---------------------------------------------------------------------------- -template -inline bool nextIs(RecordReader & reader, SamHeader const & /*tag*/) -{ - if (atEnd(reader)) - return false; - return value(reader) == '@'; -} - -// ---------------------------------------------------------------------------- -// Function nextIs() SamAlignment -// ---------------------------------------------------------------------------- - -template -inline bool nextIs(RecordReader & reader, SamAlignment const & /*tag*/) +template +inline bool nextIs(TForwardIter & iter, SamHeader const & /*tag*/) { - if (atEnd(reader)) + if (atEnd(iter)) return false; - return value(reader) != '@'; -} - -// ---------------------------------------------------------------------------- -// Function skipRecord() SamHeader -// ---------------------------------------------------------------------------- - -template -inline int skipRecord(RecordReader & reader, - SamHeader const & tag) -{ - if (atEnd(reader)) - return EOF_BEFORE_SUCCESS; - if (!nextIs(reader, tag)) - return SAM_INVALID_RECORD; - int res = skipLine(reader); - if (res == 0 || res == EOF_BEFORE_SUCCESS) - return 0; - else - return res; + return value(iter) == '@'; } -// ---------------------------------------------------------------------------- -// Function skipRecord() SamAlignment -// ---------------------------------------------------------------------------- - -template -inline int skipRecord(RecordReader & reader, - SamAlignment const & tag) -{ - if (atEnd(reader)) - return EOF_BEFORE_SUCCESS; - if (!nextIs(reader, tag)) - return SAM_INVALID_RECORD; - int res = skipLine(reader); - if (res == 0 || res == EOF_BEFORE_SUCCESS) - return 0; - else - return res; -} +//// ---------------------------------------------------------------------------- +//// Function skipRecord() SamHeader +//// ---------------------------------------------------------------------------- +// +//template +//inline void skipRecord(TForwardIter & iter, +// SamHeader const & /*tag*/) +//{ +// skipOne(iter, EqualsChar<'@'>()); +// skipLine(iter); +// return 0; +//} +// +//// ---------------------------------------------------------------------------- +//// Function skipRecord() SamAlignment +//// ---------------------------------------------------------------------------- +// +//template +//inline void skipRecord(TForwardIter & iter, +// SamAlignment const & /*tag*/) +//{ +// skipOne(iter, EqualsChar<'@'>()); +// skipLine(iter); +// return 0; +//} // ---------------------------------------------------------------------------- // Function readRecord() BamHeaderRecord // ---------------------------------------------------------------------------- -template -int readRecord(BamHeaderRecord & record, - BamIOContext & context, - RecordReader > & reader, - Sam const & /*tag*/) +template +inline void +readRecord(BamHeaderRecord & record, + BamIOContext & context, + TForwardIter & iter, + Sam const & /*tag*/) { clear(record); // Make sure the first character is '@'. - char c = value(reader); - if (c != '@') - return SAM_INVALID_RECORD; - if (goNext(reader)) - return SAM_INVALID_RECORD; + skipOne(iter, EqualsChar<'@'>()); // Read the header tag. - char c1 = value(reader); - if (goNext(reader)) - return SAM_INVALID_RECORD; - char c2 = value(reader); - if (goNext(reader)) - return SAM_INVALID_RECORD; + char c1, c2; + readOne(c1, iter); + readOne(c2, iter); + // Determine header type. if (c1 == 'H' && c2 == 'D') record.type = BAM_HEADER_FIRST; @@ -178,333 +156,245 @@ int readRecord(BamHeaderRecord & record, else if (c1 == 'C' && c2 == 'O') record.type = BAM_HEADER_COMMENT; else - return SAM_INVALID_RECORD; + SEQAN_THROW(ParseError("Unknown SAM header type!")); + + CharString &buffer = context.buffer; if (record.type == BAM_HEADER_COMMENT) { - int res = skipChar(reader, '\t'); - if (res != 0) - return res; - CharString &buffer = context.buffer; + skipOne(iter, IsTab()); + + appendValue(record.tags, Pair()); + clear(buffer); - res = readLine(buffer, reader); - if (res != 0) - return res; - appendValue(record.tags, Pair(CharString(), buffer)); - return 0; // done, do not skip line a second time below + readLine(buffer, iter); + assign(back(record.tags).i2, buffer, Exact()); } else { // Read the rest of the line into the tag field of record. - int res = 0; - CharString key, val; - while (!atEnd(reader) && value(reader) == '\t') + while (!atEnd(iter) && value(iter) == '\t') { - clear(key); - clear(val); - - res = skipChar(reader, '\t'); - if (res != 0) - return res; - res = readUntilChar(key, reader, ':'); - if (res != 0) - return res; - if (goNext(reader)) - return SAM_INVALID_RECORD; - res = readUntilOneOf(val, reader, '\t', '\r', '\n'); - if (res != 0 && res != EOF_BEFORE_SUCCESS) - return res; - - appendValue(record.tags, Pair(key, val)); + skipOne(iter, IsTab()); + + appendValue(record.tags, Pair()); + + clear(buffer); + readUntil(buffer, iter, EqualsChar<':'>()); + assign(back(record.tags).i1, buffer, Exact()); + + skipOne(iter, EqualsChar<':'>()); + + clear(buffer); + readUntil(buffer, iter, OrFunctor()); + assign(back(record.tags).i2, buffer, Exact()); } + // Skip remaining line break + skipLine(iter); } - - // Skip remaining line break. - int res = skipLine(reader); - if (res != 0 && res != EOF_BEFORE_SUCCESS) - return res; - return 0; } // ---------------------------------------------------------------------------- // Function readRecord() BamHeader // ---------------------------------------------------------------------------- -/** -.Function.readRecord -..signature:readRecord(headerRecord, context, recordReader, tag) -..param.recordReader:The RecordReader to read from. -...type:Class.RecordReader -...remarks:Use for SAM. -*/ - -template -int readRecord(BamHeader & header, - BamIOContext & context, - RecordReader > & reader, - Sam const & tag) +template +inline void +readHeader(BamHeader & header, + BamIOContext & context, + TForwardIter & iter, + Sam const & tag) { - typedef typename BamHeader::TSequenceInfo TSequenceInfo; - BamHeaderRecord record; - while (nextIs(reader, SamHeader())) + while (nextIs(iter, SamHeader())) { clear(record); - int res = readRecord(record, context, reader, tag); - if (res != 0) - return res; - appendValue(header.records, record); + readRecord(record, context, iter, tag); + appendValue(header, record); // Get sequence information from @SQ header. if (record.type == BAM_HEADER_REFERENCE) { - CharString sn = "unknown"; - unsigned ln = 0; + CharString name; + unsigned lRef = 0; for (unsigned i = 0; i < length(record.tags); ++i) { if (record.tags[i].i1 == "SN") - { - sn = record.tags[i].i2; - } + name = record.tags[i].i2; else if (record.tags[i].i1 == "LN") - { - if (!lexicalCast2(ln, record.tags[i].i2)) - ln = 0; - } + lexicalCast(lRef, record.tags[i].i2); } - // Add name to name store cache if necessary. - unsigned contigId = 0; - if (!getIdByName(nameStore(context), sn, contigId, nameStoreCache(context))) - { - contigId = length(nameStore(context)); - appendName(nameStore(context), sn, nameStoreCache(context)); - } - - if (length(header.sequenceInfos) <= contigId) - resize(header.sequenceInfos, contigId + 1); - header.sequenceInfos[contigId] = TSequenceInfo(sn, ln); + // Add entry to name store and sequenceInfos if necessary. + size_t globalRefId = nameToId(contigNamesCache(context), name); + if (length(contigLengths(context)) <= globalRefId) + resize(contigLengths(context), globalRefId + 1, 0); + contigLengths(context)[globalRefId] = lRef; } } +} + +// ---------------------------------------------------------------------------- +// Function _readBamRecord() +// ---------------------------------------------------------------------------- - return 0; +template +inline void +_readBamRecord(TBuffer & rawRecord, TForwardIter & iter, Sam) +{ + clear(rawRecord); + readLine(rawRecord, iter); } // ---------------------------------------------------------------------------- // Function readRecord() BamAlignmentRecord // ---------------------------------------------------------------------------- -/** -.Function.readRecord -..signature:readRecord(alignmentRecord, context, recordReader, tag) -*/ - -template -int readRecord(BamAlignmentRecord & record, - BamIOContext & context, - RecordReader > & reader, - Sam const & /*tag*/) +template +inline void +readRecord(BamAlignmentRecord & record, + BamIOContext & context, + TForwardIter & iter, + Sam const & /*tag*/) { - clear(record); - CharString &buffer = context.buffer; + // fail, if we read "@" (did you miss to call readRecord(header, bamFile) first?) + if (nextIs(iter, SamHeader())) + SEQAN_THROW(ParseError("Unexpected SAM header encountered.")); -#define SEQAN_SKIP_TAB \ - do \ - { \ - res = skipChar(reader, '\t'); \ - if (res != 0) \ - return res; \ - } \ - while (false) + OrFunctor, ParseError, Sam> > nextEntry; - int res = 0; + clear(record); + CharString &buffer = context.buffer; // QNAME - res = readUntilTabOrLineBreak(record.qName, reader); - if (res != 0) - return res; - SEQAN_SKIP_TAB; + readUntil(record.qName, iter, nextEntry); + skipOne(iter, IsTab()); // FLAG // TODO(holtgrew): Interpret hex and char as c-samtools -X does? clear(buffer); - res = readDigits(buffer, reader); - if (res != 0) - return res; + readUntil(buffer, iter, nextEntry); record.flag = lexicalCast<__uint16>(buffer); - SEQAN_SKIP_TAB; + skipOne(iter, IsTab()); // RNAME clear(buffer); - res = readUntilTabOrLineBreak(buffer, reader); - if (res != 0) - return res; + readUntil(buffer, iter, nextEntry); if (buffer == "*") - { record.rID = BamAlignmentRecord::INVALID_REFID; - } - else if (!getIdByName(nameStore(context), buffer, record.rID, nameStoreCache(context))) - { - record.rID = length(nameStore(context)); - appendName(nameStore(context), buffer, nameStoreCache(context)); - } - SEQAN_SKIP_TAB; + else + record.rID = nameToId(contigNamesCache(context), buffer); + skipOne(iter, IsTab()); // POS clear(buffer); - res = readUntilChar(buffer, reader, '\t'); - if (res != 0) - return res; - if (buffer == "*") - record.beginPos = BamAlignmentRecord::INVALID_POS; - else - record.beginPos = lexicalCast<__uint32>(buffer) - 1; - SEQAN_SKIP_TAB; + SEQAN_ASSERT_EQ((__int32)0 - 1, (__int32)BamAlignmentRecord::INVALID_POS); + readUntil(buffer, iter, nextEntry); + record.beginPos = (__int32)lexicalCast<__uint32>(buffer) - 1; + skipOne(iter, IsTab()); // MAPQ clear(buffer); - if (value(reader) == '*') + if (value(iter) == '*') { record.mapQ = 255; - goNext(reader); + skipOne(iter); } else { - res = readDigits(buffer, reader); - if (res != 0) - return res; + readUntil(buffer, iter, nextEntry); record.mapQ = lexicalCast<__uint16>(buffer); } - SEQAN_SKIP_TAB; + skipOne(iter, IsTab()); // CIGAR CigarElement<> element; - if (atEnd(reader)) - return EOF_BEFORE_SUCCESS; - if (value(reader) == '*') - { - goNext(reader); - } + if (value(iter) == '*') + skipOne(iter); else { do { clear(buffer); - res = readDigits(buffer, reader); - if (res != 0) - return res; + readUntil(buffer, iter, OrFunctor, ParseError, Sam> >()); element.count = lexicalCast<__uint32>(buffer); - element.operation = value(reader); - if (goNext(reader)) - return EOF_BEFORE_SUCCESS; + element.operation = value(iter); + skipOne(iter); appendValue(record.cigar, element); - } while (value(reader) != '\t'); + } while (value(iter) != '\t'); } - SEQAN_SKIP_TAB; + skipOne(iter, IsTab()); // RNEXT clear(buffer); - res = readUntilChar(buffer, reader, '\t'); - if (res != 0) - return res; + readUntil(buffer, iter, nextEntry); if (buffer == "*") - { record.rNextId = BamAlignmentRecord::INVALID_REFID; - } else if (buffer == "=") - { record.rNextId = record.rID; - } - else if (!getIdByName(nameStore(context), buffer, record.rNextId, nameStoreCache(context))) - { - record.rNextId = length(nameStore(context)); - appendName(nameStore(context), buffer, nameStoreCache(context)); - } - SEQAN_SKIP_TAB; + else + record.rNextId = nameToId(contigNamesCache(context), buffer); + skipOne(iter, IsTab()); // PNEXT - if (atEnd(reader)) - return EOF_BEFORE_SUCCESS; - if (value(reader) == '*') + if (value(iter) == '*') { record.pNext = BamAlignmentRecord::INVALID_POS; - goNext(reader); + skipOne(iter); } else { clear(buffer); - res = readDigits(buffer, reader); - if (res != 0) - return res; - record.pNext = lexicalCast<__uint32>(buffer) - 1; + readUntil(buffer, iter, nextEntry); + record.pNext = (__int32)lexicalCast<__uint32>(buffer) - 1; } - SEQAN_SKIP_TAB; + skipOne(iter, IsTab()); // TLEN - if (atEnd(reader)) - return EOF_BEFORE_SUCCESS; - if (value(reader) == '*') + if (value(iter) == '*') { record.tLen = MaxValue<__int32>::VALUE; - goNext(reader); + skipOne(iter); } else { clear(buffer); - if (value(reader) == '-') - { - appendValue(buffer, value(reader)); - if (goNext(reader)) - return SAM_INVALID_RECORD; - } - res = readDigits(buffer, reader); - if (res != 0) - return res; + readUntil(buffer, iter, nextEntry); record.tLen = lexicalCast<__int32>(buffer); } - SEQAN_SKIP_TAB; + skipOne(iter, IsTab()); // SEQ - res = readUntilTabOrLineBreak(record.seq, reader); - if (res != 0) - return res; + readUntil(record.seq, iter, nextEntry); // Handle case of missing sequence: Clear seq string as documented. if (record.seq == "*") clear(record.seq); - SEQAN_SKIP_TAB; + skipOne(iter, IsTab()); // QUAL - res = readUntilTabOrLineBreak(record.qual, reader); - if (res == EOF_BEFORE_SUCCESS) // The record ends on EOF. - return 0; - if (res != 0) - return res; + readUntil(record.qual, iter, OrFunctor()); + // Handle case of missing quality: Clear qual string as documented. if (record.qual == "*") clear(record.qual); // The following list of tags is optional. A line break or EOF could also follow. - if (atEnd(reader)) - return 0; - if (value(reader) != '\t') + if (atEnd(iter)) + return; + if (value(iter) != '\t') { - res = skipLine(reader); - return res; + skipLine(iter); + return; } - SEQAN_SKIP_TAB; + skipOne(iter, IsTab()); // TAGS clear(buffer); - res = readLine(buffer, reader); - if (res != 0 && res != EOF_BEFORE_SUCCESS) - return res; - assignTagsSamToBam(record.tags, buffer); - - return 0; - -#undef SEQAN_SKIP_TAB + readLine(buffer, iter); + appendTagsSamToBam(record.tags, buffer); } } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_READ_SAM_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_READ_SAM_H_ + diff --git a/seqan/bam_io/write_bam.h b/seqan/bam_io/write_bam.h index 4f6c5c7..3fd7ca1 100644 --- a/seqan/bam_io/write_bam.h +++ b/seqan/bam_io/write_bam.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -30,30 +30,16 @@ // // ========================================================================== // Author: Manuel Holtgrewe +// Author: David Weese // ========================================================================== // Code for writing BAM. // ========================================================================== -// TODO(holtgrew): Add buffer to context? -// TODO(holtgrew): Rename to writeRecord from write2! Go over deprecated alias! - -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_WRITE_BAM_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_WRITE_BAM_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_WRITE_BAM_H_ +#define INCLUDE_SEQAN_BAM_IO_WRITE_BAM_H_ namespace seqan { -// ============================================================================ -// Forwards -// ============================================================================ - -// ============================================================================ -// Tags, Classes, Enums -// ============================================================================ - -// ============================================================================ -// Metafunctions -// ============================================================================ - // ============================================================================ // Functions // ============================================================================ @@ -62,82 +48,48 @@ namespace seqan { // Function writeRecord() BamHeader // ---------------------------------------------------------------------------- -/*! - * @fn SamBamIO#write2 - * @brief Write a record to a SAM/BAM file. - * - * @signature int writeRecord(stream, record, context, tag); - * @signature int writeRecord(stream, header, context, tag); - * - * @param[in,out] stream The @link StreamConcept Stream @endlink to write to. - * @param[out] record The @link BamAlignmentRecord @endlink object to write out. - * @param[out] header The @link BamHeader @endlink object to write out. - * @param[in,out] context The BamIOContext object to use. - * @param[in] tag The format tag, one of Sam and Bam. - * - * @return int A status code, 0 on success, != 0 on failure. - */ - - -template -int write2(TStream & stream, +template +void write(TTarget & target, BamHeader const & header, - BamIOContext const & context, + BamIOContext & context, Bam const & /*tag*/) { - int res = streamWriteBlock(stream, "BAM\1", 4); - if (res != 4) - return 1; // Could not write magic. + write(target, "BAM\1"); + clear(context.buffer); // Create text of header. - CharString headerBuffer; - for (unsigned i = 0; i < length(header.records); ++i) - { - res = write2(headerBuffer, header.records[i], context, Sam()); - if (res != 0) - return 1; // Error writing header to buffer. - } + for (unsigned i = 0; i < length(header); ++i) + write(context.buffer, header[i], context, Sam()); + // Note that we do not write out a null-character to terminate the header. This would be valid by the SAM standard // but the samtools do not expect this and write out the '\0' when converting from BAM to SAM. - // appendValue(headerBuffer, '\0'); + // appendValue(context.buffer, '\0'); // Write text header. - __int32 lText = length(headerBuffer); - res = streamWriteBlock(stream, reinterpret_cast(&lText), 4); - if (res != 4) - return 1; // Error writing l_text. - - res = streamWriteBlock(stream, &headerBuffer[0], lText); + appendRawPod(target, (__int32)length(context.buffer)); + write(target, context.buffer); // Write references. - __int32 nRef = _max(length(header.sequenceInfos), length(nameStore(context))); - res = streamWriteBlock(stream, reinterpret_cast(&nRef), 4); - if (res != 4) - return 1; // Error writing n_ref; + __int32 nRef = _max(length(contigNames(context)), length(contigLengths(context))); + appendRawPod(target, nRef); - for (unsigned i = 0; i < length(header.sequenceInfos); ++i) + for (__int32 i = 0; i < nRef; ++i) { - __int32 lName = length(header.sequenceInfos[i].i1) + 1; - res = streamWriteBlock(stream, reinterpret_cast(&lName), 4); - if (res != 4) - return 1; // Error writing l_name; - - res = streamWriteBlock(stream, &header.sequenceInfos[i].i1[0], lName - 1); - if (res != lName - 1) - return 1; // Error writing name; - - char const n = '\0'; - res = streamWriteBlock(stream, &n, 1); - if (res != 1) - return 1; // Error writing trailing '\0'. - - __int32 lRef = header.sequenceInfos[i].i2; - res = streamWriteBlock(stream, reinterpret_cast(&lRef), 4); - if (res != 4) - return 1; // Error writing l_ref; + if (i < (__int32)length(contigNames(context))) + { + appendRawPod(target, (__int32)(length(contigNames(context)[i]) + 1)); + write(target, contigNames(context)[i]); + } + else + { + appendRawPod(target, (__int32)1); + } + writeValue(target, '\0'); + __int32 lRef = 0; + if (i < (__int32)length(contigLengths(context))) + lRef = contigLengths(context)[i]; + appendRawPod(target, lRef); } - - return 0; } // ---------------------------------------------------------------------------- @@ -165,55 +117,44 @@ static inline int _reg2Bin(uint32_t beg, uint32_t end) return 0; } -template -int write2(TStream & stream, - BamAlignmentRecord const & record, - BamIOContext const & /*context*/, - Bam const & /*tag*/) +inline __uint32 +updateLengths(BamAlignmentRecord const & record) { - CharString buffer; - - // First, write record to buffer. + // update internal lengths. + record._l_qname = length(record.qName) + 1; + record._n_cigar = length(record.cigar); + record._l_qseq = length(record.seq); + + return sizeof(BamAlignmentRecordCore) + record._l_qname + + record._n_cigar * 4 + (record._l_qseq + 1) / 2 + record._l_qseq + + length(record.tags); +} - // refID - streamWriteBlock(buffer, reinterpret_cast(&record.rID), 4); - // pos - streamWriteBlock(buffer, reinterpret_cast(&record.beginPos), 4); +template +inline void +_writeBamRecord(TTarget & target, + BamAlignmentRecord const & record, + Bam const & /*tag*/) +{ + typedef typename Iterator > const, Standard>::Type SEQAN_RESTRICT TCigarIter; + typedef typename Iterator::Type SEQAN_RESTRICT TSeqIter; + typedef typename Iterator::Type SEQAN_RESTRICT TQualIter; // bin_mq_nl - SEQAN_ASSERT_LT(length(record.qName) + 1u, 255u); - __uint8 lReadName = length(record.qName) + 1; unsigned l = 0; - _getLengthInRef(record.cigar, l); - __uint32 bin = _reg2Bin(record.beginPos, record.beginPos + l); - __uint32 binMqNl = (bin << 16) | (record.mapQ << 8) | lReadName; - streamWriteBlock(buffer, reinterpret_cast(&binMqNl), 4); - - // flag_nc - __uint16 nCigarOp = length(record.cigar); - __uint32 flagNc = (record.flag << 16) | nCigarOp; - streamWriteBlock(buffer, reinterpret_cast(&flagNc), 4); - - // l_seq - __int32 lSeq = length(record.seq); - streamWriteBlock(buffer, reinterpret_cast(&lSeq), 4); + _getLengthInRef(l, record.cigar); + record.bin =_reg2Bin(record.beginPos, record.beginPos + std::max(1u, l)); - // next_refID - streamWriteBlock(buffer, reinterpret_cast(&record.rNextId), 4); - - // next_pos - streamWriteBlock(buffer, reinterpret_cast(&record.pNext), 4); - - // tlen - streamWriteBlock(buffer, reinterpret_cast(&record.tLen), 4); + // Write fixed-size BamAlignmentRecordCore. + appendRawPod(target, (BamAlignmentRecordCore &)record); // read_name - streamWriteBlock(buffer, reinterpret_cast(&record.qName[0]), lReadName - 1); - streamWriteChar(buffer, '\0'); + write(target, record.qName); + writeValue(target, '\0'); // cigar - static __uint8 const MAP[256] = + static unsigned char const MAP[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -232,72 +173,91 @@ int write2(TStream & stream, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - for (unsigned i = 0; i < length(record.cigar); ++i) - { - __uint32 x = record.cigar[i].count; - x <<= 4; - x |= MAP[static_cast(record.cigar[i].operation)]; - streamWriteBlock(buffer, reinterpret_cast(&x), 4); - } + TCigarIter citEnd = end(record.cigar, Standard()); + for (TCigarIter cit = begin(record.cigar, Standard()); cit != citEnd; ++cit) + appendRawPod(target, ((__uint32)cit->count << 4) | MAP[(unsigned char)cit->operation]); // seq - static __uint8 const MAP2[256] = - { - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, - 15, 1, 14, 2, 13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, - 15, 15, 5, 6, 8, 15, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 - }; - __uint8 c = 0; - for (int i = 0; i < lSeq; ++i) + TSeqIter sit = begin(record.seq, Standard()); + TSeqIter sitEnd = sit + (record._l_qseq & ~1); + while (sit != sitEnd) { - c <<= 4; - c &= 0xf0; - c |= MAP2[static_cast(record.seq[i])]; - if (i % 2 == 1) - streamWriteChar(buffer, c); - } - if (lSeq % 2 == 1) - { - c <<= 4; - c &= 0xf0; - streamWriteChar(buffer, c); + unsigned char x = (ordValue(getValue(sit++)) << 4); + writeValue(target, x | ordValue(getValue(sit++))); } + if (record._l_qseq & 1) + writeValue(target, ordValue(getValue(sit++)) << 4); // qual - if (empty(record.qual)) + SEQAN_ASSERT_LEQ(length(record.qual), length(record.seq)); + TQualIter qit = begin(record.qual, Standard()); + TQualIter qitEnd = end(record.qual, Standard()); + TQualIter qitVirtEnd = qit + record._l_qseq; + while (qit != qitEnd) + writeValue(target, *qit++ - '!'); + for (; qit != qitVirtEnd; ++qit) + writeValue(target, '\xff'); // fill with zero qualities + + // tags + write(target, record.tags); +} + +template +inline void +_writeBamRecordWrapper(TTarget & target, + BamAlignmentRecord const & record, + Nothing & /* range */, + __uint32 size, + Bam const & tag) +{ + appendRawPod(target, size); + _writeBamRecord(target, record, tag); +} + +template +inline void +_writeBamRecordWrapper(TTarget & target, + BamAlignmentRecord const & record, + Range & range, + __uint32 size, + Bam const & tag) +{ + if (SEQAN_LIKELY(size + 4 <= length(range))) { - for (unsigned i = 0; i < length(record.qual); ++i) - streamWriteChar(buffer, static_cast(0xff)); + appendRawPod(range.begin, size); + _writeBamRecord(range.begin, record, tag); + advanceChunk(target, size + 4); } else { - for (unsigned i = 0; i < length(record.qual); ++i) - streamWriteChar(buffer, static_cast(record.qual[i] - '!')); + appendRawPod(target, size); + _writeBamRecord(target, record, tag); } +} - // tags - if (length(record.tags) > 0u) - streamWriteBlock(buffer, reinterpret_cast(&record.tags[0]), length(record.tags)); +template +void write(TTarget & target, + BamAlignmentRecord const & record, + BamIOContext & context, + Bam const & tag) +{ + // Check for valid IO Context. + SEQAN_ASSERT_LT_MSG(record.rID, static_cast<__int32>(length(contigNames(context))), "BAM IO Assertion: Unknown REF ID!"); + SEQAN_ASSERT_LT_MSG(record.rNextId, static_cast<__int32>(length(contigNames(context))), "BAM IO Assertion: Unknown NEXT REF ID!"); + ignoreUnusedVariableWarning(context); + + // Update internal lengths + __uint32 size = updateLengths(record); + + // Reserve chunk memory + reserveChunk(target, 4 + size, Output()); - // buffer to stream - __uint32 blockSize = length(buffer); - streamWriteBlock(stream, reinterpret_cast(&blockSize), 4); - return streamWriteBlock(stream, &buffer[0], blockSize) != blockSize; + // Write length and record + typename Chunk::Type ochunk; + getChunk(ochunk, target, Output()); + _writeBamRecordWrapper(target, record, ochunk, size, tag); } } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_WRITE_BAM_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_WRITE_BAM_H_ diff --git a/seqan/bam_io/write_sam.h b/seqan/bam_io/write_sam.h index dfc2a74..bb101e1 100644 --- a/seqan/bam_io/write_sam.h +++ b/seqan/bam_io/write_sam.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,254 +34,178 @@ // Code for writing SAM. // ========================================================================== -#ifndef CORE_INCLUDE_SEQAN_BAM_IO_WRITE_SAM_H_ -#define CORE_INCLUDE_SEQAN_BAM_IO_WRITE_SAM_H_ +#ifndef INCLUDE_SEQAN_BAM_IO_WRITE_SAM_H_ +#define INCLUDE_SEQAN_BAM_IO_WRITE_SAM_H_ namespace seqan { -// ============================================================================ -// Forwards -// ============================================================================ - -// ============================================================================ -// Tags, Classes, Enums -// ============================================================================ - -// ============================================================================ -// Metafunctions -// ============================================================================ - // ============================================================================ // Functions // ============================================================================ // ---------------------------------------------------------------------------- -// Function write2() BamHeaderRecord +// Function write() BamHeaderRecord // ---------------------------------------------------------------------------- -template -int write2(TStream & stream, - BamHeaderRecord const & header, - BamIOContext const & /*context*/, - Sam const & /*tag*/) +template +inline void write(TTarget & target, + BamHeaderRecord const & header, + BamIOContext const & /*context*/, + Sam const & /*tag*/) { char const * headerTypes[] = {"@HD", "@SQ", "@RG", "@PG", "@CO"}; - streamPut(stream, headerTypes[header.type]); - if (header.type == BAM_HEADER_COMMENT) + write(target, headerTypes[header.type]); + + if (header.type == BAM_HEADER_COMMENT && !empty(header.tags)) { - streamPut(stream, '\t'); - streamPut(stream, header.tags[0].i2); + writeValue(target, '\t'); + write(target, header.tags[0].i2); } else { for (unsigned i = 0; i < length(header.tags); ++i) { - streamPut(stream, '\t'); - streamPut(stream, header.tags[i].i1); - streamPut(stream, ':'); - streamPut(stream, header.tags[i].i2); + writeValue(target, '\t'); + write(target, header.tags[i].i1); + writeValue(target, ':'); + write(target, header.tags[i].i2); } } - int res = streamPut(stream, '\n'); - if (res != 0) - return res; - - return 0; + writeValue(target, '\n'); } // ---------------------------------------------------------------------------- -// Function write2() BamHeader +// Function write() BamHeader // ---------------------------------------------------------------------------- -template -int write2(TStream & stream, - BamHeader const & header, - BamIOContext const & context, - Sam const & tag) +template +inline void write(TTarget & target, + BamHeader const & header, + BamIOContext const & context, + Sam const & tag) { - std::set writtenSeqInfos; + String writtenSeqInfos; + resize(writtenSeqInfos, length(contigNames(context)), false); - for (unsigned i = 0; i < length(header.records); ++i) + size_t globalRefId = 0; + for (unsigned i = 0; i < length(header); ++i) { - BamHeaderRecord const & record = header.records[i]; + BamHeaderRecord const & record = header[i]; if (record.type == BAM_HEADER_REFERENCE) - { - for (unsigned i = 0; i < length(record.tags); ++i) - { - if (record.tags[i].i1 == "SN") + for (unsigned j = 0; j < length(record.tags); ++j) + if (record.tags[j].i1 == "SN") { - writtenSeqInfos.insert(record.tags[i].i2); + if (getIdByName(globalRefId, contigNamesCache(context), record.tags[j].i2)) + writtenSeqInfos[globalRefId] = true; break; } - } - } - int res = write2(stream, record, context, tag); - if (res != 0) - return res; + write(target, record, context, tag); } // Write missing @SQ header records. - for (unsigned i = 0; i < length(header.sequenceInfos); ++i) + SEQAN_ASSERT_LEQ(length(contigLengths(context)), length(contigNames(context))); + for (unsigned i = 0; i < length(contigLengths(context)); ++i) { - if (writtenSeqInfos.find(header.sequenceInfos[i].i1) != writtenSeqInfos.end()) + if (writtenSeqInfos[i]) continue; - int res = streamPut(stream, "@SQ\tSN:"); - if (res != 0) - return res; - - res = streamPut(stream, header.sequenceInfos[i].i1); - if (res != 0) - return res; - - res = streamPut(stream, "\tLN:"); - if (res != 0) - return res; - - res = streamPut(stream, header.sequenceInfos[i].i2); - if (res != 0) - return res; - - res = streamPut(stream, '\n'); - if (res != 0) - return res; + write(target, "@SQ\tSN:"); + write(target, contigNames(context)[i]); + write(target, "\tLN:"); + appendNumber(target, contigLengths(context)[i]); + writeValue(target, '\n'); } - - return 0; } // ---------------------------------------------------------------------------- -// Function write2() BamAlignmentRecord +// Function write() BamAlignmentRecord // ---------------------------------------------------------------------------- -template -int write2(TStream & stream, - BamAlignmentRecord const & record, - BamIOContext const & context, - Sam const & /*tag*/) +template +inline void write(TTarget & target, + BamAlignmentRecord const & record, + BamIOContext const & context, + Sam const & /*tag*/) { - int res = 0; - -#define SEQAN_PUT_TAB \ - do { \ - res = streamPut(stream, '\t'); \ - if (res != 0) \ - return res; \ - } \ - while (false) - - res = streamPut(stream, record.qName); - if (res != 0) - return res; - - SEQAN_PUT_TAB; + // Check for valid IO Context. + SEQAN_ASSERT_LT_MSG(record.rID, static_cast<__int32>(length(contigNames(context))), "SAM IO Assertion: Unknown REF ID!"); + SEQAN_ASSERT_LT_MSG(record.rNextId, static_cast<__int32>(length(contigNames(context))), "SAM IO Assertion: Unknown NEXT REF ID!"); - res = streamPut(stream, record.flag); - if (res != 0) - return res; + write(target, record.qName); + writeValue(target, '\t'); - SEQAN_PUT_TAB; + appendNumber(target, record.flag); + writeValue(target, '\t'); if (record.rID == BamAlignmentRecord::INVALID_REFID) - res = streamPut(stream, '*'); + writeValue(target, '*'); else - res = streamPut(stream, nameStore(context)[record.rID]); - if (res != 0) - return res; + write(target, contigNames(context)[record.rID]); - SEQAN_PUT_TAB; + writeValue(target, '\t'); - res = streamPut(stream, record.beginPos + 1); - if (res != 0) - return res; + SEQAN_ASSERT_EQ((__int32)BamAlignmentRecord::INVALID_POS + 1, (__int32)0); + appendNumber(target, record.beginPos + 1); - SEQAN_PUT_TAB; + writeValue(target, '\t'); - res = streamPut(stream, static_cast<__uint16>(record.mapQ)); - if (res != 0) - return res; - - SEQAN_PUT_TAB; + appendNumber(target, static_cast<__uint16>(record.mapQ)); + writeValue(target, '\t'); if (empty(record.cigar)) - { - res = streamPut(stream, '*'); - if (res != 0) - return res; - } + writeValue(target, '*'); else - { for (unsigned i = 0; i < length(record.cigar); ++i) { - res = streamPut(stream, record.cigar[i].count); - if (res != 0) - return res; - - res = streamPut(stream, record.cigar[i].operation); - if (res != 0) - return res; + appendNumber(target, record.cigar[i].count); + writeValue(target, record.cigar[i].operation); } - } - SEQAN_PUT_TAB; + writeValue(target, '\t'); if (record.rNextId == BamAlignmentRecord::INVALID_REFID) - res = streamPut(stream, '*'); + writeValue(target, '*'); else if (record.rID == record.rNextId) - res = streamPut(stream, '='); + writeValue(target, '='); else - res = streamPut(stream, nameStore(context)[record.rNextId]); - if (res != 0) - return res; + write(target, contigNames(context)[record.rNextId]); - SEQAN_PUT_TAB; + writeValue(target, '\t'); - res = streamPut(stream, record.pNext + 1); - if (res != 0) - return res; + appendNumber(target, record.pNext + 1); - SEQAN_PUT_TAB; + writeValue(target, '\t'); if (record.tLen == BamAlignmentRecord::INVALID_LEN) - res = streamPut(stream, '0'); + writeValue(target, '0'); else - res = streamPut(stream, record.tLen); - if (res != 0) - return res; + appendNumber(target, record.tLen); - SEQAN_PUT_TAB; + writeValue(target, '\t'); if (empty(record.seq)) - res = streamPut(stream, '*'); // Case of empty seq string / "*". + writeValue(target, '*'); // Case of empty seq string / "*". else - res = streamPut(stream, record.seq); - if (res != 0) - return res; + write(target, record.seq); - SEQAN_PUT_TAB; + writeValue(target, '\t'); if (empty(record.qual)) // Case of empty quality string / "*". - res = streamPut(stream, '*'); + writeValue(target, '*'); else - res = streamPut(stream, record.qual); - if (res != 0) - return res; + write(target, record.qual); if (!empty(record.tags)) { - SEQAN_PUT_TAB; - CharString buffer; - assignTagsBamToSam(buffer, record.tags); - streamPut(stream, buffer); + writeValue(target, '\t'); + appendTagsBamToSam(target, record.tags); } - return streamPut(stream, '\n'); - -#undef SEQAN_PUT_TAB + writeValue(target, '\n'); } } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BAM_IO_WRITE_SAM_H_ +#endif // #ifndef INCLUDE_SEQAN_BAM_IO_WRITE_SAM_H_ diff --git a/seqan/basic.h b/seqan/basic.h index 9580594..4409c8b 100644 --- a/seqan/basic.h +++ b/seqan/basic.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Umbrella header for the basic module. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_H_ // -------------------------------------------------------------------------- // Prerequisites @@ -48,15 +48,18 @@ // Code for debugging and testing (assertions, test system) and profiling. #include -// Basic exceptions. -#include - // C++ Metaprogramming Support Code, generally independent of SeqAn. #include +// Basic functors. +#include + // Fundamental meta and global functions. This is what makes SeqAn SeqAn. #include +// Basic exceptions. +#include + // More advanced debug system constructs. // TODO(holtgrew): Move into basic_debug subsystem, some stuff from metaprogramming and fundamental required, those should not depend on debug system. #include @@ -64,11 +67,8 @@ // SeqAn Concept Checking Library (ported from Boost). #include -// Alphabet concept and biological implementations. -#include - -// Aggregate data types (pairs, triples, tuples). -#include +// Container concept and supporting code. +#include // Memory allocation code. #include @@ -85,13 +85,28 @@ // Iterator concept and implementation. #include +// Functions to efficiently move data between containers/iterators/streams +#include + +// Alphabet concept and biological implementations. +#include + // Proxy class and supporting code. #include -// Container concept and supporting code. -#include +// Aggregate data types (pairs, triples, tuples). +#include // Remaining code with cyclic dependencies. #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_H_ +// Basic views functions and metafunctions. +#include + +// Basic device metafunctions. +#include + +// Basic SIMD vector (not included by default on purpose) +//#include + +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_H_ diff --git a/seqan/basic/INFO b/seqan/basic/INFO deleted file mode 100644 index 6506f01..0000000 --- a/seqan/basic/INFO +++ /dev/null @@ -1,14 +0,0 @@ -Name: seqan-basic -Author: Andreas Gogol-Doering -Author: Tobias Rausch -Author: Anne-Katrin Emde -Author: David Weese -Author: Manuel Holtgrewe -Maintainer: Manuel Holtgrewe -License: BSD 3-clause -Copyright: 2006-2013, FU Berlin -Status: mature -Description: SeqAn core module with fundamental infrastructure. - The basic module contains the infrastructure for testing, allocators, the SeqAn - types and concepts, iterators, the Holder classes, aggregats (Pairs, Triples, - Tuples, ...) and other fundamentals. diff --git a/seqan/basic/aggregate_concept.h b/seqan/basic/aggregate_concept.h index 15cfa1b..206530a 100644 --- a/seqan/basic/aggregate_concept.h +++ b/seqan/basic/aggregate_concept.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -32,8 +32,8 @@ // Author: Manuel Holtgrewe // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_AGGREGATE_CONCEPT_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_AGGREGATE_CONCEPT_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_AGGREGATE_CONCEPT_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_AGGREGATE_CONCEPT_H_ namespace seqan { @@ -47,13 +47,11 @@ namespace seqan { /*! * @concept AggregateConcept - * + * * @brief Aggregate types contain a fixed number of fixed-size values (pairs, triples, tuples). - * - * @section Remarks - * + * * Stream output operators are not shown in the function list below, but required. - * + * * Comparison operators are not shown in the function list below, but required. */ @@ -63,26 +61,12 @@ namespace seqan { * * @signature TStream AggregateConcept::operator<<(stream, aggregate); * - * @param stream The std::ostream to write to. - * @param aggregate The aggregate type to write to the stream. + * @param[in,out] stream The std::ostream to write to. + * @param[in] aggregate The aggregate type to write to the stream. * * @return TStream Reference to stream after writing aggregate to it. */ -/** -.Concept.AggregateConcept -..summary:Aggregate types contain a fixed number of fixed-size values. -..remarks:Stream output operators are not shown in the function list below, but required. -..remarks:Comparison operators are not shown in the function list below, but required. - -.Function.clear.concept:Concept.AggregateConcept -.Function.value.concept:Concept.AggregateConcept -.Function.assignValue.concept:Concept.AggregateConcept - -.Metafunction.LENGTH.concept:Concept.AggregateConcept -.Metafunction.Value.concept:Concept.AggregateConcept - */ - /*! * @defgroup AggregateTags Aggregate Tags * @brief Tags to use in aggregate (e.g. Pair, Triple, and Tuple) types. @@ -96,14 +80,6 @@ namespace seqan { * @signature typedef Tag Pack; */ -/** -.Tag.Pack -..cat:Aggregates -..summary:Tag to mark a packed specialization that disables address alignment for members. -..signature:Pack -..include:seqan/basic.h - */ - struct Pack_; typedef Tag Pack; @@ -122,18 +98,6 @@ typedef Tag Pack; * BITSIZE2 The number of bits for the second entry. */ -/** -.Tag.BitPacked -..cat:Aggregates -..summary:Tag to mark a bit-packed specialization that avoids to waste bits. -..signature:BitPacked -..param.BITSIZE1:Number of bits used for first element. -...type:nolink:$unsigned$ -..param.BITSIZE2:Number of bits used for second element. -...type:nolink:$unsigned$ -..include:seqan/basic.h - */ - template struct BitPacked; @@ -153,16 +117,6 @@ struct BitPacked; * @return Type The resulting packed type. */ -/** -.Metafunction.MakePacked -..cat:Aggregates -..summary:Return the corresponding packed type of a type. -..signature:MakePacked -..param.TAggregate:An aggregate type. -..returns:The corresponding packed aggregate. -..include:seqan/basic.h - */ - template struct MakePacked { @@ -175,4 +129,4 @@ struct MakePacked } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_AGGREGATE_CONCEPT_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_AGGREGATE_CONCEPT_H_ diff --git a/seqan/basic/allocator_chunkpool.h b/seqan/basic/allocator_chunkpool.h index b40a1ff..1308a61 100644 --- a/seqan/basic/allocator_chunkpool.h +++ b/seqan/basic/allocator_chunkpool.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // size. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_CHUNKPOOL_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_CHUNKPOOL_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_CHUNKPOOL_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_CHUNKPOOL_H_ #include @@ -50,31 +50,9 @@ namespace seqan { // Tags, Classes, Enums // ============================================================================ -/** -.Spec.Chunk Pool Allocator: -..cat:Allocators -..general:Class.Allocator -..summary:Allocator that pools one or more consecutive memory blocks of a specific size. -..signature:Allocator< ChunkPool > -..param.SIZE:Size of memory blocks that are pooled. -...value:An unsigned integer with $SIZE >= sizeof(void *)$. -..param.MAX_COUNT:Maximum number of consecutive memory blocks that are pooled. -...default:26 -...remarks:Longer "chunks" are allocated and deallocated without pooling. -..param.ParentAllocator:An allocator that is by the pool allocator used to allocate memory. -...default:@Spec.Simple Allocator@ -...note:The multi pool allocator only supports @Function.clear@ if this function is also implemented for $ParentAllocator$. -..remarks:A pool allocator allocates several memory blocks at once. -Freed blocks are not immediately deallocated but recycled in subsequential allocations. -This way, the number of calls to the heap manager is reduced, and that speeds up memory management. -...text:Note that memory blocks of size different than $SIZE$, $2*SIZE$, $3*SIZE$, ..., $MAX_COUNT * SIZE$ -are not pooled but immediately allocated and deallocated using $ParentAllocator$. -..include:seqan/basic.h -*/ - template < - size_t SIZE, - size_t MAX_COUNT = 26, + size_t SIZE, + size_t MAX_COUNT = 26, typename TParentAllocator = Allocator > > struct ChunkPool; @@ -101,7 +79,7 @@ struct Allocator > Allocator() { SEQAN_CHECKPOINT; - ::std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); + std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); data_current_end = data_current_free = 0; //dont need to initialize data_current_begin } @@ -109,7 +87,7 @@ struct Allocator > Allocator(size_t reserve_item_count) { SEQAN_CHECKPOINT; - ::std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); + std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); size_t storage_size = (reserve_item_count * SIZE > STORAGE_SIZE_MIN) ? reserve_item_count * SIZE : STORAGE_SIZE_MIN; allocate( parentAllocator( *this ), data_current_begin, storage_size ); @@ -120,7 +98,7 @@ struct Allocator > Allocator(TParentAllocator & parent_alloc) { SEQAN_CHECKPOINT; - ::std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); + std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); data_current_end = data_current_free = 0; //dont need to initialize data_current_begin @@ -130,7 +108,7 @@ struct Allocator > Allocator(size_t reserve_item_count, TParentAllocator & parent_alloc) { SEQAN_CHECKPOINT; - ::std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); + std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); setValue(data_parent_allocator, parent_alloc); @@ -143,7 +121,7 @@ struct Allocator > //Dummy copy Allocator(Allocator const &) { - ::std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); + std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); data_current_end = data_current_free = 0; //dont need to initialize data_current_begin } @@ -190,7 +168,7 @@ void clear(Allocator > & me) { SEQAN_CHECKPOINT; - ::std::memset(me.data_recycled_blocks, 0, sizeof(me.data_recycled_blocks)); + std::memset(me.data_recycled_blocks, 0, sizeof(me.data_recycled_blocks)); me.data_current_end = me.data_current_free = 0; clear(parentAllocator(me)); @@ -202,7 +180,7 @@ clear(Allocator > & me) template inline void -allocate(Allocator > & me, +allocate(Allocator > & me, TValue * & data, TSize count, Tag const tag_) @@ -273,4 +251,4 @@ deallocate(Allocator > & me, } // namespace seqan -#endif // SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_CHUNKPOOL_H_ +#endif // SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_CHUNKPOOL_H_ diff --git a/seqan/basic/allocator_interface.h b/seqan/basic/allocator_interface.h index 851fdef..c9917bd 100644 --- a/seqan/basic/allocator_interface.h +++ b/seqan/basic/allocator_interface.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ // TODO(holtgrew): Perform some benchmarks and use a better malloc, e.g. tcmalloc and see whether our allocator infrastructure is worth keeping around. // TODO(holtgrew): Rename to allocator_base.h? -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_INTERFACE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_INTERFACE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_INTERFACE_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_INTERFACE_H_ namespace seqan { @@ -55,34 +55,22 @@ template struct Holder; // ============================================================================ /*! - * @defgroup AllocatorUsageTags Allocator Usage + * @defgroup AllocatorUsageTags Allocator Usage Tags * @brief The purpose of an allocated memory block. - * + * * @tag AllocatorUsageTags#TagAllocateUnspecified * @headerfile * @brief Not specified. - * + * * @tag AllocatorUsageTags#TagAllocateTemp * @headerfile * @brief Temporary memory. - * + * * @tag AllocatorUsageTags#TagAllocateStorage * @headerfile * @brief Memory for storing container content. */ -/** -.Tag.Allocator Usage: -..cat:Memory -..summary:The purpose of an allocated memory block. -..tag.TagAllocateUnspecified:Not specified. -..tag.TagAllocateTemp:Temporary memory. -..tag.TagAllocateStorage:Memory for storing container content. -..see:Function.allocate -..see:Function.deallocate -..include:seqan/basic.h -*/ - // TODO(holtgrew): ANY use/difference? struct AllocateUnspecified_; @@ -98,14 +86,14 @@ typedef Tag TagAllocateStorage; * @class Allocator * @headerfile * @brief Manager for allocated memory. - * + * * @signature template * class Allocator; - * + * * @tparam TSpec The specializing type. - * + * * @section Remarks - * + * * There are two reasons for using non-trivial allocators: * *
    @@ -118,32 +106,9 @@ typedef Tag TagAllocateStorage; *
*/ -/** -.Class.Allocator: -..cat:Basic -..summary:Manager for allocated memory. -..signature:Allocator -..param.TSpec:The specializing type. -...metafunction:Metafunction.Spec -..include:basic.h -..remarks:There are two reasons for using non-trivial allocators: -...text:1. Allocators support the function @Function.Allocator#clear@ for a fast deallocation of all -allocated memory blocks. -...text:2. Some allocators are faster in allocating an deallocating memory. -Pool allocators like e.g. @Spec.Single Pool Allocator@ or @Spec.Multi Pool Allocator@ -speed up @Function.allocate@, @Function.deallocate@, and @Function.Allocator#clear@ for -pooled memory blocks. -..include:seqan/basic.h -*/ - template struct Allocator; -///.Function.allocate.param.object.type:Class.Allocator -///.Function.allocate.class:Class.Allocator -///.Function.deallocate.param.object.type:Class.Allocator -///.Function.deallocate.class:Class.Allocator - // ============================================================================ // Metafunctions // ============================================================================ @@ -168,9 +133,9 @@ struct Spec > * @fn Allocator#allocate * @headerfile * @brief Allocates memory from heap. - * + * * @signature void allocate(allocator, data, count[, usageTag]); - * + * * @param[in] count Number of items that could be stored in the allocated memory. The type of the allocated * items is given by the type of data. * @param[in] usageTag A tag the specifies the purpose for the allocated memory. Values: @@ -179,58 +144,23 @@ struct Spec > * memory. Objects of all types can be used as allocators. If no special behavior is * implemented, default functions allocation/deallocation are applied that uses standard * new and delete operators. Types: Allocator - * + * * @section Remarks - * + * * The function allocates at least count*sizeof(data) bytes. The allocated memory is large enough to hold * count objects of type T, where T * is type of data. - * + * * These objects are not constructed by allocate. - * + * * Use e.g. one of the functions @link valueConstruct @endlink, @link arrayConstruct @endlink, @link arrayConstructCopy * @endlink or @link arrayFill @endlink to construct the objects. A new operator which is part of the C++ * standard (defined in <new>) can also be used to construct objects at a given memory address. - * + * * @section Remarks - * + * * All allocated memory blocks should be deallocated by the corresponding function @link Allocator#deallocate @endlink. */ -/** -.Function.allocate -..class:Class.Allocator -..cat:Memory -..summary:Allocates memory from heap. -..signature:allocate(object, data, count [, usage_tag]) -..param.object:Allocator object. -...remarks:$object$ is conceptually the "owner" of the allocated memory. - Objects of all types can be used as allocators. If no special behavior is implemented, - default functions allocation/deallocation are applied that uses standard - $new$ and $delete$ operators. -..param.count:Number of items that could be stored in the allocated memory. -...text:The type of the allocated items is given by the type of $data$. -..param.usage_tag:A tag the specifies the purpose for the allocated memory. -...value:@Tag.Allocator Usage@ -..returns.param.data:Pointer to allocated memory. -...remarks:The value of this pointer is overwritten by the function. -..remarks: -...text:The function allocates at least $count*sizeof(data)$ bytes. - The allocated memory is large enough - to hold $count$ objects of type $T$, where $T *$ is type of $data$. -...note:These objects are not constructed by $allocate$. -...text:Use e.g. one of the functions @Function.valueConstruct@, @Function.arrayConstruct@, @Function.arrayConstructCopy@ or @Function.arrayFill@ -to construct the objects. -A $new$ operator which is part of the C++ standard (defined in $$) - can also be used to construct objects at a given memory address. -..note:All allocated memory blocks should be deallocated by the corresponding function @Function.deallocate@. -..see:Function.deallocate -..see:Function.valueConstruct -..see:Function.arrayFill -..see:Function.arrayConstruct -..see:Function.arrayConstructCopy -..include:seqan/basic.h -*/ - template inline void allocate(T const & me, @@ -251,7 +181,7 @@ allocate(T & me, template inline void -allocate(T const &, +allocate(T const &, TValue * & data, TSize count, Tag const &) @@ -278,7 +208,7 @@ allocate(T const &, template inline void -allocate(T &, +allocate(T &, TValue * & data, TSize count, Tag const &) @@ -311,71 +241,41 @@ allocate(T &, * @fn Allocator#deallocate * @headerfile * @brief Deallocates memory. - * + * * @signature void deallocate(object, data, count[, usageTag]) - * - * @param count Number of items that could be stored in the allocated memory. - * @param usageTag A tag the specifies the purpose for the allocated memory. - * Values: @link AllocatorUsageTags @endlink. - * @param object Allocator object.object is conceptually the "owner" of the allocated memory. Objects of - * all types can be used as allocators. If no special behavior is implemented, default - * functions allocation/deallocation are applied that uses standard new and delete - * operators. Types: Allocator - * @param data Pointer to allocated memory that was allocated by allocate. - * - * @section Remarks - * + * + * @param[in,out] object Allocator object.object is conceptually the "owner" of the allocated memory. + * Objects of all types can be used as allocators. If no special behavior is implemented, + * default functions allocation/deallocation are applied that uses standard new + * and delete operators. Types: Allocator + * @param[out] data Pointer to allocated memory that was allocated by allocate. + * @param[in] count Number of items that could be stored in the allocated memory. + * @param[in] usageTag A tag the specifies the purpose for the allocated memory. + * Values: @link AllocatorUsageTags @endlink. + * * The values for object, count and usageTag should be the same that was used when * allocate was called. The value of data should be the same that was returned by allocate. - * + * * deallocate does not destruct objects. - * + * * Use e.g. one of the functions @link valueDestruct @endlink or @link arrayDestruct @endlink to destruct the objects. * delete and delete [] operators which are part of the C++ standard (defined in <new>) * can also be used to destruct objects at a given memory address. */ -/** -.Function.deallocate -..class:Class.Allocator -..cat:Memory -..summary:Deallocates memory. -..signature:deallocate(object, data, count [, usage_tag]) -..param.object:Allocator object. -...remarks:$object$ is conceptually the "owner" of the allocated memory. - Objects of all types can be used as allocators. If no special behavior is implemented, - default functions allocation/deallocation are applied that uses standard - $new$ and $delete$ operators. -..param.data:Pointer to allocated memory that was allocated by $allocate$. -..param.count:Number of items that could be stored in the allocated memory. -..param.usage_tag:A tag the specifies the purpose for the allocated memory. -...value:@Tag.Allocator Usage@ -..remarks: -...text:The values for $object$, $count$ and $usage_tag$ should be the same that was -used when $allocate$ was called. The value of $data$ should be the same that was -returned by $allocate$. -...note:$deallocate$ does not destruct objects. -...text:Use e.g. one of the functions @Function.valueDestruct@ or @Function.arrayDestruct@ to destruct the objects. -$delete$ and $delete []$ operators which are part of the C++ standard (defined in $$) - can also be used to destruct objects at a given memory address. -..see:Function.valueDestruct -..see:Function.arrayDestruct -..include:seqan/basic.h -*/ - template -inline void -deallocate(T const & me, - TValue * data, +inline void +deallocate(T const & me, + TValue * data, TSize const count) { deallocate(me, data, count, TagAllocateUnspecified()); } template -inline void -deallocate(T & me, - TValue * data, +inline void +deallocate(T & me, + TValue * data, TSize const count) { deallocate(me, data, count, TagAllocateUnspecified()); @@ -386,7 +286,7 @@ inline void deallocate( T const & /*me*/, TValue * data, -#ifdef SEQAN_PROFILE +#ifdef SEQAN_PROFILE TSize count, #else TSize, @@ -407,11 +307,11 @@ deallocate( } template -inline void +inline void deallocate( T & /*me*/, TValue * data, -#ifdef SEQAN_PROFILE +#ifdef SEQAN_PROFILE TSize count, #else TSize, @@ -433,4 +333,4 @@ deallocate( } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_INTERFACE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_INTERFACE_H_ diff --git a/seqan/basic/allocator_multipool.h b/seqan/basic/allocator_multipool.h index 7fdbb39..36b17c8 100644 --- a/seqan/basic/allocator_multipool.h +++ b/seqan/basic/allocator_multipool.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -51,41 +51,23 @@ namespace seqan { /*! * @class MultiPoolAllocator + * @extends Allocator * @headerfile * @brief Allocator that pools memory blocks. * - * @signature template + * @signature template * class Allocator >; * - * @tparam TParentAllocator Th eparent allocator. + * @tparam TParentAllocator The parent allocator. * @tparam BLOCKING_LIMIT The maximum size for memory blocks to be pooled (default is 256). * - * Freed blocks are not immediately deallocated but recycled in subsequential allocations. This way, th enumber of + * Freed blocks are not immediately deallocated but recycled in subsequential allocations. This way, the number of * calls to the heap manager is reduced and that might speed up memory management. * * Note that memory block larger than BLOCKING_LIMIT are not pooled but immediately allocated and deallocated * using ParentAllocator. */ -/** -.Spec.Multi Pool Allocator: -..cat:Allocators -..general:Class.Allocator -..summary:Allocator that pools memory blocks. -..signature:Allocator MultiPool > -..param.ParentAllocator:An allocator that is by the pool allocator used to allocate memory. -...default:@Spec.Simple Allocator@ -...note:The multi pool allocator only supports @Function.clear@ if this function is also implemented for $ParentAllocator$. -..remarks:A pool allocator allocates several memory blocks at once. -..param.BLOCKING_LIMIT:The maximum size for memory blocks to be pooled. -...default:256 -Freed blocks are not immediately deallocated but recycled in subsequential allocations. -This way, the number of calls to the heap manager is reduced, and that speeds up memory management. -...text:Note that memory blocks larger than $BLOCKING_LIMIT$ are not pooled -but immediately allocated and deallocated using $ParentAllocator$. -..include:seqan/basic.h -*/ - template >, unsigned int BLOCKING_LIMIT = 0x100> struct MultiPool; @@ -111,18 +93,18 @@ struct Allocator > { SEQAN_CHECKPOINT; // TODO(holtrew): Why not SeqAn's memset? or use using? - ::std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); - ::std::memset(data_current_begin, 0, sizeof(data_current_begin)); - ::std::memset(data_current_free, 0, sizeof(data_current_free)); + std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); + std::memset(data_current_begin, 0, sizeof(data_current_begin)); + std::memset(data_current_free, 0, sizeof(data_current_free)); } Allocator(TParentAllocator & parent_alloc) { SEQAN_CHECKPOINT; // TODO(holtrew): Why not SeqAn's memset? or use using? - ::std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); - ::std::memset(data_current_begin, 0, sizeof(data_current_begin)); - ::std::memset(data_current_free, 0, sizeof(data_current_free)); + std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); + std::memset(data_current_begin, 0, sizeof(data_current_begin)); + std::memset(data_current_free, 0, sizeof(data_current_free)); setValue(data_parent_allocator, parent_alloc); } @@ -131,9 +113,9 @@ struct Allocator > Allocator(Allocator const &) { // TODO(holtrew): Why not SeqAn's memset? or use using? - ::std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); - ::std::memset(data_current_begin, 0, sizeof(data_current_begin)); - ::std::memset(data_current_free, 0, sizeof(data_current_free)); + std::memset(data_recycled_blocks, 0, sizeof(data_recycled_blocks)); + std::memset(data_current_begin, 0, sizeof(data_current_begin)); + std::memset(data_current_free, 0, sizeof(data_current_free)); } inline Allocator & @@ -179,9 +161,9 @@ void clear(Allocator > & me) { SEQAN_CHECKPOINT; - ::std::memset(me.data_recycled_blocks, 0, sizeof(me.data_recycled_blocks)); - ::std::memset(me.data_current_begin, 0, sizeof(me.data_current_begin)); - ::std::memset(me.data_current_free, 0, sizeof(me.data_current_free)); + std::memset(me.data_recycled_blocks, 0, sizeof(me.data_recycled_blocks)); + std::memset(me.data_current_begin, 0, sizeof(me.data_current_begin)); + std::memset(me.data_current_free, 0, sizeof(me.data_current_free)); clear(parentAllocator(me)); } @@ -216,7 +198,7 @@ _allocatorBlockNumber(Allocator > &, template inline void -allocate(Allocator > & me, +allocate(Allocator > & me, TValue * & data, TSize count, Tag const & tag_) @@ -259,9 +241,9 @@ allocate(Allocator > & me, // ---------------------------------------------------------------------------- template -inline void +inline void deallocate(Allocator > & me, - TValue * data, + TValue * data, TSize count, Tag const tag_) { diff --git a/seqan/basic/allocator_simple.h b/seqan/basic/allocator_simple.h index 52937bd..b60aec7 100644 --- a/seqan/basic/allocator_simple.h +++ b/seqan/basic/allocator_simple.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // General purpose allocator. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_SIMPLE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_SIMPLE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_SIMPLE_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_SIMPLE_H_ #include @@ -51,6 +51,7 @@ namespace seqan { /*! * @class SimpleAllocator + * @extends Allocator * @headerfile * @brief General purpose allocator. * @@ -59,25 +60,10 @@ namespace seqan { * * @tparam TParentAllocator An allocator that is used by the simple allocator to allocate memory. * - * @section Remarks - * * The tag Default used as TparentAllocator means that the default implementations of allocate and * deallocate are used. */ -/** -.Spec.Simple Allocator: -..cat:Allocators -..general:Class.Allocator -..summary:General purpose allocator. -..signature:Allocator< SimpleAlloc > -..param.ParentAllocator:An allocator that is by the simple allocator used to allocate memory. -...default:@Tag.Default@ -...remarks:@Tag.Default@ used as allocator means that the default implementations -of @Function.allocate@ and @Function.deallocate@ are used. -..include:seqan/basic.h -*/ - template struct SimpleAlloc; @@ -168,21 +154,6 @@ parentAllocator(Allocator > & me) */ // TODO(holtgrew): Using #-functions messes up search results. -/** -.Function.Allocator#clear -..class:Class.Allocator -..cat:Memory -..summary:Deallocates all memory blocks. -..signature:clear(allocator) -..param.allocator:Allocator object. -...type:Class.Allocator -..remarks:This function deallocates all memory blocks -that was allocated using @Function.allocate@ for $allocator$. -The memory is not pooled but directly passed back to the heap manager. -..see:Function.allocate -..see:Function.deallocate -..include:seqan/basic.h -*/ template void clear(Allocator > & me) @@ -204,7 +175,7 @@ clear(Allocator > & me) template inline void -allocate(Allocator > & me, +allocate(Allocator > & me, TValue * & data, TSize count, Tag const &) @@ -240,9 +211,9 @@ allocate(Allocator > & me, // ---------------------------------------------------------------------------- template -inline void +inline void deallocate(Allocator > & me, - TValue * data, + TValue * data, TSize, Tag const &) { @@ -272,4 +243,4 @@ deallocate(Allocator > & me, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_SIMPLE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_SIMPLE_H_ diff --git a/seqan/basic/allocator_singlepool.h b/seqan/basic/allocator_singlepool.h index 1d5b244..da290d4 100644 --- a/seqan/basic/allocator_singlepool.h +++ b/seqan/basic/allocator_singlepool.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -62,8 +62,6 @@ namespace seqan { * @tparam SIZE The size of the blocks. * @tparam TParentAllocator The parent allocator to use. * - * @section Remarks - * * A pool allocator allocates several memory blocks at once. Freed blocks are not immediately deallocated but * recycled in subsequential allocations. This way, the number of calls to the heap manager is reduced, and that * might speed up memory management. @@ -73,25 +71,6 @@ namespace seqan { * than a few KB is not advised. */ -/** -.Spec.Single Pool Allocator: -..cat:Allocators -..general:Class.Allocator -..summary:Allocator that pools memory blocks of specific size. -..signature:Allocator< SinglePool > -..param.SIZE:Size of memory blocks that are pooled. -..param.ParentAllocator:An allocator that is by the pool allocator used to allocate memory. -...default:@Spec.Simple Allocator@ -...note:The single pool allocator only supports @Function.clear@ if this function is also implemented for $ParentAllocator$. -..remarks:A pool allocator allocates several memory blocks at once. -Freed blocks are not immediately deallocated but recycled in subsequential allocations. -This way, the number of calls to the heap manager is reduced, and that speeds up memory management. -...text:The single pool allocator only pools memory blocks of size at most $SIZE$. -Blocks of other sizes are allocated and deallocated using an allocator of type $ParentAllocator$. -...text:Using the single pool allocator for blocksizes larger than some KB is not advised. -..include:seqan/basic.h -*/ - template struct SinglePool; @@ -113,16 +92,11 @@ struct Allocator > char * data_current_free; Holder data_parent_allocator; - Allocator() - { - data_recycled_blocks = data_current_end = data_current_free = 0; - //dont need to initialize data_current_begin - } + Allocator() : data_recycled_blocks(), data_current_begin(), data_current_end(), data_current_free() + {} - Allocator(size_t reserve_item_count) + Allocator(size_t reserve_item_count) : data_recycled_blocks() { - data_recycled_blocks = 0; - size_t storage_size = std::max(reserve_item_count * SIZE_PER_ITEM, STORAGE_SIZE_MIN); allocate(parentAllocator(*this), data_current_begin, storage_size); data_current_end = data_current_begin + storage_size; @@ -149,11 +123,12 @@ struct Allocator > data_current_free = data_current_begin; } - //Dummy copy - Allocator(Allocator const &) + // Dummy copy + Allocator(Allocator const &) : + data_recycled_blocks(), data_current_begin(), data_current_end(), + data_current_free() { data_recycled_blocks = data_current_end = data_current_free = 0; - //dont need to initialize data_current_begin } inline Allocator & @@ -206,7 +181,7 @@ clear(Allocator > & me) template inline void -allocate(Allocator > & me, +allocate(Allocator > & me, TValue * & data, TSize count, Tag const tag_) @@ -249,9 +224,9 @@ allocate(Allocator > & me, // ---------------------------------------------------------------------------- template -inline void +inline void deallocate(Allocator > & me, - TValue * data, + TValue * data, TSize count, Tag const tag_) { diff --git a/seqan/basic/allocator_to_std.h b/seqan/basic/allocator_to_std.h index d065c97..2a755e8 100644 --- a/seqan/basic/allocator_to_std.h +++ b/seqan/basic/allocator_to_std.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ // TODO(holtgrew): Rename STD to STL? // TODO(holtgrew): Rename to allocator_to_stl.h, remove basic_ prefix of all other allocator headers. -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_TO_STD_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_TO_STD_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_TO_STD_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_TO_STD_H_ namespace seqan { @@ -54,55 +54,28 @@ namespace seqan { * @class ToStdAllocator * @headerfile * @brief Emulates standard conform allocator. - * + * * @signature template * class ToStdAllocator; - * + * * @tparam TValue Type of allocated items. - * @tparam THost Type of the host allocator object.This object is used to call @link allocate @endlink and - * @link deallocate @endlink. - * - * @section Remarks - * + * @tparam THost Type of the host allocator object.This object is used to call @link Allocator#allocate @endlink and + * @link Allocator#deallocate @endlink. + * * The member functions allocate and deallocate of ToStdAllocator call the (globale) * functions @link Allocator#allocate @endlink and @link Allocator#deallocate @endlink, respectively. The globale * functions get an allocator object as their first arguments. This allocator object is not the ToStdAllocator * object itself, but the host object that was given to the constructor. - * + * * @fn ToStdAllocator#ToStdAllocator - * * @brief Constructor - * + * * @signature ToStdAllocator::ToStdAllocator(host); - * - * @param host The host object that is used as allocator for @link Allocator#allocate @endlink and @link - * Allocator#deallocate @endlink. + * + * @param[in] host The host object that is used as allocator for @link Allocator#allocate @endlink and @link + * Allocator#deallocate @endlink. */ -/** -.Class.ToStdAllocator: -..summary:Emulates standard conform allocator. -..signature:ToStdAllocator -..param.THost:Type of the host allocator object. -...text:This object is used to call @Function.allocate@ and @Function.deallocate@. -..param.TValue:Type of allocated items. -..remarks:The member functions $allocate$ and $deallocate$ of $ToStdAllocator$ call -the (globale) functions @Function.allocate@ and @Function.deallocate@, respectively. The globale functions -get an allocator object as their first arguments. This allocator object is not the $ToStdAllocator$ object itself, -but the host object that was given to the constructor. -..cat:Basic -..remarks: -..see:Function.allocate -..see:Function.deallocate -..include:seqan/basic.h - -.Memfunc.ToStdAllocator: -..summary:Constructor -..signature:ToStdAllocator(host) -..class:Class.ToStdAllocator -..param.host:The host object that is used as allocator for @Function.allocate@ and @Function.deallocate@. -*/ - template struct ToStdAllocator { @@ -213,26 +186,14 @@ struct StdAllocator * @fn ToStdAllocator#host * @headerfile * @brief The object a given object depends on. - * + * * @signature THost host(allocator); - * - * @param allocator The allocator to query. - * + * + * @param[in] allocator The allocator to query. + * * @return THost The host object. */ -/** -.Function.host -..class:Class.ToStdAllocator -..summary:The object a given object depends on. -..cat:Dependent Objects -..signature:host(object) -..param.object:An object. -...type:Class.ToStdAllocator -..returns:The host object. -..include:seqan/basic.h -*/ - template THost & host(ToStdAllocator & me) @@ -242,4 +203,4 @@ host(ToStdAllocator & me) } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALLOCATOR_TO_STD_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALLOCATOR_TO_STD_H_ diff --git a/seqan/basic/alphabet_adapt_builtins.h b/seqan/basic/alphabet_adapt_builtins.h index b22c783..fba40f7 100644 --- a/seqan/basic/alphabet_adapt_builtins.h +++ b/seqan/basic/alphabet_adapt_builtins.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,8 @@ // concepts they are in. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_BASIC_ALPHABET_ADAPT_BUILTINS_H_ -#define SEQAN_CORE_INCLUDE_BASIC_ALPHABET_ADAPT_BUILTINS_H_ +#ifndef SEQAN_INCLUDE_BASIC_ALPHABET_ADAPT_BUILTINS_H_ +#define SEQAN_INCLUDE_BASIC_ALPHABET_ADAPT_BUILTINS_H_ #include @@ -82,36 +82,22 @@ struct BitsPerValue /*! * @mfn IsCharType * @headerfile - * + * * @brief Return whether the argument is char, wchar_t, char const, or wchar_t * const. - * + * * @signature IsCharType::Type; * @signature IsCharType::VALUE; - * + * * @tparam T Type to check type of. - * - * @section Remarks - * + * * This metafunction is used to enable and disable templated adaptions of arrays to sequences for builtin character * types only. - * + * * The return value is True/true for char, wchar_t, char const, and * wchar_t const. */ -/** -.Metafunction.IsCharType -..cat:Alphabets -..summary:Return whether the argument is $char$, $wchar_t$, $char const$, or $wchar_t const$. -..signature:IsCharType::Type -..signature:IsCharType::VALUE -..param.T:Type to check type of. -..remarks:This metafunction is used to enable and disable templated adaptions of arrays to sequences for builtin character types only. -..remarks:The return value is $True$/$true$ for $char$, $wchar_t$, $char const$, and $wchar_t const$. -..include:seqan/sequence.h -*/ - template struct IsCharType; @@ -196,7 +182,7 @@ inline long double const & supremumValueImpl(long double *) { #ifdef PLATFORM_WINDOWS - static long double const _value = ::std::numeric_limits::infinity( ); + static long double const _value = std::numeric_limits::infinity( ); #else static long double const _value = 1.7976931348623157e+308; #endif @@ -207,7 +193,7 @@ inline double const & supremumValueImpl(double *) { #ifdef PLATFORM_WINDOWS - static double const _value = ::std::numeric_limits::infinity( ); + static double const _value = std::numeric_limits::infinity( ); #else static double const _value = 1.7976931348623157e+308; #endif @@ -217,7 +203,7 @@ inline float const & supremumValueImpl(float *) { #ifdef PLATFORM_WINDOWS - static float const _value = ::std::numeric_limits::infinity( ); + static float const _value = std::numeric_limits::infinity( ); #else static float const _value = 3.40282347e+38F; #endif @@ -240,7 +226,7 @@ inline float const & infimumValueImpl(float *) { #ifdef PLATFORM_WINDOWS - static float const _value = -::std::numeric_limits::infinity( ); + static float const _value = -std::numeric_limits::infinity( ); #else static float const _value = -3.40282347e+38F; #endif @@ -251,7 +237,7 @@ inline double const & infimumValueImpl(double *) { #ifdef PLATFORM_WINDOWS - static double const _value = -::std::numeric_limits::infinity( ); + static double const _value = -std::numeric_limits::infinity( ); #else static double const _value = -1.7976931348623157e+308; #endif @@ -262,7 +248,7 @@ inline long double const & infimumValueImpl(long double *) { #ifdef PLATFORM_WINDOWS - static long double const _value = -::std::numeric_limits::infinity( ); + static long double const _value = -std::numeric_limits::infinity( ); #else static long double const _value = -1.7976931348623157e+308; #endif @@ -271,4 +257,4 @@ infimumValueImpl(long double *) } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_BASIC_ALPHABET_ADAPT_BUILTINS_H_ +#endif // #ifndef SEQAN_INCLUDE_BASIC_ALPHABET_ADAPT_BUILTINS_H_ diff --git a/seqan/basic/alphabet_bio.h b/seqan/basic/alphabet_bio.h index b333b30..c6ca89b 100644 --- a/seqan/basic/alphabet_bio.h +++ b/seqan/basic/alphabet_bio.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ // TODO(holtgrew): Where is the documentation, shouldnt it be here? -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_BIO_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_BIO_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_BIO_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_BIO_H_ namespace seqan { @@ -98,4 +98,4 @@ unknownValue() } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_BIO_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_BIO_H_ diff --git a/seqan/basic/alphabet_concept.h b/seqan/basic/alphabet_concept.h index 459ee88..10c22b4 100644 --- a/seqan/basic/alphabet_concept.h +++ b/seqan/basic/alphabet_concept.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ // SEQAN_NO_GENERATED_FORWARDS -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_CONCEPT_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_CONCEPT_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_CONCEPT_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_CONCEPT_H_ namespace seqan { @@ -55,11 +55,11 @@ namespace seqan { * @brief Natural container value. * * @signature concept AlphabetConcept; - * + * * @section Examples - * + * * Valid expressions (v is of type T): - * + * * @code{.cpp} * unsigned bpv = BitsPerValue::VALUE; * @endcode @@ -69,37 +69,12 @@ namespace seqan { * @mfn AlphabetConcept#BitsPerValue * @headerfile * @brief Number of bits needed to store a value. - * + * * @signature BitsPerValue::VALUE - * + * * @tparam T A class. - * - * @return TReturn - */ - -/** -.Concept.AlphabetConcept -..cat:Alphabets -..baseconcept:Concept.AssignableConcept -..baseconcept:Concept.DefaultConstructibleConcept -..baseconcept:Concept.CopyConstructibleConcept -..summary:Natural container value. -..include:seqan/basic.h -..example.text:Valid expressions ($v$ is of type $T$): -..example.code: -unsigned bpv = BitsPerValue::VALUE; - -.Metafunction.BitsPerValue -..concept:Concept.AlphabetConcept -..cat:Basic -..summary:Number of bits needed to store a value. -..signature:BitsPerValue::VALUE -..param.T:A class. -...type:Concept.AlphabetConcept -..returns.param.VALUE:Number of bits needed to store $T$. -...default:$sizeof * 8$ -..see:Metafunction.ValueSize -..include:seqan/basic.h + * + * @return VALUE The number of bits needed to store a value. */ // Forwards for Metafunctions and Functions. @@ -109,7 +84,7 @@ template struct BitsPerValue; SEQAN_CONCEPT_REFINE(AlphabetConcept, (TValue), (Assignable)(DefaultConstructible)(CopyConstructible)) { typedef typename BitsPerValue::Type TBitsPerValue; - + TValue val, val2; SEQAN_CONCEPT_USAGE(AlphabetConcept) @@ -121,7 +96,7 @@ SEQAN_CONCEPT_REFINE(AlphabetConcept, (TValue), (Assignable)(DefaultConstructibl // swap(val, val2); TBitsPerValue b = BitsPerValue::VALUE; - + ignoreUnusedVariableWarning(b); } }; @@ -135,7 +110,7 @@ SEQAN_CONCEPT_REFINE(AlphabetConcept, (TValue), (Assignable)(DefaultConstructibl * @extends AlphabetConcept * @extends ComparableConcept * @headerfile - * + * * @brief Totally strict ordered alphabet. * * @signature concept OrderedAlphabetConcept; @@ -156,13 +131,13 @@ SEQAN_CONCEPT_REFINE(AlphabetConcept, (TValue), (Assignable)(DefaultConstructibl * @mfn OrderedAlphabetConcept#MaxValue * @headerfile * @brief Supremum for a given type. - * + * * @signature MaxValue::VALUE * * @tparam T An ordered type. - * + * * @return VALUE The largest value that T can represent. - * + * * @see OrderedAlphabetConcept#maxValue */ @@ -170,59 +145,54 @@ SEQAN_CONCEPT_REFINE(AlphabetConcept, (TValue), (Assignable)(DefaultConstructibl * @mfn OrderedAlphabetConcept#MinValue * @headerfile * @brief Infimum for a given type. - * + * * @signature MinValue::VALUE * * @tparam T An ordered type. - * + * * @return VALUE The smallest value that T can represent. - * + * * @see OrderedAlphabetConcept#minValue */ /*! * @fn OrderedAlphabetConcept#supremumValueImpl * @brief Implements maxValue. - * + * * @signature T supremumValueImpl(valuePointerTag); - * + * * @param[in] valuePointerTag A pointer that is used as a tag to specify the value type. The pointer needs not to point * to a valid object, so it is possible to use a null pointer here. - * + * * @return T A value inf that holds: inf >= i for all values i. - * - * @section Remarks - * + * * This function implements OrderedAlphabetConcept#maxValue. It is recommended to use OrderedAlphabetConcept#maxValue * rather than supremumValueImpl. - * + * * @section Status * * Deprecated, will be removed in favour of OrderedAlphabetConcept#MaxValue. - * + * * @see OrderedAlphabetConcept#maxValue */ /*! * @fn OrderedAlphabetConcept#maxValue * @brief Supremum for a given type. - * - * @signature template - * T maxValue(); - * + * + * @signature T maxValue(); + * * @tparam T The type to get the max value of. * * @return T A value inf that holds: inf >= i for all values i of type T. - * - * @section Remarks - * + * * The function is implemented in supremumValueImpl. Do not specialize maxValue, specialize supremumValueImpl * instead! - * + * * @section Status * * Deprecated, will be removed in favour of MaxValue. - * + * * @see OrderedAlphabetConcept#supremumValueImpl * @see OrderedAlphabetConcept#minValue * @see OrderedAlphabetConcept#MaxValue @@ -231,148 +201,45 @@ SEQAN_CONCEPT_REFINE(AlphabetConcept, (TValue), (Assignable)(DefaultConstructibl /*! * @fn OrderedAlphabetConcept#infimumValueImpl * @brief Implements minValue. - * + * * @signature T infimumValueImpl(valuePointerTag); - * + * * @param[in] valuePointerTag A pointer that is used as a tag to specify the value type. The pointer needs not to point * to a valid object, so it is possible to use a null pointer here. - * + * * @return T A value inf that holds: inf <= i for all values i. - * - * @section Remarks - * + * * This function implements minValue. It is recommended to use minValue rather than infimumValueImpl. * * @section Status * * Deprecated, will be removed in favour of MinValue. - * + * * @see OrderedAlphabetConcept#minValue */ /*! * @fn OrderedAlphabetConcept#minValue * @brief Infimum for a given type. - * - * @signature template - * T minValue(); - * + * + * @signature T minValue(); + * * @tparam T An ordered type. - * + * * @return T A value inf that holds: inf <= i for all values i of type T. - * - * @section Remarks - * + * * The function is implemented in infimumValueImpl. Do not specialize minValue, specialize infimumValueImpl * instead! * * @section Status * * Deprecated, will be removed in favour of MinValue. - * + * * @see OrderedAlphabetConcept#infimumValueImpl * @see OrderedAlphabetConcept#maxValue * @see OrderedAlphabetConcept#MinValue */ -/** -.Concept.OrderedAlphabetConcept -..concept:Concept.OrderedAlphabetConcept -..cat:Alphabets -..summary:Totally strict ordered alphabet. -..baseconcept:Concept.AlphabetConcept -..baseconcept:Concept.ComparableConcept -..include:seqan/basic.h - -.Function.operator<.concept:Concept.OrderedAlphabetConcept - -.Metafunction.MaxValue -..concept:Concept.OrderedAlphabetConcept -..cat:Miscellaneous -..summary:Supremum for a given type. -..signature:MaxValue::VALUE -..param.T:An ordered type. -...type:Concept.OrderedAlphabetConcept -..returns.param.VALUE:A value $sup$ for which holds: $sup >= i$ for all values $i$ of type $T$. -..remarks:Note tat -..see:Function.maxValue -..include:seqan/basic.h - -.Metafunction.MinValue -..concept:Concept.OrderedAlphabetConcept -..cat:Miscellaneous -..summary:Infimum for a given type. -..signature:MinValue::VALUE -..param.T:An ordered type. -...type:Concept.OrderedAlphabetConcept -..returns.param.VALUE:A value $inf$ for which holds: $inf <= i$ for all values $i$ of type $T$. -..remarks:Note tat -..see:Function.minValue -..include:seqan/basic.h - -.Function.supremumValueImpl -..concept:Concept.OrderedAlphabetConcept -..hidefromindex -..cat:Alphabets -..summary:Implements @Function.maxValue@. -..signature:supremumValueImpl(value_pointer_tag) -..param.value_pointer_tag:A pointer that is used as a tag to specify the value type. -...type:Concept.OrderedAlphabetConcept -...remarks:The pointer needs not to point to a valid object, so it is possible to use a null pointer here. -..returns:A value $inf$ that holds: $inf >= i$ for all values $i$. -..remarks.text:This function implements @Function.maxValue@. -It is recommended to use @Function.maxValue@ rather than $supremumValueImpl$. -..status:deprecated, will be removed in favour of @Metafunction.MaxValue@ -..include:seqan/basic.h - -.Function.maxValue -..concept:Concept.OrderedAlphabetConcept -..cat:Alphabets -..summary:Supremum for a given type. -..signature:maxValue() -..param.T:An ordered type. -...type:Concept.OrderedAlphabetConcept -..returns:A value $inf$ that holds: $inf >= i$ for all values $i$ of type $T$. -..remarks: -The function is implemented in @Function.supremumValueImpl@. -Do not specialize $maxValue$, specialize @Function.supremumValueImpl@ instead! -..see:Function.supremumValueImpl -..status:deprecated, will be removed in favour of @Metafunction.MaxValue@ -..include:seqan/basic.h - -// TODO(holtgrew): Rename to minValueImpl? - -.Function.infimumValueImpl -..concept:Concept.OrderedAlphabetConcept -..hidefromindex -..cat:Alphabets -..summary:Implements @Function.minValue@. -..signature:infimumValueImpl(value_pointer_tag) -..param.value_pointer_tag:A pointer that is used as a tag to specify the value type. -...type:Concept.OrderedAlphabetConcept -...remarks:The pointer needs not to point to a valid object, so it is possible to use a null pointer here. -..returns:A value $inf$ that holds: $inf <= i$ for all values $i$. -..remarks.text:This function implements @Function.minValue@. -It is recommended to use @Function.minValue@ rather than $infimumValueImpl$. -..status:deprecated, will be removed in favour of @Metafunction.MinValue@ -..include:seqan/basic.h - -.Function.minValue -..concept:Concept.OrderedAlphabetConcept -..cat:Alphabets -..summary:Infimum for a given type. -..signature:minValue() -..param.T:An ordered type. -...type:Concept.OrderedAlphabetConcept -..returns:A value $inf$ that holds: $inf <= i$ for all values $i$ of type $T$. -..remarks.text:The function is implemented in @Function.infimumValueImpl@. -Do not specialize $minValue$, specialize @Function.infimumValueImpl@ instead! -..see:Function.infimumValueImpl -..see:Function.maxValue -..status:deprecated, will be removed in favour of @Metafunction.MinValue@ -..include:seqan/basic.h -*/ - // Forwards for Metafunctions and Functions. template struct MinValue; template struct MaxValue; @@ -418,27 +285,27 @@ SEQAN_CONCEPT_REFINE(OrderedAlphabetConcept, (TValue), (AlphabetConcept)(Compara /*! * @mfn FiniteOrderedAlphabetConcept#ValueSize * @brief Number of different values a value type object can have. - * - * @signature ValueSize::VALUE - * - * @tparam T A class. - * - * @return VALUE The number of different values the value can have. - * - * @section Remarks - * + * + * @signature ValueSize::Type; + * @signature ValueSize::VALUE; + * + * @tparam T A type to query for its value size. + * + * @return VALUE The number of different values a value of type T can have. The type is Type. + * @return Type The type of the result VALUE. + * * This function is only defined for integral types like unsigned, int, or Dna. For floating point * numbers and the 64 bit types __int64 and __uint64, it returns 0 since there is no standard * compliant way to return the number of values for these types. - * + * * Note that you cannot get pointers or references to ValueSize<T>::VALUE in your program. You can use - * @link FiniteOrderedAlphabetConcept#valueSize @endlink in your programs without problems, though. When you get problems in your tests, use the - * "unary plus" workaround from the examples section. - * + * @link FiniteOrderedAlphabetConcept#valueSize @endlink in your programs without problems, though. When you get + * problems in your tests, use the "unary plus" workaround from the examples section. + * * @section Examples - * + * * The temporary assignment workaround. - * + * * @code{.cpp} * SEQAN_ASSERT_EQ(ValueSize::VALUE, 2u); // Linker error. * SEQAN_ASSERT_EQ(+ValueSize::VALUE, 2u); // OK @@ -448,17 +315,15 @@ SEQAN_CONCEPT_REFINE(OrderedAlphabetConcept, (TValue), (AlphabetConcept)(Compara /*! * @fn FiniteOrderedAlphabetConcept#ordValue - * @headerfile seqan/sequence.h + * @headerfile * @brief Maps an alphabet 1-to-1 to the interval [0..ValueSize). - * + * * @signature T ordValue(value); - * - * @param value Arbitrary character value. Types: SimpleType - * + * + * @param[in] value Arbitrary character value. Types: SimpleType + * * @return T An unsigned value (result of Size<typeof(value)> between 0 and ValueSize of the type of value. - * - * @section Remarks - * + * * This function first converts value to its unsigned value type and after that to an unsigned int. You can't * use (unsigned int)c for a character c as on some systems char is signed and a -1 * would be mapped to 0xffffffff instead of 0x000000ff. @@ -467,72 +332,16 @@ SEQAN_CONCEPT_REFINE(OrderedAlphabetConcept, (TValue), (AlphabetConcept)(Compara /*! * @fn FiniteOrderedAlphabetConcept#valueSize * @brief Returns size of an alphabet. - * - * @signature template - * T valueSize(); - * - * @tparam T Type to query for value size. - * - * @return T Number of values in type T. - * + * + * @signature T1 valueSize(); + * + * @tparam T2 Type to query for value size. + * + * @return T1 Number of values in type T2. + * * @see FiniteOrderedAlphabetConcept#ValueSize */ -/** -.Concept.FiniteOrderedAlphabetConcept -..cat:Alphabets -..summary:An type that is of finite domain and totally ordered and thus has a minimum and maximum value. -..baseconcept:Concept.OrderedAlphabetConcept -..include:seqan/basic.h - -.Function.ordValue -..concept:Concept.FiniteOrderedAlphabetConcept -..summary:Maps an alphabet 1-to-1 to the interval [0..ValueSize). -..cat:Alphabets -..signature:ordValue(value) -..param.value:Arbitrary character value. -...type:Class.SimpleType -..returns:An unsigned value (result of @Metafunction.Size@$$ between 0 and @Metafunction.ValueSize@ of the type of value. -..note:This function first converts value to its unsigned value type and after that to an $unsigned int$. -You can't use $(unsigned int)c$ for a character $c$ as on some systems $char$ is signed and a $-1$ would be mapped to $0xffffffff$ instead of $0x000000ff$. -..include:seqan/sequence.h - -.Function.valueSize -..concept:Concept.FiniteOrderedAlphabetConcept -..cat:Alphabets -..summary:Returns size of an alphabet. -..signature:valueSize() -..param.T:Type to query for value size. -..returns:Number of values in type $T$. -...type:Metafunction.ValueSize -..include:seqan/basic.h -..see:Metafunction.ValueSize - -.Metafunction.ValueSize: -..concept:Concept.FiniteOrderedAlphabetConcept -..cat:Basic -..summary:Number of different values a value type object can have. -..signature:ValueSize::VALUE -..param.T:A class. -...type:Concept.FiniteOrderedAlphabetConcept -..returns.param.VALUE:Value size of $T$. -..remarks: -This function is only defined for integral types like $unsigned$, $int$, or @Spec.Dna@. -For floating point numbers and the 64 bit types $__int64$ and $__uint64$, it returns 0 since there is no standard compliant way to return the number of values for these types. -..remarks: -Note that you cannot get pointers or references to $ValueSize::VALUE$ in your program. -You can use @Function.valueSize@ in your programs without problems, though. -When you get problems in your tests, use the "unary plus" workaround from the examples section. -..example.text:The temporary assignment workaround. -..example.code: -SEQAN_ASSERT_EQ(ValueSize::VALUE, 2u); // Linker error. -SEQAN_ASSERT_EQ(+ValueSize::VALUE, 2u); // OK -SEQAN_ASSERT_EQ(valueSize(), 2u); // OK -..see:Function.valueSize -..see:Metafunction.Value -..include:seqan/basic.h - */ - // Forwards for Metafunctions and Functions. template struct ValueSize; template typename ValueSize::Type valueSize(); @@ -557,7 +366,7 @@ SEQAN_CONCEPT_REFINE(FiniteOrderedAlphabetConcept, (TValue), (OrderedAlphabetCon // alphabet must be non-empty SEQAN_STATIC_ASSERT_MSG(static_cast(0) < ValueSize::VALUE, "Alphabet size be greater than zero."); - + // convert integer to alphabet value val = 0; val = size; @@ -576,85 +385,42 @@ SEQAN_CONCEPT_REFINE(FiniteOrderedAlphabetConcept, (TValue), (OrderedAlphabetCon * @concept AlphabetWithGapsConcept * @extends AlphabetConcept * @headerfile - * + * * @brief An alphabet that includes a specific gap character. */ /*! * @fn AlphabetWithGapsConcept#gapValueImpl * @brief Implements gapValue. - * + * * @signature T gapValueImpl(valuePointerTag); - * - * @param valuePointerTag A pointer that is used as a tag to specify the value type. The pointer needs not to point - * to a valid object, so it is possible to use a null pointer here. - * + * + * @param[in] valuePointerTag A pointer that is used as a tag to specify the value type. The pointer needs not to + * point to a valid object, so it is possible to use a null pointer here. + * * @return T A gap character. - * - * @section Remarks - * + * * This function implements gapValue. It is recommended to use gapValue rather than gapValueImpl. - * + * * @see AlphabetWithGapsConcept#gapValue */ /*! * @fn AlphabetWithGapsConcept#gapValue * @brief Return the "gap" value from an alphabet. - * - * @signature template - * T gapValue(); - * + * + * @signature T gapValue(); + * * @tparam T The alphabet type to query the gap value from. - * + * * @return T The gap character. - * - * @section Remarks - * + * * The function is implemented in gapValueImpl. Do not specialize gapValue, specialize link gapValueImpl * instead! - * + * * @see AlphabetWithGapsConcept#gapValueImpl */ -/** -.Concept.AlphabetWithGapsConcept -..cat:Alphabets -..baseconcept:Concept.AlphabetConcept -..summary:An alphabet that includes a specific gap character. -..include:seqan/basic.h - -.Function.gapValue -..concept:Concept.AlphabetWithGapsConcept -..cat:Alphabets -..cat:Alignments -..summary:Return the "gap" value from an alphabet. -..signature:gapValue() -..param.T:The alphabet type to query the "gap" value from. -...type:Concept.AlphabetWithGapsConcept -..returns:The gap character. -..remarks.text:The function is implemented in @Function.gapValueImpl@. -Do not specialize $gapValue$, specialize @Function.gapValueImpl@ instead! -..see:Function.gapValueImpl -..include:seqan/basic.h - -.Function.gapValueImpl -..concept:Concept.AlphabetWithGapsConcept -..hidefromindex -..cat:Alphabets -..cat:Alignments -..summary:Implements @Function.gapValue@. -..signature:gapValueImpl(valuePointerTag) -..param.valuePointerTag:A pointer that is used as a tag to specify the value type. -....type:Concept.AlphabetWithGapsConcept -...remarks:The pointer needs not to point to a valid object, so it is possible to use a null pointer here. -..returns:A gap character. -..see:Function.gapValue -..remarks.text:This function implements @Function.gapValue@. -It is recommended to use @Function.gapValue@ rather than $gapValueImpl$. -..include:seqan/basic.h -*/ - // Forwards for Metafunctions and Functions. template T gapValue(); template T gapValueImpl(T *); @@ -675,75 +441,40 @@ SEQAN_CONCEPT_REFINE(AlphabetWithGapsConcept, (TValue), (AlphabetConcept)) * @concept AlphabetWithUnknownValueConcept * @extends AlphabetConcept * @headerfile - * + * * @brief An alphabet which includes a specific "unknown" character. */ /*! * @fn AlphabetWithUnknownValueConcept#unknownValue - * + * * @brief Return the "unknown" value from an alphabet. - * - * @signature template - * T unknownValue(); - * + * + * @signature T unknownValue(); + * * @tparam T The alphabet type to query the unknown value from. - * + * * @return TReturn The "unknown" value. - * + * * @see AlphabetWithUnknownValueConcept#unknownValueImpl */ /*! * @fn AlphabetWithUnknownValueConcept#unknownValueImpl * @brief Implements unknownValue. - * + * * @signature T gapValueImpl(valuePointerTag) - * - * @param valuePointerTag A pointer that is used as a tag to specify the value type. The pointer needs not to point - * to a valid object, so it is possible to use a null pointer here. - * + * + * @param[in] valuePointerTag A pointer that is used as a tag to specify the value type. The pointer needs not to + * point to a valid object, so it is possible to use a null pointer here. + * * @return TReturn A "unknown" character. - * - * @section Remarks - * + * * This function implements unknownValue. It is recommended to use gapValue rather than gapValueImpl. - * + * * @see AlphabetWithUnknownValueConcept#unknownValue */ -/** -.Concept.AlphabetWithUnknownValueConcept -..cat:Alphabets -..baseconcept:Concept.AlphabetConcept -..summary:An alphabet which includes a specific "unknown" character. -..include:seqan/basic.h - -.Function.unknownValueImpl -..concept:Concept.AlphabetWithUnknownValueConcept -..hidefromindex -..cat:Alphabets -..summary:Implements @Function.unknownValue@. -..signature:gapValueImpl(valuePointerTag) -..param.valuePointerTag:A pointer that is used as a tag to specify the value type. -...type:Concept.AlphabetWithUnknownValueConcept -...remarks:The pointer needs not to point to a valid object, so it is possible to use a null pointer here. -..returns:A "unknown" character. -..see:Function.unknownValue -..remarks.text:This function implements @Function.unknownValue@. -It is recommended to use @Function.gapValue@ rather than $gapValueImpl$. -..include:seqan/basic.h - -.Function.unknownValue -..concept:Concept.AlphabetWithUnknownValueConcept -..cat:Alphabets -..summary:Return the "unknown" value from an alphabet. -..signature:unknownValue() -..param.T:The alphabet type to query the "unknown" value from. -...type:Concept.AlphabetWithUnknownValueConcept -..returns:The "unknown" value. - */ - // Forwards for Metafunctions and Functions. template T unknownValue(); template T unknownValueImpl(T *); @@ -764,7 +495,7 @@ SEQAN_CONCEPT_REFINE(AlphabetWithUnknownValueConcept, (TValue), (AlphabetConcept * @concept AlphabetWithQualitiesConcept * @extends AlphabetConcept * @headerfile - * + * * @brief An alphabet where qualities can be attached to the characters. */ @@ -772,12 +503,12 @@ SEQAN_CONCEPT_REFINE(AlphabetWithUnknownValueConcept, (TValue), (AlphabetConcept * @mfn AlphabetWithQualitiesConcept#HasQualities * @headerfile * @brief Return whether the given type stores qualities besides the alphabet. - * + * * @signature HasQualities::VALUE; * @signature HasQualities::Type; - * + * * @tparam TAlphabet The alphabe to query. - * + * * @return VALUE true or false * @return Type True or False */ @@ -785,11 +516,11 @@ SEQAN_CONCEPT_REFINE(AlphabetWithUnknownValueConcept, (TValue), (AlphabetConcept /*! * @mfn AlphabetWithQualitiesConcept#QualityValueSize * @brief Return the number of quality values in characters from alphabet with qualities. - * + * * @signature QualityValueSize::VALUE; - * + * * @tparam TAlphabet The alphabet to query for its value size. - * + * * @return VALUE The cardinality of the set of qualities. */ @@ -798,13 +529,13 @@ SEQAN_CONCEPT_REFINE(AlphabetWithUnknownValueConcept, (TValue), (AlphabetConcept * @brief Returns the quality of a character from an alphabet with integrated quality, e.g. the quality associated with * a specified element from a sequence. * @signature int getQualityValue(c); - * + * * @param[in] c Character to retrieve the quality from. - * + * * @return int Quality value of c. The quality value is an int value between 0 and 62 (inclusive). - * + * * @section Examples - * + * * @code{.cpp} * String seq = "TATA"; * // Assign quality value to first 'T' in sequence seq @@ -822,89 +553,21 @@ SEQAN_CONCEPT_REFINE(AlphabetWithUnknownValueConcept, (TValue), (AlphabetConcept * @fn AlphabetWithQualitiesConcept#assignQualityValue * @brief Assigns quality to a character from an alphabet with integrated quality, e.g. to a specified element from a * sequence. - * + * * @signature void assignQualityValue(c, q); - * + * * @param[out] c Target character to assign quality to. * @param[in] q Quality to assign to the character. The quality value is an integral value between 0 and 62 * (inclusive). - * - * @section Remarks - * + * * If q is a char then '!' is subtracted from q. This is useful for ASCII encoded * PHRED scores. - * + * * @see AlphabetWithQualitiesConcept#getQualityValue * @see convertQuality * @see assignQualities */ -/** -.Concept.AlphabetWithQualitiesConcept -..cat:Alphabets -..baseconcept:Concept.AlphabetConcept -..summary:An alphabet where qualities can be attached to the characters. -..include:seqan/basic.h - -.Metafunction.QualityValueSize -..concept:Concept.AlphabetWithQualitiesConcept -..cat:Alphabets -..summary:Return the number of quality values in characters from alphabet with qualities. -..signature:QualityValueSize::VALUE -..param.TAlphabet:The alphabet to query for its value size. -...type:Concept.AlphabetWithQualitiesConcept -..returns:The cardinality of the set of qualities. -..include:seqan/basic.h - -.Metafunction.HasQualities -..concept:Concept.AlphabetWithQualitiesConcept -..cat:Alphabets -..summary:Return whether the given type stores qualities besides the alphabet. -..signature:HasQualities::VALUE -..signature:HasQualities::Type -..param.TAlphabet:The alphabe to query. -..returns:$true$, $false$, $True$, or $False$. -..include:seqan/basic.h - -.Function.getQualityValue -..concept:Concept.AlphabetWithQualitiesConcept -..cat:Alphabets -..signature:getQualityValue(c) -..summary:Returns the quality of a character from an alphabet with integrated quality, e.g. the quality associated with a specified element from a sequence. -..param.c:Character to retrieve the quality from. -...type:Concept.AlphabetWithQualitiesConcept -..returns:Quality value of $c$. -...type:nolink:int -...remarks:The quality value is an integral value between 0 and 62 (inclusive). -..see:Function.assignQualityValue -..see:Function.convertQuality -..include:seqan/basic.h -..example.code: - String seq = "TATA"; - // Assign quality value to first 'T' in sequence seq - assignQualityValue(seq[0], 35); - // Print quality value of first 'T', and default quality value of first 'A' - std::cout << getQualityValue(seq[0]) << std::endl; // Defined as 35 - std::cout << getQualityValue(seq[1]) << std::endl; // Default value 60 - - -.Function.assignQualityValue -..concept:Concept.AlphabetWithQualitiesConcept -..cat:Alphabets -..signature:assignQualityValue(c, q) -..summary:Assigns quality to a character from an alphabet with integrated quality, e.g. to a specified element from a sequence. -..param.c:Target character to assign quality to. -...type:Concept.AlphabetWithQualitiesConcept -..param.q:Quality to assign to the character. -...type:nolink:int -...type:nolink:char -...remarks:The quality value is an integral value between 0 and 62 (inclusive). -..remarks:If $q$ is a $char$ then $'!'$ is subtracted from $q$. This is useful for ASCII encoded PHRED scores. -..see:Function.getQualityValue -..see:Function.convertQuality -..include:seqan/basic.h - */ - // TODO(holtgrew): What about different quality types? Guess scaling? Look at how other packages do this. SEQAN_CONCEPT_REFINE(AlphabetWithQualitiesConcept, (TValue), (AlphabetConcept)) @@ -920,4 +583,4 @@ SEQAN_CONCEPT_REFINE(AlphabetWithQualitiesConcept, (TValue), (AlphabetConcept)) } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_CONCEPT_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_CONCEPT_H_ diff --git a/seqan/basic/alphabet_math.h b/seqan/basic/alphabet_math.h index 61736a2..ec9d28c 100644 --- a/seqan/basic/alphabet_math.h +++ b/seqan/basic/alphabet_math.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -272,6 +272,32 @@ minValue(T /*tag*/) return infimumValueImpl(_tag); } +// ---------------------------------------------------------------------------- +// Function isNegative() +// ---------------------------------------------------------------------------- + +// to remove '... < 0 is always false' warning +template +inline bool +isNegative(T, False) +{ + return false; +} + +template +inline bool +isNegative(T t, True) +{ + return t < 0; +} + +template +inline bool +isNegative(T t) +{ + return isNegative(t, typename IsSameType::Type>::Type()); +} + } // namespace seqan #endif // #ifndef SEQAN_BASIC_ALPHABET_MATH_H_ diff --git a/seqan/basic/alphabet_profile.h b/seqan/basic/alphabet_profile.h index cebb8d2..74103c5 100644 --- a/seqan/basic/alphabet_profile.h +++ b/seqan/basic/alphabet_profile.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,10 +34,10 @@ // Profile alphabet character code. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_PROFILE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_PROFILE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_PROFILE_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_PROFILE_H_ -#include +#include namespace seqan { @@ -57,14 +57,14 @@ struct IteratorProxy; /*! * @class ProfileChar - * - * @headerfile seqan/basic.h - * + * + * @headerfile + * * @brief Alphabet type for profiles over another alphabet. - * + * * @signature template * class ProfileChar; - * + * * @tparam TValue The underlying alphabet type. * @tparam TCount The type to use for counting, default: unsigned int. * @tparam TSpec Specialization tag, default: void @@ -72,27 +72,10 @@ struct IteratorProxy; /*! * @var VariableType ProfileChar::count[] - * + * * @brief Array of ValueSize elements, giving counts in profile. */ -/** -.Class.ProfileChar -..summary:Alphabet type for profiles over another alphabet. -..cat:Alphabets -..signature:ProfileChar -..param.TValue:The underlying alphabet type. -..param.TCount:The type to use for counting. -...default:nolink:$unsigned int$ -..param.TSpec:Specialization tag. -...default:nolink:$void$ -..include:seqan/basic.h - -.Memvar.ProfileChar#count[] -..class:Class.ProfileChar -..summary:Array of @Metafunction.ValueSize@ elements, giving counts in profile. - */ - template class ProfileChar; @@ -163,7 +146,7 @@ class ProfileChar operator char() { - typename Size::Type maxIndex = _getMaxIndex(*this); + typename Size::Type maxIndex = getMaxIndex(*this); return (maxIndex == ValueSize::VALUE - 1) ? gapValue() : (char) TValue(maxIndex); } }; @@ -179,17 +162,14 @@ class ProfileChar /*! * @mfn ProfileChar#ValueSize * @brief Number of different values a value type object can have. - * + * * @signature ValueSize::VALUE; - * + * * @tparam T The type to query. - * + * * @return VALUE Number of different values T can have. */ -///.Metafunction.ValueSize.param.T.type:Class.ProfileChar -///.Metafunction.ValueSize.class:Class.ProfileChar - template struct ValueSize > { @@ -204,36 +184,21 @@ struct ValueSize > /*! * @mfn ProfileChar#SourceValue * @brief Returns underlying value for ProfileChar. - * + * * @signature SourceValue::Type - * + * * @tparam T Type to query. - * + * * @return Type The type of the underlying character. - * + * * @section Examples - * + * * @code{.cpp} * typedef ProfileChar TProfileChar; * typedef SourceValue::Type TType; // Is Dna. * @endcode */ -/** -.Metafunction.SourceValue -..class:Class.ProfileChar -..cat:Alphabets -..summary:Returns underlying value for @Class.ProfileChar@. -..signature:SourceValue::Type -..param.T:Type to query. -...type:Class.ProfileChar -..returns:The type of the underlying character. -..example.code: -typedef ProfileChar TProfileChar; -typedef SourceValue::Type TType; // Is Dna. -..include:seqan/basic.h -*/ - template struct SourceValue; @@ -293,13 +258,17 @@ operator!=(ProfileChar const & lhs, // Function empty() // ---------------------------------------------------------------------------- -// TODO(holtgrew): Document. - -// Check if there are only gaps. - +/*! + * @fn ProfileChar#empty + * @brief Check whether there are only gaps in the representation of the ProfileChar. + * + * @signature bool empty(c); + * + * @param c ProfileChar to query. + * @return bool Whether or not the ProfileChar only contains gaps. + */ template -inline bool -empty(ProfileChar const & source) +bool empty(ProfileChar const & source) { typedef typename ValueSize const>::Type TSize; @@ -310,12 +279,22 @@ empty(ProfileChar const & source) } // ---------------------------------------------------------------------------- -// Helper Function _getMaxIndex() +// Helper Function getMaxIndex() // ---------------------------------------------------------------------------- +/*! + * @fn ProfileChar#getMaxIndex + * @brief Return number of dominating entry in ProfileChar. + * + * @signature TSize getMaxIndex(c); + * + * @param[in] c ProfileChar to query for its dominating entry. + * @return TSize index (with the @link FiniteOrderedAlphabetConcept#ordValue @endlink) of the dominating character + * in c + */ template -inline typename Size const>::Type -_getMaxIndex(ProfileChar const & source) +typename Size const>::Type +getMaxIndex(ProfileChar const & source) { typedef ProfileChar TProfileChar; typedef typename Size::Type TSize; @@ -333,12 +312,20 @@ _getMaxIndex(ProfileChar const & source } // ---------------------------------------------------------------------------- -// Helper Function _totalCount() +// Helper Function totalCount() // ---------------------------------------------------------------------------- +/*! + * @fn ProfileChar#totalCount + * @brief Return sum of counts in ProfileChar. + * + * @signature TCount totalCount(c); + * + * @param[in] c ProfileChar to query. + * @return TCount Total number of characters represented by c. + */ template -inline TSourceCount -_totalCount(ProfileChar const & source) +TSourceCount totalCount(ProfileChar const & source) { typedef ProfileChar TProfileChar; typedef typename Size::Type TSize; @@ -360,7 +347,7 @@ inline void assign(SimpleType & target, ProfileChar const & source) { - target.value = _getMaxIndex(source); + target.value = getMaxIndex(source); } // ---------------------------------------------------------------------------- @@ -372,7 +359,7 @@ inline typename Convert const &, ProfileChar const & source) { - return (_getMaxIndex(source) == ValueSize::VALUE) ? convertImpl(Convert(), '-') : convertImpl(Convert(), TSourceValue(_getMaxIndex(source))); + return (getMaxIndex(source) == ValueSize::VALUE) ? convertImpl(Convert(), '-') : convertImpl(Convert(), TSourceValue(getMaxIndex(source))); } // ---------------------------------------------------------------------------- @@ -392,4 +379,4 @@ operator<<(TStream & os, ProfileChar const & rhs) } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_PROFILE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_PROFILE_H_ diff --git a/seqan/basic/alphabet_qualities.h b/seqan/basic/alphabet_qualities.h index e84a635..6112dd5 100644 --- a/seqan/basic/alphabet_qualities.h +++ b/seqan/basic/alphabet_qualities.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // Definitions for piggybacking qualities in free bits of bytes. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_QUALITIES_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_QUALITIES_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_QUALITIES_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_QUALITIES_H_ // TODO(holtgrew): Should the documentation be here? @@ -76,8 +76,8 @@ struct QualityValueSize : QualityValueSize template struct HasQualities { - enum { VALUE = false }; typedef False Type; + static const bool VALUE = false; }; // ============================================================================ @@ -105,39 +105,25 @@ struct HasQualities /*! * @fn convertQuality * @headerfile - * + * * @brief Convert an integer quality value into its ASCII representation for FASTQ (Phred scale). - * + * * @signature void convertQuality(c, q); - * + * * @param[in] q Value of the quality to convert. The quality value is an integral value between 0 and 62 * (inclusive), int. * @param[out] c Character to store the quality in, char. - * + * * @see AlphabetWithQualitiesConcept#getQualityValue * @see AlphabetWithQualitiesConcept#assignQualityValue */ -/** -.Function.convertQuality -..cat:Alphabets -..signature:convertQuality(c, q) -..summary:Convert an integer quality value into its ASCII representation for FASTQ (Phred scale). -..param.c:Character to store the quality in. -...type:nolink:$char$ -..param.q:Value of the quality to convert. -...remarks:The quality value is an integral value between 0 and 62 (inclusive). -...type:nolink:$int$ -..see:Function.getQualityValue -..include:seqan/basic.h - */ - -inline -void convertQuality(char & c, int q) +inline +void convertQuality(char & c, int q) { c = '!' + char(q); } } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_QUALITIES_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_QUALITIES_H_ diff --git a/seqan/basic/alphabet_residue.h b/seqan/basic/alphabet_residue.h index 4aa1ac2..c05d254 100644 --- a/seqan/basic/alphabet_residue.h +++ b/seqan/basic/alphabet_residue.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -45,8 +45,8 @@ // TODO(holtgrew): Add RnaQ and Rna5Q? Can we create a tag/type for Dna and Rna that is then differentiated with one additional tag? -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_H_ namespace seqan { @@ -76,40 +76,20 @@ template struct BaseAlphabet; * @extends SimpleType * @headerfile * @brief Alphabet for DNA. - * + * * @signature typedef SimpleType Dna; - * - * @section Remarks - * + * * The ValueSize of Dna is 4. The nucleotides are enumerated this way: 'A' = 0, 'C' = 1, 'G' = 2, 'T' = * 3. - * + * * Objects of type Dna can be converted to various other types and vice versa. An object that has a value not * in {'A', 'C', 'G', 'T'} is converted to 'A'. - * + * * @see Dna5 * @see DnaString * @see DnaIterator */ -/** -.Spec.Dna -..cat:Alphabets -..summary:Alphabet for DNA. -..general:Class.SimpleType -..signature:Dna -..remarks: -...text:The @Metafunction.ValueSize@ of $Dna$ is 4. -The nucleotides are enumerated this way: $'A' = 0, 'C' = 1, 'G' = 2, 'T' = 3$. -...text:Objects of type $Dna$ can be converted to various other types and vice versa. -An object that has a value not in ${'A', 'C', 'G', 'T'}$ is converted to $'A'$. -...text:$Dna$ is typedef for $SimpleType$, while $Dna_$ is a helper -specialization tag class. -..see:Metafunction.ValueSize -..see:Spec.Dna5 -..include:seqan/basic.h -*/ - struct Dna_ {}; typedef SimpleType Dna; @@ -136,40 +116,20 @@ struct BitsPerValue< Dna > * @extends SimpleType * @headerfile * @brief Alphabet for DNA including 'N' character. - * + * * @signature typedef SimpleType Dna5; - * - * @section Remarks - * + * * The @link FiniteOrderedAlphabetConcept#ValueSize @endlink of Dna5 is 5. The nucleotides are enumerated this * way: 'A' = 0, 'C' = 1, 'G' = 2, 'T' = 3. The 'N' character ("unkown nucleotide") is encoded by 4. - * + * * Objects of type Dna5 can be converted to various other types and vice versa. An object that has a value not * in {'A', 'C', 'G', 'T'} is converted to 'N'. - * + * * @see Dna5Iterator * @see Dna5String * @see Dna */ -/** -.Spec.Dna5: -..cat:Alphabets -..summary:Alphabet for DNA including 'N' character. -..general:Class.SimpleType -..signature:Dna5 -..remarks: -...text:The @Metafunction.ValueSize@ of $Dna5$ is 5. -The nucleotides are enumerated this way: $'A' = 0, 'C' = 1, 'G' = 2, 'T' = 3$. -The 'N' character ("unkown nucleotide") is encoded by 4. -...text:Objects of type $Dna5$ can be converted to various other types and vice versa. -An object that has a value not in ${'A', 'C', 'G', 'T'}$ is converted to $'N'$. -...text:$Dna5$ is typedef for $SimpleType$, while $Dna5_$ is a helper -specialization tag class. -..see:Metafunction.ValueSize -..include:seqan/basic.h -*/ - struct Dna5_ {}; typedef SimpleType Dna5; @@ -204,39 +164,23 @@ unknownValueImpl(Dna5 *) * @headerfile * @implements AlphabetWithQualitiesConcept * @brief Alphabet for DNA plus PHRED quality. - * + * * @signature typedef SimpleType DnaQ; - * - * @section Remarks - * + * * The ValueSize of DnaQ is 4. The nucleotides are enumerated this way: 'A' = 0, 'C' = 1, 'G' = 2, 'T' = * 3. - * + * * Objects of type DnaQ can be converted to various other types and vice versa. - * + * * Note that the default quality value is set to 60. - * + * * @see Dna5Q * @see Dna */ -/** -.Spec.DnaQ: -..implements:Concept.AlphabetWithQualitiesConcept -..cat:Alphabets -..summary:Alphabet for DNA plus PHRED quality. -..general:Class.SimpleType -..signature:DnaQ -..remarks: -...text:The @Metafunction.ValueSize@ of $DnaQ$ is 4. -The nucleotides are enumerated this way: $'A' = 0, 'C' = 1, 'G' = 2, 'T' = 3$. -...text:Objects of type $DnaQ$ can be converted to various other types and vice versa. -...text:$DnaQ$ is typedef for $SimpleType$, while $DnaQ_$ is a helper -specialization tag class. -...text:Note that the default quality value is set to 60. -..see:Metafunction.ValueSize -..see:Spec.Dna5Q -*/ +#ifndef SEQAN_DEFAULT_QUALITY +#define SEQAN_DEFAULT_QUALITY 40 +#endif struct DnaQ_ {}; typedef SimpleType DnaQ; @@ -260,8 +204,8 @@ template <> struct BitsPerValue template <> struct HasQualities { - enum { VALUE = true }; typedef True Type; + static const bool VALUE = true; }; template <> @@ -276,17 +220,17 @@ struct QualityValueSize enum { VALUE = 63 }; // 64 - 1 (N) }; -///.Function.getQualityValue.param.c.type:Spec.DnaQ -///.Function.getQualityValue.class:Spec.DnaQ +template +inline int getQualityValue(TValue const &) +{ + return 0; +} -inline int getQualityValue(DnaQ const & c) +inline int getQualityValue(DnaQ const & c) { return c.value >> 2; } -///.Function.assignQualityValue.param.c.type:Spec.DnaQ -///.Function.assignQualityValue.class:Spec.DnaQ - inline void assignQualityValue(DnaQ & c, int q) { @@ -323,40 +267,20 @@ void assignQualityValue(char & q, DnaQ c) * @headerfile * @implements AlphabetWithQualitiesConcept * @brief Alphabet for DNA plus PHRED quality including 'N' character. - * + * * @signature typedef SimpleType Dna5Q; - * - * @section Remarks - * + * * The ValueSize of Dna5Q is 5. The nucleotides are enumerated this way: 'A' = 0, 'C' = 1, 'G' = 2, 'T' = * 3. The 'N' character ("unknown nucleotide") is encoded by 4. - * + * * Objects of type Dna5 can be converted to various other types and vice versa. - * - * Note that the default quality value is set to 60. - * + * + * Note that the default quality value is set to 40. + * * @see Dna5 * @see DnaQ */ -/** -.Spec.Dna5Q -..implements:Concept.AlphabetWithQualitiesConcept -..cat:Alphabets -..summary:Alphabet for DNA plus PHRED quality including 'N' character. -..general:Class.SimpleType -..signature:Dna5Q -..remarks: -...text:The @Metafunction.ValueSize@ of $Dna5Q$ is 5. -The nucleotides are enumerated this way: $'A' = 0, 'C' = 1, 'G' = 2, 'T' = 3$. -The 'N' character ("unkown nucleotide") is encoded by 4. -...text:Objects of type $Dna5$ can be converted to various other types and vice versa. -...text:$Dna5Q$ is typedef for $SimpleType$, while $Dna5Q_$ is a helper -specialization tag class. -...text:Note that the default quality value is set to 60. -..see:Metafunction.ValueSize -*/ - struct Dna5Q_ {}; typedef SimpleType Dna5Q; @@ -381,8 +305,8 @@ template <> struct BitsPerValue template <> struct HasQualities { - enum { VALUE = true }; typedef True Type; + static const bool VALUE = true; }; template <> @@ -404,10 +328,7 @@ unknownValueImpl(Dna5Q *) return _result; } -///.Function.getQualityValue.param.c.type:Spec.Dna5Q -///.Function.getQualityValue.class.Spec.Dna5Q - -inline int getQualityValue(Dna5Q const &c) +inline int getQualityValue(Dna5Q const &c) { // We use a lookup table to extract the qualities from DNA5Q. The lookup // table based code is equivalent to the following line: @@ -433,9 +354,6 @@ inline int getQualityValue(Dna5Q const &c) return table[c.value]; } -///.Function.assignQualityValue.param.c.type:Spec.Dna5Q -///.Function.assignQualityValue.class:Spec.Dna5Q - inline void assignQualityValue(Dna5Q &c, int q) { @@ -446,8 +364,8 @@ void assignQualityValue(Dna5Q &c, int q) c.value = (c.value & 3) | (q << 2); } -inline -void assignQualityValue(Dna5Q &c, char q) +inline +void assignQualityValue(Dna5Q &c, char q) { int q1 = static_cast(q - '!'); if (q1 < 0) q1 = 0; @@ -456,7 +374,7 @@ void assignQualityValue(Dna5Q &c, char q) assignQualityValue(c, q1); } -inline +inline void assignQualityValue(char & q, Dna5Q c) { q = '!' + getQualityValue(c); @@ -471,42 +389,22 @@ void assignQualityValue(char & q, Dna5Q c) * @extends SimpleType * @headerfile * @brief Alphabet for RNA. - * + * * @signature typedef SimpleType Rna; - * - * @section Remarks - * + * * The ValueSize of Rna is 4. The nucleotides are enumerated this way: 'A' = 0, 'C' = 1, 'G' = 2, 'U' = * 3. - * + * * Objects of type Rna can be converted to various other types and vice versa. An object that has a value not * in {'A', 'C', 'G', 'U'} is converted to 'A'. - * + * * Rna is typedef for SimpleType, while Rna_ is a helper specialization tag class. - * + * * @see Rna5 * @see RnaString * @see RnaIterator */ -/** -.Spec.Rna: -..cat:Alphabets -..summary:Alphabet for RNA. -..general:Class.SimpleType -..signature:Rna -..remarks: -...text:The @Metafunction.ValueSize@ of $Rna$ is 4. -The nucleotides are enumerated this way: $'A' = 0, 'C' = 1, 'G' = 2, 'U' = 3$. -...text:Objects of type $Rna$ can be converted to various other types and vice versa. -An object that has a value not in ${'A', 'C', 'G', 'U'}$ is converted to $'A'$. -...text:$Rna$ is typedef for $SimpleType$, while $Rna_$ is a helper -specialization tag class. -..see:Metafunction.ValueSize -..see:Spec.Rna5 -..include:seqan/basic.h -*/ - struct Rna_ {}; typedef SimpleType Rna; @@ -533,40 +431,20 @@ struct BitsPerValue * @extends SimpleType * @headerfile * @brief Alphabet for RNA including 'N' character. - * + * * @signature typedef SimpleType Rna5; - * - * @section Remarks - * + * * The ValueSize of Rna5 is 5. The nucleotides are enumerated this way: 'A' = 0, 'C' = 1, 'G' = 2, 'U' = * 3. The 'N' character ("unkown nucleotide") is encoded by 4. - * + * * Objects of type Rna5 can be converted to various other types and vice versa. An object that has a value not * in {'A', 'C', 'G', 'U'} is converted to 'N'. - * + * * @see Rna5Iterator * @see Rna5String * @see Rna */ -/** -.Spec.Rna5: -..cat:Alphabets -..summary:Alphabet for RNA including 'N' character. -..general:Class.SimpleType -..signature:Rna5 -..remarks: -...text:The @Metafunction.ValueSize@ of $Rna5$ is 5. -The nucleotides are enumerated this way: $'A' = 0, 'C' = 1, 'G' = 2, 'U' = 3$. -The 'N' character ("unkown nucleotide") is encoded by 4. -...text:Objects of type $Rna5$ can be converted to various other types and vice versa. -An object that has a value not in ${'A', 'C', 'G', 'U'}$ is converted to $'N'$. -...text:$Rna5$ is typedef for $SimpleType$, while $Rna5_$ is a helper -specialization tag class. -..see:Metafunction.ValueSize -..include:seqan/basic.h -*/ - struct Rna5_ {}; typedef SimpleType Rna5; @@ -601,14 +479,12 @@ unknownValueImpl(Rna5 *) * @extends SimpleType * @headerfile * @brief Iupac code for DNA. - * + * * @signature typedef SimpleType Iupac; - * - * @section Remarks - * + * * The ValueSize of Iupac is 16. The nucleotides are enumerated from 0 to 19 in this order: 'U'=0, 'T', 'A', * 'W', 'C', 'Y', 'M', 'H', 'G', 'K', 'R', 'D', 'S', 'B', 'V', 'N'=15. - * + * * Objects of type Iupac can be converted to various other types and vice versa. Unknown values are converted * to 'N'. * @@ -616,24 +492,6 @@ unknownValueImpl(Rna5 *) * @see IupacIterator */ -/** -.Spec.Iupac: -..cat:Alphabets -..summary:Iupac code for DNA. -..general:Class.SimpleType -..signature:Iupac -..remarks: -...text:The @Metafunction.ValueSize@ of $Iupac$ is 16. -The nucleotides are enumerated from 0 to 19 in this order: -'U'=0, 'T', 'A', 'W', 'C', 'Y', 'M', 'H', 'G', 'K', 'R', 'D', 'S', 'B', 'V', 'N'=15. -...text:Objects of type $Iupac$ can be converted to various other types and vice versa. -Unkown values are converted to $'N'$. -...text:$Iupac$ is typedef for $SimpleType$, while $Iupac_$ is a helper -specialization tag class. -..see:Metafunction.ValueSize -..include:seqan/basic.h -*/ - struct Iupac_ {}; typedef SimpleType Iupac; @@ -666,54 +524,35 @@ unknownValueImpl(Iupac *) * @headerfile * @brief IUPAC code for amino acids. * @signature typedef SingleType AminoAcid; - * - * @section Remarks - * - * The ValueSize of AminoAcid is 24. - * - * The amino acids are enumerated from 0 to 15 in this order: - * - * 'A'=0, 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'=19. - * - * The remaining 4 symbols are: - * - * 'B'=20 (Aspartic Acid, Asparagine), 'Z'=21 (Glutamic Acid, Glutamine), 'X'=22 (unknown), '*'=23 (terminator) - * + * + * The ValueSize of AminoAcid is 27. + * + * The amino acid symbols are as follows, i.e. they are sorted alphabetically + * up until the last two symbols: + * + * 'A' = 0, 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'Y', 'Z', 'X'. '*' + * + * Of these 'B' is a wildcard for (Aspartic Acid, Asparagine), + * 'J' for (Leucine, Isoleucine), 'Z' for (Glutamic Acid, Glutamine) and + * 'X' for "any amino acid". + * + * 'O' refers to the rare Pyrrolysine, 'U' refers to the rare Selenocysteine and '*' to the terminator tRNA. + * * Objects of type AminoAcid can be converted to char and vice versa. Unknown values are converted to * 'X'. - * + * * @see FiniteOrderedAlphabetConcept#ValueSize * @see PeptideIterator * @see Peptide */ -/** -.Spec.AminoAcid: -..cat:Alphabets -..summary:Iupac code for amino acids. -..general:Class.SimpleType -..signature:AminoAcid -..remarks: -...text:The @Metafunction.ValueSize@ of $AminoAcid$ is 24. -...text:The amino acids are enumerated from 0 to 15 in this order: -...text:'A'=0, 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'=19. -...text:The remaining 4 symbols are: -...text: 'B'=20 (Aspartic Acid, Asparagine), 'Z'=21 (Glutamic Acid, Glutamine), 'X'=22 (unknown), '*'=23 (terminator) -...text:Objects of type $AminoAcid$ can be converted to $char$ and vice versa. -Unkown values are converted to $'X'$. -...text:$AminoAcid$ is typedef for $SimpleType$, while $AminoAcid_$ is a helper -specialization tag class. -..see:Metafunction.ValueSize -..include:seqan/basic.h -*/ - struct AminoAcid_ {}; typedef SimpleType AminoAcid; template <> struct ValueSize { typedef __uint8 Type; - static const Type VALUE = 24; + static const Type VALUE = 27; }; template <> struct BitsPerValue @@ -737,40 +576,27 @@ unknownValueImpl(AminoAcid *) * @class Finite * @extends SimpleType * @headerfile - * + * * @brief A finite alphabet of a fixed size. - * + * * @signature template * class SimpleType >; - * + * * @tparam TValue The type that is use to store the values. * @tparam SIZE The ValueSize of the alphabet. */ -/** -.Spec.Finite: -..cat:Alphabets -..summary:A finite alphabet of a fixed size. -..general:Class.SimpleType -..signature:SimpleType > -..param.TValue:The type that is use to store the values. -...default:$char$ -..param.SIZE:The @Metafunction.ValueSize@ of the alphabet. -..see:Metafunction.ValueSize -..include:seqan/basic.h -*/ - template struct Finite; -template +template struct ValueSize > > { - typedef __uint8 Type; + typedef unsigned Type; static const Type VALUE = SIZE; }; -template +template struct BitsPerValue > > { typedef __uint8 Type; @@ -785,38 +611,38 @@ struct BitsPerValue > > // char // ---------------------------------------------------------------------------- -inline void assign(char & c_target, +inline void assign(char & c_target, Dna const & source) { - c_target = TranslateTableDna5ToAscii_<>::VALUE[source.value]; + c_target = TranslateTableDna5ToChar_<>::VALUE[source.value]; } -inline void assign(char & c_target, +inline void assign(char & c_target, Dna5 const & source) { - c_target = TranslateTableDna5ToAscii_<>::VALUE[source.value]; + c_target = TranslateTableDna5ToChar_<>::VALUE[source.value]; } -inline void assign(char& target, +inline void assign(char & c_target, Rna const & source) { - target = TranslateTableRna5ToAscii_<>::VALUE[source.value]; + c_target = TranslateTableRna5ToChar_<>::VALUE[source.value]; } -inline void assign(char& target, +inline void assign(char & c_target, Rna5 const & source) { - target = TranslateTableRna5ToAscii_<>::VALUE[source.value]; + c_target = TranslateTableRna5ToChar_<>::VALUE[source.value]; } inline void assign(char & c_target, Iupac const & source) { - c_target = TranslateTableIupacToAscii_<>::VALUE[source.value]; + c_target = TranslateTableIupacToChar_<>::VALUE[source.value]; } inline void assign(char & c_target, AminoAcid const & source) { - c_target = TranslateTableAAToAscii_<>::VALUE[source.value]; + c_target = TranslateTableAAToChar_<>::VALUE[source.value]; } // ---------------------------------------------------------------------------- @@ -824,7 +650,7 @@ inline void assign(char & c_target, AminoAcid const & source) // ---------------------------------------------------------------------------- template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; @@ -835,7 +661,7 @@ inline void assign(Dna & target, __uint8 c_source) } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; @@ -843,29 +669,14 @@ struct CompareType inline void assign(Dna & target, char c_source) { #ifdef __CUDA_ARCH__ - target.value = _translateAsciiToDna((unsigned char)c_source); -#else - target.value = TranslateTableAsciiToDna_<>::VALUE[(unsigned char) c_source]; -#endif -} - -template <> -struct CompareType -{ - typedef Dna Type; -}; - -inline void assign(Dna & target, Unicode c_source) -{ -#ifdef __CUDA_ARCH__ - target.value = _translateAsciiToDna((unsigned char)c_source); + target.value = _translateCharToDna((unsigned char)c_source); #else - target.value = TranslateTableAsciiToDna_<>::VALUE[(unsigned char) c_source]; + target.value = TranslateTableCharToDna_<>::VALUE[(unsigned char) c_source]; #endif } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; @@ -876,7 +687,7 @@ inline void assign(Dna & target, Dna5 const & c_source) } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; @@ -891,7 +702,7 @@ inline void assign(Dna & target, Iupac const & source) // ---------------------------------------------------------------------------- template <> -struct CompareType +struct CompareTypeImpl { typedef Dna5 Type; }; @@ -902,29 +713,22 @@ inline void assign(Dna5 & target, __uint8 c_source) } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna5 Type; }; inline void assign(Dna5 & target, char c_source) { - target.value = TranslateTableAsciiToDna5_<>::VALUE[(unsigned char) c_source]; -} - -template <> -struct CompareType -{ - typedef Dna5 Type; -}; - -inline void assign(Dna5 & target, Unicode c_source) -{ - target.value = TranslateTableAsciiToDna5_<>::VALUE[(unsigned char) c_source]; +#ifdef __CUDA_ARCH__ + target.value = _translateCharToDna5((unsigned char)c_source); +#else + target.value = TranslateTableCharToDna5_<>::VALUE[(unsigned char) c_source]; +#endif } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna5 Type; }; @@ -935,7 +739,7 @@ inline void assign(Dna5 & target, Iupac const & source) } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; @@ -950,40 +754,29 @@ inline void assign(Dna5 & target, Dna const & c_source) // ---------------------------------------------------------------------------- template <> -struct CompareType +struct CompareTypeImpl { typedef Rna Type; }; inline void assign(Rna & target, __uint8 c_source) { - target.value = TranslateTableByteToRna_<>::VALUE[c_source]; + target.value = TranslateTableByteToDna_<>::VALUE[c_source]; } template <> -struct CompareType +struct CompareTypeImpl { typedef Rna Type; }; inline void assign(Rna & target, char c_source) { - target.value = TranslateTableAsciiToRna_<>::VALUE[(unsigned char)c_source]; -} - -template <> -struct CompareType -{ - typedef Rna Type; -}; - -inline void assign(Rna & target, Unicode c_source) -{ - target.value = TranslateTableAsciiToRna_<>::VALUE[(unsigned char) c_source]; + target.value = TranslateTableCharToDna_<>::VALUE[(unsigned char)c_source]; } template <> -struct CompareType +struct CompareTypeImpl { typedef Rna Type; }; @@ -998,40 +791,29 @@ inline void assign(Rna & target, Rna5 const & c_source) // --------------------------------------------------------------------------- template <> -struct CompareType +struct CompareTypeImpl { typedef Rna5 Type; }; inline void assign(Rna5 & target, __uint8 c_source) { - target.value = TranslateTableByteToRna5_<>::VALUE[c_source]; + target.value = TranslateTableByteToDna5_<>::VALUE[c_source]; } template <> -struct CompareType +struct CompareTypeImpl { typedef Rna5 Type; }; inline void assign(Rna5 & target, char c_source) { - target.value = TranslateTableAsciiToRna5_<>::VALUE[(unsigned char)c_source]; + target.value = TranslateTableCharToDna5_<>::VALUE[(unsigned char)c_source]; } template <> -struct CompareType -{ - typedef Rna5 Type; -}; - -inline void assign(Rna5 & target, Unicode c_source) -{ - target.value = TranslateTableAsciiToRna5_<>::VALUE[(unsigned char) c_source]; -} - -template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; @@ -1046,7 +828,7 @@ inline void assign(Rna5 & target, Rna const & c_source) // --------------------------------------------------------------------------- template <> -struct CompareType +struct CompareTypeImpl { typedef Iupac Type; }; @@ -1057,25 +839,14 @@ inline void assign(Iupac & target, __uint8 c_source) } template <> -struct CompareType +struct CompareTypeImpl { typedef Iupac Type; }; inline void assign(Iupac & target, char c_source) { - target.value = TranslateTableAsciiToIupac_<>::VALUE[(unsigned char) c_source]; -} - -template <> -struct CompareType -{ - typedef Iupac Type; -}; - -inline void assign(Iupac & target, Unicode c_source) -{ - target.value = TranslateTableAsciiToIupac_<>::VALUE[(unsigned char) c_source]; + target.value = TranslateTableCharToIupac_<>::VALUE[(unsigned char) c_source]; } inline void assign(Iupac & target, Dna const & source) @@ -1093,7 +864,7 @@ inline void assign(Iupac & target, Dna5 const & source) // --------------------------------------------------------------------------- template <> -struct CompareType +struct CompareTypeImpl { typedef AminoAcid Type; }; @@ -1104,25 +875,14 @@ inline void assign(AminoAcid & target, __uint8 c_source) } template <> -struct CompareType +struct CompareTypeImpl { typedef AminoAcid Type; }; inline void assign(AminoAcid & target, char c_source) { - target.value = TranslateTableAsciiToAA_<>::VALUE[(unsigned char) c_source]; -} - -template <> -struct CompareType -{ - typedef AminoAcid Type; -}; - -inline void assign(AminoAcid & target, Unicode c_source) -{ - target.value = TranslateTableAsciiToAA_<>::VALUE[(unsigned char) c_source]; + target.value = TranslateTableCharToAA_<>::VALUE[(unsigned char) c_source]; } // --------------------------------------------------------------------------- @@ -1130,36 +890,36 @@ inline void assign(AminoAcid & target, Unicode c_source) // --------------------------------------------------------------------------- // template -// struct CompareType, SimpleType > +// struct CompareTypeImpl, SimpleType > // { // typedef SimpleType Type; // }; -// +// // template -// struct CompareType, SimpleType > +// struct CompareTypeImpl, SimpleType > // { // typedef SimpleType Type; // }; template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; inline void assign(DnaQ & target, Dna const & source) { - target.value = source.value | (60 << 2); + target.value = source.value | (SEQAN_DEFAULT_QUALITY << 2); } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; @@ -1170,9 +930,9 @@ inline void assign(Dna & target, DnaQ const & source) } template <> -struct CompareType +struct CompareTypeImpl { - typedef Dna Type; + typedef Iupac Type; }; inline void assign(DnaQ & target, Iupac const & source) @@ -1181,71 +941,71 @@ inline void assign(DnaQ & target, Iupac const & source) } template <> -struct CompareType +struct CompareTypeImpl { - typedef Dna Type; + typedef Iupac Type; }; -inline void assign(DnaQ & target, Dna5 const & source) +inline void assign(Iupac & target, DnaQ const & source) { assign(target, (Dna) source); } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; -inline void assign(DnaQ & target, __uint8 c_source) +inline void assign(DnaQ & target, Dna5 const & source) { - assign(target, (Dna) c_source); + assign(target, (Dna) source); } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; -inline void assign(DnaQ & target, char c_source) +inline void assign(DnaQ & target, __uint8 c_source) { assign(target, (Dna) c_source); } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; -inline void assign(DnaQ & target, Unicode c_source) +inline void assign(DnaQ & target, char c_source) { assign(target, (Dna) c_source); } -inline void +inline void assign(DnaQ & target, DnaQ const & source) { target.value = source.value; } template -inline void +inline void assign(DnaQ & target, TSource const & source) { target.value = (Dna)source; } -inline void -assign(__int64 & c_target, +inline void +assign(__int64 & c_target, DnaQ & source) { c_target = Dna(source); } -inline void -assign(__int64 & c_target, +inline void +assign(__int64 & c_target, DnaQ const & source) { c_target = Dna(source); @@ -1253,15 +1013,15 @@ assign(__int64 & c_target, // __uint64 -inline void -assign(__uint64 & c_target, +inline void +assign(__uint64 & c_target, DnaQ & source) { c_target = Dna(source); } -inline void -assign(__uint64 & c_target, +inline void +assign(__uint64 & c_target, DnaQ const & source) { c_target = Dna(source); @@ -1269,15 +1029,15 @@ assign(__uint64 & c_target, // int -inline void -assign(int & c_target, +inline void +assign(int & c_target, DnaQ & source) { c_target = Dna(source); } -inline void -assign(int & c_target, +inline void +assign(int & c_target, DnaQ const & source) { c_target = Dna(source); @@ -1285,15 +1045,15 @@ assign(int & c_target, // unsigned int -inline void -assign(unsigned int & c_target, +inline void +assign(unsigned int & c_target, DnaQ & source) { c_target = Dna(source); } -inline void -assign(unsigned int & c_target, +inline void +assign(unsigned int & c_target, DnaQ const & source) { c_target = Dna(source); @@ -1301,15 +1061,15 @@ assign(unsigned int & c_target, // short -inline void -assign(short & c_target, +inline void +assign(short & c_target, DnaQ & source) { c_target = Dna(source); } -inline void -assign(short & c_target, +inline void +assign(short & c_target, DnaQ const & source) { c_target = Dna(source); @@ -1317,15 +1077,15 @@ assign(short & c_target, // unsigned short -inline void -assign(unsigned short & c_target, +inline void +assign(unsigned short & c_target, DnaQ & source) { c_target = Dna(source); } -inline void -assign(unsigned short & c_target, +inline void +assign(unsigned short & c_target, DnaQ const & source) { c_target = Dna(source); @@ -1333,15 +1093,15 @@ assign(unsigned short & c_target, // char -inline void -assign(char & c_target, +inline void +assign(char & c_target, DnaQ & source) { c_target = Dna(source); } -inline void -assign(char & c_target, +inline void +assign(char & c_target, DnaQ const & source) { c_target = Dna(source); @@ -1349,15 +1109,15 @@ assign(char & c_target, // signed char -inline void -assign(signed char & c_target, +inline void +assign(signed char & c_target, DnaQ & source) { c_target = Dna(source); } -inline void -assign(signed char & c_target, +inline void +assign(signed char & c_target, DnaQ const & source) { c_target = Dna(source); @@ -1365,15 +1125,15 @@ assign(signed char & c_target, // unsigned char -inline void -assign(unsigned char & c_target, +inline void +assign(unsigned char & c_target, DnaQ & source) { c_target = Dna(source); } -inline void -assign(unsigned char & c_target, +inline void +assign(unsigned char & c_target, DnaQ const & source) { c_target = Dna(source); @@ -1384,26 +1144,26 @@ assign(unsigned char & c_target, // --------------------------------------------------------------------------- // template -// struct CompareType, SimpleType > +// struct CompareTypeImpl, SimpleType > // { // typedef SimpleType Type; // }; -// +// // template -// struct CompareType, SimpleType > +// struct CompareTypeImpl, SimpleType > // { // typedef SimpleType Type; // }; template <> -struct CompareType +struct CompareTypeImpl { typedef Dna5 Type; }; template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; @@ -1437,7 +1197,7 @@ inline void assign(DnaQ & target, Dna5Q const & source) } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; @@ -1449,7 +1209,7 @@ inline void assign(Dna5Q & target, DnaQ const & source) template <> -struct CompareType +struct CompareTypeImpl { typedef Dna5 Type; }; @@ -1482,7 +1242,7 @@ inline void assign(Dna5 & target, Dna5Q const & source) } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna5 Type; }; @@ -1493,16 +1253,17 @@ inline void assign(Dna5Q & target, Dna5 const & source) // We perform the conversion from DNA5 with qualities to DNA5 by a simple // table lookup. The lookup below is equivalent to the following line: // - // target.value = (source.value == 4)? Dna5QValueN_ : source.value | (60 << 2); + // target.value = (source.value == 4)? Dna5QValueN_ : source.value | (40 << 2); static const unsigned table[] = { - (60 << 2) + 0, (60 << 2) + 1, (60 << 2) + 2, (60 << 2) + 3, Dna5QValueN_ + (SEQAN_DEFAULT_QUALITY << 2) + 0, (SEQAN_DEFAULT_QUALITY << 2) + 1, + (SEQAN_DEFAULT_QUALITY << 2) + 2, (SEQAN_DEFAULT_QUALITY << 2) + 3, Dna5QValueN_ }; target.value = table[source.value]; } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; @@ -1513,7 +1274,7 @@ inline void assign(Dna5Q & target, Dna const & source) } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna Type; }; @@ -1524,7 +1285,7 @@ inline void assign(Dna & target, Dna5Q const & source) } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna5 Type; }; @@ -1535,7 +1296,7 @@ inline void assign(Dna5 & target, DnaQ const & source) } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna5 Type; }; @@ -1546,7 +1307,7 @@ inline void assign(Dna5Q & target, __uint8 c_source) } template <> -struct CompareType +struct CompareTypeImpl { typedef Dna5 Type; }; @@ -1557,35 +1318,35 @@ inline void assign(Dna5Q & target, char c_source) } template <> -struct CompareType +struct CompareTypeImpl { - typedef Dna5 Type; + typedef Iupac Type; }; -inline void assign(Dna5Q & target, Unicode c_source) +inline void assign(Dna5Q & target, Iupac const & source) { - assign(target, (Dna5)c_source); + assign(target, (Dna5)source); } template <> -struct CompareType +struct CompareTypeImpl { - typedef Dna5 Type; + typedef Iupac Type; }; -inline void assign(Dna5Q & target, Iupac const & source) +inline void assign(Iupac & target, Dna5Q const & source) { assign(target, (Dna5)source); } -inline void +inline void assign(Dna5Q & target, Dna5Q const & source) { target.value = source.value; } template -inline void +inline void assign(Dna5Q & target, TSource const & source) { assign(target, (Dna5)source); @@ -1593,15 +1354,15 @@ assign(Dna5Q & target, TSource const & source) // __int64 -inline void -assign(__int64 & c_target, +inline void +assign(__int64 & c_target, Dna5Q & source) { c_target = Dna5(source); } -inline void -assign(__int64 & c_target, +inline void +assign(__int64 & c_target, Dna5Q const & source) { c_target = Dna5(source); @@ -1609,15 +1370,15 @@ assign(__int64 & c_target, // __uint64 -inline void -assign(__uint64 & c_target, +inline void +assign(__uint64 & c_target, Dna5Q & source) { c_target = Dna5(source); } -inline void -assign(__uint64 & c_target, +inline void +assign(__uint64 & c_target, Dna5Q const & source) { c_target = Dna5(source); @@ -1625,15 +1386,15 @@ assign(__uint64 & c_target, // int -inline void -assign(int & c_target, +inline void +assign(int & c_target, Dna5Q & source) { c_target = Dna5(source); } -inline void -assign(int & c_target, +inline void +assign(int & c_target, Dna5Q const & source) { c_target = Dna5(source); @@ -1641,15 +1402,15 @@ assign(int & c_target, // unsigned int -inline void -assign(unsigned int & c_target, +inline void +assign(unsigned int & c_target, Dna5Q & source) { c_target = Dna5(source); } -inline void -assign(unsigned int & c_target, +inline void +assign(unsigned int & c_target, Dna5Q const & source) { c_target = Dna5(source); @@ -1658,15 +1419,15 @@ assign(unsigned int & c_target, //short -inline void -assign(short & c_target, +inline void +assign(short & c_target, Dna5Q & source) { c_target = Dna5(source); } -inline void -assign(short & c_target, +inline void +assign(short & c_target, Dna5Q const & source) { c_target = Dna5(source); @@ -1674,15 +1435,15 @@ assign(short & c_target, //unsigned short -inline void -assign(unsigned short & c_target, +inline void +assign(unsigned short & c_target, Dna5Q & source) { c_target = Dna5(source); } -inline void -assign(unsigned short & c_target, +inline void +assign(unsigned short & c_target, Dna5Q const & source) { c_target = Dna5(source); @@ -1690,15 +1451,15 @@ assign(unsigned short & c_target, // char -inline void -assign(char & c_target, +inline void +assign(char & c_target, Dna5Q & source) { c_target = Dna5(source); } -inline void -assign(char & c_target, +inline void +assign(char & c_target, Dna5Q const & source) { c_target = Dna5(source); @@ -1706,15 +1467,15 @@ assign(char & c_target, // signed char -inline void -assign(signed char & c_target, +inline void +assign(signed char & c_target, Dna5Q & source) { c_target = Dna5(source); } -inline void -assign(signed char & c_target, +inline void +assign(signed char & c_target, Dna5Q const & source) { c_target = Dna5(source); @@ -1722,15 +1483,15 @@ assign(signed char & c_target, // unsigned char -inline void -assign(unsigned char & c_target, +inline void +assign(unsigned char & c_target, Dna5Q & source) { c_target = Dna5(source); } -inline void -assign(unsigned char & c_target, +inline void +assign(unsigned char & c_target, Dna5Q const & source) { c_target = Dna5(source); @@ -1738,4 +1499,4 @@ assign(unsigned char & c_target, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_H_ diff --git a/seqan/basic/alphabet_residue_funcs.h b/seqan/basic/alphabet_residue_funcs.h index 847437c..12fa686 100644 --- a/seqan/basic/alphabet_residue_funcs.h +++ b/seqan/basic/alphabet_residue_funcs.h @@ -32,8 +32,8 @@ // Author: Enrico Siragusa // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_FUNCS_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_FUNCS_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_FUNCS_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_FUNCS_H_ namespace seqan { @@ -44,7 +44,7 @@ namespace seqan { template SEQAN_HOST_DEVICE inline unsigned char -_translateAsciiToDna(unsigned char c) +_translateCharToDna(unsigned char c) { switch (toUpperValue(c)) { @@ -58,6 +58,30 @@ _translateAsciiToDna(unsigned char c) return 0; }; } + +// -------------------------------------------------------------------------- +// Dna5 +// -------------------------------------------------------------------------- + +template +SEQAN_HOST_DEVICE inline unsigned char +_translateCharToDna5(unsigned char c) +{ + switch (toUpperValue(c)) + { + case 'A': + return 0; + case 'C': + return 1; + case 'G': + return 2; + case 'T': + return 3; + default: + return 4; + }; +} + } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_FUNCS_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_FUNCS_H_ diff --git a/seqan/basic/alphabet_residue_tabs.h b/seqan/basic/alphabet_residue_tabs.h index e30fd43..7805607 100644 --- a/seqan/basic/alphabet_residue_tabs.h +++ b/seqan/basic/alphabet_residue_tabs.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ // Rna5. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_TABS_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_TABS_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_TABS_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_TABS_H_ namespace seqan { @@ -48,13 +48,13 @@ namespace seqan { // -------------------------------------------------------------------------- template -struct TranslateTableDna5ToAscii_ +struct TranslateTableDna5ToChar_ { static char const VALUE[5]; }; template -char const TranslateTableDna5ToAscii_::VALUE[5] = {'A', 'C', 'G', 'T', 'N'}; +char const TranslateTableDna5ToChar_::VALUE[5] = {'A', 'C', 'G', 'T', 'N'}; template @@ -64,16 +64,16 @@ struct TranslateTableDna5ToIupac_ }; template -char const TranslateTableDna5ToIupac_::VALUE[5] = {0x02, 0x04, 0x08, 0x01, 0x0f}; +char const TranslateTableDna5ToIupac_::VALUE[5] = {0x01, 0x02, 0x04, 0x08, 0x0f}; template -struct TranslateTableAsciiToDna_ +struct TranslateTableCharToDna_ { static char const VALUE[256]; }; template -char const TranslateTableAsciiToDna_::VALUE[256] = +char const TranslateTableCharToDna_::VALUE[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //0 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //1 @@ -84,13 +84,13 @@ char const TranslateTableAsciiToDna_::VALUE[256] = // , A, B, C, D, E, D, G, H, I, J, K, L, M, N, O, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //5 -// P, Q, R, S, T, U, V, W, X, Y, Z, , , , , +// P, Q, R, S, T, U, V, W, X, Y, Z, , , , , 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, //6 // , a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //7 -// p, q, r, s, t, u, v, w, x, y, z, , , , , +// p, q, r, s, t, u, v, w, x, y, z, , , , , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //8 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //9 @@ -104,13 +104,13 @@ char const TranslateTableAsciiToDna_::VALUE[256] = template -struct TranslateTableAsciiToDna5_ +struct TranslateTableCharToDna5_ { static char const VALUE[256]; }; template -char const TranslateTableAsciiToDna5_::VALUE[256] = +char const TranslateTableCharToDna5_::VALUE[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //0 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //1 @@ -121,13 +121,13 @@ char const TranslateTableAsciiToDna5_::VALUE[256] = // , A, B, C, D, E, D, G, H, I, J, K, L, M, N, O, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //5 -// P, Q, R, S, T, U, V, W, X, Y, Z, , , , , +// P, Q, R, S, T, U, V, W, X, Y, Z, , , , , 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, //6 // , a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, 4, 4, 4, 4, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //7 -// p, q, r, s, t, u, v, w, x, y, z, , , , , +// p, q, r, s, t, u, v, w, x, y, z, , , , , 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //8 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //9 @@ -146,7 +146,7 @@ struct TranslateTableByteToDna_ }; template -char const TranslateTableByteToDna_::VALUE[256] = +char const TranslateTableByteToDna_::VALUE[256] = { 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //0 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //1 @@ -198,168 +198,44 @@ char const TranslateTableByteToDna5_::VALUE[256] = // -------------------------------------------------------------------------- template -struct TranslateTableRna5ToAscii_ +struct TranslateTableRna5ToChar_ { static char const VALUE[5]; }; template -char const TranslateTableRna5ToAscii_::VALUE[5] = {'A', 'C', 'G', 'U', 'N'}; +char const TranslateTableRna5ToChar_::VALUE[5] = {'A', 'C', 'G', 'U', 'N'}; -template -struct TranslateTableAsciiToRna_ -{ - static char const VALUE[256]; -}; - -template -char const TranslateTableAsciiToRna_::VALUE[256] = -{ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //0 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //1 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //2 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //3 - - 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, //4 -// , A, B, C, D, E, D, G, H, I, J, K, L, M, N, O, - - 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //5 -// P, Q, R, S, T, U, V, W, X, Y, Z, , , , , - - 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, //6 -// , a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, - - 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //7 -// p, q, r, s, t, u, v, w, x, y, z, , , , , - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //8 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //9 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //10 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //11 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //12 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //13 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //14 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 //15 -}; - -template -struct TranslateTableAsciiToRna5_ -{ - static char const VALUE[256]; -}; -template -char const TranslateTableAsciiToRna5_::VALUE[256] = -{ - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //0 - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //1 - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //2 - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //3 - - 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, //4 -// , A, B, C, D, E, D, G, H, I, J, K, L, M, N, O, - - 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //5 -// P, Q, R, S, T, U, V, W, X, Y, Z, , , , , - - 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, //6 -// , a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, - - 4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //7 -// p, q, r, s, t, u, v, w, x, y, z, , , , , - - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //8 - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //9 - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //10 - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //11 - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //12 - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //13 - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //14 - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 //15 -}; - - -template -struct TranslateTableByteToRna_ -{ - static char const VALUE[256]; -}; - -template -char const TranslateTableByteToRna_::VALUE[256] = -{ - 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //0 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //1 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //2 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //3 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //4 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //5 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //6 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //7 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //8 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //9 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //10 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //11 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //12 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //13 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //14 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 //15 -}; - -template -struct TranslateTableByteToRna5_ -{ - static char const VALUE[256]; -}; - -template -char const TranslateTableByteToRna5_::VALUE[256] = -{ - 0, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //0 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //1 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //2 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //3 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //4 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //5 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //6 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //7 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //8 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //9 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //10 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //11 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //12 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //13 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //14 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 //15 -}; +// other tables identical to Dna(5) // -------------------------------------------------------------------------- // Iupac // -------------------------------------------------------------------------- template -struct TranslateTableIupacToAscii_ +struct TranslateTableIupacToChar_ { static char const VALUE[16]; }; template -char const TranslateTableIupacToAscii_::VALUE[16] = -{ - 'U', //0000=0 - 'T', //0001=1 //T=1: change between U and T is just inc/dec - 'A', //0010=2 - 'W', //0011=3 TA - 'C', //0100=4 - 'Y', //0101=5 TC (pyrimidine) - 'M', //0110=6 AC - 'H', //0111=7 not-G - 'G', //1000=8 - 'K', //1001=9 TG - 'R', //1010=A AG (purine) - 'D', //1011=B not-C - 'S', //1100=C CG - 'B', //1101=D non-A - 'V', //1110=E non-T +char const TranslateTableIupacToChar_::VALUE[16] = +{ //TGCA + 'U', //0000=0 = or U + 'A', //0001=1 + 'C', //0010=2 + 'M', //0011=3 AC + 'G', //0100=4 + 'R', //0101=5 AG (purine) + 'S', //0110=6 CG + 'V', //0111=7 non-T + 'T', //1000=8 + 'W', //1001=9 TA + 'Y', //1010=A TC (pyrimidine) + 'H', //1011=B not-G + 'K', //1100=C TG + 'D', //1101=D not-C + 'B', //1110=E non-A 'N' //1111=F any }; @@ -370,24 +246,24 @@ struct TranslateTableIupacToDna_ }; template -char const TranslateTableIupacToDna_::VALUE[16] = -{ - 3, //'U' - 3, //'T' - 0, //'A' - 0, //'W' = TA - 1, //'C' - 1, //'Y' = TC - 0, //'M' = AC - 0, //'H' = not-G - 2, //'G' - 2, //'K' = TG - 0, //'R' = AG - 0, //'D' = not-C - 1, //'S' = CG - 1, //'B' = non-A - 0, //'V' = non-T - 0 //'N' = any +char const TranslateTableIupacToDna_::VALUE[16] = +{ //TGCA + 3, //0000=0 = or U + 0, //0001=1 + 1, //0010=2 + 0, //0011=3 AC + 2, //0100=4 + 0, //0101=5 AG (purine) + 1, //0110=6 CG + 0, //0111=7 non-T + 3, //1000=8 + 0, //1001=9 TA + 1, //1010=A TC (pyrimidine) + 0, //1011=B not-G + 2, //1100=C TG + 0, //1101=D not-C + 1, //1110=E non-A + 0 //1111=F any }; template @@ -397,60 +273,56 @@ struct TranslateTableIupacToDna5_ }; template -char const TranslateTableIupacToDna5_::VALUE[16] = -{ - 3, //'U' - 3, //'T' - 0, //'A' - 4, //'W' = TA - 1, //'C' - 4, //'Y' = TC - 4, //'M' = AC - 4, //'H' = not-G - 2, //'G' - 4, //'K' = TG - 4, //'R' = AG - 4, //'D' = not-C - 4, //'S' = CG - 4, //'B' = non-A - 4, //'V' = non-T - 4 //'N' = any +char const TranslateTableIupacToDna5_::VALUE[16] = +{ //TGCA + 3, //0000=0 = or U + 0, //0001=1 + 1, //0010=2 + 4, //0011=3 AC + 2, //0100=4 + 4, //0101=5 AG (purine) + 4, //0110=6 CG + 4, //0111=7 non-T + 3, //1000=8 + 4, //1001=9 TA + 4, //1010=A TC (pyrimidine) + 4, //1011=B not-G + 4, //1100=C TG + 4, //1101=D not-C + 4, //1110=E non-A + 4 //1111=F any }; template -struct TranslateTableAsciiToIupac_ +struct TranslateTableCharToIupac_ { static char const VALUE[256]; }; template -char const TranslateTableAsciiToIupac_::VALUE[256] = -{ - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //0 - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //1 - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //2 - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //3 - - 15, 2, 13, 4, 11, 15, 15, 8, 7, 15, 15, 9, 15, 6, 15, 15, //4 - // , A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, - - 15, 15, 10, 12, 1, 0, 14, 3, 15, 5, 15, 15, 15, 15, 15, 15, //5 - // P, Q, R, S, T, U, V, W, X, Y, Z, , , , , - - 15, 2, 13, 4, 11, 15, 15, 8, 7, 15, 15, 9, 15, 6, 15, 15, //6 - // , a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, - - 15, 15, 10, 12, 1, 0, 14, 3, 15, 5, 15, 15, 15, 15, 15, 15, //7 - // p, q, r, s, t, u, v, w, x, y, z, , , , , - - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //8 - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //9 - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //10 - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //11 - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //12 - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //13 - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //14 - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 //15 +char const TranslateTableCharToIupac_::VALUE[256] = +{ + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + // = + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 15, 15, + // A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, + 15, 1, 14, 2, 13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, + // Q, R, S, T, U, V, W, X, Y, Z + 15, 15, 5, 6, 8, 0, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, + // a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, + 15, 1, 14, 2, 13, 15, 15, 4, 11, 15, 15, 12, 15, 3, 15, 15, + // q, r, s, t, u, v, w, x, y, z + 15, 15, 5, 6, 8, 0, 7, 9, 15, 10, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 }; template @@ -460,9 +332,9 @@ struct TranslateTableByteToIupac_ }; template -char const TranslateTableByteToIupac_::VALUE[256] = +char const TranslateTableByteToIupac_::VALUE[256] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //0 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //0 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //1 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //2 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, //3 @@ -485,73 +357,76 @@ char const TranslateTableByteToIupac_::VALUE[256] = // -------------------------------------------------------------------------- template -struct TranslateTableAAToAscii_ +struct TranslateTableAAToChar_ { - static char const VALUE[24]; + static char const VALUE[27]; }; template -char const TranslateTableAAToAscii_::VALUE[24] = -{ - 'A', // 0 Ala Alanine - 'R', // 1 Arg Arginine - 'N', // 2 Asn Asparagine - 'D', // 3 Asp Aspartic Acid - 'C', // 4 Cys Cystine - 'Q', // 5 Gln Glutamine - 'E', // 6 Glu Glutamic Acid - 'G', // 7 Gly Glycine - 'H', // 8 His Histidine - 'I', // 9 Ile Isoleucine - 'L', //10 Leu Leucine - 'K', //11 Lys Lysine - 'M', //12 Met Methionine - 'F', //13 Phe Phenylalanine - 'P', //14 Pro Proline - 'S', //15 Ser Serine - 'T', //16 Thr Threonine - 'W', //17 Trp Tryptophan - 'Y', //18 Tyr Tyrosine - 'V', //19 Val Valine - 'B', //20 Aspartic Acid, Asparagine - 'Z', //21 Glutamic Acid, Glutamine - 'X', //22 Unknown - '*' //23 Terminator +char const TranslateTableAAToChar_::VALUE[27] = +{ + 'A', // Ala Alanine + 'B', // Aspartic Acid, Asparagine + 'C', // Cys Cystine + 'D', // Asp Aspartic Acid + 'E', // Glu Glutamic Acid + 'F', // Phe Phenylalanine + 'G', // Gly Glycine + 'H', // His Histidine + 'I', // Ile Isoleucine + 'J', // Leucine, Isoleucine + 'K', // Lys Lysine + 'L', // Leu Leucine + 'M', // Met Methionine + 'N', // Asn Asparagine + 'O', // Pyl Pyrrolysine + 'P', // Pro Proline + 'Q', // Gln Glutamine + 'R', // Arg Arginine + 'S', // Ser Serine + 'T', // Thr Threonine + 'U', // Selenocystein + 'V', // Val Valine + 'W', // Trp Tryptophan + 'Y', // Tyr Tyrosine + 'Z', // Glutamic Acid, Glutamine + 'X', // Unknown + '*' // Terminator }; template -struct TranslateTableAsciiToAA_ +struct TranslateTableCharToAA_ { static char const VALUE[256]; }; template -char const TranslateTableAsciiToAA_::VALUE[256] = -{ - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //0 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //1 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 22, 22, 22, 22, 22, //2 -// * - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //3 - 22, 0, 20, 4, 3, 6, 13, 7, 8, 9, 22, 11, 10, 12, 2, 22, //4 +char const TranslateTableCharToAA_::VALUE[256] = +{ + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //0 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //1 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 25, 25, 25, 25, 25, //2 +// * + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //3 + 25, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, //4 // , A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, - 14, 5, 1, 15, 16, 22, 19, 17, 22, 18, 21, 22, 22, 22, 22, 22, //5 + 15, 16, 17, 18, 19, 20, 21, 22, 25, 23, 24, 25, 25, 25, 25, 25, //5 // P, Q, R, S, T, U, V, W, X, Y, Z, , , , , , - 22, 0, 20, 4, 3, 6, 13, 7, 8, 9, 22, 11, 10, 12, 2, 22, //6 + 25, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, //6 // , a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, - 14, 5, 1, 15, 16, 22, 19, 17, 22, 18, 21, 22, 22, 22, 22, 22, //7 + 15, 16, 17, 18, 19, 20, 21, 22, 25, 23, 24, 25, 25, 25, 25, //7 // p, q, r, s, t, u, v, w, x, y, z, , , , , , - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //8 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //9 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //10 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //11 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //12 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //13 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //14 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22 //15 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //8 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //9 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //10 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //11 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //12 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //13 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //14 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25 //15 }; template @@ -561,26 +436,26 @@ struct TranslateTableByteToAA_ }; template -char const TranslateTableByteToAA_::VALUE[256] = +char const TranslateTableByteToAA_::VALUE[256] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, //0 - 16, 17, 18, 19, 20, 21, 22, 23, 22, 22, 22, 22, 22, 22, 22, 22, //1 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //2 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //3 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //4 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //5 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //6 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //7 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //8 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //9 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //10 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //11 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //12 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //13 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, //14 - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22 //15 + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 25, 25, 25, 25, 25, 25, //1 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //2 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //3 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //4 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //5 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //6 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //7 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //8 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //9 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //10 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //11 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //12 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //13 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, //14 + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25 //15 }; } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_TABS_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_RESIDUE_TABS_H_ diff --git a/seqan/basic/alphabet_simple_type.h b/seqan/basic/alphabet_simple_type.h index 2d392c6..535ea52 100644 --- a/seqan/basic/alphabet_simple_type.h +++ b/seqan/basic/alphabet_simple_type.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // The SimpleType alphabet type is the base class for all residue types. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_SIMPLE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_SIMPLE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_SIMPLE_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_SIMPLE_H_ namespace seqan { @@ -56,58 +56,75 @@ namespace seqan { * @class SimpleType * @implements FiniteOrderedAlphabetConcept * @headerfile - * + * * @brief Implementation for "simple" types. - * + * * @signature template * class SimpleType; - * + * * @tparam TSpec Specialization tag. * @tparam TValue Type that stores the values of an instance. TValue must be a simple type. - * - * @section Remarks - * + * * A "simple type" is a C++ type that can be constructed without constructor, destructed without destructor and copied * without copy constructor or assignment operator. All basic types (like char, int or * float) are simple. Pointers, references and arrays of simple types are simple. POD types ("plain old data * types"), that are - simplified spoken - C++-types that already existed in C, are simple too. - * + * * Arrays of simple types can be copied very fast by memory manipulation routines, but the default implementation of * functions like arrayCopyForward and arrayCopy are not optimized for simple types this way. But for classes derived * from SimpleType, optimized variants of array manipulation functions are applied. - * + * * Note that simple types need not to be derived or specialized from SimpleType, but it could be convenient to * do so. - * + * * @see IsSimple */ -/** -.Class.SimpleType: -..cat:Basic -..implements:Concept.FiniteOrderedAlphabetConcept -..summary:Implementation for "simple" types. -..signature:SimpleType -..param.TValue:Type that stores the values of an instance. -...remarks:TValue must be a simple type. -...metafunction:Metafunction.Value -..param.TSpec:Specialization tag. -...metafunction:Metafunction.Spec -..remarks: -...text:A "simple type" is a C++ type that can be constructed without constructor, -destructed without destructor and copied without copy constructor or assignment operator. -All basic types (like $char$, $int$ or $float$) are simple. Pointers, references and arrays of -simple types are simple. -POD types ("plain old data types"), that are - simplified spoken - C++-types that already existed in C, -are simple too. -...text:Arrays of simple types can be copied very fast by memory manipulation routines, -but the default implementation of functions like @Function.arrayCopyForward@ and @Function.arrayCopy@ -are not optimized for simple types this way. -But for classes derived from $SimpleType$, optimized variants of array manipulation functions are applied. -...text:Note that simple types need not to be derived or specialized from $SimpleType$, but -it could be convenient to do so. -..include:seqan/basic.h -*/ +/*! + * @var TValue SimpleType::value + * @brief The internal value storage of the SimpleType object. + * + * @important Do not modify this value directly. SimpleType implements conversion operators for all numeric types, so + * you can simply assign and cast the SimpleType to all built-in numeric types. + * + * Stores the value of the SimpleType, is of type TValue. Valid values are from 0 to @link + * FiniteOrderedAlphabetConcept#ValueSize @endlink minus one. + */ + +/*! + * @fn SimpleType::SimpleType + * @brief Constructor of SimpleType type. + * + * @signature SimpleType::SimpleType(); + * @signature SimpleType::SimpleType(other); + * @signature template SimpleType::SimpleType(param); + * + * @param[in] other Other SimpleType to copy construct with. + * @param[in] param Any value of type TParam that can be assigned to the SimpleType object. + * + * The default constructor initializes the SimpleType object with the value 0 and the copy constructor copies + * over the value of the SimpleType. + * + * When constructing with a param that is not a SimpleType then param is assigned to the SimpleType + * object, using @link AssignableConcept#assign assign @endlink. This function can be overloaded differently for each + * type. You can expect the following behaviour for all SimpleType objects, however: + * + *
    + *
  • If param is a builtin integer then this value is directly assigned to the @link SimpleType::value value + * @endlink member of the SimpleType object. Note that this can allow an invalid assignment, e.g., when assigning + * 42 to a @link Dna @endlink object.
  • + *
  • If param is a char then this character value is converted to the appropriate value and written + * to the @link SimpleType::value value @endlink member. For example, assigning 'A' or 'a' to + * a SimpleType object assigns 0 to the @link SimpleType::value value @endlink member.
  • + *
+ * + * @section Example + * + * The following example shows construction of a @link Dna @endlink (specialization of SimpleType) object with from + * char and integer values. + * + * @snippet demos/dox/basic/simple_type_construction.cpp simple type construction and assignment + */ #ifdef PLATFORM_WINDOWS #pragma pack(push,1) @@ -150,16 +167,16 @@ class SimpleType // ------------------------------------------------------------------------ SEQAN_HOST_DEVICE - SimpleType & operator=(SimpleType const & other) - { + SimpleType & operator=(SimpleType const & other) + { assign(*this, other); return *this; } template SEQAN_HOST_DEVICE inline SimpleType & - operator=(T const & other) - { + operator=(T const & other) + { assign(*this, other); return *this; } @@ -263,22 +280,30 @@ class SimpleType // Metafunction IsSimple // ---------------------------------------------------------------------------- -///.Metafunction.IsSimple.param.T.type:Class.SimpleType - template struct IsSimple > { typedef True Type; }; +// ---------------------------------------------------------------------------- +// Concept Convertible +// ---------------------------------------------------------------------------- + +template +struct Is< Convertible, TSource> > : + Is< FundamentalConcept > {}; + +template +struct Is< Convertible > > : + Is< FundamentalConcept > {}; + // ---------------------------------------------------------------------------- // Metafunction Value // ---------------------------------------------------------------------------- // TODO(holtgrew): Rename? SimpleType is no container! -///.Metafunction.Value.param.T.type:Class.SimpleType - template struct Value > { @@ -335,8 +360,6 @@ supremumValueImpl(SimpleType *) // Metafunction Spec // ---------------------------------------------------------------------------- -///.Metafunction.Spec.param.T.type:Class.SimpleType - template struct Spec > { @@ -359,7 +382,7 @@ struct Spec const> // TODO(holtgrew): Is some of the code below redundant, can we lose some copy and paste here? template -struct CompareType, TRight> +struct CompareTypeImpl, TRight> { typedef TRight Type; }; @@ -392,7 +415,7 @@ convertImpl(Convert const, template SEQAN_HOST_DEVICE inline TStream & -operator<<(TStream & stream, +operator<<(TStream & stream, SimpleType const & data) { stream << convert(data); @@ -405,7 +428,7 @@ operator<<(TStream & stream, template SEQAN_HOST_DEVICE inline TStream & -operator>>(TStream & stream, +operator>>(TStream & stream, SimpleType & data) { char c; @@ -418,13 +441,9 @@ operator>>(TStream & stream, // Function assign() // ---------------------------------------------------------------------------- -///.Function.assign.param.target.type:Class.SimpleType -///.Function.assign.param.target.type:Class.SimpleType -///.Function.assign.class:Class.SimpleType - template SEQAN_HOST_DEVICE inline void -assign(SimpleType & target, +assign(SimpleType & target, SimpleType & source) { target.value = source.value; @@ -432,15 +451,15 @@ assign(SimpleType & target, template SEQAN_HOST_DEVICE inline void -assign(SimpleType & target, +assign(SimpleType & target, SimpleType const & source) { target.value = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(SimpleType & target, +SEQAN_HOST_DEVICE inline void +assign(SimpleType & target, TSource & source) { target.value = source; @@ -448,27 +467,27 @@ assign(SimpleType & target, template SEQAN_HOST_DEVICE inline void -assign(SimpleType & target, +assign(SimpleType & target, TSource const & source) { target.value = source; } -// Assign Proxy to SimpleType +// Assign Proxy to SimpleType // NOTE(doering): Diese Funktionen wurden noetig wegen eines seltsamen VC++-Verhaltens // TODO(holtgrew): Still necessary with dropped 2003 support? template -SEQAN_HOST_DEVICE inline void -assign(SimpleType & target, +SEQAN_HOST_DEVICE inline void +assign(SimpleType & target, Proxy & source) { target.value = getValue(source); } template -SEQAN_HOST_DEVICE inline void -assign(SimpleType & target, +SEQAN_HOST_DEVICE inline void +assign(SimpleType & target, Proxy const & source) { target.value = getValue(source); @@ -478,144 +497,144 @@ assign(SimpleType & target, // NOTE(doering): It is not possible to write a single function here since "assign" must be specialized for the first argument at the first place template -SEQAN_HOST_DEVICE inline void -assign(__int64 & c_target, +SEQAN_HOST_DEVICE inline void +assign(__int64 & c_target, SimpleType & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(__int64 & c_target, +SEQAN_HOST_DEVICE inline void +assign(__int64 & c_target, SimpleType const & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(__uint64 & c_target, +SEQAN_HOST_DEVICE inline void +assign(__uint64 & c_target, SimpleType & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(__uint64 & c_target, +SEQAN_HOST_DEVICE inline void +assign(__uint64 & c_target, SimpleType const & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(int & c_target, +SEQAN_HOST_DEVICE inline void +assign(int & c_target, SimpleType & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(int & c_target, +SEQAN_HOST_DEVICE inline void +assign(int & c_target, SimpleType const & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(unsigned int & c_target, +SEQAN_HOST_DEVICE inline void +assign(unsigned int & c_target, SimpleType & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(unsigned int & c_target, +SEQAN_HOST_DEVICE inline void +assign(unsigned int & c_target, SimpleType const & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(short & c_target, +SEQAN_HOST_DEVICE inline void +assign(short & c_target, SimpleType & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(short & c_target, +SEQAN_HOST_DEVICE inline void +assign(short & c_target, SimpleType const & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(unsigned short & c_target, +SEQAN_HOST_DEVICE inline void +assign(unsigned short & c_target, SimpleType & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(unsigned short & c_target, +SEQAN_HOST_DEVICE inline void +assign(unsigned short & c_target, SimpleType const & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(char & c_target, +SEQAN_HOST_DEVICE inline void +assign(char & c_target, SimpleType & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(char & c_target, +SEQAN_HOST_DEVICE inline void +assign(char & c_target, SimpleType const & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(signed char & c_target, +SEQAN_HOST_DEVICE inline void +assign(signed char & c_target, SimpleType & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(signed char & c_target, +SEQAN_HOST_DEVICE inline void +assign(signed char & c_target, SimpleType const & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(unsigned char & c_target, +SEQAN_HOST_DEVICE inline void +assign(unsigned char & c_target, SimpleType & source) { c_target = source.value; } template -SEQAN_HOST_DEVICE inline void -assign(unsigned char & c_target, +SEQAN_HOST_DEVICE inline void +assign(unsigned char & c_target, SimpleType const & source) { c_target = source.value; @@ -627,7 +646,7 @@ assign(unsigned char & c_target, template SEQAN_HOST_DEVICE inline bool -operator==(SimpleType const & left_, +operator==(SimpleType const & left_, TRight const & right_) { typedef SimpleType TLeft; @@ -637,7 +656,7 @@ operator==(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator==(TLeft const & left_, +operator==(TLeft const & left_, SimpleType const & right_) { typedef SimpleType TRight; @@ -647,7 +666,7 @@ operator==(TLeft const & left_, template SEQAN_HOST_DEVICE inline bool -operator==(SimpleType const & left_, +operator==(SimpleType const & left_, SimpleType const & right_) { typedef SimpleType TLeft; @@ -658,7 +677,7 @@ operator==(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator==(SimpleType const & left_, +operator==(SimpleType const & left_, SimpleType const & right_) { return convert(left_) == convert(right_); @@ -666,7 +685,7 @@ operator==(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator==(Proxy const & left_, +operator==(Proxy const & left_, SimpleType const & right_) { typedef Proxy TLeft; @@ -692,7 +711,7 @@ operator==(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator!=(SimpleType const & left_, +operator!=(SimpleType const & left_, TRight const & right_) { typedef SimpleType TLeft; @@ -702,7 +721,7 @@ operator!=(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator!=(TLeft const & left_, +operator!=(TLeft const & left_, SimpleType const & right_) { typedef SimpleType TRight; @@ -712,7 +731,7 @@ operator!=(TLeft const & left_, template SEQAN_HOST_DEVICE inline bool -operator!=(SimpleType const & left_, +operator!=(SimpleType const & left_, SimpleType const & right_) { typedef SimpleType TLeft; @@ -723,7 +742,7 @@ operator!=(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator!=(SimpleType const & left_, +operator!=(SimpleType const & left_, SimpleType const & right_) { return convert(left_) != convert(right_); @@ -731,7 +750,7 @@ operator!=(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator!=(Proxy const & left_, +operator!=(Proxy const & left_, SimpleType const & right_) { typedef Proxy TLeft; @@ -757,7 +776,7 @@ operator!=(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator<(SimpleType const & left_, +operator<(SimpleType const & left_, TRight const & right_) { typedef SimpleType TLeft; @@ -767,7 +786,7 @@ operator<(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator<(TLeft const & left_, +operator<(TLeft const & left_, SimpleType const & right_) { typedef SimpleType TRight; @@ -777,7 +796,7 @@ operator<(TLeft const & left_, template SEQAN_HOST_DEVICE inline bool -operator<(SimpleType const & left_, +operator<(SimpleType const & left_, SimpleType const & right_) { typedef SimpleType TLeft; @@ -788,7 +807,7 @@ operator<(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator<(SimpleType const & left_, +operator<(SimpleType const & left_, SimpleType const & right_) { return convert(left_) < convert(right_); @@ -796,7 +815,7 @@ operator<(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator<(Proxy const & left_, +operator<(Proxy const & left_, SimpleType const & right_) { typedef Proxy TLeft; @@ -822,7 +841,7 @@ operator<(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator<=(SimpleType const & left_, +operator<=(SimpleType const & left_, TRight const & right_) { typedef SimpleType TLeft; @@ -832,7 +851,7 @@ operator<=(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator<=(TLeft const & left_, +operator<=(TLeft const & left_, SimpleType const & right_) { typedef SimpleType TRight; @@ -842,7 +861,7 @@ operator<=(TLeft const & left_, template SEQAN_HOST_DEVICE inline bool -operator<=(SimpleType const & left_, +operator<=(SimpleType const & left_, SimpleType const & right_) { typedef SimpleType TLeft; @@ -853,7 +872,7 @@ operator<=(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator<=(SimpleType const & left_, +operator<=(SimpleType const & left_, SimpleType const & right_) { return convert(left_) <= convert(right_); @@ -861,7 +880,7 @@ operator<=(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator<=(Proxy const & left_, +operator<=(Proxy const & left_, SimpleType const & right_) { typedef Proxy TLeft; @@ -886,7 +905,7 @@ operator<=(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator>(SimpleType const & left_, +operator>(SimpleType const & left_, TRight const & right_) { typedef SimpleType TLeft; @@ -896,7 +915,7 @@ operator>(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator>(TLeft const & left_, +operator>(TLeft const & left_, SimpleType const & right_) { typedef SimpleType TRight; @@ -906,7 +925,7 @@ operator>(TLeft const & left_, template SEQAN_HOST_DEVICE inline bool -operator>(SimpleType const & left_, +operator>(SimpleType const & left_, SimpleType const & right_) { typedef SimpleType TLeft; @@ -917,7 +936,7 @@ operator>(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator>(SimpleType const & left_, +operator>(SimpleType const & left_, SimpleType const & right_) { return convert(left_) > convert(right_); @@ -925,7 +944,7 @@ operator>(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator>(Proxy const & left_, +operator>(Proxy const & left_, SimpleType const & right_) { typedef Proxy TLeft; @@ -951,7 +970,7 @@ operator>(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator>=(SimpleType const & left_, +operator>=(SimpleType const & left_, TRight const & right_) { typedef SimpleType TLeft; @@ -961,7 +980,7 @@ operator>=(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator>=(TLeft const & left_, +operator>=(TLeft const & left_, SimpleType const & right_) { typedef SimpleType TRight; @@ -971,7 +990,7 @@ operator>=(TLeft const & left_, template SEQAN_HOST_DEVICE inline bool -operator>=(SimpleType const & left_, +operator>=(SimpleType const & left_, SimpleType const & right_) { typedef SimpleType TLeft; @@ -982,7 +1001,7 @@ operator>=(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator>=(SimpleType const & left_, +operator>=(SimpleType const & left_, SimpleType const & right_) { return convert(left_) >= convert(right_); @@ -990,7 +1009,7 @@ operator>=(SimpleType const & left_, template SEQAN_HOST_DEVICE inline bool -operator>=(Proxy const & left_, +operator>=(Proxy const & left_, SimpleType const & right_) { typedef Proxy TLeft; @@ -1078,4 +1097,4 @@ ordValue(SimpleType const & c) } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_SIMPLE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_SIMPLE_H_ diff --git a/seqan/basic/alphabet_storage.h b/seqan/basic/alphabet_storage.h index a2e7ba9..aa4fc48 100644 --- a/seqan/basic/alphabet_storage.h +++ b/seqan/basic/alphabet_storage.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -36,8 +36,8 @@ // construction type (simple, non-simple) and storage size. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_STORAGE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_STORAGE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_STORAGE_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_STORAGE_H_ #include @@ -54,11 +54,6 @@ class SimpleType; // Tags, Classes, Enums // ============================================================================ -// TODO(holtgrew): Remove Ascii and Unicode alias. Also see #849. -typedef char Ascii; -//typedef unsigned char Byte; // TODO(holtgrew): Disabling, remove together with Ascii and Unicode with #849 -typedef wchar_t Unicode; - // ============================================================================ // Metafunctions // ============================================================================ @@ -131,9 +126,9 @@ struct ValueSize // The internal value size is used for alphabets with piggyback qualities, // for example Dna5Q. Here, the public value size is 5 but the internal -// value size is 256. +// value size is 256. -template +template struct InternalValueSize_ : public ValueSize {}; @@ -146,36 +141,21 @@ struct InternalValueSize_ * @mfn BytesPerValue * @headerfile * @brief Number of bytes needed to store a value. - * + * * @signature BytesPerValue::VALUE - * + * * @tparam T The type to query. - * - * @return VALUE The number of bytes to store on T object. * - * @section Remarks + * @return VALUE The number of bytes to store on T object. * * By default, this function returns ceil(BitsPerValue<T>::VALUE). For built-in types, this is the same * as sizeof(T). - * + * * @see FiniteOrderedAlphabetConcept#ValueSize * @see AlphabetConcept#BitsPerValue * @see IntegralForValue */ -/** -.Metafunction.BytesPerValue: -..cat:Basic -..summary:Number of bytes needed to store a value. -..signature:BytesPerValue::VALUE -..param.T:A class. -..returns.param.VALUE:Number of bytes needed to store $T$. -...default:$BitsPerValue / 8$, rounded up. For built-in types, this is the same as $sizeof(T)$. -..see:Metafunction.ValueSize -..see:Metafunction.BitsPerValue -..include:seqan/basic.h -*/ - template struct BytesPerValue { @@ -190,15 +170,13 @@ struct BytesPerValue * @mfn IntegralForValue * @headerfile * @brief Returns an itegral type that provides sufficient space to store a value. - * - * @signature IntegralForValue::Type - * + * + * @signature IntegralForValue::Type; + * * @tparam T The type to query. - * + * * @return Type An integral type. - * - * @section Remarks - * + * * The type is the smallest unsigned integral type that has a size of at least BytesPerValue bytes. * * @@ -227,37 +205,15 @@ struct BytesPerValue * * *
__int64
- * + * * Note that the returned integral type cannot store T values, if T takes more than 8 bytes, since * there exists no integral type that provides sufficient space to store types of this size. - * + * * @see FiniteOrderedAlphabetConcept#ValueSize * @see AlphabetConcept#BitsPerValue * @see BytesPerValue */ -/** -.Metafunction.IntegralForValue: -..cat:Basic -..summary:Returns an itegral type that provides sufficient space to store a value. -..signature:IntegralForValue::Type -..param.T:A class. -..returns.param.Type:An integral type that can store $T$ values. -..remarks:The type is the smallest unsigned integral type that has a size of at least @Metafunction.BytesPerValue@ bytes. -...tableheader:bytes|integral type -...table:1|$unsigned char$ -...table:2|$unsigned short$ -...table:3|$unsigned int$ -...table:4|$unsigned int$ -...table:5 and above|$__int64$ -..remarks:Note that the returned integral type cannot store $T$ values, if $T$ takes more than 8 bytes, - since there exists no integral type that provides sufficient space to store types of this size. -..see:Metafunction.ValueSize -..see:Metafunction.BitsPerValue -..see:Metafunction.BytesPerValue -..include:seqan/basic.h -*/ - template struct IntegralForValueImpl_ { @@ -307,7 +263,7 @@ template SEQAN_HOST_DEVICE inline typename ValueSize::Type ordValue(TValue const & c) { - return convert(static_cast::Type const &>(c)); + return convert(static_cast::Type const &>(c)); } // The internal ord value is used for alphabets with piggyback qualities. @@ -316,7 +272,7 @@ template SEQAN_HOST_DEVICE inline typename ValueSize::Type _internalOrdValue(TValue const & c) { - return ordValue(c); + return ordValue(c); } // ---------------------------------------------------------------------------- @@ -332,4 +288,4 @@ valueSize() } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ALPHABET_STORAGE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_ALPHABET_STORAGE_H_ diff --git a/seqan/basic/array_construct_destruct.h b/seqan/basic/array_construct_destruct.h index 3e983f2..5e192bc 100644 --- a/seqan/basic/array_construct_destruct.h +++ b/seqan/basic/array_construct_destruct.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -41,8 +41,8 @@ #include -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ARRAY_CONSTRUCT_DESTRUCT_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_ARRAY_CONSTRUCT_DESTRUCT_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_ARRAY_CONSTRUCT_DESTRUCT_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_ARRAY_CONSTRUCT_DESTRUCT_H_ namespace seqan { @@ -68,38 +68,20 @@ namespace seqan { * @mfn IsSimple * @headerfile * @brief Tests type to be simple. - * + * * @signature IsSimple::Type; - * + * * @tparam T Type that is tested. - * + * * @return Type Either True or False, depending on T being a "POD" type. - * - * @section Remarks - * + * * A simple type is a type that does not need constructors to be created, a destructor to be destroyed, and copy * assignment operators or copy constructors to be copied. All POD ("plain old data") types are simple, but some * non-POD types could be simple too, e.g. some specializations of SimpleType. - * + * * @see SimpleType */ -/** -.Metafunction.IsSimple -..cat:Basic -..summary:Tests type to be simple. -..signature:IsSimple::Type -..param.T:Type that is tested. -..returns.param.Type:@Tag.Logical Values.True@, if $T$ is a simple type, @Tag.Logical Values.False@ otherwise. -...default:@Tag.Logical Values.False@ -..remarks:A simple type is a type that does not need constructors to be created, -a destructor to be destroyed, and copy assignment operators or copy constructors -to be copied. All POD ("plain old data") types are simple, but some -non-POD types could be simple too, e.g. some specializations of @Class.SimpleType@. -..see:Class.SimpleType -..include:seqan/basic.h -*/ - template struct IsSimple_ { @@ -149,9 +131,6 @@ struct IsSimple : public IsSimple {}; // TODO(holtgrew): This should probably to into sequence module along with this header. -///.Metafunction.Value.param.T.type:Adaption.char array -///.Metafunction.Value.class:Adaption.char array - template struct Value { @@ -187,9 +166,6 @@ struct Value // TODO(holtgrew): This should probably to into sequence module along with this header. -///.Metafunction.Reference.param.T.type:Adaption.char array -///.Metafunction.Reference.class:Adaption.char array - template struct Reference { @@ -216,7 +192,6 @@ template SEQAN_HOST_DEVICE inline T & value(T * me) { - SEQAN_CHECKPOINT; return *me; } @@ -230,7 +205,6 @@ template SEQAN_HOST_DEVICE inline T & getValue(T * me) { - SEQAN_CHECKPOINT; return value(me); } @@ -246,36 +220,20 @@ getValue(T * me) * @fn valueConstruct * @headerfile * @brief Constructs an object at specified position. - * + * * @signature void valueConstruct(iterator [, param [, move_tag]]); - * - * @param iterator Pointer or iterator to position where the object should be constructed. - * @param param Parameter that is forwarded to constructor. - * @param moveTag Instance of the move tag. If the tag is specified, it is forwarded to the constructor, so the - * constructed object must support move construction. - * - * @section Remarks - * + * + * @param[in,out] iterator Pointer or iterator to position where the object should be constructed. + * @param[in] param Parameter that is forwarded to constructor. + * @param[in] moveTag Instance of the move tag. If the tag is specified, it is forwarded to the constructor, + * so the constructed object must support move construction. + * * The type of the destructed object is the value type of iterator. */ -/** -.Function.valueConstruct -..cat:Content Manipulation -..summary:Constructs an object at specified position. -..signature:valueConstruct(iterator [, param [, move_tag] ]) -..param.iterator:Pointer or iterator to position where the object should be constructed. -..param.param:Parameter that is forwarded to constructor. (optional) -..param.move_tag:Instance of the @Tag.Move Switch.move switch tag@. (optional) -...remarks:If the @Tag.Move Switch.move switch tag@ is specified, it is forwarded to the constructor, -so the constructed object must support move construction. -..remarks:The type of the destructed object is the @Metafunction.Value.value type@ of $iterator$. -..include:seqan/basic.h -*/ - // Helper code for constructing values behind iterators that do not return // proxies from their value() functions but references. -struct ValueConstructor_ +struct ValueConstructor_ { template static inline void @@ -289,7 +247,32 @@ struct ValueConstructor_ template static inline void construct(TIterator it, - TParam const & param_) + TParam SEQAN_FORWARD_CARG param_) + { + typedef typename Value::Type TValue; + typedef typename RemoveConst::Type TNonConstValue; + new( (void*) & value(it) ) TNonConstValue(SEQAN_FORWARD(TParam, param_)); + } + +#ifndef SEQAN_CXX11_STANDARD + template + static inline void + construct(TIterator it, + TParam & param_, + Move const & tag, + True) + { + typedef typename Value::Type TValue; + typedef typename RemoveConst::Type TNonConstValue; + new( (void*) & value(it) ) TNonConstValue(param_, tag); + } + + template + static inline void + construct(TIterator it, + TParam & param_, + Move const &, + False) { typedef typename Value::Type TValue; typedef typename RemoveConst::Type TNonConstValue; @@ -304,31 +287,33 @@ struct ValueConstructor_ { typedef typename Value::Type TValue; typedef typename RemoveConst::Type TNonConstValue; - new( (void*) & value(it) ) TNonConstValue(param_, tag); + construct(it, param_, tag, typename HasMoveConstructor::Type()); } +#endif }; // Helper code for constructing values behind iterators that return proxies // from their value() function. // // TODO(holtgrew): These implementations are empty and to be overwritten. Should we have dynamic/static asserstions here? -struct ValueConstructorProxy_ +struct ValueConstructorProxy_ { template static inline void construct(TIterator) {} template - static inline void construct(TIterator, TParam const &) {} + static inline void construct(TIterator, TParam SEQAN_FORWARD_CARG) {} +#ifndef SEQAN_CXX11_STANDARD template static inline void construct(TIterator, TParam &, Move const & ) {} +#endif }; template inline void valueConstruct(TIterator it) { - SEQAN_CHECKPOINT; typedef typename IfC< IsSameType< typename Value::Type &, @@ -346,9 +331,8 @@ valueConstruct(TIterator it) template inline void valueConstruct(TIterator it, - TParam const & param_) + TParam SEQAN_FORWARD_CARG param_) { - SEQAN_CHECKPOINT; typedef typename IfC< IsSameType< typename Value::Type &, @@ -360,16 +344,16 @@ valueConstruct(TIterator it, ValueConstructorProxy_ // false, types differ -> value() returns a proxy >::Type TConstructor; - TConstructor::construct(it, param_); + TConstructor::construct(it, SEQAN_FORWARD(TParam, param_)); } +#ifndef SEQAN_CXX11_STANDARD template inline void valueConstruct(TIterator it, TParam & param_, Move const & tag) { - SEQAN_CHECKPOINT; typedef typename IfC< IsSameType< typename Value::Type &, @@ -383,6 +367,7 @@ valueConstruct(TIterator it, TConstructor::construct(it, param_, tag); } +#endif // ---------------------------------------------------------------------------- // Function valueDestruct() using iterators @@ -390,7 +375,7 @@ valueConstruct(TIterator it, // Helper code for destructing values behind iterators that do not return // proxies from their value() function but references. -struct ValueDestructor_ +struct ValueDestructor_ { template static inline void @@ -411,7 +396,7 @@ struct ValueDestructor_ // from their value() function. // // TODO(holtgrew): These implementations are empty and to be overwritten. Should we have dynamic/static asserstions here? -struct ValueDestructorProxy_ +struct ValueDestructorProxy_ { template static inline void destruct(TIterator) {} @@ -420,33 +405,19 @@ struct ValueDestructorProxy_ /*! * @fn valueDestruct * @headerfile - * @brief Destoys an object at specified position. - * + * @brief Destroys an object at specified position. + * * @signature void valueDestruct(iterator); - * - * @param iterator Pointer or iterator to position where the object should be destructed. - * - * @section Remarks - * + * + * @param[in,out] iterator Pointer or iterator to position where the object should be destructed. + * * The type of the constructed object is the value type of iterator. */ -/** -.Function.valueDestruct -..cat:Content Manipulation -..summary:Destoys an object at specified position. -..signature:valueDestruct(iterator) -..param.iterator:Pointer or iterator to position where the object should be destructed. -..remarks:The type of the constructed object is the @Metafunction.Value.value type@ of $iterator$. -..see:Function.valueConstruct -..include:seqan/basic.h -*/ - template inline void valueDestruct(TIterator it) { - SEQAN_CHECKPOINT; typedef typename IfC< IsSameType< typename Value::Type &, @@ -469,47 +440,24 @@ valueDestruct(TIterator it) * @fn arrayConstruct * @headerfile * @brief Construct objects in a given memory buffer. - * + * * @signature void arrayConstruct(begin, end[, value]); - * - * @param begin Iterator to the begin of the range that is to be constructed. - * @param end Iterator behind the end of the range. - * @param value Argument that is forwarded to the constructor. An appropriate constructor is required. If - * value is not specified, the default constructor is used. - * - * @section Remarks - * + * + * @param[in] begin Iterator to the begin of the range that is to be constructed. + * @param[in] end Iterator behind the end of the range. + * @param[in] value Argument that is forwarded to the constructor. An appropriate constructor is required. If + * value is not specified, the default constructor is used. + * * The type of the constructed Objects is the value type of begin and end. */ -/** -.Function.arrayConstruct -..cat:Array Handling -..summary:Construct objects in a given memory buffer. -..signature:arrayConstruct(begin, end [, value]) -..param.begin:Iterator to the begin of the range that is to be constructed. -..param.end:Iterator behind the end of the range. -..param.value:Argument that is forwarded to the constructor. (optional) -...text:An appropriate constructor is required. -If $value$ is not specified, the default constructor is used. -..remarks:The type of the constructed Objects is the @Metafunction.Value.value type@ -of $begin$ and $end$. -..see:Function.arrayDestruct -..see:Function.arrayConstructCopy -..see:Function.arrayFill -..see:Class.SimpleType -..see:Function.valueConstruct -..include:seqan/basic.h -*/ - // NOTE(holtgrew): Of course, it does not make sense to declare this in a move version! template -inline void -_arrayConstructDefault(TIterator1 begin_, +inline void +_arrayConstructDefault(TIterator1 begin_, TIterator2 end_) { - SEQAN_CHECKPOINT; while (begin_ != end_) { valueConstruct(begin_); @@ -518,21 +466,19 @@ _arrayConstructDefault(TIterator1 begin_, } template -inline void -arrayConstruct(TIterator1 begin_, +inline void +arrayConstruct(TIterator1 begin_, TIterator2 end_) { - SEQAN_CHECKPOINT; _arrayConstructDefault(begin_, end_); } template -inline void -_arrayConstructDefault(TIterator1 begin_, - TIterator2 end_, +inline void +_arrayConstructDefault(TIterator1 begin_, + TIterator2 end_, TParam const & param_) { - SEQAN_CHECKPOINT; while (begin_ != end_) { valueConstruct(begin_, param_); @@ -541,12 +487,11 @@ _arrayConstructDefault(TIterator1 begin_, } template -inline void -arrayConstruct(TIterator1 begin_, - TIterator2 end_, +inline void +arrayConstruct(TIterator1 begin_, + TIterator2 end_, TParam const & param_) { - SEQAN_CHECKPOINT; _arrayConstructDefault(begin_, end_, param_); } @@ -558,46 +503,25 @@ arrayConstruct(TIterator1 begin_, * @fn arrayConstructCopy * @headerfile * @brief Copy constructs an array of objects into in a given memory buffer. - * + * * @signature void arrayConstructCopy(sourceBegin, sourceEnd, target); - * - * @param sourceBegin Iterator to the first element of the source range. - * @param sourceEnd Iterator behind the last element of the source range. sourceEnd should have the same - * type as sourceBegin. - * @param target Pointer to the memory block the new objects will be constructed in. The type of target - * specifies the type of the constructed objects: If T* is the type of target, then - * the function constructs objects of type T. The memory buffer should be large enough to - * store sourceEnd - sourceBegin objects. An appropriate (copy-) constructor that - * constructs an target objects given a source object is required. + * + * @param[in] sourceBegin Iterator to the first element of the source range. + * @param[in] sourceEnd Iterator behind the last element of the source range. sourceEnd should have the same + * type as sourceBegin. + * @param[in] target Pointer to the memory block the new objects will be constructed in. The type of target + * specifies the type of the constructed objects: If T* is the type of target, then + * the function constructs objects of type T. The memory buffer should be large enough to + * store sourceEnd - sourceBegin objects. An appropriate (copy-) constructor that + * constructs an target objects given a source object is required. */ -/** -.Function.arrayConstructCopy -..cat:Array Handling -..summary:Copy constructs an array of objects into in a given memory buffer. -..signature:arrayConstructCopy(source_begin, source_end, target) -..param.source_begin:Iterator to the first element of the source range. -..param.source_end:Iterator behind the last element of the source range. -...text:$source_end$ should have the same type as $source_begin$. -..param.target:Pointer to the memory block the new objects will be constructed in. -...text:The type of $target$ specifies the type of the constructed objects: -If $T*$ is the type of $target$, then the function constructs objects of type $T$. -...text:The memory buffer should be large enough to store $source_end$ - $source_begin$ objects. -An appropriate (copy-) constructor that constructs an target objects given a source object is required. -..see:Function.arrayDestruct -..see:Function.arrayCopyForward -..see:Function.arrayCopy -..see:Function.valueConstruct -..include:seqan/basic.h -*/ - template -inline void -_arrayConstructCopyDefault(TSource1 source_begin, - TSource2 source_end, +inline void +_arrayConstructCopyDefault(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; while (source_begin != source_end) { // NOTE(holtgrew): getValue() is used here since value() could return @@ -609,12 +533,11 @@ _arrayConstructCopyDefault(TSource1 source_begin, } template -inline void -arrayConstructCopy(TSource1 source_begin, - TSource2 source_end, +inline void +arrayConstructCopy(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; _arrayConstructCopyDefault(source_begin, source_end, target_begin); } @@ -626,66 +549,47 @@ arrayConstructCopy(TSource1 source_begin, * @fn arrayConstructMove * @headerfile * @brief Move constructs an array of objects into in a given memory buffer. - * + * * @signature void arrayConstructMove(sourceBegin, sourceEnd, target); - * - * @param sourceEnd Iterator behind the last element of the source range. sourceEnd should have the same - * type as sourceBegin. - * @param sourceBegin Iterator to the first element of the source range. - * @param target Pointer to the memory block the new objects will be constructed in. The type of target - * specifies the type of the constructed objects: If T* is the type of target, then - * the function constructs objects of type T. The memory buffer should be large enough to - * store sourceEnd - sourceBegin objects. An appropriate move constructor that - * constructs an target objects given a source object is required. + * + * @param[in] sourceEnd Iterator behind the last element of the source range. sourceEnd should have the same + * type as sourceBegin. + * @param[in] sourceBegin Iterator to the first element of the source range. + * @param[in] target Pointer to the memory block the new objects will be constructed in. The type of target + * specifies the type of the constructed objects: If T* is the type of target, then + * the function constructs objects of type T. The memory buffer should be large enough to + * store sourceEnd - sourceBegin objects. An appropriate move constructor that + * constructs an target objects given a source object is required. */ -/** -.Function.arrayConstructMove -..cat:Array Handling -..summary:Move constructs an array of objects into in a given memory buffer. -..signature:arrayConstructMove(source_begin, source_end, target) -..param.source_begin:Iterator to the first element of the source range. -..param.source_end:Iterator behind the last element of the source range. -...text:$source_end$ should have the same type as $source_begin$. -..param.target:Pointer to the memory block the new objects will be constructed in. -...text:The type of $target$ specifies the type of the constructed objects: -If $T*$ is the type of $target$, then the function constructs objects of type $T$. -...text:The memory buffer should be large enough to store $source_end$ - $source_begin$ objects. -An appropriate move constructor that constructs an target objects given a source object is required. -..see:Function.arrayDestruct -..see:Function.arrayConstructCopy -..see:Function.arrayMoveForward -..see:Function.arrayMove -..see:Function.valueConstruct -..include:seqan/basic.h -*/ - template -inline void -_arrayConstructMoveDefault(TSource1 source_begin, - TSource2 source_end, +inline void +_arrayConstructMoveDefault(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; while (source_begin < source_end) { // NOTE(holtgrew): Using value() here, used to be getValue() but // cannot move from const reference or proxy. // valueConstruct(target_begin, value(source_begin), Move()); // TODO(holtgrew): We need a "has move constructor" metafunction to switch between move/copy constructing before we can use the line here. - valueConstruct(target_begin, value(source_begin)); +#ifdef SEQAN_CXX11_STANDARD + valueConstruct(target_begin, std::move(*source_begin)); +#else + valueConstruct(target_begin, value(source_begin), Move()); +#endif ++source_begin; ++target_begin; } } template -inline void -arrayConstructMove(TSource1 source_begin, - TSource2 source_end, +inline void +arrayConstructMove(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; _arrayConstructMoveDefault(source_begin, source_end, target_begin); } @@ -697,36 +601,20 @@ arrayConstructMove(TSource1 source_begin, * @fn arrayDestruct * @headerfile * @brief Destroys an array of objects. - * + * * @signature void arrayDestruct(begin, end); - * - * @param begin Iterator to the begin of the range that is to be destructed. - * @param end Iterator behind the end of the range. - * - * @section Remarks - * + * + * @param[in] begin Iterator to the begin of the range that is to be destructed. + * @param[in] end Iterator behind the end of the range. + * * This function does not deallocates the memory. */ -/** -.Function.arrayDestruct -..cat:Array Handling -..summary:Destroys an array of objects. -..signature:arrayDestruct(begin, end) -..param.begin:Iterator to the begin of the range that is to be destructed. -..param.end:Iterator behind the end of the range. -..remarks:This function does not deallocates the memory. -..see:Class.SimpleType -..see:Function.valueDestruct -..include:seqan/basic.h -*/ - template -inline void -_arrayDestructDefault(TIterator1 begin_, +inline void +_arrayDestructDefault(TIterator1 begin_, TIterator2 end_) { - SEQAN_CHECKPOINT; while (begin_ != end_) { valueDestruct(begin_); @@ -735,11 +623,10 @@ _arrayDestructDefault(TIterator1 begin_, } template -inline void -arrayDestruct(TIterator1 begin_, +inline void +arrayDestruct(TIterator1 begin_, TIterator2 end_) { - SEQAN_CHECKPOINT; _arrayDestructDefault(begin_, end_); } @@ -753,49 +640,32 @@ arrayDestruct(TIterator1 begin_, * @fn arrayFill * @headerfile * @brief Assigns one object to each element of a range. - * + * * @signature void arrayFill(begin, end, value[, parallelTag]); - * - * @param begin Iterator to the begin of the range that is to be filled. - * @param end Iterator behind the end of the range. - * @param value Argument that is assigned to all count objects in array. - * @param parallelTag Tag to enable/disable parallelism. Types: Serial, Parallel - * - * @section Remarks - * + * + * @param[in] begin Iterator to the begin of the range that is to be filled. + * @param[in] end Iterator behind the end of the range. + * @param[in] value Argument that is assigned to all count objects in array. + * @param[in] parallelTag Tag to enable/disable parallelism. Types: Serial, Parallel + * * All objects target_begin[0] to target_begin[count-1] are set to value. */ -/** -.Function.arrayFill -..cat:Array Handling -..summary:Assigns one object to each element of a range. -..signature:arrayFill(begin, end, value) -..param.begin:Iterator to the begin of the range that is to be filled. -..param.end:Iterator behind the end of the range. -..param.value:Argument that is assigned to all $count$ objects in $array$. -..remarks:All objects $target_begin[0]$ to $target_begin[count-1]$ are set to $value$. -..see:Function.arrayCopy -..see:Function.arrayCopyForward -..include:seqan/basic.h -*/ - // TODO(holtgrew): Redirects to fill_n. What are the exact semantics here? Do the array elements have to be initialized already? fill_n uses assignment, not copy construction! template -inline void +inline void arrayFill(TIterator begin_, TIterator end_, TValue const & value) { - SEQAN_CHECKPOINT; - ::std::fill_n(begin_, end_ - begin_, value); + std::fill_n(begin_, end_ - begin_, value); } template -inline void +inline void arrayFill(TIterator begin_, - TIterator end_, + TIterator end_, TValue const & value, Serial) { @@ -810,59 +680,38 @@ arrayFill(TIterator begin_, * @fn arrayCopyForward * @headerfile * @brief Copies a range of objects into another range of objects starting from the first element. - * + * * @signature void arrayCopyForward(sourceBegin, sourceEnd, target); - * - * @param sourceEnd Iterator behind the last element of the source array. sourceEnd must have the same type - * as sourceBegin. - * @param sourceBegin Iterator to the first element of the source array. - * @param target Iterator to the first element of the target array. The target capacity should be at least as - * long as the source range. - * - * @section Remarks - * + * + * @param[in] sourceEnd Iterator behind the last element of the source array. sourceEnd must have the same type + * as sourceBegin. + * @param[in] sourceBegin Iterator to the first element of the source array. + * @param[in] target Iterator to the first element of the target array. The target capacity should be at least as + * long as the source range. + * * Be careful if source and target range overlap, because in this case some source elements could be accidently * overwritten before they are copied. - * + * * If there is no need for the source elements to persist, consider to use arrayMoveForward instead to improve * performance. */ -/** -.Function.arrayCopyForward -..cat:Array Handling -..summary:Copies a range of objects into another range of objects starting from the first element. -..signature:arrayCopyForward(source_begin, source_end, target) -..param.source_begin:Iterator to the first element of the source array. -..param.source_end:Iterator behind the last element of the source array. -...text:$source_end$ must have the same type as $source_begin$. -..param.target:Iterator to the first element of the target array. -...text:The target capacity should be at least as long as the source range. -..remarks.note:Be careful if source and target range overlap, because in this case some source elements could be accidently overwritten before they are copied. -..remarks:If there is no need for the source elements to persist, consider to use -@Function.arrayMoveForward@ instead to improve performance. -..see:Class.SimpleType -..include:seqan/basic.h -*/ - template -inline void -_arrayCopyForwardDefault(TSource1 source_begin, - TSource2 source_end, +inline void +_arrayCopyForwardDefault(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; - ::std::copy(source_begin, source_end, target_begin); + std::copy(source_begin, source_end, target_begin); } template -inline void -arrayCopyForward(TSource1 source_begin, - TSource2 source_end, +inline void +arrayCopyForward(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; - _arrayCopyForwardDefault(source_begin, source_end, target_begin); + _arrayCopyForwardDefault(source_begin, source_end, target_begin); } // ---------------------------------------------------------------------------- @@ -873,68 +722,42 @@ arrayCopyForward(TSource1 source_begin, * @fn arrayCopyBackward * @headerfile * @brief Copies a range of objects into another range of objects starting from the last element. - * + * * @signature void arrayCopyBackward(source_begin, source_end, target); - * - * @param sourceBegin Iterator to the first element of the source array. - * @param sourceEnd Iterator behind the last element of the source array. sourceEnd must have the same type - * as source_begin. - * @param target Iterator to the first element of the target array. The target capacity should be at least as - * long as the source range. - * - * @section Remarks - * + * + * @param[in] sourceBegin Iterator to the first element of the source array. + * @param[in] sourceEnd Iterator behind the last element of the source array. sourceEnd must have the same type + * as source_begin. + * @param[in] target Iterator to the first element of the target array. The target capacity should be at least as + * long as the source range. + * * Be careful if source and target range overlap, because in this case some source elements could be accidently * overwritten before they are moved. - * + * * If source and target do not overlap, consider to use the function arrayCopyForward instead that is faster in some * cases. - * + * * If there is no need for the source elements to persist, consider to use arrayMoveBackward instead to improve * performance. - * - * The semantic of this function's argument target differ from the arguments of ::std::copy_backward. + * + * The semantic of this function's argument target differ from the arguments of std::copy_backward. */ -/** -.Function.arrayCopyBackward -..cat:Array Handling -..summary:Copies a range of objects into another range of objects starting from the last element. -..signature:arrayCopyBackward(source_begin, source_end, target) -..param.source_begin:Iterator to the first element of the source array. -..param.source_end:Iterator behind the last element of the source array. -...text:$source_end$ must have the same type as $source_begin$. -..param.target:Iterator to the first element of the target array. -...text:The target capacity should be at least as long as the source range. -..remarks.note:Be careful if source and target range overlap, because in this case - some source elements could be accidently overwritten before they are moved. -..remarks.text:If source and target do not overlap, consider to use the function -@Function.arrayCopyForward@ instead that is faster in some cases. -..remarks:If there is no need for the source elements to persist, consider to use -@Function.arrayMoveBackward@ instead to improve performance. -..remarks.note:The semantic of this function's argument $target$ differ from the arguments of $::std::copy_backward$. -..see:Function.arrayCopyForward -..see:Class.SimpleType -..include:seqan/basic.h -*/ - template -inline void -_arrayCopyBackwardDefault(TSource1 source_begin, +inline void +_arrayCopyBackwardDefault(TSource1 source_begin, TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; - ::std::copy_backward(source_begin, source_end, target_begin + (source_end - source_begin)); + std::copy_backward(source_begin, source_end, target_begin + (source_end - source_begin)); } template -inline void -arrayCopyBackward(TSource1 source_begin, - TSource2 source_end, +inline void +arrayCopyBackward(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; _arrayCopyBackwardDefault(source_begin, source_end, target_begin); } @@ -944,53 +767,28 @@ arrayCopyBackward(TSource1 source_begin, /*! * @fn arrayCopy - * - * @headerfile seqan/basic.h - * + * + * @headerfile + * * @brief Copies a range of objects into another range of objects. - * + * * @signature void arrayCopy(sourceBegin, sourceEnd, target); - * - * @param sourceEnd Iterator behind the last element of the source range. sourceEnd must have the same type - * as sourceBegin. - * @param sourceBegin Iterator to the first element of the source range. - * @param target Iterator to the first element of the target range.The target capacity should be at least as long - * as the source range. - * - * @section Remarks - * + * + * @param[in] sourceEnd Iterator behind the last element of the source range. sourceEnd must have the same type + * as sourceBegin. + * @param[in] sourceBegin Iterator to the first element of the source range. + * @param[in] target Iterator to the first element of the target range.The target capacity should be at least as long + * as the source range. + * * If source and target range do not overlap, consider to use arrayCopyForward instead to improve performance. - * + * * If there is no need for the source elements to persist, consider to use arrayMoveForward instead to improve * performance. */ -/** -.Function.arrayCopy -..cat:Array Handling -..summary:Copies a range of objects into another range of objects. -..signature:arrayCopy(source_begin, source_end, target) -..param.source_begin:Iterator to the first element of the source range. -..param.source_end:Iterator behind the last element of the source range. -...text:$source_end$ must have the same type as $source_begin$. -..param.target:Iterator to the first element of the target range. -...text:The target capacity should be at least as long as the source range. -..remarks.text:If source and target range do not overlap, consider to use - @Function.arrayCopyForward@ instead to improve performance. -..remarks:If there is no need for the source elements to persist, consider to use - @Function.arrayMoveForward@ instead to improve performance. -..DISABLED.remarks.note:Be careful if source and target range overlap and the size of the - source elements differ from the size of target elements, because in this case - some source elements could be accidently overwritten before they are moved. -..see:Function.arrayCopyForward -..see:Function.arrayCopyBackward -..see:Class.SimpleType -..include:seqan/basic.h -*/ - template -inline void arrayCopy(TSource1 source_begin, - TSource2 source_end, +inline void arrayCopy(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { if (target_begin <= source_begin) @@ -1007,66 +805,47 @@ inline void arrayCopy(TSource1 source_begin, * @fn arrayMoveForward * @headerfile * @brief Moves a range of objects into another range of objects starting from the first element. - * + * * @signature void arrayMoveForward(sourceBegin, sourceEnd, target); - * - * @param sourceEnd Iterator behind the last element of the source array. sourceEnd must have the same type - * as sourceBegin. - * @param sourceBegin Iterator to the first element of the source array. - * @param target Iterator to the first element of the target array. The target capacity should be at least as - * long as the source range. - * - * @section Remarks - * + * + * @param[in] sourceEnd Iterator behind the last element of the source array. sourceEnd must have the same type + * as sourceBegin. + * @param[in] sourceBegin Iterator to the first element of the source array. + * @param[in] target Iterator to the first element of the target array. The target capacity should be at least as + * long as the source range. + * * The function possibly clears (but does not destroy) the source elements. If source elements must persist, consider * to use arrayCopyForward instead. - * + * * Be careful if source and target range overlap, because in this case some source elements could be accidently * overwritten before they are moved. */ -/** -.Function.arrayMoveForward -..cat:Array Handling -..summary:Moves a range of objects into another range of objects starting from the first element. -..signature:arrayMoveForward(source_begin, source_end, target) -..param.source_begin:Iterator to the first element of the source array. -..param.source_end:Iterator behind the last element of the source array. -...text:$source_end$ must have the same type as $source_begin$. -..param.target:Iterator to the first element of the target array. -...text:The target capacity should be at least as long as the source range. -..remarks:The function possibly clears (but does not destroy) the source elements. - If source elements must persist, consider to use @Function.arrayCopyForward@ instead. -..remarks.note:Be careful if source and target range overlap, because in this case - some source elements could be accidently overwritten before they are moved. -..see:Function.arrayCopyForward -..see:Class.SimpleType -..include:seqan/basic.h -*/ - template -inline void -_arrayMoveForwardDefault(TSource1 source_begin, - TSource2 source_end, +inline void +_arrayMoveForwardDefault(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; +#ifdef SEQAN_CXX11_STANDARD + std::move(source_begin, source_end, target_begin); +#else while (source_begin != source_end) { move(*target_begin, *source_begin); ++source_begin; ++target_begin; } +#endif } template -inline void -arrayMoveForward(TSource1 source_begin, - TSource2 source_end, +inline void +arrayMoveForward(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; - _arrayMoveForwardDefault(source_begin, source_end, target_begin); + _arrayMoveForwardDefault(source_begin, source_end, target_begin); } // ---------------------------------------------------------------------------- @@ -1077,74 +856,51 @@ arrayMoveForward(TSource1 source_begin, * @fn arrayMoveBackward * @headerfile * @brief Moves a range of objects into another range of objects starting from the last element. - * + * * @signature void arrayMoveBackward(sourceBegin, sourceEnd, target); - * - * @param sourceEnd Iterator behind the last element of the source array. sourceEnd must have the same type - * as sourceBegin. - * @param sourceBegin Iterator to the first element of the source array. - * @param target Iterator to the first element of the target array.The target capacity should be at least as long - * as the source range. - * - * @section Remarks - * + * + * @param[in] sourceEnd Iterator behind the last element of the source array. sourceEnd must have the same type + * as sourceBegin. + * @param[in] sourceBegin Iterator to the first element of the source array. + * @param[in] target Iterator to the first element of the target array.The target capacity should be at least as long + * as the source range. + * * The function possibly clears (but does not destroy) the source elements. If source elements must persist, consider * to use arrayCopyBackward instead. - * + * * Be careful if source and target range overlap, because in this case some source elements could be accidently * overwritten before they are moved. - * + * * If source and target do not overlap, consider to use the function arrayMoveForward instead that is faster in some * cases. - * - * The semantic of this function's argument target differ from the arguments of ::std::copy_backward. + * + * The semantic of this function's argument target differ from the arguments of std::copy_backward. */ -/** -.Function.arrayMoveBackward -..cat:Array Handling -..summary:Moves a range of objects into another range of objects starting from the last element. -..signature:arrayMoveBackward(source_begin, source_end, target) -..param.source_begin:Iterator to the first element of the source array. -..param.source_end:Iterator behind the last element of the source array. -...text:$source_end$ must have the same type as $source_begin$. -..param.target:Iterator to the first element of the target array. -...text:The target capacity should be at least as long as the source range. -..remarks:The function possibly clears (but does not destroy) the source elements. - If source elements must persist, consider to use @Function.arrayCopyBackward@ instead. -..remarks.note:Be careful if source and target range overlap, because in this case - some source elements could be accidently overwritten before they are moved. -..remarks.text:If source and target do not overlap, consider to use the function -@Function.arrayMoveForward@ instead that is faster in some cases. -..remarks.note:The semantic of this function's argument $target$ differ from the arguments of $::std::copy_backward$. -..see:Function.arrayMoveForward -..see:Function.arrayCopyBackward -..see:Class.SimpleType -..include:seqan/basic.h -*/ - template -inline void -_arrayMoveBackwardDefault(TSource1 source_begin, - TSource2 source_end, +inline void +_arrayMoveBackwardDefault(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; +#ifdef SEQAN_CXX11_STANDARD + std::move_backward(source_begin, source_end, target_begin + (source_end - source_begin)); +#else target_begin += (source_end - source_begin); while (source_end != source_begin) { --source_end; --target_begin; move(*target_begin, *source_end); } +#endif } template -inline void -arrayMoveBackward(TSource1 source_begin, - TSource2 source_end, +inline void +arrayMoveBackward(TSource1 source_begin, + TSource2 source_end, TTarget target_begin) { - SEQAN_CHECKPOINT; _arrayMoveBackwardDefault(source_begin, source_end, target_begin); } @@ -1158,52 +914,24 @@ arrayMoveBackward(TSource1 source_begin, * @brief Moves a range of objects into another range of objects. * * @signature void arrayMove(sourceBegin, sourceEnd, target); - * - * @param sourceBegin Iterator to the first element of the source range. - * @param sourceEnd Iterator behind the last element of the source range. sourceEnd must have the same type - * as sourceBegin. - * @param target Iterator to the first element of the target range. The target capacity should be at least as - * long as the source range. - * - * @section Remarks - * + * + * @param[in] sourceBegin Iterator to the first element of the source range. + * @param[in] sourceEnd Iterator behind the last element of the source range. sourceEnd must have the same type + * as sourceBegin. + * @param[in] target Iterator to the first element of the target range. The target capacity should be at least as + * long as the source range. + * * The function possibly clears (but does not destroy) the source elements. If source elements must persist, consider * to use arrayCopy instead. - * + * * If source and target range do not overlap, consider to use arrayMoveForward instead to improve performance. - * + * * Don't confuse this function with the standard move function that resembles arrayCopy. */ -/** -.Function.arrayMove -..cat:Array Handling -..summary:Moves a range of objects into another range of objects. -..signature:arrayMove(source_begin, source_end, target) -..param.source_begin:Iterator to the first element of the source range. -..param.source_end:Iterator behind the last element of the source range. -...text:$source_end$ must have the same type as $source_begin$. -..param.target:Iterator to the first element of the target range. -...text:The target capacity should be at least as long as the source range. -..remarks:The function possibly clears (but does not destroy) the source elements. - If source elements must persist, consider to use @Function.arrayCopy@ instead. -..remarks.text:If source and target range do not overlap, consider to use - @Function.arrayMoveForward@ instead to improve performance. -..DISABLED.remarks.note:Be careful if source and target range overlap and the size of the - source elements differ from the size of target elements, because in this case - some source elements could be accidently overwritten before they are moved. -..remarks.note:Don't confuse this function with the standard $move$ function that -resembles @Function.arrayCopy@. -..see:Function.arrayMoveForward -..see:Function.arrayMoveBackward -..see:Function.arrayCopy -..see:Class.SimpleType -..include:seqan/basic.h -*/ - template -inline void -arrayMove(TSource1 source_begin, +inline void +arrayMove(TSource1 source_begin, TSource2 source_end, TTarget target_begin) { @@ -1221,55 +949,30 @@ arrayMove(TSource1 source_begin, * @fn arrayClearSpace * @headerfile * @brief Destroys the begin of an array and keeps the rest. - * + * * @signature void arrayClearSpace(arrBegin, arrLength, keepFrom, moveTo); - * - * @param arrBegin Pointer to the first element of the array. - * @param keepFrom Offset of the first object that will be kept. - * @param arrLength Length of the array. - * @param moveTo Offset the first kept object will get at the end of the function. - * - * @section Remarks - * + * + * @param[in] arrBegin Pointer to the first element of the array. + * @param[in] keepFrom Offset of the first object that will be kept. + * @param[in] arrLength Length of the array. + * @param[in] moveTo Offset the first kept object will get at the end of the function. + * * The objects arr[keep_from] to arr[arr_length-1] are moved to the area beginning at positions * move_to. All objects in arr[0] to arr[keep_from-1] are destroyed. After this function, the * first move_to positions of the array are free and dont contain objects. - * + * * The array must have at least enough space to store arr_length + move_to - keep_from objects. - * + * * The objects from arr[0] to arr[array_length-1] have to be initialized/constructed, arrays beyond * arr[array_length-1] are assumed not to be constructed. If this assumption is violated, memory might leak. */ -/** -.Function.arrayClearSpace -..cat:Array Handling -..summary:Destroys the begin of an array and keeps the rest. -..signature:arrayClearSpace(arr_begin, arr_length, keep_from, move_to) -..param.arr_begin:Pointer to the first element of the array. -..param.arr_length:Length of the array. -..param.keep_from:Offset of the first object that will be kept. -..param.move_to:Offset the first kept object will get at the end of the function. -..remarks.text:The objects $arr[keep_from]$ to $arr[arr_length-1]$ -are moved to the area beginning at positions $move_to$. -All objects in $arr[0]$ to $arr[keep_from-1]$ are destroyed. -After this function, the first $move_to$ positions of the array -are free and dont contain objects. -..remarks.text:The array must have at least enough space to store $arr_length + move_to - keep_from$ objects. -..remarks.text:The objects from $arr[0]$ to $arr[array_length-1]$ have to be initialized/constructed, arrays beyond $arr[array_length-1]$ are assumed not to be constructed. If this assumption is violated, memory might leak. -..see:Function.arrayCopy -..see:Function.arrayDestruct -..see:Function.arrayCopyForward -..see:Class.SimpleType -..include:seqan/basic.h -*/ - // TODO(holtgrew): The feature that the range [0, array_begin) is deleted is used nowhere. Can this be removed to simplify behaviour? template -void _arrayClearSpaceDefault(TIterator array_begin, - size_t array_length, - size_t keep_from, +void _arrayClearSpaceDefault(TIterator array_begin, + size_t array_length, + size_t keep_from, size_t move_to) { if (keep_from == array_length) { @@ -1289,20 +992,17 @@ void _arrayClearSpaceDefault(TIterator array_begin, if (array_length > move_to) { // Case 2a: Moving right of array_length, i.e. we can move a part // of the objects and have to move-construct the rest. - SEQAN_CHECKPOINT; size_t middle = array_length - (move_to - keep_from); arrayConstructMove(array_begin + middle, array_begin + array_length, array_begin + array_length); arrayMove(array_begin + keep_from, array_begin + middle, array_begin + move_to); arrayDestruct(array_begin, array_begin + move_to); } else { // Case 2b: We have to move-construct all target objects. - SEQAN_CHECKPOINT; arrayConstructMove(array_begin + keep_from, array_begin + array_length, array_begin + move_to); arrayDestruct(array_begin, array_begin + array_length); } } else { // Case 3: Move to the left. - SEQAN_CHECKPOINT; arrayMove(array_begin + keep_from, array_begin + array_length, array_begin + move_to); arrayDestruct(array_begin, array_begin + move_to); arrayDestruct(array_begin + array_length - (keep_from - move_to), array_begin + array_length); @@ -1310,9 +1010,9 @@ void _arrayClearSpaceDefault(TIterator array_begin, } template -void arrayClearSpace(TIterator array_begin, - size_t array_length, - size_t keep_from, +void arrayClearSpace(TIterator array_begin, + size_t array_length, + size_t keep_from, size_t move_to) { _arrayClearSpaceDefault(array_begin, array_length, keep_from, move_to); @@ -1323,63 +1023,57 @@ void arrayClearSpace(TIterator array_begin, // ---------------------------------------------------------------------------- template -inline void -_arrayConstructPointer(TIterator, +inline void +_arrayConstructPointer(TIterator, TIterator, True) { - SEQAN_CHECKPOINT; //nothing to do } template -inline void -_arrayConstructPointer(TIterator begin_, +inline void +_arrayConstructPointer(TIterator begin_, TIterator end_, False) { - SEQAN_CHECKPOINT; _arrayConstructDefault(begin_, end_); } template -inline void -arrayConstruct(TValue * begin_, +inline void +arrayConstruct(TValue * begin_, TValue * end_) { - SEQAN_CHECKPOINT; _arrayConstructPointer(begin_, end_, typename IsSimple::Type() ); } template -inline void +inline void _arrayConstructPointer(TValue * begin_, TValue * end_, TParam const & param_, True) { - SEQAN_CHECKPOINT; arrayFill(begin_, end_, static_cast(param_)); } template -inline void +inline void _arrayConstructPointer(TValue * begin_, TValue * end_, TParam const & param_, False) { - SEQAN_CHECKPOINT; _arrayConstructDefault(begin_, end_, param_); } template -inline void -arrayConstruct(TValue * begin_, - TValue * end_, +inline void +arrayConstruct(TValue * begin_, + TValue * end_, TParam const & param_) { - SEQAN_CHECKPOINT; _arrayConstructPointer(begin_, end_, param_, typename IsSimple::Type()); } @@ -1388,44 +1082,40 @@ arrayConstruct(TValue * begin_, // ---------------------------------------------------------------------------- template -inline void -_arrayConstructCopyPointer(TValueSource * source_begin, - TValueSource * source_end, +inline void +_arrayConstructCopyPointer(TValueSource * source_begin, + TValueSource * source_end, TValueTarget * target_begin, True) { - SEQAN_CHECKPOINT; arrayCopyForward(source_begin, source_end, target_begin); } template -inline void -_arrayConstructCopyPointer(TValueSource * source_begin, - TValueSource * source_end, +inline void +_arrayConstructCopyPointer(TValueSource * source_begin, + TValueSource * source_end, TValueTarget const* target_begin, True) { - SEQAN_CHECKPOINT; arrayCopyForward(source_begin, source_end, const_cast(target_begin)); } template -inline void -_arrayConstructCopyPointer(TValueSource * source_begin, - TValueSource * source_end, +inline void +_arrayConstructCopyPointer(TValueSource * source_begin, + TValueSource * source_end, TValueTarget * target_begin, False) { - SEQAN_CHECKPOINT; _arrayConstructCopyDefault(source_begin, source_end, target_begin); } template -inline void -arrayConstructCopy(TValueSource * source_begin, - TValueSource * source_end, +inline void +arrayConstructCopy(TValueSource * source_begin, + TValueSource * source_end, TValueTarget * target_begin) { - SEQAN_CHECKPOINT; _arrayConstructCopyPointer(source_begin, source_end, target_begin, typename IsSimple::Type() ); } @@ -1434,34 +1124,31 @@ arrayConstructCopy(TValueSource * source_begin, // ---------------------------------------------------------------------------- template -inline void -_arrayConstructMovePointer(TValue * source_begin, - TValue * source_end, +inline void +_arrayConstructMovePointer(TValue * source_begin, + TValue * source_end, TValue * target_begin, True) { - SEQAN_CHECKPOINT; arrayMoveForward(source_begin, source_end, target_begin); } template -inline void -_arrayConstructMovePointer(TValue * source_begin, - TValue * source_end, +inline void +_arrayConstructMovePointer(TValue * source_begin, + TValue * source_end, TValue * target_begin, False) { - SEQAN_CHECKPOINT; _arrayConstructMoveDefault(source_begin, source_end, target_begin); } template -inline void -arrayConstructMove(TValue * source_begin, - TValue * source_end, +inline void +arrayConstructMove(TValue * source_begin, + TValue * source_end, TValue * target_begin) { - SEQAN_CHECKPOINT; _arrayConstructMovePointer(source_begin, source_end, target_begin, typename IsSimple::Type() ); } @@ -1470,31 +1157,28 @@ arrayConstructMove(TValue * source_begin, // ---------------------------------------------------------------------------- template -inline void -_arrayDestructPointer(TValue * /*begin_*/, +inline void +_arrayDestructPointer(TValue * /*begin_*/, TValue * /*end_*/, True) { - SEQAN_CHECKPOINT; //do nothing } template -inline void -_arrayDestructPointer(TValue * begin_, +inline void +_arrayDestructPointer(TValue * begin_, TValue * end_, False) { - SEQAN_CHECKPOINT; _arrayDestructDefault(begin_, end_); } template -inline void -arrayDestruct(TValue * begin_, +inline void +arrayDestruct(TValue * begin_, TValue * end_) { - SEQAN_CHECKPOINT; _arrayDestructPointer(begin_, end_, typename IsSimple::Type() ); } @@ -1511,66 +1195,60 @@ arrayDestruct(TValue * begin_, // ---------------------------------------------------------------------------- template -inline void -_arrayCopyForwardPointer(TValue * source_begin, - TValue * source_end, +inline void +_arrayCopyForwardPointer(TValue * source_begin, + TValue * source_end, TValue * target_begin, True) { - SEQAN_CHECKPOINT; - ::std::memmove(target_begin, source_begin, (source_end - source_begin) * sizeof(TValue)); + std::memmove(target_begin, source_begin, (source_end - source_begin) * sizeof(TValue)); } template -inline void -_arrayCopyForwardPointer(TValue * source_begin, - TValue * source_end, +inline void +_arrayCopyForwardPointer(TValue * source_begin, + TValue * source_end, TValue * target_begin, False) { - SEQAN_CHECKPOINT; _arrayCopyForwardDefault(source_begin, source_end, target_begin); } template -inline void -arrayCopyForward(TValue * source_begin, - TValue * source_end, +inline void +arrayCopyForward(TValue * source_begin, + TValue * source_end, TValue * target_begin) { - SEQAN_CHECKPOINT; _arrayCopyForwardPointer(source_begin, source_end, target_begin, typename IsSimple::Type() ); } template -inline void -_arrayCopyBackwardPointer(TValue * source_begin, - TValue * source_end, +inline void +_arrayCopyBackwardPointer(TValue * source_begin, + TValue * source_end, TValue * target_begin, True) { - SEQAN_CHECKPOINT; - ::std::memmove(target_begin, source_begin, (source_end - source_begin) * sizeof(TValue)); + std::memmove(target_begin, source_begin, (source_end - source_begin) * sizeof(TValue)); } template -inline void -_arrayCopyBackwardPointer(TValue * source_begin, - TValue * source_end, +inline void +_arrayCopyBackwardPointer(TValue * source_begin, + TValue * source_end, TValue * target_begin, False) { - SEQAN_CHECKPOINT; - _arrayCopyBackwardDefault(source_begin, source_end, target_begin); + _arrayCopyBackwardDefault(source_begin, source_end, target_begin); } template -inline void -arrayCopyBackward(TValue * source_begin, - TValue * source_end, +inline void +arrayCopyBackward(TValue * source_begin, + TValue * source_end, TValue * target_begin) { - SEQAN_CHECKPOINT; _arrayCopyBackwardPointer(source_begin, source_end, target_begin, typename IsSimple::Type() ); } @@ -1579,65 +1257,59 @@ arrayCopyBackward(TValue * source_begin, // ---------------------------------------------------------------------------- template -inline void -_arrayMoveForwardPointer(TValue * source_begin, - TValue * source_end, +inline void +_arrayMoveForwardPointer(TValue * source_begin, + TValue * source_end, TValue * target_begin, True) { - SEQAN_CHECKPOINT; - ::std::memmove(target_begin, source_begin, (source_end - source_begin) * sizeof(TValue)); + std::memmove(target_begin, source_begin, (source_end - source_begin) * sizeof(TValue)); } template -inline void -_arrayMoveForwardPointer(TValue * source_begin, - TValue * source_end, +inline void +_arrayMoveForwardPointer(TValue * source_begin, + TValue * source_end, TValue * target_begin, False) { - SEQAN_CHECKPOINT; _arrayMoveForwardDefault(source_begin, source_end, target_begin); } template -inline void -arrayMoveForward(TValue * source_begin, - TValue * source_end, +inline void +arrayMoveForward(TValue * source_begin, + TValue * source_end, TValue * target_begin) { - SEQAN_CHECKPOINT; _arrayMoveForwardPointer(source_begin, source_end, target_begin, typename IsSimple::Type() ); } template -inline void -_arrayMoveBackwardPointer(TValue * source_begin, - TValue * source_end, +inline void +_arrayMoveBackwardPointer(TValue * source_begin, + TValue * source_end, TValue * target_begin, True) { - SEQAN_CHECKPOINT; - ::std::memmove(target_begin, source_begin, (source_end - source_begin) * sizeof(TValue)); + std::memmove(target_begin, source_begin, (source_end - source_begin) * sizeof(TValue)); } template -inline void -_arrayMoveBackwardPointer(TValue * source_begin, - TValue * source_end, +inline void +_arrayMoveBackwardPointer(TValue * source_begin, + TValue * source_end, TValue * target_begin, False) { - SEQAN_CHECKPOINT; - _arrayMoveBackwardDefault(source_begin, source_end, target_begin); + _arrayMoveBackwardDefault(source_begin, source_end, target_begin); } template -inline void -arrayMoveBackward(TValue * source_begin, - TValue * source_end, +inline void +arrayMoveBackward(TValue * source_begin, + TValue * source_end, TValue * target_begin) { - SEQAN_CHECKPOINT; _arrayMoveBackwardPointer(source_begin, source_end, target_begin, typename IsSimple::Type() ); } @@ -1647,25 +1319,24 @@ arrayMoveBackward(TValue * source_begin, // clearSpace() on simple type using pointers. template -inline void -_arrayClearSpacePointer(TValue * array_begin, - size_t array_length, - size_t keep_from, +inline void +_arrayClearSpacePointer(TValue * array_begin, + size_t array_length, + size_t keep_from, size_t move_to, True const & /*isSimple*/) { if (keep_from == move_to) return; - SEQAN_CHECKPOINT; // TODO(holtgrew): arrayCopy is more appropriate here since we are dealing with the IsSimple case. arrayMove(array_begin + keep_from, array_begin + array_length, array_begin + move_to); } // clearSpace() on non-simple type using pointers. template -inline void -_arrayClearSpacePointer(TValue * array_begin, - size_t array_length, - size_t keep_from, +inline void +_arrayClearSpacePointer(TValue * array_begin, + size_t array_length, + size_t keep_from, size_t move_to, False const & /*isSimple*/) { @@ -1673,9 +1344,9 @@ _arrayClearSpacePointer(TValue * array_begin, } template -void arrayClearSpace(TValue * array_begin, - size_t array_length, - size_t keep_from, +void arrayClearSpace(TValue * array_begin, + size_t array_length, + size_t keep_from, size_t move_to) { _arrayClearSpacePointer(array_begin, array_length, keep_from, move_to, typename IsSimple::Type()); @@ -1684,4 +1355,4 @@ void arrayClearSpace(TValue * array_begin, } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_ARRAY_CONSTRUCT_DESTRUCT_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_ARRAY_CONSTRUCT_DESTRUCT_H_ diff --git a/seqan/basic/basic_aggregate.h b/seqan/basic/basic_aggregate.h index 2945299..18bc52a 100644 --- a/seqan/basic/basic_aggregate.h +++ b/seqan/basic/basic_aggregate.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Facade header for basic/aggregate submodule. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_AGGREGATE_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_AGGREGATE_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_AGGREGATE_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_AGGREGATE_H_ // -------------------------------------------------------------------------- // Prerequisites @@ -59,4 +59,4 @@ #include #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_AGGREGATE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_AGGREGATE_H_ diff --git a/seqan/basic/basic_allocator.h b/seqan/basic/basic_allocator.h index 70c238f..d632f69 100644 --- a/seqan/basic/basic_allocator.h +++ b/seqan/basic/basic_allocator.h @@ -1,7 +1,7 @@ // ========================================================================== // basic_allocator.h // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Facade header for basic/allocator submodule. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALLOCATOR_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALLOCATOR_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALLOCATOR_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALLOCATOR_H_ // -------------------------------------------------------------------------- // Dependencies @@ -61,4 +61,4 @@ // Adaption from SeqAn allocator to STL allocator. #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALLOCATOR_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALLOCATOR_H_ diff --git a/seqan/basic/basic_alphabet.h b/seqan/basic/basic_alphabet.h index f52a467..4da25b0 100644 --- a/seqan/basic/basic_alphabet.h +++ b/seqan/basic/basic_alphabet.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Facade header for the basic/alphabet sub module. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_H_ // -------------------------------------------------------------------------- // Dependencies @@ -95,4 +95,4 @@ // The profile character implementation. #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ALPHABET_H_ diff --git a/seqan/basic/basic_concept.h b/seqan/basic/basic_concept.h index c841da3..a89db30 100644 --- a/seqan/basic/basic_concept.h +++ b/seqan/basic/basic_concept.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -29,6 +29,7 @@ // DAMAGE. // // ========================================================================== +// Author: David Weese // Author: Manuel Holtgrewe // ========================================================================== // Facade header for sub module basic_concept. @@ -38,8 +39,8 @@ // Boost concepts and ConceptC++ concepts). // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_CONCEPT_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_CONCEPT_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_CONCEPT_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_CONCEPT_H_ // -------------------------------------------------------------------------- // Prerequisites @@ -63,4 +64,4 @@ // Fundamental concepts such as Assignable. #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_CONCEPT_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_CONCEPT_H_ diff --git a/seqan/basic/basic_container.h b/seqan/basic/basic_container.h index e8a4b06..53a2e58 100644 --- a/seqan/basic/basic_container.h +++ b/seqan/basic/basic_container.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Facade header for the basic_container submodule. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_CONTAINER_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_CONTAINER_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_CONTAINER_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_CONTAINER_H_ // -------------------------------------------------------------------------- // Dependencies @@ -50,7 +50,10 @@ // Sub Module Headers // -------------------------------------------------------------------------- -// The container concept. +// PropertyMap concept. +#include + +// Container concept. #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_CONTAINER_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_CONTAINER_H_ diff --git a/seqan/basic/basic_debug.h b/seqan/basic/basic_debug.h index abe12ab..b16d926 100644 --- a/seqan/basic/basic_debug.h +++ b/seqan/basic/basic_debug.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ // assert macros double for test checks. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_DEBUG_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_DEBUG_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_DEBUG_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_DEBUG_H_ // -------------------------------------------------------------------------- // Prerequisites @@ -57,4 +57,4 @@ // Code for profiling. #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_DEBUG_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_DEBUG_H_ diff --git a/seqan/basic/basic_device.h b/seqan/basic/basic_device.h new file mode 100644 index 0000000..c20f412 --- /dev/null +++ b/seqan/basic/basic_device.h @@ -0,0 +1,152 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2013 NVIDIA Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA Corporation nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Enrico Siragusa +// ========================================================================== + +#ifndef SEQAN_BASIC_DEVICE_H +#define SEQAN_BASIC_DEVICE_H + +namespace seqan { + +// ============================================================================ +// Tags +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Execution space tags +// ---------------------------------------------------------------------------- + +struct ExecHost_; +struct ExecDevice_; + +typedef Tag ExecHost; +typedef Tag ExecDevice; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Metafunction Device +// ---------------------------------------------------------------------------- + +/*! + * @mfn Device + * @headerfile + * @brief Converts a given type into one that lives on a device. + * + * @signature Device::Type; + * @tparam TObject The type to be converted into a device type. + * @return Type The resulting device type. + * + * This metafunction is used to convert host containers into device containers. + * + * @see View + */ + +template +struct Device +{ + typedef TObject Type; +}; + +template +struct Device +{ + typedef typename Device::Type const Type; +}; + +// ---------------------------------------------------------------------------- +// Metafunction IsDevice +// ---------------------------------------------------------------------------- + +/*! + * @mfn IsDevice + * @headerfile + * @brief Tests if a given type is a device type. + * + * @signature IsDevice::Type; + * @tparam TObject The type to be tested for being a device type. + * @return Type @link LogicalValuesTags#True @endlink or @link LogicalValuesTags#False @endlink. + * + * @see Device + */ + +template +struct IsDevice : public False {}; + +template +struct IsDevice : public IsDevice {}; + +// ---------------------------------------------------------------------------- +// Metafunction IfDevice +// ---------------------------------------------------------------------------- + +template +struct IfDevice +{ + typedef typename If, T1, T2>::Type Type; +}; + +// ---------------------------------------------------------------------------- +// Metafunction ExecSpace +// ---------------------------------------------------------------------------- + +template +struct ExecSpace +{ + typedef typename If, ExecDevice, ExecHost>::Type Type; +}; + +// ---------------------------------------------------------------------------- +// Metafunction ExecSpec +// ---------------------------------------------------------------------------- + +template +struct ExecSpec +{ + typedef typename IfDevice, TSpec>::Type Type; +}; + +// ---------------------------------------------------------------------------- +// Metafunction CtaSize +// ---------------------------------------------------------------------------- + +template +struct CtaSize +{ + static const unsigned VALUE = 256; +}; + +} // namespace seqan + +#endif // #ifndef SEQAN_BASIC_DEVICE_H diff --git a/seqan/basic/basic_exception.h b/seqan/basic/basic_exception.h index 8d84db9..4e84e41 100644 --- a/seqan/basic/basic_exception.h +++ b/seqan/basic/basic_exception.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -45,38 +45,57 @@ #include #include +#ifdef PLATFORM_GCC +#include +#endif + +namespace seqan { + +// ============================================================================ +// Forwards +// ============================================================================ + +template +struct Tag; + +//struct Nothing_; +//typedef Tag Nothing; + // ============================================================================ // Macros // ============================================================================ /*! - * @macro SEQAN_EXCEPTIONS + * @defgroup ExceptionHandling SeqAn Exception Handling + * @brief Macros supporting exception handling on various platforms. + */ + +/*! + * @macro ExceptionHandling#SEQAN_EXCEPTIONS * @headerfile * @brief Determines whether exceptions are enabled or not. - * - * @signature SEQAN_EXCEPTIONS * - * @see SEQAN_TRY - * @see SEQAN_CATCH - * @see SEQAN_THROW + * @signature #define SEQAN_EXCEPTIONS + * + * @see ExceptionHandling#SEQAN_TRY + * @see ExceptionHandling#SEQAN_CATCH + * @see ExceptionHandling#SEQAN_THROW * @see Exception */ #define SEQAN_EXCEPTIONS __EXCEPTIONS /*! - * @macro SEQAN_TRY + * @macro ExceptionHandling#SEQAN_TRY * @headerfile * @brief Replaces the C++ try keyword. - * + * * @signature SEQAN_TRY {} SEQAN_CATCH() {} * - * @section Remarks - * - * When exceptions are disabled, i.e. SEQAN_EXCEPTIONS is set to false, the code inside the try block is always executed". - * - * @see SEQAN_CATCH - * @see SEQAN_THROW + * When exceptions are disabled, i.e. SEQAN_EXCEPTIONS is set to false, the code inside the try block is always executed. + * + * @see ExceptionHandling#SEQAN_CATCH + * @see ExceptionHandling#SEQAN_THROW * @see Exception * * @section Examples @@ -96,44 +115,40 @@ */ /*! - * @macro SEQAN_CATCH + * @macro ExceptionHandling#SEQAN_CATCH * @headerfile * @brief Replaces the C++ catch keyword. * * @signature SEQAN_TRY {} SEQAN_CATCH() {} * - * @section Remarks + * When exceptions are disabled, i.e. SEQAN_EXCEPTIONS is set to false, the code inside the catch block is never executed. * - * When exceptions are disabled, i.e. SEQAN_EXCEPTIONS is set to false, the code inside the catch block is never executed". - * - * @see SEQAN_TRY - * @see SEQAN_THROW + * @see ExceptionHandling#SEQAN_TRY + * @see ExceptionHandling#SEQAN_THROW * @see Exception * * @section Examples * - * See @link SEQAN_TRY @endlink for a full example. + * See @link ExceptionHandling#SEQAN_TRY @endlink for a full example. */ /*! - * @macro SEQAN_THROW + * @macro ExceptionHandling#SEQAN_THROW * @headerfile * @brief Replaces the C++ throw keyword. * * @signature SEQAN_THROW(Exception); * - * @section Remarks - * - * When exceptions are disabled, i.e. SEQAN_EXCEPTIONS is set to false, the macro turns into SEQAN_FAIL". + * When exceptions are disabled, i.e. AssertMacros#SEQAN_EXCEPTIONS is set to false, the macro turns into SEQAN_FAIL. * - * @see SEQAN_TRY - * @see SEQAN_CATCH - * @see SEQAN_FAIL + * @see ExceptionHandling#SEQAN_TRY + * @see ExceptionHandling#SEQAN_CATCH + * @see AssertMacros#SEQAN_FAIL * @see Exception * * @section Examples * - * See @link SEQAN_TRY @endlink for a full example. + * See @link ExceptionHandling#SEQAN_TRY @endlink for a full example. */ #ifdef SEQAN_EXCEPTIONS @@ -153,69 +168,180 @@ #endif // #ifdef SEQAN_EXCEPTIONS -namespace seqan { - // ============================================================================ -// Classes +// Exceptions // ============================================================================ // ---------------------------------------------------------------------------- -// Class Exception +// Basic Exception // ---------------------------------------------------------------------------- /*! * @class Exception * @headerfile * @brief Generic SeqAn exception. - * @signature Exception; + * @signature typedef std::exception Exception; + * + * @fn Exception::Exception + * @brief Constructor. + * + * @signature Exception::Exception(msg); + * @param[in] msg The message as a std::string. */ typedef std::exception Exception; // ---------------------------------------------------------------------------- -// Class BadAlloc +// Exception BadAlloc // ---------------------------------------------------------------------------- /*! * @class BadAlloc * @headerfile - * @brief Bad memory allocation exception. - * @signature BadAlloc; + * @brief Generic SeqAn exception. + * @signature typedef std::bad_alloc BadAlloc; + * + * @fn BadAlloc::BadAlloc + * @brief Constructor. + * + * @signature BadAlloc::BadAlloc(msg); + * @param[in] msg The message as a std::string. */ typedef std::bad_alloc BadAlloc; // ---------------------------------------------------------------------------- -// Classes Bad* +// Exception BadCast +// ---------------------------------------------------------------------------- + +/*! + * @class BadCast + * @headerfile + * @brief Generic SeqAn exception. + * @signature typedef std::bad_cast BadCast; + * + * @fn BadCast::BadCast + * @brief Constructor. + * + * @signature BadCast::BadCast(msg); + * @param[in] msg The message as a std::string. + */ + +typedef std::bad_cast BadCast; + +// ---------------------------------------------------------------------------- +// Exceptions Bad* // ---------------------------------------------------------------------------- // NOTE(esiragusa): These exceptions can be introduced as long as we need them. //typedef std::bad_exception BadException; -//typedef std::bad_cast BadCast; //typedef std::bad_typeid BadTypeId; //typedef std::bad_function_call BadFunctionCall; //typedef std::bad_weak_ptr BadWeakPtr; // ---------------------------------------------------------------------------- -// Class RuntimeError +// Exception RuntimeError // ---------------------------------------------------------------------------- /*! * @class RuntimeError * @headerfile * @brief Runtime error exception. - * @signature RuntimeError("Message"); + * @signature typedef std::runtime_error RuntimeError; + * + * + * @fn RuntimeError::RuntimeError + * @brief Constructor. + * + * @signature RuntimeError::RuntimeError(msg); + * @param[in] msg The message as a std::string. */ typedef std::runtime_error RuntimeError; // ---------------------------------------------------------------------------- -// Class LogicError +// Exception LogicError // ---------------------------------------------------------------------------- // NOTE(esiragusa): Always prefer SEQAN_ASSERT to logic error exceptions. //typedef std::logic_error LogicError; +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Metafunction ExceptionMessage +// ---------------------------------------------------------------------------- + +template +struct ExceptionMessage +{ + static const std::string VALUE; +}; + +template +const std::string ExceptionMessage::VALUE; + +// ---------------------------------------------------------------------------- +// Function getExceptionMessage() +// ---------------------------------------------------------------------------- + +template +inline std::string const & +getExceptionMessage(TFunctor const &, TContext const &) +{ + return ExceptionMessage::VALUE; +} + +// ============================================================================ +// Functors +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Functor AssertFunctor +// ---------------------------------------------------------------------------- + +template +struct AssertFunctor +{ + TFunctor func; + + AssertFunctor() {} + + AssertFunctor(TFunctor & func) : + func(func) + {} + + std::string escapeChar(unsigned char val) + { + if (val <= '\r') + { + static const char * const escapeCodes[14] = { + "\\0", "\\1", "\\2", "\\3", "\\4", "\\5", "\\6", "\\a", + "\\b", "\\t", "\\n", "\\v", "\\f", "\\r" }; + return std::string(escapeCodes[val]); + } + else if (' ' <= val && val < 128u) + return std::string() + (char)val; + else + { + char buffer[6]; // 5 + 1, e.g. "\0xff" + trailing zero + sprintf(buffer, "\\%#2x", (unsigned)val); + return std::string(buffer); + } + } + + template + bool operator() (TValue const & val) + { + if (SEQAN_UNLIKELY(!func(val))) + throw TException(std::string("Unexpected character '") + escapeChar(val) + "' found. " + + getExceptionMessage(func, TContext())); + return RETURN_VALUE; + } +}; + // ============================================================================ // Functions // ============================================================================ @@ -224,8 +350,9 @@ typedef std::runtime_error RuntimeError; // Function globalExceptionHandler() // ---------------------------------------------------------------------------- -#ifdef SEQAN_EXCEPTIONS -static void globalExceptionHandler() +#if defined(SEQAN_EXCEPTIONS) && defined(SEQAN_GLOBAL_EXCEPTION_HANDLER) +// Declare global exception handler. +inline static void globalExceptionHandler() { SEQAN_TRY { @@ -233,13 +360,18 @@ static void globalExceptionHandler() } SEQAN_CATCH(Exception & e) { - SEQAN_FAIL("Uncaught exception of type %s: %s", typeid(e).name(), e.what()); + SEQAN_FAIL("Uncaught exception of type %s: %s", toCString(Demangler(e)), e.what()); + } + SEQAN_CATCH(...) + { + SEQAN_FAIL("Uncaught exception of unknown type.\n"); } } // Install global exception handler. -static const std::terminate_handler _globalExceptionHandler = std::set_terminate(globalExceptionHandler); -#endif +static const std::terminate_handler SEQAN_UNUSED _globalExceptionHandler = std::set_terminate(globalExceptionHandler); + +#endif // #if defined(SEQAN_EXCEPTIONS) && defined(SEQAN_GLOBAL_EXCEPTION_HANDLER) } // namespace seqan diff --git a/seqan/basic/basic_functors.h b/seqan/basic/basic_functors.h new file mode 100644 index 0000000..076127d --- /dev/null +++ b/seqan/basic/basic_functors.h @@ -0,0 +1,214 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: David Weese +// Author: Enrico Siragusa +// ========================================================================== + +#ifndef SEQAN_BASIC_FUNCTORS_H_ +#define SEQAN_BASIC_FUNCTORS_H_ + +namespace seqan { + +// ============================================================================ +// Functors +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Functor OrFunctor +// ---------------------------------------------------------------------------- + +template +struct OrFunctor +{ + TFunctor1 func1; + TFunctor2 func2; + + OrFunctor() + {} + + OrFunctor(TFunctor1 const &func1, TFunctor2 const &func2): + func1(func1), func2(func2) + {} + + template + bool operator() (TValue const & val) + { + return func1(val) || func2(val); + } + + template + bool operator() (TValue const & val) const + { + return func1(val) || func2(val); + } +}; + +// ---------------------------------------------------------------------------- +// Functor AndFunctor +// ---------------------------------------------------------------------------- + +template +struct AndFunctor +{ + TFunctor1 func1; + TFunctor2 func2; + + AndFunctor() + {} + + AndFunctor(TFunctor1 const &func1, TFunctor2 const &func2): + func1(func1), func2(func2) + {} + + template + bool operator() (TValue const & val) + { + return func1(val) && func2(val); + } + + template + bool operator() (TValue const & val) const + { + return func1(val) && func2(val); + } +}; + +// ---------------------------------------------------------------------------- +// Functor NotFunctor +// ---------------------------------------------------------------------------- + +template +struct NotFunctor +{ + TFunctor func; + + NotFunctor() + {} + + NotFunctor(TFunctor const &func): + func(func) + {} + + template + bool operator() (TValue const & val) + { + return !func(val); + } + + + template + bool operator() (TValue const & val) const + { + return !func(val); + } +}; + +// ---------------------------------------------------------------------------- +// Functor CountDownFunctor +// ---------------------------------------------------------------------------- + +template +struct CountDownFunctor +{ + __uint64 remaining; + TFunctor func; + + CountDownFunctor(__uint64 remaining = REMAINING): + remaining(remaining) + {} + + CountDownFunctor(__uint64 remaining, TFunctor const &func): + remaining(remaining), + func(func) + {} + + template + bool operator() (TValue const & val) + { + if (remaining == 0) + return true; + if (func(val)) + --remaining; + return false; + } + + operator bool() + { + return remaining == 0; + } +}; + +// ---------------------------------------------------------------------------- +// Functor CountFunctor +// ---------------------------------------------------------------------------- + +template +struct CountFunctor +{ + __uint64 count; + TFunctor func; + + CountFunctor() : count(0) + {} + + CountFunctor(TFunctor const & func) : count(0), func(func) + {} + + template + bool operator() (TValue const & val) + { + if (func(val)) + ++count; + return false; + } + + operator __uint64() const + { + return count; + } +}; + +template +inline void clear(CountFunctor &func) +{ + func.count = 0; +} + +template +inline __uint64 & value(CountFunctor &func) +{ + return func.count; +} + +} // namespace seqan + +#endif // SEQAN_BASIC_FUNCTORS_H_ diff --git a/seqan/basic/basic_fundamental.h b/seqan/basic/basic_fundamental.h index 2296de3..ecfbda1 100644 --- a/seqan/basic/basic_fundamental.h +++ b/seqan/basic/basic_fundamental.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ // prototypes for common metafunctions like Value<>, functions assign() etc. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_FUNDAMENTAL_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_FUNDAMENTAL_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_FUNDAMENTAL_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_FUNDAMENTAL_H_ // -------------------------------------------------------------------------- // Prerequisites @@ -65,6 +65,9 @@ // Basic tag-related code. #include +// Functions and metafunctions to use contiguous chunks of memory +#include + // Definition of assign(), set(), move(). #include @@ -82,4 +85,4 @@ // Hosted type. #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_HOSTED_TYPE_INTERFACE_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_HOSTED_TYPE_INTERFACE_H_ diff --git a/seqan/basic/basic_iterator.h b/seqan/basic/basic_iterator.h index ca6e41f..2de7c10 100644 --- a/seqan/basic/basic_iterator.h +++ b/seqan/basic/basic_iterator.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Facade header for basic_iterator submodule. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ITERATOR_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ITERATOR_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ITERATOR_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ITERATOR_H_ // -------------------------------------------------------------------------- // Dependencies @@ -61,12 +61,24 @@ // Iterator concept. #include -// Iterator for adapting iterators to Rooted Iterators. -#include +// PropertyMap concept. +#include + +// Container concept. +#include + +// Counting iterator. +#include // Positional iterator. #include +// Makes a container out of begin/end iterators +#include + +// Iterator for adapting iterators to Rooted Iterators. +#include + // Adaption between STL iterators and SeqAn iterators. #include @@ -74,4 +86,4 @@ #include #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_ITERATOR_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_ITERATOR_H_ diff --git a/seqan/basic/basic_math.h b/seqan/basic/basic_math.h index 30ae4df..8b8097d 100644 --- a/seqan/basic/basic_math.h +++ b/seqan/basic/basic_math.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Facade header for the basic/math sub module. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_MATH_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_MATH_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_MATH_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_MATH_H_ // -------------------------------------------------------------------------- // Dependencies @@ -54,5 +54,5 @@ #include #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_MATH_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_MATH_H_ diff --git a/seqan/basic/basic_metaprogramming.h b/seqan/basic/basic_metaprogramming.h index b43cee5..44b6337 100644 --- a/seqan/basic/basic_metaprogramming.h +++ b/seqan/basic/basic_metaprogramming.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,11 +34,15 @@ // Facade header for the basic/metaprogramming submodule. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_METAPROGRAMMING_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_METAPROGRAMMING_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_METAPROGRAMMING_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_METAPROGRAMMING_H_ #include +#ifdef SEQAN_CXX11_STANDARD +#include +#endif + #include // Metaprogramming logical operations. @@ -53,7 +57,10 @@ // Metaprogramming for querying and modifying types. #include +// Metaprogramming for type algebra. +#include + // Metaprogramming for conditional enabling/disabling of code. #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_METAPROGRAMMING_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_METAPROGRAMMING_H_ diff --git a/seqan/basic/basic_parallelism.h b/seqan/basic/basic_parallelism.h index db7761a..11ef6ac 100644 --- a/seqan/basic/basic_parallelism.h +++ b/seqan/basic/basic_parallelism.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ // It mainly defines the macro SEQAN_ENABLE_PARALLELISM. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_PARALLELISM_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_PARALLELISM_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_PARALLELISM_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_PARALLELISM_H_ /*! * @macro SEQAN_ENABLE_PARALLELISM @@ -65,24 +65,6 @@ * @endcode */ -/** -.Macro.SEQAN_ENABLE_PARALLELISM -..summary:Indicates whether parallelism is enabled with value 0/1. -..cat:Parallelism -..signature:SEQAN_ENABLE_PARALLELISM -..remarks:By default, set to 1 if $_OPENMP$ is defined and set to 0 otherwise. -..example:If you want to change this value, you have to define this value before including any SeqAn header. -...code:#define SEQAN_ENABLE_PARALLELISM 0 // ALWAYS switch off parallelism! - -#include - -int main(int argc, char ** argv) -{ - return 0; -} -..include:seqan/basic.h - */ - #if !defined(SEQAN_ENABLE_PARALLELISM) #if defined(_OPENMP) #define SEQAN_ENABLE_PARALLELISM 1 @@ -91,4 +73,4 @@ int main(int argc, char ** argv) #endif // defined(_OPENMP) #endif // !defined(SEQAN_ENABLE_PARALLELISM) -#endif // SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_PARALLELISM_H_ +#endif // SEQAN_INCLUDE_SEQAN_BASIC_BASIC_PARALLELISM_H_ diff --git a/seqan/basic/basic_proxy.h b/seqan/basic/basic_proxy.h index eda6402..ee6fd97 100644 --- a/seqan/basic/basic_proxy.h +++ b/seqan/basic/basic_proxy.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Proxy definition. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_PROXY_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_PROXY_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_PROXY_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_PROXY_H_ // -------------------------------------------------------------------------- // Dependencies @@ -56,4 +56,4 @@ // assignValue() and setValue()). #include -#endif // SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_PROXY_H_ +#endif // SEQAN_INCLUDE_SEQAN_BASIC_BASIC_PROXY_H_ diff --git a/seqan/basic/basic_simd_vector.h b/seqan/basic/basic_simd_vector.h new file mode 100644 index 0000000..ee28655 --- /dev/null +++ b/seqan/basic/basic_simd_vector.h @@ -0,0 +1,662 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: David Weese +// ========================================================================== +// generic SIMD interface for SSE4/AVX +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_SIMD_VECTOR_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_SIMD_VECTOR_H_ + +#ifdef __SSE4_1__ + #include +#else +// SSE4.1 or greater required + #warning "SSE4.1 instruction set not enabled" +#endif + + +namespace seqan { + +// ============================================================================ +// Useful Macros +// ============================================================================ + +#define SEQAN_DEFINE_SIMD_VECTOR_GETVALUE_(TSimdVector) \ +template \ +inline typename Value::Type \ +getValue(TSimdVector &vector, TPosition pos) \ +{ \ +/* \ + typedef typename Value::Type TValue; \ + TValue val = (reinterpret_cast(&vector))[pos]; \ + return val; \ +*/ \ + return vector[pos]; \ +} + + +#define SEQAN_DEFINE_SIMD_VECTOR_VALUE_(TSimdVector) \ +template \ +inline typename Value::Type \ +value(TSimdVector &vector, TPosition pos) \ +{ \ + return getValue(vector, pos); \ +} + +#define SEQAN_DEFINE_SIMD_VECTOR_ASSIGNVALUE_(TSimdVector) \ +template \ +inline void \ +assignValue(TSimdVector &vector, TPosition pos, TValue2 value) \ +{ \ +/* \ + typedef typename Value::Type TValue; \ + (reinterpret_cast(&vector))[pos] = value; \ +*/ \ + vector[pos] = value; \ +} + + +// ============================================================================ +// Tags, Classes, Enums +// ============================================================================ + +// define a concept and its models +// they allow us to define generic vector functions +SEQAN_CONCEPT(SimdVectorConcept, (T)) {}; + +#if defined(__AVX2__) +#define SEQAN_SIZEOF_MAX_VECTOR 32 +#elif defined(__SSE3__) +#define SEQAN_SIZEOF_MAX_VECTOR 16 +#else +#define SEQAN_SIZEOF_MAX_VECTOR 8 +#endif + +// a metafunction returning the biggest supported SIMD vector +template +struct SimdVector; + +// internal struct to specialize for vector parameters (SIZEOF=sizeof(TVector), LENGTH=LENGTH::VALUE) +template +struct SimdParams_ {}; + +// internal struct to specialize for matrix parameters +template +struct SimdMatrixParams_ +{ +}; + + +#define SEQAN_DEFINE_SIMD_VECTOR_(TSimdVector, TValue, SIZEOF_VECTOR) \ + typedef TValue TSimdVector __attribute__ ((__vector_size__ (SIZEOF_VECTOR))); \ + template <> struct SimdVector { typedef TSimdVector Type; }; \ + template <> struct Value { typedef TValue Type; }; \ + template <> struct LENGTH { enum { VALUE = SIZEOF_VECTOR / sizeof(TValue) }; }; \ + template <> struct Value: public Value {}; \ + template <> struct LENGTH: public LENGTH {}; \ + SEQAN_DEFINE_SIMD_VECTOR_GETVALUE_(TSimdVector const) \ + SEQAN_DEFINE_SIMD_VECTOR_VALUE_(TSimdVector) \ + SEQAN_DEFINE_SIMD_VECTOR_VALUE_(TSimdVector const) \ + SEQAN_DEFINE_SIMD_VECTOR_ASSIGNVALUE_(TSimdVector) \ + template <> \ + SEQAN_CONCEPT_IMPL((TSimdVector), (SimdVectorConcept)); \ + template <> \ + SEQAN_CONCEPT_IMPL((TSimdVector const), (SimdVectorConcept)); + +#ifdef __AVX__ +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32Char, char, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32SChar, signed char, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector32UChar, unsigned char, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16Short, short, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16UShort, unsigned short, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Int, int, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8UInt, unsigned int, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4Int64, __int64, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4UInt64, __uint64, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Float, float, 32) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4Double, double, 32) +#endif + +#ifdef __SSE3__ +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Char, char, 8) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8SChar, signed char, 8) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8UChar, unsigned char, 8) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4Short, short, 8) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4UShort, unsigned short, 8) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2Int, int, 8) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2UInt, unsigned int, 8) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2Float, float, 8) + +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16Char, char, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16SChar, signed char, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector16UChar, unsigned char, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8Short, short, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector8UShort, unsigned short, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4Int, int, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4UInt, unsigned int, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2Int64, __int64, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2UInt64, __uint64, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector4Float, float, 16) +SEQAN_DEFINE_SIMD_VECTOR_(SimdVector2Double, double, 16) +#endif + +// ============================================================================ +// Functions +// ============================================================================ + +// -------------------------------------------------------------------------- +// AVX/AVX2 wrappers +// -------------------------------------------------------------------------- + +#ifdef __AVX__ + +template +inline void _fillVector(TSimdVector &vector, TValue x, SimdParams_<32, 32>) { reinterpret_cast<__m256i&>(vector) = _mm256_set1_epi8(x); } +template +inline void _fillVector(TSimdVector &vector, TValue x, SimdParams_<32, 16>) { reinterpret_cast<__m256i&>(vector) = _mm256_set1_epi16(x); } +template +inline void _fillVector(TSimdVector &vector, TValue x, SimdParams_<32, 8>) { reinterpret_cast<__m256i&>(vector) = _mm256_set1_epi32(x); } +template +inline void _fillVector(TSimdVector &vector, TValue x, SimdParams_<32, 4>) { reinterpret_cast<__m256i&>(vector) = _mm256_set1_epi64x(x); } +template +inline void _fillVector(TSimdVector &vector, float x, SimdParams_<32, 8>) { reinterpret_cast<__m256i&>(vector) = _mm256_set1_ps(x); } +template +inline void _fillVector(TSimdVector &vector, double x, SimdParams_<32, 4>) { reinterpret_cast<__m256i&>(vector) = _mm256_set1_pd(x); } + +template +inline void _clearVector(TSimdVector &vector, SimdParams_<32, L>) { reinterpret_cast<__m256i&>(vector) = _mm256_setzero_si256(); } +template +inline void _clearVector(TSimdVector &vector, SimdParams_<32, 8>) { reinterpret_cast<__m256&>(vector) = _mm256_setzero_ps(); } +template +inline void _clearVector(TSimdVector &vector, SimdParams_<32, 4>) { reinterpret_cast<__m256d&>(vector) = _mm256_setzero_pd(); } + +#ifdef __AVX2__ + +template +inline TSimdVector _blend(TSimdVector const &a, TSimdVector const &b, TSimdVector const &mask, SimdParams_<32, L>) +{ + return reinterpret_cast(_mm256_blendv_epi8( + reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(mask))); +} + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const &vector, TSimdVector2 const &indices, SimdParams_<32, 32>, SimdParams_<32, 32>) +{ + return reinterpret_cast(_mm256_shuffle_epi8( + reinterpret_cast(vector), + reinterpret_cast(indices))); +} +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const &vector, TSimdVector2 const &indices, SimdParams_<32, 16>, SimdParams_<16, 16>) +{ + // copy 2nd 64bit word to 3rd, compute 2*idx + __m256i idx = _mm256_slli_epi16(_mm256_permute4x64_epi64(_mm256_castsi128_si256(reinterpret_cast(indices)), 0x50), 1); + // interleave with 2*idx+1 and call shuffle + return reinterpret_cast(_mm256_shuffle_epi8( + reinterpret_cast(vector), + _mm256_unpacklo_epi8(idx, _mm256_add_epi8(idx, _mm256_set1_epi8(1))))); +} + +template +inline TSimdVector _shiftRightLogical(TSimdVector const &vector, const int imm, SimdParams_<32, 32>) +{ + return reinterpret_cast(_mm256_srli_epi16(reinterpret_cast(vector), imm) & _mm256_set1_epi8(0xff >> imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const &vector, const int imm, SimdParams_<32, 16>) +{ + return reinterpret_cast(_mm256_srli_epi16(reinterpret_cast(vector), imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const &vector, const int imm, SimdParams_<32, 8>) +{ + return reinterpret_cast(_mm256_srli_epi32(reinterpret_cast(vector), imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const &vector, const int imm, SimdParams_<32, 4>) +{ + return reinterpret_cast(_mm256_srli_epi64(reinterpret_cast(vector), imm)); +} + +// emulate missing _mm256_unpacklo_epi128/_mm256_unpackhi_epi128 instructions +inline __m256i _mm256_unpacklo_epi128(__m256i const &a, __m256i const &b) +{ + return _mm256_permute2x128_si256(a, b, 0x20); +// return _mm256_inserti128_si256(a, _mm256_extracti128_si256(b, 0), 1); +} + +inline __m256i _mm256_unpackhi_epi128(__m256i const &a, __m256i const &b) +{ + return _mm256_permute2x128_si256(a, b, 0x31); +// return _mm256_inserti128_si256(b, _mm256_extracti128_si256(a, 1), 0); +} + +template +inline void +_transposeMatrix(TSimdVector matrix[], SimdMatrixParams_<32, 32, 8>) +{ + // we need a look-up table to reverse the lowest 4 bits + // in order to place the permute the transposed rows + static const unsigned char bitRev[] = { 0, 8, 4,12, 2,10, 6,14, 1, 9, 5,13, 3,11, 7,15, + 16,24,20,28,18,26,22,30,17,25,21,29,19,27,23,31}; + + // transpose a 32x32 byte matrix + __m256i tmp1[32]; + for (int i = 0; i < 16; ++i) + { + tmp1[i] = _mm256_unpacklo_epi8(reinterpret_cast(matrix[2*i]), reinterpret_cast(matrix[2*i+1])); + tmp1[i+16] = _mm256_unpackhi_epi8(reinterpret_cast(matrix[2*i]), reinterpret_cast(matrix[2*i+1])); + } + __m256i tmp2[32]; + for (int i = 0; i < 16; ++i) + { + tmp2[i] = _mm256_unpacklo_epi16(tmp1[2*i], tmp1[2*i+1]); + tmp2[i+16] = _mm256_unpackhi_epi16(tmp1[2*i], tmp1[2*i+1]); + } + for (int i = 0; i < 16; ++i) + { + tmp1[i] = _mm256_unpacklo_epi32(tmp2[2*i], tmp2[2*i+1]); + tmp1[i+16] = _mm256_unpackhi_epi32(tmp2[2*i], tmp2[2*i+1]); + } + for (int i = 0; i < 16; ++i) + { + tmp2[i] = _mm256_unpacklo_epi64(tmp1[2*i], tmp1[2*i+1]); + tmp2[i+16] = _mm256_unpackhi_epi64(tmp1[2*i], tmp1[2*i+1]); + } + for (int i = 0; i < 16; ++i) + { + matrix[bitRev[i]] = reinterpret_cast(_mm256_unpacklo_epi128(tmp2[2*i],tmp2[2*i+1])); + matrix[bitRev[i+16]] = reinterpret_cast(_mm256_unpackhi_epi128(tmp2[2*i],tmp2[2*i+1])); + } +} + +#else // #ifdef __AVX2__ +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const &vector, TSimdVector2 const &indices, SimdParams_<32, 32>, SimdParams_<32, 32>) +{ + return reinterpret_cast(_mm256_permute2f128_si256( + _mm256_castsi128_si256 (_mm_shuffle_epi8( + _mm256_castsi256_si128(reinterpret_cast(vector)), + _mm256_castsi256_si128(reinterpret_cast(indices)))), + _mm256_castsi128_si256 (_mm_shuffle_epi8( + _mm256_castsi256_si128(reinterpret_cast(vector)), + _mm256_extractf128_si256(reinterpret_cast(indices), 1))), + 0x20)); +} + +inline SimdVector32Char shiftRightLogical(SimdVector32Char const &vector, const int imm) +{ + return reinterpret_cast(_mm256_permute2f128_si256( + _mm256_castsi128_si256 (_mm_srli_epi16( + _mm256_castsi256_si128(reinterpret_cast(vector)), + imm)), + _mm256_castsi128_si256 (_mm_srli_epi16( + _mm256_extractf128_si256(reinterpret_cast(vector), 1), + imm)), + 0x20) & _mm256_set1_epi8(0xff >> imm)); +} + +#endif // #ifdef __AVX2__ + + +template +SEQAN_FUNC_ENABLE_IF( + Is >, + int) +inline _testAllZeros(TSimdVector const &vector, TSimdVector const &mask, SimdParams_<32>) +{ +#ifdef __AVX2__ + return _mm256_testz_si256(vector, mask); +#else // #ifdef __AVX2__ + return + _mm_testz_si128(_mm256_castsi256_si128(vector), _mm256_castsi256_si128(mask)) & + _mm_testz_si128(_mm256_extractf128_si256(vector, 1), _mm256_extractf128_si256(mask, 1)); +#endif // #ifdef __AVX2__ +} + +template +inline int _testAllOnes(TSimdVector const &vector, SimdParams_<32>) +{ + __m256i vec = reinterpret_cast(vector); +#ifdef __AVX2__ + return _mm256_testc_si256(vec, _mm256_cmpeq_epi32(vec, vec)); +#else // #ifdef __AVX2__ + return + _mm_test_all_ones(_mm256_castsi256_si128(vec)) & + _mm_test_all_ones(_mm256_extractf128_si256(vec, 1)); +#endif // #ifdef __AVX2__ +} + +#endif // #ifdef __AVX__ + + +// -------------------------------------------------------------------------- +// SSE3 wrappers +// -------------------------------------------------------------------------- + +#ifdef __SSE3__ + +template +inline void _fillVector(TSimdVector &vector, TValue x, SimdParams_<16, 16>) { reinterpret_cast<__m128i&>(vector) = _mm_set1_epi8(x); } +template +inline void _fillVector(TSimdVector &vector, TValue x, SimdParams_<16, 8>) { reinterpret_cast<__m128i&>(vector) = _mm_set1_epi16(x); } +template +inline void _fillVector(TSimdVector &vector, TValue x, SimdParams_<16, 4>) { reinterpret_cast<__m128i&>(vector) = _mm_set1_epi32(x); } +template +inline void _fillVector(TSimdVector &vector, TValue x, SimdParams_<16, 2>) { reinterpret_cast<__m128i&>(vector) = _mm_set1_epi64x(x); } +template +inline void _fillVector(TSimdVector &vector, float x, SimdParams_<16, 4>) { reinterpret_cast<__m128i&>(vector) = _mm_set1_ps(x); } +template +inline void _fillVector(TSimdVector &vector, double x, SimdParams_<16, 2>) { reinterpret_cast<__m128i&>(vector) = _mm_set1_pd(x); } + +template +inline void _clearVector(TSimdVector &vector, SimdParams_<16, L>) { reinterpret_cast<__m128i&>(vector) = _mm_setzero_si128(); } +template +inline void _clearVector(TSimdVector &vector, SimdParams_<16, 4>) { reinterpret_cast<__m128&>(vector) = _mm_setzero_ps(); } +template +inline void _clearVector(TSimdVector &vector, SimdParams_<16, 2>) { reinterpret_cast<__m128d&>(vector) = _mm_setzero_pd(); } + + +template +inline TSimdVector _blend(TSimdVector const &a, TSimdVector const &b, TSimdVector const &mask, SimdParams_<16, L>) +{ + return reinterpret_cast(_mm_blendv_epi8( + reinterpret_cast(a), + reinterpret_cast(b), + reinterpret_cast(mask))); +} + +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const &vector, TSimdVector2 const &indices, SimdParams_<16, 16>, SimdParams_<16, 16>) +{ + return reinterpret_cast(_mm_shuffle_epi8( + reinterpret_cast(vector), + reinterpret_cast(indices))); +} +template +inline TSimdVector1 +_shuffleVector(TSimdVector1 const &vector, TSimdVector2 const &indices, SimdParams_<16, 8>, SimdParams_<8, 8>) +{ + __m128i idx = _mm_slli_epi16(_mm_cvtsi64_si128(reinterpret_cast(indices)), 1); + return reinterpret_cast(_mm_shuffle_epi8( + reinterpret_cast(vector), + _mm_unpacklo_epi8(idx, _mm_add_epi8(idx, _mm_set1_epi8(1))))); +} + +template +inline TSimdVector _shiftRightLogical(TSimdVector const &vector, const int imm, SimdParams_<16, 16>) +{ + return reinterpret_cast(_mm_srli_epi16(reinterpret_cast(vector), imm) & _mm_set1_epi8(0xff >> imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const &vector, const int imm, SimdParams_<16, 8>) +{ + return reinterpret_cast(_mm_srli_epi16(reinterpret_cast(vector), imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const &vector, const int imm, SimdParams_<16, 4>) +{ + return reinterpret_cast(_mm_srli_epi32(reinterpret_cast(vector), imm)); +} +template +inline TSimdVector _shiftRightLogical(TSimdVector const &vector, const int imm, SimdParams_<16, 2>) +{ + return reinterpret_cast(_mm_srli_epi64(reinterpret_cast(vector), imm)); +} + + + +template +inline void +_transposeMatrix(TSimdVector matrix[], SimdMatrixParams_<8, 8, 8>) +{ + // we need a look-up table to reverse the lowest 4 bits + // in order to place the permute the transposed rows + static const unsigned char bitRev[] = {0,4,2,6,1,5,3,7}; + + // transpose a 8x8 byte matrix + __m64 tmp1[8]; + for (int i = 0; i < 4; ++i) + { + tmp1[i] = _mm_unpacklo_pi8(reinterpret_cast(matrix[2*i]), reinterpret_cast(matrix[2*i+1])); + tmp1[i+4] = _mm_unpackhi_pi8(reinterpret_cast(matrix[2*i]), reinterpret_cast(matrix[2*i+1])); + } + __m64 tmp2[8]; + for (int i = 0; i < 4; ++i) + { + tmp2[i] = _mm_unpacklo_pi16(tmp1[2*i], tmp1[2*i+1]); + tmp2[i+4] = _mm_unpackhi_pi16(tmp1[2*i], tmp1[2*i+1]); + } + for (int i = 0; i < 4; ++i) + { + matrix[bitRev[i]] = reinterpret_cast(_mm_unpacklo_pi32(tmp2[2*i], tmp2[2*i+1])); + matrix[bitRev[i+4]] = reinterpret_cast(_mm_unpackhi_pi32(tmp2[2*i], tmp2[2*i+1])); + } +} + +template +inline void +_transposeMatrix(TSimdVector matrix[], SimdMatrixParams_<16, 16, 8>) +{ + // we need a look-up table to reverse the lowest 4 bits + // in order to place the permute the transposed rows + static const unsigned char bitRev[] = {0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15}; + + // transpose a 16x16 byte matrix + // + // matrix = + // A0 A1 A2 ... Ae Af + // B0 B1 B2 ... Be Bf + // ... + // P0 P1 P2 ... Pe Pf + __m128i tmp1[16]; + for (int i = 0; i < 8; ++i) + { + tmp1[i] = _mm_unpacklo_epi8(reinterpret_cast(matrix[2*i]), reinterpret_cast(matrix[2*i+1])); + tmp1[i+8] = _mm_unpackhi_epi8(reinterpret_cast(matrix[2*i]), reinterpret_cast(matrix[2*i+1])); + } + // tmp1[0] = A0 B0 A1 B1 ... A7 B7 + // tmp1[1] = C0 D0 C1 D1 ... C7 D7 + // ... + // tmp1[7] = O0 P0 O1 P1 ... O7 P7 + // tmp1[8] = A8 B8 A9 B9 ... Af Bf + // ... + // tmp1[15] = O8 P8 O9 P9 ... Of Pf + __m128i tmp2[16]; + for (int i = 0; i < 8; ++i) + { + tmp2[i] = _mm_unpacklo_epi16(tmp1[2*i], tmp1[2*i+1]); + tmp2[i+8] = _mm_unpackhi_epi16(tmp1[2*i], tmp1[2*i+1]); + } + // tmp2[0] = A0 B0 C0 D0 ... A3 B3 C3 D3 + // tmp2[1] = E0 F0 G0 H0 ... E3 F3 G3 H3 + // ... + // tmp2[3] = M0 N0 O0 P0 ... M3 N3 O3 P3 + // tmp2[4] = A8 B8 C8 D8 ... Ab Bb Cb Db + // ... + // tmp2[7] = M8 N8 O8 P8 ... Mb Nb Ob Pb + // tmp2[8] = A4 B4 C4 D4 ... A7 B7 C7 D7 + // .. + // tmp2[12] = Ac Bc Cc Dc ... Af Bf Cf Df + // ... + // tmp2[15] = Mc Nc Oc Pc ... Mf Nf Of Pf + for (int i = 0; i < 8; ++i) + { + tmp1[i] = _mm_unpacklo_epi32(tmp2[2*i], tmp2[2*i+1]); + tmp1[i+8] = _mm_unpackhi_epi32(tmp2[2*i], tmp2[2*i+1]); + } + // tmp1[0] = A0 B0 .... H0 A1 B1 .... H1 + // tmp1[1] = I0 J0 .... P0 I1 J1 .... P1 + // ... + // tmp1[4] = A0 B0 .... H0 A1 B1 .... H1 + // tmp1[1] = I0 J0 .... P0 I1 J1 .... P1 + for (int i = 0; i < 8; ++i) + { + matrix[bitRev[i]] = reinterpret_cast(_mm_unpacklo_epi64(tmp1[2*i], tmp1[2*i+1])); + matrix[bitRev[i+8]] = reinterpret_cast(_mm_unpackhi_epi64(tmp1[2*i], tmp1[2*i+1])); + } +} + +#ifdef __SSE4_1__ +template +SEQAN_FUNC_ENABLE_IF( + Is >, + int) +inline _testAllZeros(TSimdVector const &vector, TSimdVector const &mask, SimdParams_<16>) +{ + return _mm_testz_si128(vector, mask); +} + +template +inline int _testAllOnes(TSimdVector const &vector, SimdParams_<16>) +{ + return _mm_test_all_ones(reinterpret_cast(vector)); +} + + +#endif // #ifdef __SSE3__ +//#endif // #ifdef __AVX__ +#endif + + +template +SEQAN_FUNC_ENABLE_IF( + Is >, + int) +inline testAllZeros(TSimdVector const &vector, TSimdVector const &mask) +{ + return _testAllZeros(vector, mask, SimdParams_()); +} + +template +SEQAN_FUNC_ENABLE_IF( + Is >, + int) +inline testAllZeros(TSimdVector const &vector) +{ + return _testAllZeros(vector, vector, SimdParams_()); +} + +template +inline int _testAllOnes(TSimdVector const &vector, True) +{ + return _testAllOnes(vector, SimdParams_()); +} + +template +SEQAN_FUNC_ENABLE_IF( + Is >, + void) +inline transpose(TSimdVector matrix[ROWS]) +{ + typedef typename Value::Type TValue; + _transposeMatrix(matrix, SimdMatrixParams_< + ROWS, + LENGTH::VALUE, + BitsPerValue::VALUE>()); +} + + +template +SEQAN_FUNC_ENABLE_IF( + Is >, + void) +inline clearVector(TSimdVector &vector) +{ + _clearVector(vector, SimdParams_::VALUE>()); +} + + +template +SEQAN_FUNC_ENABLE_IF( + Is >, + void) +inline fillVector(TSimdVector &vector, TValue x) +{ + _fillVector(vector, x, SimdParams_::VALUE>()); +} + +template +SEQAN_FUNC_ENABLE_IF( + Is >, + TSimdVector) +inline blend(TSimdVector const &a, TSimdVector const &b, TSimdVector const &mask) +{ + return _blend(a, b, mask, SimdParams_::VALUE>()); +} + +template +SEQAN_FUNC_ENABLE_IF( + Is >, + TSimdVector1) +inline shuffleVector(TSimdVector1 const &vector, TSimdVector2 const &indices) +{ + return _shuffleVector( + vector, + indices, + SimdParams_::VALUE>(), + SimdParams_::VALUE>()); +} + +template +SEQAN_FUNC_ENABLE_IF( + Is >, + TSimdVector) +inline shiftRightLogical(TSimdVector const &vector, const int imm) +{ + return _shiftRightLogical(vector, imm, SimdParams_::VALUE>()); +} + + +template +SEQAN_FUNC_ENABLE_IF( + Is >, + std::ostream &) +inline print(std::ostream &stream, TSimdVector const &vector) +{ + stream << '<'; + for (int i = 0; i < LENGTH::VALUE; ++i) + stream << '\t' << (unsigned)vector[i]; + stream << "\t>"; + return stream; +} + +} // namespace seqan + +#endif // SEQAN_INCLUDE_SEQAN_BASIC_SIMD_VECTOR_H_ diff --git a/seqan/basic/basic_smart_pointer.h b/seqan/basic/basic_smart_pointer.h index bc020e1..9c3ba38 100644 --- a/seqan/basic/basic_smart_pointer.h +++ b/seqan/basic/basic_smart_pointer.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // Facade header for the basic/smart_pointer sub module. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_SMART_POINTER_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_SMART_POINTER_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_SMART_POINTER_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_SMART_POINTER_H_ // -------------------------------------------------------------------------- // Dependencies @@ -56,4 +56,4 @@ #include -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BASIC_SMART_POINTER_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_SMART_POINTER_H_ diff --git a/seqan/basic/basic_stream.h b/seqan/basic/basic_stream.h new file mode 100644 index 0000000..ac25b8a --- /dev/null +++ b/seqan/basic/basic_stream.h @@ -0,0 +1,1303 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of Knut Reinert or the FU Berlin nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: David Weese +// ========================================================================== +// Basic definitions for the stream module. +// ========================================================================== + +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_STREAM_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BASIC_STREAM_H_ + +namespace seqan { + +// ============================================================================ +// Forwards +// ============================================================================ + +template +struct StreamIterator; + +template +inline void writeValue(std::basic_ostream &ostream, TValue2 val); + +template +inline void writeValue(std::ostreambuf_iterator &iter, TValue2 val); + +template +inline void writeValue(Iter > &iter, TValue val); + +template +inline bool atEnd(std::istreambuf_iterator const &it); + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename Size::Type) +length(TContainer const & me); + +template +inline void +appendValue(TContainer SEQAN_FORWARD_ARG me, TSource SEQAN_FORWARD_CARG source); + +/*! + * @macro SEQAN_HAS_ZLIB + * @headerfile + * @brief Defined as 0 or 1, depending on zlib being available. + * + * @signature #define SEQAN_HAS_ZLIB 0 // or 1 + */ + +/*! + * @macro SEQAN_HAS_BZIP2 + * @headerfile + * @brief Defined as 0 or 1, depending on bzlib being available. + * + * @signature #define SEQAN_HAS_BZIP 0 // or 1 + */ + +// ============================================================================ +// Tags +// ============================================================================ + +// ============================================================================ +// Concepts +// ============================================================================ + +// -------------------------------------------------------------------------- +// Concept StreamConcept +// -------------------------------------------------------------------------- + +/*! + * @concept StreamConcept + * @headerfile + * + * @brief Base concept for streams. + * + * @signature concept StreamConcept; + */ + +/*! + * @mfn StreamConcept#Value + * @brief Metafunction for retrieving the value type of a stream. + * + * @signature Value::Type; + * + * @tparam TStream The stream type to query for its value type. + * @return Type The resulting value type. + */ + +/*! + * @mfn StreamConcept#Size + * @brief Metafunction for retrieving the type of a stream. + * + * @signature Size::Type; + * + * @tparam TStream The stream type to query for its size type. + * @return Type The resulting size type. + */ + +/*! + * @mfn StreamConcept#Position + * @brief Metafunction for retrieving the position type of a stream. + * + * @signature Position::Type; + * + * @tparam TStream The stream type to query for its position type. + * @return Type The resulting position type. + */ + +/*! + * @fn StreamConcept#position + * @brief Return current stream position. + * + * @signature TPosition position(stream); + * + * @param[in] stream The stream to query. + * @return TPosition Current position in stream, see @link StreamConcept#Position Position @endlink. + */ + +/*! + * @fn StreamConcept#setPosition + * @brief Set stream position. + * + * @signature void setPosition(stream, pos); + * + * @param[in,out] stream The stream to update + * @param[in] pos The positoin to set. + */ + +/*! + * @fn StreamConcept#atEnd + * @brief Return whether stream is at the end. + * + * @signature bool atEnd(stream); + * + * @param[in] stream The stream to check. + * @return bool true if the file at EOF, false otherwise. + */ + +SEQAN_CONCEPT(StreamConcept, (TStream)) +{}; + +// -------------------------------------------------------------------------- +// Concept InputStreamConcept +// -------------------------------------------------------------------------- + +/*! + * @concept InputStreamConcept Input StreamConcept + * @extends StreamConcept + * @headerfile + * + * @signature concept InputStreamConcept : StreamConcept; + * + * @brief Concept for input streams (for reading). + */ + +SEQAN_CONCEPT_REFINE(InputStreamConcept, (TStream), (StreamConcept)) +{ + typedef typename Value::Type TValue; + typedef typename Size::Type TSize; + typedef typename Position::Type TPosition; + + SEQAN_CONCEPT_ASSERT((SignedIntegerConcept)); + + SEQAN_CONCEPT_USAGE(InputStreamConcept) + {} +}; + +// -------------------------------------------------------------------------- +// Concept OutputStreamConcept +// -------------------------------------------------------------------------- + +/*! + * @concept OutputStreamConcept Output StreamConcept + * @extends StreamConcept + * @headerfile + * + * @signature concept OutputStreamConcept : StreamConcept; + * + * @brief Concept for output streams (for writing). + */ + +SEQAN_CONCEPT_REFINE(OutputStreamConcept, (TStream), (StreamConcept)) +{ + typedef typename Value::Type TValue; + typedef typename Size::Type TSize; + typedef typename Position::Type TPosition; + + SEQAN_CONCEPT_ASSERT((SignedIntegerConcept)); + + SEQAN_CONCEPT_USAGE(OutputStreamConcept) + {} +}; + +// -------------------------------------------------------------------------- +// Concept BidirectionalStreamConcept +// -------------------------------------------------------------------------- + +/*! + * @concept BidirectionalStreamConcept Bidirectional StreamConcept + * @extends StreamConcept + * @headerfile + * + * @signature concept BidirectionalStreamConcept : StreamConcept; + * + * @brief Concept for bidirectional streams (both for reading and writing). + */ + +SEQAN_CONCEPT_REFINE(BidirectionalStreamConcept, (TStream), (InputStreamConcept)(OutputStreamConcept)) +{}; + +// ============================================================================ +// Classes +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Struct FormattedNumber +// ---------------------------------------------------------------------------- + +/*! + * @class FormattedNumber + * @implements NumberConcept + * @headerfile + * + * @brief Helper class for storing a numeric value together with a + * printf format string. + * + * @signature template + * struct FormattedNumber; + * + * @tparam The numeric value type. + */ + +template +struct FormattedNumber +{ + char const * format; + TValue value; + + /*! + * @fn FormattedNumber::FormattedNumber + * @brief Constructor. + * + * @signature FormattedNumber::FormattedNumber(format, value); + * + * @param[in] format A char const * for the format string. + * @param[in] value The TValue to store. + * + * The constructed FormattedNumber object store the format pointer "as is". This means that you are + * responsible for keeping this pointer valid until the object is deconstructed. Passing in a C string literal + * (as in FormattedNumber<double>("%.2f", 1.234) is fine. + */ + + FormattedNumber(char const * format, TValue const & value) : + format(format), value(value) + {} + + operator TValue() const + { + return value; + } +}; + +template +struct Is< NumberConcept< FormattedNumber > > : + Is< NumberConcept > {}; + +// ============================================================================ +// Exceptions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Exception ParseError +// ---------------------------------------------------------------------------- + +/*! + * @class ParseError + * @extends RuntimeError + * @headerfile + * + * @brief Exception class for parser errors. + * + * @signature struct ParseError : RuntimeError; + */ + +struct ParseError : RuntimeError +{ + /*! + * @fn ParseError::ParseError + * @headerfile + * + * @brief Constructor. + * + * @signature ParseError::ParseError(message); + * + * @param[in] message The error message to use, std::string or char const * . + */ + + template + ParseError(TString const & message) : + RuntimeError(message) + {} +}; + +// ---------------------------------------------------------------------------- +// Exception UnexpectedEnd +// ---------------------------------------------------------------------------- + +/*! + * @class UnexpectedEnd + * @extends ParseError + * @headerfile + * + * @brief Exception class for "unexpected end of input" errors. + * + * @signature struct UnexpectedEnd : RuntimeError; + */ + +struct UnexpectedEnd : ParseError +{ + /*! + * @fn UnexpectedEnd::UnexpectedEnd + * @headerfile + * + * @brief Default constructor, makes the object use a default message. + * + * @signature UnexpectedEnd::UnexpectedEnd(); + */ + + UnexpectedEnd() : + ParseError("Unexpected end of input.") + {} +}; + +// ---------------------------------------------------------------------------- +// Exception EmptyFieldError +// ---------------------------------------------------------------------------- + +/*! + * @class EmptyFieldError + * @extends ParseError + * @headerfile + * + * @brief Exception class for "empty field" errors. + * + * @signature struct EmptyFieldError : RuntimeError; + */ + +struct EmptyFieldError : ParseError +{ + /*! + * @fn EmptyFieldError::EmptyFieldError + * @headerfile + * + * @brief Construct the exception with fieldName + " field was empty.". + * + * @signature EmptyFieldEror::EmptyFieldError(fieldName); + * + * @param[in] fieldName The field name to use for the message, std::string. + */ + + EmptyFieldError(std::string fieldName): + ParseError(fieldName + " field was empty.") + {} +}; + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Metafunction Iterator +// ---------------------------------------------------------------------------- + +/*! + * @mfn StreamConcept#DirectionIterator + * @brief Return the direction iterator for the given direction. + * + * @signature DirectionIterator::Type; + * + * @tparam TStream The stream to query for its direction iterator. + * @return Type The resulting direction iterator. + */ + +/*! + * @mfn ContainerConcept#DirectionIterator + * @brief Return the direction iterator for the given direction. + * + * @signature DirectionIterator::Type; + * + * @tparam TContainer The container to query for its direction iterator. + * @return Type The resulting direction iterator. + */ + +template +struct DirectionIterator : + If >, + Iter >, + typename Iterator::Type> +{}; + +// -------------------------------------------------------------------------- +// Metafunction BasicStream +// -------------------------------------------------------------------------- + +/*! + * @mfn BasicStream + * @headerfile + * @brief Return the stream type to read or write values. + * + * @signature BasicStream::Type; + * + * @tparam TValue The value type of the stream. + * @tparam TDirection The direction of the stream, one of the @link DirectionTags @endlink. + * @tparam TTraits The traits to use for the values, defaults to std::char_traits<TValue>. + * + */ + +template > +struct BasicStream : + If< + IsSameType, + std::basic_istream, + typename If< + IsSameType, + std::basic_ostream, + std::basic_iostream + >::Type + > +{}; + +// -------------------------------------------------------------------------- +// Metafunction IosOpenMode +// -------------------------------------------------------------------------- + +/*! + * @mfn IosOpenMode + * @headerfile + * @brief Return the std::ios open mode for a direction. + * + * @signature IosOpenMode::Type; + * + * @tparam TDirection The direction to query for the open mode, one of the @link DirectionTags @endlink. + * @tparam TDummy Implementation detail, defaults to void and is ignored. + * @return Type The resulting open mode of type const int. + */ + +template +struct IosOpenMode; + + +template +struct IosOpenMode +{ + static const int VALUE; +}; + +template +struct IosOpenMode +{ + static const int VALUE; +}; + +template +struct IosOpenMode +{ + static const int VALUE; +}; + +template +const int IosOpenMode::VALUE = std::ios::in | std::ios::binary; + +template +const int IosOpenMode::VALUE = std::ios::out | std::ios::binary; + +template +const int IosOpenMode::VALUE = std::ios::in | std::ios::out | std::ios::binary; + + +// -------------------------------------------------------------------------- +// Metafunction MagicHeader +// -------------------------------------------------------------------------- + +/*! + * @mfn MagicHeader + * @headerfile + * @brief Returns the magic header for a file format tag. + * + * The magic header is used for recognizing files from the first few bytes. + * + * @signature MagicHeader::VALUE; + * + * @tparam TTag The file format tag to use for the query. + * @tparam TDummy Implementation detail, defaults to void and is ignored. + * @return VALUE The magic header string, of type char const *. + * + * This metafunction must be implemented in the modules implementing the file I/O. The metafunction is predefined when + * TTag is @link Nothing @endlink. In this case, VALUE is NULL. + */ + +template +struct MagicHeader; + +template +struct MagicHeader +{ + static char const * VALUE; +}; + +template +char const * MagicHeader::VALUE = NULL; + +// -------------------------------------------------------------------------- +// Metafunction FileExtensions +// -------------------------------------------------------------------------- + +/*! + * @mfn FileExtensions + * @headerfile + * @brief Returns an array of file format extension strings for file foramt tag. + * + * @signature FileExtensions::VALUE; + * + * @tparam TTag The file format tag to use for the query. + * @tparam TDummy Implementation detail, defaults to void and is ignored. + * @return VALUE The array of file format extension, of type char const *[]. + * + * This metafunction must be implemented in the modules implementing the file I/O. The metafunction is predefined when + * TTag is @link Nothing @endlink. In this case, VALUE is {""}. + */ + +template +struct FileExtensions; + +template +struct FileExtensions +{ + static char const * VALUE[1]; +}; + +template +char const * FileExtensions::VALUE[1] = +{ + "" // default output extension +}; + +// ---------------------------------------------------------------------------- +// Metafunction IntegerFormatString_ +// ---------------------------------------------------------------------------- +// Return the format string for numbers. + +template +struct IntegerFormatString_; + + +template +struct IntegerFormatString_ : + IntegerFormatString_ {}; + + +template +struct IntegerFormatString_ +{ + static const char VALUE[]; + typedef short Type; +}; +template +const char IntegerFormatString_::VALUE[] = "%hi"; + + +template +struct IntegerFormatString_ +{ + static const char VALUE[]; + typedef unsigned short Type; +}; +template +const char IntegerFormatString_::VALUE[] = "%hu"; + + +template +struct IntegerFormatString_ +{ + static const char VALUE[]; + typedef int Type; +}; +template +const char IntegerFormatString_::VALUE[] = "%i"; + + +template +struct IntegerFormatString_ +{ + static const char VALUE[]; + typedef unsigned Type; +}; +template +const char IntegerFormatString_::VALUE[] = "%u"; + + +// helper for the case: typedef long __int64; +template +struct LongFormatString_; + +template +struct LongFormatString_ +{ + static const char VALUE[]; + typedef long Type; +}; +template +const char LongFormatString_::VALUE[] = "%li"; + +template +struct LongFormatString_ +{ + static const char VALUE[]; + typedef unsigned long Type; +}; +template +const char LongFormatString_::VALUE[] = "%lu"; + +// helper for the case: typedef long long __int64; +template +struct Int64FormatString_; + +template +struct Int64FormatString_ +{ + static const char VALUE[]; + typedef __int64 Type; +}; +template +const char Int64FormatString_::VALUE[] = "%lli"; + +template +struct Int64FormatString_ +{ + static const char VALUE[]; + typedef __uint64 Type; +}; +template +const char Int64FormatString_::VALUE[] = "%llu"; + + +template +struct IntegerFormatString_ : + If, + LongFormatString_, + Int64FormatString_ >::Type {}; + + +// ============================================================================ +// Functions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Function writeValue() [ContainerConcept] +// ---------------------------------------------------------------------------- + +/*! + * @fn ContainerConcept#writeValue + * @brief Write a value at the end of a container. + * + * @signature void writeValue(container, val); + * + * @param[in,out] container to append to. + * @param[in] val The value to append. + * + * @see ContainerConcept#appendValue + */ + +// resizable containers +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +writeValue(TSequence &cont, TValue val) +{ + appendValue(cont, val); +} + +// ---------------------------------------------------------------------------- +// Function writeValue() [Range] +// ---------------------------------------------------------------------------- + +/*! + * @fn Range#writeValue + * @brief Write a value to a @link Range @endlink. + * + * @signature void writeValue(range, val); + * + * val will be assigned to the first element of the range. Then, the beginning of the range will be advanced + * by one. + * + * @param[in,out] range to append to. + * @param[in] val The value to append. + */ + +// Range +template +inline void +writeValue(Range &range, TValue val) +{ + assignValue(range.begin, val); + ++range.begin; +} + +// ---------------------------------------------------------------------------- +// Function writeValue() [Iter] +// ---------------------------------------------------------------------------- + +/*! + * @fn OutputIteratorConcept#writeValue + * @brief Write a single value to a container by dereferencing its iterator. + * + * @signature void writeValue(iter, val); + * + * @param[in,out] iter The iterator to use for dereferenced writing. + * @param[in] val The value to write into the container. + * + * If the host of iter is a @link ContainerConcept @endlink then container is resized to make space for the + * item. + */ + +// resizable containers +template +inline SEQAN_FUNC_ENABLE_IF(Is >, void) +writeValue(Iter & iter, TValue val) +{ + typedef Iter TIter; + + TSequence &cont = container(iter); + typename Position::Type pos = position(iter); + typename Size::Type len = length(cont); + + if (pos < len) + { + assignValue(iter, val); + ++iter; + } + else + { + if (pos > len) + resize(cont, pos - 1); + appendValue(cont, val); + setPosition(iter, pos + 1); + } +} + +// non-resizable containers +template +inline SEQAN_FUNC_DISABLE_IF(Is >, void) +writeValue(Iter & iter, TValue val) +{ + SEQAN_ASSERT_LT(position(iter), length(container(iter))); + + assignValue(iter, val); + ++iter; +} + +// ---------------------------------------------------------------------------- +// Function writeValue() [pointer] +// ---------------------------------------------------------------------------- + +///*! +// * @fn ContainerConcept#writeValue +// * @brief Write a value by dereferencing a pointer and incrementing its position by one. +// * +// * @signature void writeValue(pointer, val); +// * +// * @param[in,out] iter The pointer to dereference, usually a char *. +// * @param[in] val The value to write to the dereferenced pointer. +// * +// * This function is equivalent to *iter++ = val. +// */ + +template +inline void +writeValue(TTargetValue * & iter, TValue val) +{ + *iter++ = val; +} + +// ---------------------------------------------------------------------------- +// Function _write(); Element-wise +// ---------------------------------------------------------------------------- + +template +inline void _write(TTarget &target, TFwdIterator &iter, TSize n, TIChunk, TOChunk) +{ + for (; n > (TSize)0; --n, ++iter) + writeValue(target, getValue(iter)); +} + +// ---------------------------------------------------------------------------- +// Function _write(); Chunked +// ---------------------------------------------------------------------------- + +template +inline void _write(TTarget &target, TFwdIterator &iter, TSize n, Range *, Range *) +{ + typedef Nothing* TNoChunking; + typedef typename Size::Type TTargetSize; + + Range ichunk; + Range ochunk; + + while (n != (TSize)0) + { + getChunk(ichunk, iter, Input()); + getChunk(ochunk, target, Output()); + + TTargetSize minChunkSize = std::min((TTargetSize)length(ichunk), (TTargetSize)length(ochunk)); + + if (SEQAN_UNLIKELY(minChunkSize == 0u)) + { + reserveChunk(target, n, Output()); + reserveChunk(iter, n, Input()); + getChunk(ochunk, target, Output()); + getChunk(ichunk, iter, Input()); + minChunkSize = std::min((TTargetSize)length(ichunk), (TTargetSize)length(ochunk)); + if (SEQAN_UNLIKELY(minChunkSize == 0u)) + { + _write(target, iter, n, TNoChunking(), TNoChunking()); + return; + } + } + + if (minChunkSize > (TTargetSize)n) + minChunkSize = (TTargetSize)n; + + arrayCopyForward(ichunk.begin, ichunk.begin + minChunkSize, ochunk.begin); + + iter += minChunkSize; // advance input iterator + advanceChunk(target, minChunkSize); + n -= minChunkSize; + } +} + +// chunked, target is pointer (e.g. readRawPod) +template +inline SEQAN_FUNC_DISABLE_IF(IsSameType::Type, Nothing>, void) +write(TOValue *ptr, TFwdIterator &iter, TSize n) +{ + typedef Nothing* TNoChunking; + typedef typename Size::Type TSourceSize; + typedef typename Chunk::Type TIChunk; + + TIChunk ichunk; + + while (n != (TSize)0) + { + getChunk(ichunk, iter, Input()); + TSourceSize chunkSize = length(ichunk); + + if (SEQAN_UNLIKELY(chunkSize == 0u)) + { + reserveChunk(iter, n, Input()); + getChunk(ichunk, iter, Input()); + TSourceSize chunkSize = length(ichunk); + if (SEQAN_UNLIKELY(chunkSize == 0u)) + { + _write(ptr, iter, n, TNoChunking(), TNoChunking()); + return; + } + } + + if (chunkSize > (TSourceSize)n) + chunkSize = (TSourceSize)n; + + arrayCopyForward(ichunk.begin, ichunk.begin + chunkSize, ptr); + + iter += chunkSize; // advance input iterator + ptr += chunkSize; + n -= chunkSize; + } +} + +// non-chunked fallback +template +inline SEQAN_FUNC_ENABLE_IF(And< IsSameType::Type, Nothing>, + Is::Type, TIValue> > >, void) +write(TTarget &target, TIValue *ptr, TSize n) +{ + _write(target, ptr, n, Nothing(), Nothing()); +} + +// ostream shortcut, source is pointer (e.g. readRawPod) +template +inline SEQAN_FUNC_ENABLE_IF(Is< OutputStreamConcept >, void) +write(TTarget &target, const char *ptr, TSize n) +{ + target.write(ptr, n); +} + +// ostream shortcut, source is pointer (e.g. readRawPod) +template +inline SEQAN_FUNC_ENABLE_IF(Is< OutputStreamConcept >, void) +write(TTarget &target, char *ptr, TSize n) +{ + target.write(ptr, n); +} + +// chunked, source is pointer (e.g. readRawPod) +template +inline SEQAN_FUNC_ENABLE_IF(And< Not::Type, Nothing> >, + Is::Type, TIValue> > >, void) +write(TTarget &target, TIValue *ptr, TSize n) +{ + typedef Nothing* TNoChunking; + typedef typename Size::Type TTargetSize; + typedef typename Chunk::Type TOChunk; + + TOChunk ochunk; + + while (n != (TSize)0) + { + getChunk(ochunk, target, Output()); + TTargetSize chunkSize = length(ochunk); + + if (SEQAN_UNLIKELY(chunkSize == 0u)) + { + reserveChunk(target, n, Output()); + getChunk(ochunk, target, Output()); + chunkSize = length(ochunk); + if (SEQAN_UNLIKELY(chunkSize == 0u)) + { + _write(target, ptr, n, TNoChunking(), TNoChunking()); + return; + } + } + + if (chunkSize > (TTargetSize)n) + chunkSize = (TTargetSize)n; + + arrayCopyForward(ptr, ptr + chunkSize, ochunk.begin); + + ptr += chunkSize; // advance input iterator + advanceChunk(target, chunkSize); + n -= chunkSize; + } +} + +template +inline SEQAN_FUNC_ENABLE_IF(And< Is >, + Is > >, void) +write(TOValue * &optr, TIValue *iptr, TSize n) +{ + std::memcpy(optr, iptr, n); + optr += n; +} + +template +inline SEQAN_FUNC_ENABLE_IF(And< Is >, + Is > >, void) +write(TOValue * optr, TIValue * &iptr, TSize n) +{ + std::memcpy(optr, iptr, n); + iptr += n; +} + +// ---------------------------------------------------------------------------- +// Function write(TValue *) +// ---------------------------------------------------------------------------- +// NOTE(esiragusa): should it be defined for Streams and Containers? + +//template +//inline SEQAN_FUNC_ENABLE_IF(Or >, Is > >, void) +//write(TTarget &target, TValue *ptr, TSize n) +//{ +// typedef Range TRange; +// typedef typename Iterator::Type TIterator; +// typedef typename Chunk::Type* TIChunk; +// typedef typename Chunk::Type* TOChunk; +// +// TRange range(ptr, ptr + n); +// TIterator iter = begin(range, Rooted()); +// _write(target, iter, n, TIChunk(), TOChunk()); +//} + +// ---------------------------------------------------------------------------- +// Function write(Iterator) +// ---------------------------------------------------------------------------- + +/*! + * @fn ContainerConcept#write + * @brief Write to a container. + * + * @signature void write(container, iter, n); + * + * @param[in,out] container The container to append to. + * @param[in,out] iter The @link ForwardIteratorConcept forward iterator @endlink to take the values from. + * @param[in] n Number of elements to write from iter. + * + * This function reads n values from iter and appends them to the back of container. + */ + +//TODO(singer): Enable this! +template +//inline SEQAN_FUNC_ENABLE_IF(Or >, Is > >, void) +inline SEQAN_FUNC_ENABLE_IF(And< Is >, + Is::Type, + typename Value::Type> > >, void) +write(TTarget &target, TFwdIterator &iter, TSize n) +{ + typedef typename Chunk::Type* TIChunk; + typedef typename Chunk::Type* TOChunk; + + _write(target, iter, n, TIChunk(), TOChunk()); +} + +// write for more complex values (defer to write of iterator value) +// used for Strings of Pairs +template +//inline SEQAN_FUNC_ENABLE_IF(Or >, Is > >, void) +inline SEQAN_FUNC_ENABLE_IF(And< + Is >, + Not< Is::Type, + typename Value::Type> > > >, void) +write(TTarget &target, TFwdIterator &iter, TSize n) +{ + for (; n > (TSize)0; --n, ++iter) + { + write(target, *iter); + writeValue(target, ' '); + } +} + +// ---------------------------------------------------------------------------- +// Function write(TContainer) but not container of container +// ---------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(And< Not >, + And< Is >, + Not::Type> > > > >, void) +write(TTarget &target, TContainer &cont) +{ + typename DirectionIterator::Type iter = directionIterator(cont, Input()); + write(target, iter, length(cont)); +} + +template +inline SEQAN_FUNC_ENABLE_IF(And< IsContiguous, + And< Is >, + Not::Type> > > > >, void) +write(TTarget &target, TContainer &cont) +{ + typename Iterator::Type iter = begin(cont, Standard()); + write(target, iter, length(cont)); +} + +template +inline SEQAN_FUNC_ENABLE_IF(And< Not >, + And< Is >, + Not::Type> > > > >, void) +write(TTarget &target, TContainer const &cont) +{ + typename DirectionIterator::Type iter = directionIterator(cont, Input()); + write(target, iter, length(cont)); +} + +template +inline SEQAN_FUNC_ENABLE_IF(And< IsContiguous, + And< Is >, + Not::Type> > > > >, void) +write(TTarget &target, TContainer const &cont) +{ + typename Iterator::Type iter = begin(cont, Standard()); + write(target, iter, length(cont)); +} + +template +inline void +write(TTarget &target, TValue * ptr) +{ + write(target, ptr, length(ptr)); +} + +// ---------------------------------------------------------------------------- +// Function appendNumber() +// ---------------------------------------------------------------------------- +// Generic version for integers. + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, typename Size::Type) +appendNumber(TTarget & target, TInteger i) +{ + typedef IntegerFormatString_ >::Type, + sizeof(TInteger)> TInt; + + // 1 byte has at most 3 decimal digits (plus 2 for '-' and the NULL character) + char buffer[sizeof(TInteger) * 3 + 2]; + size_t len = snprintf(buffer, sizeof(buffer), + TInt::VALUE, static_cast(i)); + char *bufPtr = buffer; + write(target, bufPtr, len); + return len; +} + +// ---------------------------------------------------------------------------- +// Function appendNumber(bool) +// ---------------------------------------------------------------------------- + +template +inline typename Size::Type +appendNumber(TTarget & target, bool source) +{ + writeValue(target, '0' + source); + return 1; +} + +// ---------------------------------------------------------------------------- +// Function appendNumber(float) +// ---------------------------------------------------------------------------- + +template +inline typename Size::Type +appendNumber(TTarget & target, float source) +{ + char buffer[32]; + size_t len = snprintf(buffer, sizeof(buffer), "%g", source); + write(target, (char *)buffer, len); + return len; +} + +// ---------------------------------------------------------------------------- +// Function appendNumber(double) +// ---------------------------------------------------------------------------- + +template +inline typename Size::Type +appendNumber(TTarget & target, double source) +{ + char buffer[32]; + size_t len = snprintf(buffer, sizeof(buffer), "%g", source); + write(target, (char *)buffer, len); + return len; +} + +// ---------------------------------------------------------------------------- +// Function appendNumber(double) +// ---------------------------------------------------------------------------- + +template +inline typename Size::Type +appendNumber(TTarget & target, FormattedNumber const & source) +{ + char buffer[100]; + size_t len = snprintf(buffer, sizeof(buffer), source.format, source.value); + write(target, (char *)buffer, len); + return len; +} + +template +inline FormattedNumber +formattedNumber(const char *format, TValue const & val) +{ + return FormattedNumber(format, val); +} + +// ---------------------------------------------------------------------------- +// Function appendRawPod() +// ---------------------------------------------------------------------------- + +template +inline void +appendRawPod(TTarget & target, TValue const & val) +{ + write(target, (unsigned char*)&val, sizeof(TValue)); +} + +template +inline void +appendRawPod(TTargetValue * &ptr, TValue const & val) +{ + *reinterpret_cast(ptr)++ = val; +} + +// ---------------------------------------------------------------------------- +// Function write(TNumber); write fundamental type +// ---------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(And< Is::Type, TValue> >, + Is > >, void) +write(TTarget &target, TValue &number) +{ + if (sizeof(TValue) == 1) + writeValue(target, number); // write chars as chars + else + appendNumber(target, number); +} + +template +inline SEQAN_FUNC_ENABLE_IF(And< Is::Type, TValue> >, + Is > >, void) +write(TTarget &target, TValue const &number) +{ + if (sizeof(TValue) == 1) + writeValue(target, number); // write chars as chars + else + appendNumber(target, number); +} + +// ---------------------------------------------------------------------------- +// Function write(TNumber); write non-fundamental, convertible type +// ---------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(And< Is::Type, TValue> >, + Not > > >, void) +write(TTarget &target, TValue &number) +{ + writeValue(target, number); +} + +template +inline SEQAN_FUNC_ENABLE_IF(And< Is::Type, TValue const> >, + Not > > >, void) +write(TTarget &target, TValue const &number) +{ + writeValue(target, number); +} + +// ---------------------------------------------------------------------------- +// Function read(Iterator) +// ---------------------------------------------------------------------------- + +template +inline SEQAN_FUNC_ENABLE_IF(Is >, TSize) +read(TTarget &target, TFwdIterator &iter, TSize n) +{ + TSize i; + for (i = 0; !atEnd(iter) && i < n; ++i, ++iter) + writeValue(target, value(iter)); + return i; +} + +// ---------------------------------------------------------------------------- +// Function read(TContainer) +// ---------------------------------------------------------------------------- + +template +inline typename Size::Type +read(TTarget &target, TContainer &cont) +{ + typename DirectionIterator::Type iter = directionIterator(cont, Input()); + return read(target, iter, length(cont)); +} + +// ---------------------------------------------------------------------------- +// operator<< +// ---------------------------------------------------------------------------- + +template +inline TStream & +operator<<(TStream & target, + Range const & source) +{ + typename DirectionIterator::Type it = directionIterator(target, Output()); + write(it, source); + return target; +} + +template +inline TStream & +operator<<(TStream & target, + FormattedNumber const & source) +{ + typename DirectionIterator::Type it = directionIterator(target, Output()); + write(it, source); + return target; +} + +} // namespace seqean + +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BASIC_STREAM_H_ diff --git a/seqan/basic/basic_tangle.h b/seqan/basic/basic_tangle.h index 393a7a9..e6b2b0d 100644 --- a/seqan/basic/basic_tangle.h +++ b/seqan/basic/basic_tangle.h @@ -1,7 +1,7 @@ // ========================================================================== // basic_tangle.h // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -34,8 +34,8 @@ // TODO(holtgrew): This header contains code that does not clearly belongs somewhere else. -#ifndef CORE_INCLUDE_SEQAN_BASIC_BASIC_TANGLE_H_ -#define CORE_INCLUDE_SEQAN_BASIC_BASIC_TANGLE_H_ +#ifndef INCLUDE_SEQAN_BASIC_BASIC_TANGLE_H_ +#define INCLUDE_SEQAN_BASIC_BASIC_TANGLE_H_ namespace seqan { @@ -75,10 +75,8 @@ struct Iterator const, Standard> // TODO(holtgrew): Is this part of some adaption? -///.Metafunction.Key.param.T.type:Class.Pair - template -struct Key > +struct Key > { typedef TKey Type; }; @@ -89,10 +87,8 @@ struct Key > // TODO(holtgrew): Is this part of some adaption? -///.Metafunction.Cargo.param.T.type:Class.Pair - template -struct Cargo > +struct Cargo > { typedef TCargo Type; }; @@ -110,49 +106,31 @@ struct Cargo > * @fn assignQualities * @headerfile * @brief Assign quality values between strings. - * + * * @signature void assignQualities(target, source); - * + * * @param[out] target Target string, can be a String of DnaQ or Dna5Q characters. * @param[in] source Source string. Can be a String of int or char. - * + * * @section Remarks - * + * * The target is resized to the length of source. This function calls assignQualityValue for all entries of * target and source, look at the documentation of assignQualityValue on how the values of * source are interpreted. - * + * * Note that qualities are expected to be in PHRED scale. - * + * * @see AlphabetWithQualitiesConcept#assignQualityValue */ -/** -.Function.assignQualities -..cat:Alphabets -..summary:Assign quality values between strings. -..signature:assignQualities(target, source) -..param.target:Target string -...type:nolink:@Class.String@ of any alphabet with qualities, e.g. @Spec.DnaQ@, @Spec.Dna5Q@ -..param.source:Source string. -...type:nolink:@Class.String@ of $int$ or $char$. -..remarks: -The target is resized to the length of source. -This function calls @Function.assignQualityValue@ for all entries of $target$ and $source$, look at the documentation of @Function.assignQualityValue@ on how the values of $source$ are interpreted. -..remarks: -Note that qualities are expected to be in PHRED scale. -..see:Function.assignQualityValue -..include:seqan/basic.h -*/ - -// TODO(holtgrew): Uncomment, place somewhere that knows both iterators and assignQualityValue, maybe in module sequence? template -void assignQualities(TDest &dst, TSource const &src) +inline void +_assignQualities(TDest &dst, TSource const &src, True) { typedef typename Iterator::Type TDestIter; typedef typename Iterator::Type TSourceIter; - if (length(dst) < length(src)) + if (length(dst) < length(src)) resize(dst, length(src)); TDestIter itDst = begin(dst, Standard()); @@ -162,6 +140,20 @@ void assignQualities(TDest &dst, TSource const &src) assignQualityValue(*itDst, *itSrc); } +template +inline void +_assignQualities(TDest &, TSource const &, False) +{} + +// TODO(holtgrew): Uncomment, place somewhere that knows both iterators and assignQualityValue, maybe in module sequence? +template +inline void +assignQualities(TDest &dst, TSource const &src) +{ + typedef typename Value::Type TValue; + _assignQualities(dst, src, typename Or, IsSameType >::Type()); +} + template inline T unknownValueImpl(T *) @@ -172,4 +164,4 @@ unknownValueImpl(T *) } // namespace seqan -#endif // #ifndef CORE_INCLUDE_SEQAN_BASIC_BASIC_TANGLE_H_ +#endif // #ifndef INCLUDE_SEQAN_BASIC_BASIC_TANGLE_H_ diff --git a/seqan/basic/basic_type.h b/seqan/basic/basic_type.h index b23f7f5..995a516 100644 --- a/seqan/basic/basic_type.h +++ b/seqan/basic/basic_type.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // Copyright (c) 2013 NVIDIA Corporation // All rights reserved. // @@ -158,30 +158,18 @@ struct Host /*! * @mfn Cargo + * @headerfile * @brief Type of additional data stored in an object. - * - * @signature Cargo::Type + * + * @signature Cargo::Type; * * @tparam T Type for which the cargo tpye is queried. * * @return Type The cargo type of T. * - * @section Remarks - * * The definition of Cargo allows the addition of user-specified data into existing data structures. */ -/** -.Metafunction.Cargo: -..cat:Basic -..summary:Type of additional data stored in an object. -..signature:Cargo::Type -..param.T:Type for which the cargo tyoe is determined. -..returns.param.Type:Cargo of $T$. -..remarks:The definition of Cargo allows the addition of user specific data to existing data structures. -..include:seqan/basic.h -*/ - // TODO(holtgrew): Should this have a default implementation? template @@ -207,20 +195,6 @@ struct Cargo { * @return Type The resulting vertex descriptor type. */ -/** -.Metafunction.VertexDescriptor: -..cat:Graph -..summary:Type of an object that represents a vertex descriptor. -..signature:VertexDescriptor::Type -..param.T:Type T must be a graph. All graphs currently use ids as vertex descriptors. -..returns.param.Type:VertexDescriptor type. -..remarks.text:The vertex descriptor is a unique handle to a vertex in a graph. -It is used in various graph functions, e.g., to add edges, to create OutEdge Iterators or to remove a vertex. -It is also used to attach properties to vertices. -..example.code:VertexDescriptor >::Type vD; //vD is a vertex descriptor -..include:seqan/basic.h -*/ - // TODO(holtgrew): Should this have a default implementation? For all graphs -- OK but for all types? template @@ -245,19 +219,6 @@ struct VertexDescriptor: * * @return Type The resulting identifier type. */ - -/** -.Metafunction.Id: -..cat:Graph -..summary:Type of an object that represents an id. -..signature:Id::Type -..param.T:Type for which a suitable id type is determined. -..returns.param.Type:Id type. -..remarks.text:The id type of a container is the type that is used to uniquely identify its elements. -In most cases this type is unsigned int. -..example.code:Id >::Type id; //id has type unsigned int -..include:seqan/basic.h -*/ // TODO(holtgrew): Should this have a default implementation? @@ -283,17 +244,6 @@ struct Id : Id {}; * @return Type The key type. */ -/** -.Metafunction.Key: -..cat:Graph -..summary:Key type of a key to cargo mapping. -..signature:Key::Type -..param.T:Type for which a key type is determined. -..returns.param.Type:Key type. -...default:The type $T$ itself. -..include:seqan/basic.h -*/ - // TODO(holtgrew): Should this have a default implementation? template< typename T > @@ -308,17 +258,8 @@ struct Key: //____________________________________________________________________________ -/*VERALTET -.Metafunction.Object: -..summary:Object type of a key to object mapping. -..signature:Object::Type -..param.T:Type for which a object type is determined. -..returns.param.Type:Object type. -..include:seqan/basic.h -*/ - template -struct Object; +struct Object; template struct Object: @@ -330,11 +271,6 @@ struct Object: // TODO(holtgrew): Move to alignments? // TODO(holtgrew): Is this default implementation what we want? -/** -.Metafunction.Source -..cat:Alignments -*/ - template < typename TSpec = void > struct Source { @@ -362,32 +298,12 @@ struct Source: * @return Type Either True or False. True if the object can efficiently be copied. */ -/** -.Metafunction.IsLightWeight: -..cat:Metafunctions -..summary:Determines whether an object can efficiently be passed by copy. -..signature:IsLightWeight::Type -..param.T:A type. -..returns.param.Type:@Tag.Logical Values.tag.True@ if the object is light-weight and can efficiently be copied, e.g. @Class.Segment@, otherwise @Tag.Logical Values.tag.False@. -*/ - template struct IsLightWeight: False {}; //____________________________________________________________________________ -/** -.Internal.Parameter_: -..cat:Metafunctions -..summary:Type for function parameters and return values. -..signature:Parameter_::Type -..param.T:A type. -..returns.param.Type:The parameter type for arguments of type $T$. -...text:If $T$ is a pointer or array type, then $Parameter_::Type$ is $T$, -otherwise $Parameter_::Type$ is $T &$. -*/ - // TODO(holtgrew): Really required? template @@ -396,30 +312,35 @@ struct Parameter_ typedef T & Type; }; +template +struct Parameter_ +{ + typedef T const & Type; +}; + template struct Parameter_ { typedef T * Type; }; +template +struct Parameter_ +{ + typedef T const * Type; +}; + template struct Parameter_ { typedef T * Type; }; -/** -.Internal._toParameter: -..cat:Functions -..summary:Transforms pointers to parameter types. -..signature:_toParameter(pointer) -..param.pointer:A pointer. -..param.T:A Type. -...text:$object$ is transformed into the parameter type of $T$ that is given by @Internal.Parameter_@. -...note:This type must be explicitely specified. -..returns:To $TParameter$ transformed $object$. -..see:Internal.Parameter_ -*/ +template +struct Parameter_ +{ + typedef T const * Type; +}; // TODO(holtgrew): Really required? @@ -446,56 +367,6 @@ SEQAN_HOST_DEVICE inline _toParameter(T const & _object) //____________________________________________________________________________ -/** -.Internal.ConstParameter_: -..cat:Metafunctions -..summary:Type for constant function parameters and return values. -..signature:ConstParameter_::Type -..param.T:A type. -..returns.param.Type:The const parameter type for arguments of type $T$. -...text:If $T$ is a pointer or array type, then $Parameter_::Type$ is a pointer to a const array, -otherwise $Parameter_::Type$ is $T const &$. -..see:Internal.Parameter_ -*/ - -// TODO(holtgrew): Really required? - -template -struct ConstParameter_ -{ - typedef T const & Type; -}; - -template -struct ConstParameter_: - public ConstParameter_ {}; - -template -struct ConstParameter_ -{ - typedef T const * Type; -}; - -template -struct ConstParameter_ -{ - typedef T const * Type; -}; - -template -struct ConstParameter_ -{ - typedef T const * Type; -}; - -template -struct ConstParameter_ -{ - typedef T const * Type; -}; - -//____________________________________________________________________________ - /*! * @mfn Member * @headerfile @@ -506,14 +377,12 @@ struct ConstParameter_ * @tparam TSpec A tag to identify the object's member. * @return Type The resulting object's member type. * - * @section Remarks - * - * This metafunction is used to control the type of a member of a given object. It works analogously to @link Index#Fibre @endlink. + * This metafunction is used to control the type of a member of a given object. It works analogously to @link Fibre @endlink. * For instance, it is used to change the relationship between two objects from aggregation to composition and vice versa. * - * @see Index#Fibre + * @see Fibre */ - + template struct Member; @@ -523,19 +392,6 @@ struct Member : //____________________________________________________________________________ -/** -.Internal.Pointer_: -..cat:Metafunctions -..summary:The associated pointer type. -..signature:Pointer_::Type -..param.T:A type. -..returns.param.Type:A pointer type for $T$. -...text:if $T$ is already a pointer type, then $Pointer_::Type$ is $T$, -otherwise $Pointer_::Type$ is $T *$. -..see:Internal.Parameter_ -..see:Internal._toParameter -*/ - // TODO(holtgrew): Really required? template @@ -574,17 +430,6 @@ struct NonConstPointer_ typedef T * Type; }; -/** -.Internal._toPointer: -..cat:Functions -..summary:Transforms types into pointers. -..signature:_toPointer(object) -..param.object:An object. -..returns:$object$, transformed to a pointer. -...text:The type of the returned pointer is given by @Internal.Pointer_@. -..see:Internal.Pointer_ -*/ - // TODO(holtgrew): Really required? template @@ -611,25 +456,32 @@ SEQAN_CHECKPOINT } // -------------------------------------------------------------------------- -// Function _dereference() +// Function _referenceCast() // -------------------------------------------------------------------------- // explicitly give desired dereferenced type as first argument, // e.g. _dereference(int*) or _dereference &>(Segment<..> &) template inline T -_dereference(typename RemoveReference::Type & ptr) +_referenceCast(typename RemoveReference::Type & ptr) { return ptr; } template -inline T -_dereference(typename RemoveReference::Type * ptr) +inline SEQAN_FUNC_DISABLE_IF(IsSameType::Type>, T) +_referenceCast(typename RemoveReference::Type * ptr) { return *ptr; } +template +inline SEQAN_FUNC_DISABLE_IF(IsSameType::Type>, T) +_referenceCast(typename RemovePointer::Type & ptr) +{ + return &ptr; +} + //____________________________________________________________________________ @@ -640,21 +492,10 @@ _dereference(typename RemoveReference::Type * ptr) * @signature LENGTH::VALUE; * * @tparam T The type to query for its length. - * + * * @return VALUE The length of T. */ -/** -.Metafunction.LENGTH: -..cat:Basic -..summary:Number of elements in a fixed-size container. -..signature:LENGTH::VALUE -..param.T:Type for which the number of elements is determined. -..returns.param.VALUE:Number of elements. -..remarks.text:The default return value is 1 for dynamic-size containers. -..include:seqan/basic.h -*/ - // SEQREV: elements-are-containers should probably not have a default implementation // TODO(holtgrew): Rather switch to static const unsigned VALUE = ? @@ -673,26 +514,13 @@ struct LENGTH: /*! * @mfn WEIGHT * @brief Number of relevant positions in a shape. - * + * * @signature WEIGHT::VALUE; * * @tparam T The Shape type to query. * @return VALUE The number of relevant positions in a shape. */ -/** -.Metafunction.WEIGHT: -..cat:Index -..summary:Number of relevant positions in a shape. -..signature:WEIGHT::Type -..param.T:Shape type for which the number of relevant positions is determined. -...type:Class.Shape -..returns.param.VALUE:Number of relevant positions. -..remarks.text:The default return value is the result of the @Metafunction.LENGTH@ function. -For gapped shapes this is the number of '1's. -..include:seqan/basic.h -*/ - // TODO(holtgrew): Should probably go to wherever shapes are defined. template diff --git a/seqan/basic/basic_view.h b/seqan/basic/basic_view.h new file mode 100644 index 0000000..debdcfc --- /dev/null +++ b/seqan/basic/basic_view.h @@ -0,0 +1,190 @@ +// ========================================================================== +// SeqAn - The Library for Sequence Analysis +// ========================================================================== +// Copyright (c) 2013 NVIDIA Corporation +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA Corporation nor the names of +// its contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +// DAMAGE. +// +// ========================================================================== +// Author: Enrico Siragusa +// ========================================================================== + +#ifndef SEQAN_BASIC_VIEW_H +#define SEQAN_BASIC_VIEW_H + +namespace seqan { + +// ============================================================================ +// Metafunctions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Metafunction View +// ---------------------------------------------------------------------------- + +/*! + * @mfn View + * @headerfile + * @brief Converts a given type into its view type. + * + * @signature View::Type; + * + * @tparam TObject The type to be converted into a view type. + * @return Type The resulting view type. + * + * This metafunction is used to convert device containers into views of device containers. + * Subsequently, the view of a device container can be safely passed to and used in device space. + * On the host, a view of a @link String @endlink is equivalent to an @link SegmentableConcept#Infix @endlink of the complete string. + * @link RemoveView @endlink is the inverse of this metafunction. + * + * @see RemoveView + * @see Device + * @see SegmentableConcept#Infix + */ + +template +struct View +{ + typedef TObject Type; +}; + +template +struct View +{ + typedef typename View::Type const Type; +}; + +// ---------------------------------------------------------------------------- +// Metafunction RemoveView +// ---------------------------------------------------------------------------- + +/*! + * @mfn RemoveView + * @headerfile + * @brief Converts a given view type into its original type. + * + * @signature RemoveView::Type; + * + * @tparam TObject The view type to be converted into its original type. + * @return Type The resulting original type. + * + * @link View @endlink is the inverse of this metafunction. + * + * @see View + */ + +template +struct RemoveView +{ + typedef TObject Type; +}; + +template +struct RemoveView +{ + typedef typename RemoveView::Type const Type; +}; + +// ---------------------------------------------------------------------------- +// Metafunction IsView +// ---------------------------------------------------------------------------- + +/*! + * @mfn IsView + * @headerfile + * @brief Tests if a given type is a view type. + * + * @signature IsView::Type; + * + * @tparam TObject The type to be tested for being a view type. + * @return Type @link LogicalValuesTags#True @endlink or @link LogicalValuesTags#False @endlink. + * + * @see View + * @see RemoveView + */ + +template +struct IsView : public False {}; + +template +struct IsView : public IsView {}; + +// ---------------------------------------------------------------------------- +// Metafunction IfView +// ---------------------------------------------------------------------------- + +template +struct IfView +{ + typedef typename If, T1, T2>::Type Type; +}; + +// ============================================================================ +// Functions +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Function view() +// ---------------------------------------------------------------------------- + +/*! + * @fn TView view + * @headerfile + * @brief Returns the view of a given object. + * + * @signature TView view(object); + * + * @param[in] object A generic object. + * @return TView The @link View @endlink type of the given object. + * + * @see View + * @see IsView + */ + +template +inline typename View::Type +view(TObject & object) +{ + return typename View::Type(object); +} + +template +inline typename View::Type +view(TObject const & object) +{ + return typename View::Type(object); +} + +template +inline typename View::Type +view(TObject * object) +{ + return typename View::Type(value(object)); +} + +} // namespace seqan + +#endif // #ifndef SEQAN_BASIC_VIEW_H diff --git a/seqan/basic/boost_preprocessor_subset.h b/seqan/basic/boost_preprocessor_subset.h index bef6d2b..7e0df98 100644 --- a/seqan/basic/boost_preprocessor_subset.h +++ b/seqan/basic/boost_preprocessor_subset.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -39,8 +39,8 @@ // probably a bad idea to repeat this. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_SEQAN_PREPROCESSOR_SUBSET_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_SEQAN_PREPROCESSOR_SUBSET_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_SEQAN_PREPROCESSOR_SUBSET_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_SEQAN_PREPROCESSOR_SUBSET_H_ // -------------------------------------------------------------------------- // ==> boost/preprocessor/tuple/rem.hpp <== @@ -5112,7 +5112,7 @@ // Copyright (c) 2001-2003 John Maddock // Copyright (c) 2001 Darin Adler // Copyright (c) 2001 Peter Dimov -// Copyright (c) 2002 Bill Kempf +// Copyright (c) 2002 Bill Kempf // Copyright (c) 2002 Jens Maurer // Copyright (c) 2002-2003 David Abrahams // Copyright (c) 2003 Gennaro Prota @@ -5135,4 +5135,4 @@ #define SEQAN_DO_JOIN2( X, Y ) X##Y -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_SEQAN_PREPROCESSOR_SUBSET_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_SEQAN_PREPROCESSOR_SUBSET_H_ diff --git a/seqan/basic/builtin_functions.h b/seqan/basic/builtin_functions.h index 55f849f..e3652d3 100644 --- a/seqan/basic/builtin_functions.h +++ b/seqan/basic/builtin_functions.h @@ -1,7 +1,7 @@ // ========================================================================== // builtin_functions.h // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -37,8 +37,8 @@ // are overridden for some classes. // ========================================================================== -#ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BUILTIN_FUNCTIONS_H_ -#define SEQAN_CORE_INCLUDE_SEQAN_BASIC_BUILTIN_FUNCTIONS_H_ +#ifndef SEQAN_INCLUDE_SEQAN_BASIC_BUILTIN_FUNCTIONS_H_ +#define SEQAN_INCLUDE_SEQAN_BASIC_BUILTIN_FUNCTIONS_H_ namespace seqan { @@ -58,212 +58,6 @@ namespace seqan { // Functions // ============================================================================ -/** -.Function.operator= -..cat:C++ built-in -..summary:C++ built-in Assignment operator. - -.Function.operator+ -..cat:C++ built-in -..summary:C++ built-in addition operator. - -.Function.operator+ (unary) -..cat:C++ built-in -..summary:C++ built-in unary plus (integer promotion) operator. - -.Function.operator- -..cat:C++ built-in -..summary:C++ built-in subtraction operator. - -.Function.operator- (unary) -..cat:C++ built-in -..summary:C++ built-in unary minus (additive inverse) operator. - -.Function.operator* -..cat:C++ built-in -..summary:C++ built-in multiplication operator. - -.Function.operator/ -..cat:C++ built-in -..summary:C++ built-in division operator. - -.Function.operator% -..cat:C++ built-in -..summary:C++ built-in modulo operator. - -.Function.operator++ (prefix) -..cat:C++ built-in -..summary:C++ built-in prefix increment operator. - -.Function.operator++ (suffix) -..cat:C++ built-in -..summary:C++ built-in suffix increment operator. - -.Function.operator-- (prefix) -..cat:C++ built-in -..summary:C++ built-in prefix decrement operator. - -.Function.operator-- (suffix) -..cat:C++ built-in -..summary:C++ built-in suffix decrement operator. - -.Function.operator== -..cat:C++ built-in -..summary:C++ built-in equal comparison operator. - -.Function.operator!= -..cat:C++ built-in -..summary:C++ built-in inequal comparison operator. - -.Function.operator> -..cat:C++ built-in -..summary:C++ built-in greater-than comparison operator. - -.Function.operator< -..cat:C++ built-in -..summary:C++ built-in less-than comparison operator. - -.Function.operator>= -..cat:C++ built-in -..summary:C++ built-in greather-than-or-equal comparison operator. - -.Function.operator<= -..cat:C++ built-in -..summary:C++ built-in less-than-or-equal comparison operator. - -.Function.operator! -..cat:C++ built-in -..summary:C++ built-in logical negation operator. - -.Function.operator&& -..cat:C++ built-in -..summary:C++ built-in logical AND operator. - -.DISABLED.Function.operator\pipe\pipe|operator|| -..cat:C++ built-in -..summary:C++ built-in logical OR operator. - -.Function.operator~ -..cat:C++ built-in -..summary:C++ built-in bitwise NOT operator. - -.Function.operator& -..cat:C++ built-in -..summary:C++ built-in bitwise AND operator. - -.DISABLED.Function.operator\pipe|operator\pipe -..cat:C++ built-in -..summary:C++ built-in bitwise OR operator. - -.Function.operator^ -..cat:C++ built-in -..summary:C++ built-in bitwise XOR operator. - -.Function.operator<< -..cat:C++ built-in -..summary:C++ built-in bitwise left shift operator. - -.Function.operator<< (Stream) -..cat:C++ built-in -..summary:C++ built-in bitwise put-to/stream insertion operator. - -.Function.operator>> -..cat:C++ built-in -..summary:C++ built-in bitwise right shift operator. - -.Function.operator>> (Stream) -..cat:C++ built-in -..summary:C++ built-in bitwise get-from/stream extraction operator. - -.Function.operator+= -..cat:C++ built-in -..summary:C++ built-in addition assignment operator. - -.Function.operator-= -..cat:C++ built-in -..summary:C++ built-in subtraction assignment operator. - -.Function.operator*= -..cat:C++ built-in -..summary:C++ built-in multiplication assignment operator. - -.Function.operator/= -..cat:C++ built-in -..summary:C++ built-in division assignment operator. - -.Function.operator%= -..cat:C++ built-in -..summary:C++ built-in modulo assignment operator. - -.Function.operator&= -..cat:C++ built-in -..summary:C++ built-in bitwise AND assignment operator. - -.DISABLED.Function.operator\pipe=|operator|= -..cat:C++ built-in -..summary:C++ built-in bitwise OR assignment operator. - -.Function.operator^= -..cat:C++ built-in -..summary:C++ built-in bitwise XOR assignment operator. - -.Function.operator<<= -..cat:C++ built-in -..summary:C++ built-in bitwise left shift assignment operator. - -.Function.operator>>= -..cat:C++ built-in -..summary:C++ built-in bitwise right shift assignment operator. - -.Function.operator[] -..cat:C++ built-in -..summary:C++ built-in array subscript operator. - -.Function.operator* (indirection) -..cat:C++ built-in -..summary:C++ built-in indirection/object-pointed-to-by operator. - -.Function.operator& (reference) -..cat:C++ built-in -..summary:C++ built-in reference/address-of operator. - -.Function.operator-> -..cat:C++ built-in -..summary:C++ built-in structure dereference operator. - -.Function.operator->* -..cat:C++ built-in -..summary:C++ built-in member-pointed-to-by-b-of-object-pointed-to-by-a operator. - -.Function.operator() -..cat:C++ built-in -..summary:C++ built-in function call operator. - -.Function.operator, -..cat:C++ built-in -..summary:C++ built-in comma operator. - -.Function.cast operator -..cat:C++ built-in -..summary:C++ built-in cast operator. - -.Function.operator new -..cat:C++ built-in -..summary:C++ built-in allocation operator. - -.Function.operator new[] -..cat:C++ built-in -..summary:C++ built-in array allocation operator operator. - -.Function.operator delete -..cat:C++ built-in -..summary:C++ built-in deallocation operator. - -.Function.operator delete[] -..cat:C++ built-in -..summary:C++ built-in array deallocation operator. - */ - } // namespace seqan -#endif // #ifndef SEQAN_CORE_INCLUDE_SEQAN_BASIC_BUILTIN_FUNCTIONS_H_ +#endif // #ifndef SEQAN_INCLUDE_SEQAN_BASIC_BUILTIN_FUNCTIONS_H_ diff --git a/seqan/basic/concept_checking.h b/seqan/basic/concept_checking.h index 52c4c55..d7512fa 100644 --- a/seqan/basic/concept_checking.h +++ b/seqan/basic/concept_checking.h @@ -1,7 +1,7 @@ // ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== -// Copyright (c) 2006-2013, Knut Reinert, FU Berlin +// Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without @@ -29,6 +29,7 @@ // DAMAGE. // // ========================================================================== +// Author: David Weese // Author: Manuel Holtgrewe // ========================================================================== // A minimal subset of the Boost Concept Checking Library. A lot of the code @@ -40,8 +41,8 @@ // SEQAN_NO_GENERATED_FORWARDS -#ifndef CORE_INCLUDE_SEQAN_BASIC_CONCEPT_CHECKING_H_ -#define CORE_INCLUDE_SEQAN_BASIC_CONCEPT_CHECKING_H_ +#ifndef INCLUDE_SEQAN_BASIC_CONCEPT_CHECKING_H_ +#define INCLUDE_SEQAN_BASIC_CONCEPT_CHECKING_H_ namespace seqan { @@ -50,8 +51,8 @@ namespace seqan { // --------------------------------------------------------------------------- // (C) Copyright John Maddock 2000. -// Use, modification and distribution are subject to the -// Boost Software License, Version 1.0. (See accompanying file +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // See http://www.boost.org/libs/static_assert for documentation. @@ -72,15 +73,6 @@ namespace seqan { # define SEQAN_STATIC_ASSERT_BOOL_CAST(x) (bool)(x) //#endif -// -// If the compiler warns about unused typedefs then enable this: -// -#if defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 7))) -# define SEQAN_STATIC_ASSERT_UNUSED_ATTRIBUTE __attribute__((unused)) -#else -# define SEQAN_STATIC_ASSERT_UNUSED_ATTRIBUTE -#endif - #ifdef SEQAN_CXX11_STANDARD # define SEQAN_STATIC_ASSERT( B ) static_assert(B, #B) #else @@ -133,7 +125,7 @@ template struct static_assert_test{}; SEQAN_JOIN(seqan_static_assert_typedef_, __COUNTER__) /* #elif defined(SEQAN_INTEL_CXX_VERSION) || defined(SEQAN_SA_GCC_WORKAROUND) -// agurt 15/sep/02: a special care is needed to force Intel C++ issue an error +// agurt 15/sep/02: a special care is needed to force Intel C++ issue an error // instead of warning in case of failure # define SEQAN_STATIC_ASSERT( B ) \ typedef char SEQAN_JOIN(seqan_static_assert_typedef_, __LINE__) \ @@ -159,7 +151,7 @@ template struct static_assert_test{}; #define SEQAN_STATIC_ASSERT( B ) \ typedef static_assert_test<\ sizeof(STATIC_ASSERTION_FAILURE< SEQAN_STATIC_ASSERT_BOOL_CAST( B ) >)>\ - SEQAN_JOIN(seqan_static_assert_typedef_, __LINE__) SEQAN_STATIC_ASSERT_UNUSED_ATTRIBUTE + SEQAN_JOIN(seqan_static_assert_typedef_, __LINE__) SEQAN_UNUSED #endif /* #else @@ -185,7 +177,7 @@ struct unaryfunptr_arg_type; template struct unaryfunptr_arg_type { - typedef Arg type; + typedef Arg type; }; template <> @@ -212,7 +204,8 @@ template struct concept_check_; template void concept_check_failed() { - ((Model*)0)->~Model(); + Model *p = static_cast(NULL); + p->~Model(); } template @@ -228,9 +221,9 @@ struct concept_check_ {}; # define SEQAN_CONCEPT_ASSERT_FN( ModelFnPtr ) \ - typedef ::seqan::detail::instantiate< \ - &::seqan::requirement_::failed> \ - SEQAN_PP_CAT(seqan_concept_check,__LINE__) SEQAN_STATIC_ASSERT_UNUSED_ATTRIBUTE + typedef seqan::detail::instantiate< \ + &seqan::requirement_::failed> \ + SEQAN_PP_CAT(seqan_concept_check,__LINE__) SEQAN_UNUSED // --------------------------------------------------------------------------- // ==> boost/concept/assert.hpp <== @@ -241,30 +234,41 @@ struct concept_check_ // http://www.boost.org/LICENSE_1_0.txt). /*! - * @macro SEQAN_CONCEPT_ASSERT + * @defgroup ConceptChecking Concept Checking + * @brief Macros for the concept checking system in SeqAn. + * + * SeqAn's concept checking system is copied from Boost. The license for the library is as follows: + * + * @code{.cpp} + * // Copyright David Abrahams 2006. Distributed under the Boost Software + * // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at + * // http://www.boost.org/LICENSE_1_0.txt). + * @endcode + */ + +/*! + * @macro ConceptChecking#SEQAN_CONCEPT_ASSERT * @brief Perform a concept check. * @headerfile - * + * * @signature SEQAN_CONCEPT_ASSERT((concept)) - * + * * @param concept Concept specialized with a the type that should be checked. - * - * @section Remarks - * + * * This macro is a compile-time assertion and requires the concept specialized * with the tested types to compile. The check neither consumes memory nor * running time. The macro can be used at the beginning of a function or within * a struct/class definition. The checked concepts should be as restrictive and * generic as possible to on the one hand cover all used functionality and on * the other hand not limit the applicability of a function/class. - * + * * @section Examples - * + * * @code{.cpp} * typedef typename Value::Type TValue; * typedef typename Position::Type TPosition; * typedef typename Difference::Type TDifference; - * + * * SEQAN_CONCEPT_ASSERT((AlphabetConcept)); * SEQAN_CONCEPT_ASSERT((SignedIntegerConcept)); * SEQAN_CONCEPT_ASSERT((UnsignedIntegerConcept)); @@ -272,28 +276,6 @@ struct concept_check_ * @see Is */ -/** -.Macro.SEQAN_CONCEPT_ASSERT -..cat:Concepts -..summary:Perform a concept check. -..signature:SEQAN_CONCEPT_ASSERT((concept)) -..param.concept:Concept specialized with a the type that should be checked. -..remarks:This macro is a compile-time assertion and requires the concept specialized with the tested types to compile. -The check neither consumes memory nor running time. -The macro can be used at the beginning of a function or within a struct/class definition. -The checked concepts should be as restrictive and generic as possible to on the one hand -cover all used functionality and on the other hand not limit the applicability of a function/class. -..example.code: -typedef typename Value::Type TValue; -typedef typename Position::Type TPosition; -typedef typename Difference::Type TDifference; - -SEQAN_CONCEPT_ASSERT((AlphabetConcept)); -SEQAN_CONCEPT_ASSERT((SignedIntegerConcept)); -SEQAN_CONCEPT_ASSERT((UnsignedIntegerConcept)); -..include:seqan/basic.h - */ - // Usage, in class or function context: // SEQAN_CONCEPT_ASSERT((UnaryFunctionConcept)); # define SEQAN_CONCEPT_ASSERT(ModelInParens) \ @@ -304,30 +286,34 @@ SEQAN_CONCEPT_ASSERT((UnsignedIntegerConcept)); template struct usage_requirements { - ~usage_requirements() { ((Model*)0)->~Model(); } + ~usage_requirements() + { + Model *p = static_cast(NULL); + p->~Model(); + } }; /*! - * @macro SEQAN_CONCEPT_USAGE + * @macro ConceptChecking#SEQAN_CONCEPT_USAGE * @headerfile * @brief Defines valid expressions. * * @signature SEQAN_CONCEPT_USAGE(name) * - * @param[in] name Identifier of the concept defined with @link SEQAN_CONCEPT @endlink or @link SEQAN_CONCEPT_REFINE - * @endlink. + * @param[in] name Identifier of the concept defined with @link ConceptChecking#SEQAN_CONCEPT @endlink or + * @link ConceptChecking#SEQAN_CONCEPT_REFINE @endlink. * * This macro should be used to introduce a block (enclosed with curly braces) of valid expressions within a newly * defined concept. Valid expressions should test for available functions, operators and the correctness of return - * types. Use helper functions, e.g. @link ignoreUnusedVariableWarning @endlink, @link requireBooleanExpr @endlink and - * @link sameType @endlink. + * types. Use helper functions, e.g. @link ignoreUnusedVariableWarning @endlink, + * @link ConceptChecking#requireBooleanExpr @endlink and @link ConceptChecking#sameType @endlink. * * @section Examples * * @code{.cpp} * SEQAN_CONCEPT(EqualityComparable,(T)) * { - * SEQAN_CONCEPT_USAGE(EqualityComparable) + * SEQAN_CONCEPT_USAGE(EqualityComparable) * { * requireBooleanExpr(a == b); * requireBooleanExpr(a != b); @@ -337,37 +323,11 @@ struct usage_requirements * }; * @endcode * - * @see requireBooleanExpr - * @see SEQAN_CONCEPT - * @see SEQAN_CONCEPT_REFINE + * @see ConceptChecking#requireBooleanExpr + * @see ConceptChecking#SEQAN_CONCEPT + * @see ConceptChecking#SEQAN_CONCEPT_REFINE */ -/** -.Macro.SEQAN_CONCEPT_USAGE -..cat:Concepts -..summary:Define valid expressions. -..signature:SEQAN_CONCEPT_USAGE(name) -..param.name:Identifier of the concept defined with @Macro.SEQAN_CONCEPT@ or @Macro.SEQAN_CONCEPT_REFINE@. -..remarks:This macro should be used to introduce a block (enclosed with curly braces) of valid expressions within -a newly defined concept. -Valid expressions should test for available functions, operators and the correctness of return types. -Use helper functions, e.g. @Function.ignoreUnusedVariableWarning@, @Function.requireBooleanExpr@ and @Function.sameType@. -..example.code: -SEQAN_CONCEPT(EqualityComparable,(T)) -{ - SEQAN_CONCEPT_USAGE(EqualityComparable) - { - requireBooleanExpr(a == b); - requireBooleanExpr(a != b); - } -private: - T a, b; -}; -..see:Function.requireBooleanExpr -..see:Macro.SEQAN_CONCEPT -..see:Macro.SEQAN_CONCEPT_REFINE -..include:seqan/basic.h - */ #define SEQAN_CONCEPT_USAGE(model) \ SEQAN_CONCEPT_ASSERT((seqan::usage_requirements)); \ ~model() @@ -386,7 +346,7 @@ namespace detail { template struct wrap_constraints {}; - + template inline yes has_constraints_(Model*, wrap_constraints* = 0); inline no has_constraints_(...); @@ -420,7 +380,11 @@ namespace detail template struct requirement { - static void failed() { ((Model*)0)->~Model(); } + static void failed() + { + Model *p = static_cast(NULL); + p->~Model(); + } }; struct failed {}; @@ -428,15 +392,23 @@ struct failed {}; template struct requirement { - static void failed() { ((Model*)0)->~Model(); } + static void failed() + { + Model *p = static_cast(NULL); + p->~Model(); + } }; template struct constraint { - static void failed() { ((Model*)0)->constraints(); } + static void failed() + { + Model *p = static_cast(NULL); + p->constraints(); + } }; - + template struct requirement_ : IfC::Type::VALUE, /* should be called "has_constraints", see above */ @@ -446,9 +418,9 @@ struct requirement_ {}; # define SEQAN_CONCEPT_ASSERT_FN( ModelFnPtr ) \ - typedef ::seqan::detail::instantiate< \ - &::seqan::requirement_::failed> \ - SEQAN_PP_CAT(seqan_concept_check,__LINE__) SEQAN_STATIC_ASSERT_UNUSED_ATTRIBUTE + typedef seqan::detail::instantiate< \ + &seqan::requirement_::failed> \ + SEQAN_PP_CAT(seqan_concept_check,__LINE__) SEQAN_UNUSED // --------------------------------------------------------------------------- // ==> boost/concept_check/detail/requires.hpp <== @@ -480,7 +452,7 @@ template struct Requires_ : unaryfunptr_arg_type {}; -# define SEQAN_CONCEPT_REQUIRES_(r,data,t) + (::seqan::_requires_::value) +# define SEQAN_CONCEPT_REQUIRES_(r,data,t) + (seqan::_requires_::value) #if defined(NDEBUG) @@ -490,7 +462,7 @@ struct Requires_ : unaryfunptr_arg_type #else // #if defined(NDEBUG) # define SEQAN_CONCEPT_REQUIRES(models, result) \ - typename ::seqan::Requires_< \ + typename seqan::Requires_< \ (0 SEQAN_PP_SEQ_FOR_EACH(SEQAN_CONCEPT_REQUIRES_, ~, models)), \ void(*)result \ >::type @@ -519,7 +491,7 @@ inline void functionRequires(Model* = 0) /*! * @fn ignoreUnusedVariableWarning - * @headefile + * @headerfile * @brief Removes unused variable warning. * * @signature void ignoreUnusedVariableWarning(x); @@ -530,17 +502,6 @@ inline void functionRequires(Model* = 0) * operators. Use this functions to remove a compile warning that otherwise would be raised in this case. */ -/** -.Function.ignoreUnusedVariableWarning -..summary:Removes unused variable warning. -..cat:Concepts -..signature:ignoreUnusedVariableWarning(x) -..param.x: Variable that causes the unused variable warning. -..remarks:It sometimes is necessary to define variables which are not further used, e.g. to check available assignment operators. -Use this functions to remove a compile warning that otherwise would be raised in this case. -..see:Macro.SEQAN_CONCEPT_USAGE -..include:seqan/basic.h - */ template SEQAN_HOST_DEVICE inline void ignoreUnusedVariableWarning(T const&) {} // --------------------------------------------------------------------------- @@ -554,12 +515,12 @@ template SEQAN_HOST_DEVICE inline void ignoreUnusedVariableWarning(T c // SEQAN_CONCEPT_REFINE added by David Weese /*! - * @macro SEQAN_CONCEPT + * @macro ConceptChecking#SEQAN_CONCEPT * @brief Defines a new concept. - * @headerfile seqan/basic.h - * + * @headerfile + * * @signature SEQAN_CONCEPT(name, params) - * + * * @param params Template paramter list in parantheses, e.g. (T) or (T1)(T2). * Typically, template parameters are models, i.e. one or multiple * classes that should be checked for fulfilling a concept.This is @@ -567,19 +528,17 @@ template SEQAN_HOST_DEVICE inline void ignoreUnusedVariableWarning(T c * href="http://www.boost.org/doc/libs/1_47_0/libs/preprocessor/doc/index.html">more. * @param name Concept identifier. Non-trivial concepts should have an * identifier with a Concept-suffix. - * - * @section Remarks - * + * * A concept is implemented as a template struct with name name and * arguments params. The concept checking should be part of the struct - * definition. Associated types should be checked via @link SEQAN_CONCEPT_ASSERT - * @endlink and valid expressions in a function @link SEQAN_CONCEPT_USAGE + * definition. Associated types should be checked via @link ConceptChecking#SEQAN_CONCEPT_ASSERT + * @endlink and valid expressions in a function @link ConceptChecking#SEQAN_CONCEPT_USAGE * @endlink, see below. Variables used in valid expressions should be (private) * struct members instead of local variables in member functions (read more. - * + * * @section Examples - * + * * @code{.cpp} * SEQAN_CONCEPT(Assignable,(T)) * { @@ -598,7 +557,7 @@ template SEQAN_HOST_DEVICE inline void ignoreUnusedVariableWarning(T c * T a; * T b; * }; - * + * * SEQAN_CONCEPT(EqualityComparable,(T)) * { * SEQAN_CONCEPT_USAGE(EqualityComparable) @@ -611,52 +570,7 @@ template SEQAN_HOST_DEVICE inline void ignoreUnusedVariableWarning(T c * }; * @endcode * - * @see SEQAN_CONCEPT_USAGE - */ - -/** -.Macro.SEQAN_CONCEPT -..cat:Concepts -..summary:Defines a new concept. -..signature:SEQAN_CONCEPT(name, params) -..param.name:Concept identifier. Non-trivial concepts should have an identifier with a "Concept"-suffix. -..param.params:Template paramter list in parantheses, e.g. (T) or (T1)(T2). -Typically, template parameters are models, i.e. one or multiple classes that should be checked for fulfilling a concept. -...remarks:This is a sequence of the Boost Preprocessor Library, read @http://www.boost.org/doc/libs/1_47_0/libs/preprocessor/doc/index.html|more@. -..remarks:A concept is implemented as a template struct with name $name$ and arguments $params$. -The concept checking should be part of the struct definition. -Associated types should be checked via @Macro.SEQAN_CONCEPT_ASSERT@ and valid expressions in a function @Macro.SEQAN_CONCEPT_USAGE@, see below. -Variables used in valid expressions should be (private) struct members instead of local variables in member functions (read @http://www.boost.org/doc/libs/1_47_0/libs/concept_check/creating_concepts.htm|more@). -..example.code: -SEQAN_CONCEPT(Assignable,(T)) -{ - SEQAN_CONCEPT_USAGE(Assignable) - { - a = b; // require assignment operator - constConstraints(b); - } -private: - void constConstraints(const T& x) - { - a = x; // const required for argument to assignment - ignoreUnusedVariableWarning(x); - } -private: - T a; - T b; -}; - -SEQAN_CONCEPT(EqualityComparable,(T)) -{ - SEQAN_CONCEPT_USAGE(EqualityComparable) - { - requireBooleanExpr(a == b); - requireBooleanExpr(a != b); - } -private: - T a, b; -}; -..include:seqan/basic.h + * @see ConceptChecking#SEQAN_CONCEPT_USAGE */ # define SEQAN_CONCEPT(name, params) \ @@ -664,12 +578,12 @@ SEQAN_CONCEPT(EqualityComparable,(T)) struct name /*! - * @macro SEQAN_CONCEPT_REFINE + * @macro ConceptChecking#SEQAN_CONCEPT_REFINE * @brief Defines a new concept as a refinement of existing concepts. - * @headerfile seqan/basic.h - * + * @headerfile + * * @signature SEQAN_CONCEPT_REFINE(name, params, refinedConcepts) - * + * * @param params Template paramter list in parantheses, e.g. (T) or (T1)(T2). * Typically, template parameters are models, i.e. one or multiple * classes that should be checked for fulfilling a concept.This is @@ -682,57 +596,26 @@ SEQAN_CONCEPT(EqualityComparable,(T)) * into the requirements of the new concept.This is a * sequence of the Boost Preprocessor Library, read * more - * - * @section Remarks - * + * * A concept is implemented as a template struct with name name and * arguments params. The struct inherits all refined concept structs. * The concept checking should be part of the struct definition. For more - * information, see @link SEQAN_CONCEPT @endlink. - * + * information, see @link ConceptChecking#SEQAN_CONCEPT @endlink. + * * @section Examples - * + * * @code{.cpp} * SEQAN_CONCEPT_REFINE(AlphabetConcept, (TValue), (Assignable)(DefaultConstructible)(CopyConstructible)) * { * TValue val, val2; - * + * * SEQAN_CONCEPT_USAGE(AlphabetConcept) * { * assign(val, val2); * } * }; * @endcode - * @see SEQAN_CONCEPT_USAGE - */ - -/** -.Macro.SEQAN_CONCEPT_REFINE -..cat:Concepts -..summary:Defines a new concept as a refinement of existing concepts. -..signature:SEQAN_CONCEPT_REFINE(name, params, refinedConcepts) -..param.name:Concept identifier. Non-trivial concepts should have an identifier with a "Concept"-suffix. -..param.params:Template paramter list in parantheses, e.g. (T) or (T1)(T2). -Typically, template parameters are models, i.e. one or multiple classes that should be checked for fulfilling a concept. -...remarks:This is a sequence of the Boost Preprocessor Library, read @http://www.boost.org/doc/libs/1_47_0/libs/preprocessor/doc/index.html|more@. -..param.refinedConcepts:Identifiers of concepts that are refined by the new concept. -...remarks:Refined concepts are implicitly integrated into the requirements of the new concept. -...remarks:This is a sequence of the Boost Preprocessor Library, read @http://www.boost.org/doc/libs/1_47_0/libs/preprocessor/doc/index.html|more@. -..remarks:A concept is implemented as a template struct with name $name$ and arguments $params$. -The struct inherits all refined concept structs. -The concept checking should be part of the struct definition. -For more information, see @Macro.SEQAN_CONCEPT@. -..example.code: -SEQAN_CONCEPT_REFINE(AlphabetConcept, (TValue), (Assignable)(DefaultConstructible)(CopyConstructible)) -{ - TValue val, val2; - - SEQAN_CONCEPT_USAGE(AlphabetConcept) - { - assign(val, val2); - } -}; -..include:seqan/basic.h + * @see ConceptChecking#SEQAN_CONCEPT_USAGE */ # define SEQAN_CONCEPT_REFINE(name, params, refinedConcepts) \ @@ -752,65 +635,59 @@ SEQAN_CONCEPT_REFINE(AlphabetConcept, (TValue), (Assignable)(DefaultConstructibl SEQAN_PP_SEQ_FOR_EACH_I(SEQAN_CONCEPT_REFINE_superclass,params,refinedConcepts) /*! - * @macro SEQAN_CONCEPT_IMPL + * @macro ConceptChecking#SEQAN_CONCEPT_IMPL * @brief Defines which concepts a model fulfills. - * @headerfile seqan/basic.h - * - * - * @signature template<> SEQAN_CONCEPT_IMPL(name, implementedConcepts) - * template SEQAN_CONCEPT_IMPL(name, implementedConcepts) - * + * @headerfile + * + * + * @signature template<> // required, even if name has no template arguments + * SEQAN_CONCEPT_IMPL((name), implementedConcepts) + * + * template + * SEQAN_CONCEPT_IMPL((name), implementedConcepts) + * * @param implementedConcepts Identifiers of concepts that are fulfilled by the model. This is a sequence of the * Boost Preprocessor Library, read more. * @param name Model type, i.e. an identifier or an identifier with template * arguments. - * - * @section Remarks - * + * * The metafunction @link Is @endlink can be used to determine whether a class * models (fulfills) a concepts. A model of a concept must pass the concept - * check via @link SEQAN_CONCEPT_ASSERT @endlink. - * + * check via @link ConceptChecking#SEQAN_CONCEPT_ASSERT @endlink. + * * @section Examples - * + * * @code{.cpp} * template - * SEQAN_CONCEPT_IMPL(String, (StringConcept)); + * SEQAN_CONCEPT_IMPL((String), (StringConcept)); * @endcode */ -/** -.Macro.SEQAN_CONCEPT_IMPL -..cat:Concepts -..summary:Defines which concepts a model fulfills. -..signature: -template<> -SEQAN_CONCEPT_IMPL(name, implementedConcepts) - -template -SEQAN_CONCEPT_IMPL(name, implementedConcepts) -..param.name:Model type, i.e. an identifier or an identifier with template arguments. -..param.implementedConcepts:Identifiers of concepts that are fulfilled by the model. -...remarks:This is a sequence of the Boost Preprocessor Library, read @http://www.boost.org/doc/libs/1_47_0/libs/preprocessor/doc/index.html|more@. -..remarks:The metafunction @Metafunction.Is@ can be used to determine whether a class models (fulfills) a concepts. -A model of a concept must pass the concept check via @Macro.SEQAN_CONCEPT_ASSERT@. -..example.code: -template -SEQAN_CONCEPT_IMPL(String, (StringConcept)); -..include:seqan/basic.h - */ + +// STRIP_PARENS macro by Steven Watanabe (http://lists.boost.org/boost-users/2010/08/61429.php) +#define SEQAN_APPLY(macro, args) SEQAN_APPLY_I(macro, args) +#define SEQAN_APPLY_I(macro, args) macro args +#define SEQAN_STRIP_PARENS(x) SEQAN_EVAL((SEQAN_STRIP_PARENS_I x), x) +#define SEQAN_STRIP_PARENS_I(...) 1,1 +#define SEQAN_EVAL(test, x) SEQAN_EVAL_I(test, x) +#define SEQAN_EVAL_I(test, x) SEQAN_MAYBE_STRIP_PARENS(SEQAN_TEST_ARITY test, x) +#define SEQAN_TEST_ARITY(...) SEQAN_APPLY(SEQAN_TEST_ARITY_I, (__VA_ARGS__, 2, 1)) +#define SEQAN_TEST_ARITY_I(a,b,c,...) c +#define SEQAN_MAYBE_STRIP_PARENS(cond, x) SEQAN_MAYBE_STRIP_PARENS_I(cond, x) +#define SEQAN_MAYBE_STRIP_PARENS_I(cond, x) SEQAN_PP_CAT(SEQAN_MAYBE_STRIP_PARENS_, cond)(x) +#define SEQAN_MAYBE_STRIP_PARENS_1(x) x +#define SEQAN_MAYBE_STRIP_PARENS_2(x) SEQAN_APPLY(SEQAN_MAYBE_STRIP_PARENS_2_I, x) +#define SEQAN_MAYBE_STRIP_PARENS_2_I(...) __VA_ARGS__ # define SEQAN_CONCEPT_IMPL(model, implementedConcepts) \ - template <> \ - struct Implements \ + struct Implements \ { \ typedef \ - SEQAN_PP_SEQ_FOR_EACH_I(SEQAN_CONCEPT_LIST_prefix,(model),implementedConcepts) \ + SEQAN_PP_SEQ_FOR_EACH_I(SEQAN_CONCEPT_LIST_prefix,model,implementedConcepts) \ SEQAN_PP_REPEAT(SEQAN_PP_SEQ_SIZE(implementedConcepts),SEQAN_CONCEPT_LIST_suffix,~) Type; \ } - // helper for the SEQAN_CONCEPT, above. # define SEQAN_CONCEPT_typename(r, ignored, index, t) \ SEQAN_PP_COMMA_IF(index) typename t @@ -819,15 +696,15 @@ SEQAN_CONCEPT_IMPL(String, (StringConcept)); # define SEQAN_CONCEPT_REFINE_superclass(r, params, index, t) \ SEQAN_PP_COMMA_IF(index) t # define SEQAN_CONCEPT_LIST_prefix(r, params, index, t) \ - SEQAN_PP_COMMA_IF(index) TagList -# define SEQAN_CONCEPT_LIST_suffix(z, n, text) > + SEQAN_PP_COMMA_IF(index) TagList +# define SEQAN_CONCEPT_LIST_suffix(z, n, text) > // ============================================================================ // Functions // ============================================================================ /*! - * @fn sameType + * @fn ConceptChecking#sameType * @brief Tests for equality of types. * * @signature void sameType(x, y); @@ -838,19 +715,7 @@ SEQAN_CONCEPT_IMPL(String, (StringConcept)); * This function can be used to test for the correctness of function return types or the type of an expression in * concept tests. * - * @see SEQAN_CONCEPT_USAGE - */ - -/** -.Function.sameType -..summary:Tests for equality of types. -..cat:Concepts -..signature:sameType(x, y) -..param.x: Object of a certain type. -..param.y: Object that must be of the same type. -..remarks:This function can be used to test for the correctness of function return types or the type of an expression. -..see:Macro.SEQAN_CONCEPT_USAGE -..include:seqan/basic.h + * @see ConceptChecking#SEQAN_CONCEPT_USAGE */ template @@ -864,56 +729,57 @@ void sameType(T, T) { } /*! * @mfn Is * @brief Returns whether a concept is fulfilled. - * @headerfile seqan/basic.h - * + * @headerfile + * * @signature Is::Type * @signature Is::VALUE - * + * * @tparam TConcept A concept that is specialized with type(s) that should be * tested for fulfilling the concept. - * - * @return Type @link Logical Values.tag.True @endlink/true if + * + * @return Type @link LogicalValuesTags#True @endlink/true if * TConcept is a fulfilled concept, otherwise @link - * Logical Values.tag.False @endlink/false. - * - * @section Remarks - * + * LogicalValuesTags#False @endlink/false. + * * The @link Is @endlink-metafunction can be used to test types for fulfilling a concept without causing compilation * errors. If True or true is returned, TConcept must pass the concept test via @link - * SEQAN_CONCEPT_ASSERT @endlink. It can be used to switch between different implementations depending on the concept - * of a type, or in combination with @link EnableIfFunctionality#SEQAN_FUNC_ENABLE_IF @endlink to make a function only - * visible to types of certain concepts. - * + * ConceptChecking#SEQAN_CONCEPT_ASSERT @endlink. It can be used to switch between different implementations + * depending on the concept of a type, or in combination with @link EnableIfFunctionality#SEQAN_FUNC_ENABLE_IF + * @endlink to make a function only visible to types of certain concepts. + * * @section Examples - * + * * @code{.cpp} - * Is >::Type + * Is >::Type * IfC >::VALUE, T1, T2>::Type - * + * * std::cout << Is >::VALUE << std::endl; // 1 * std::cout << Is >::VALUE << std::endl; // 0 * @endcode * Define a hierarchy of concepts and two models Alice and Bob * that implements some of them. @link Is @endlink determines which concepts are * explicitly or implicitly fulfilled. - * + * * @code{.cpp} * struct Alice {}; * struct Bob {}; - * + * * SEQAN_CONCEPT(ConceptA, (T)) {}; * SEQAN_CONCEPT(ConceptB, (T)) {}; * SEQAN_CONCEPT_REFINE(ConceptC, (T), (ConceptA)(ConceptB)) {}; * SEQAN_CONCEPT_REFINE(ConceptD, (T), (ConceptC)) {}; - * + * + * template<> // Alice has no template arguments * SEQAN_CONCEPT_IMPL(Alice, (ConceptA)(ConceptB)); + * + * template<> // Bob has no template arguments * SEQAN_CONCEPT_IMPL(Bob, (ConceptC)); - * + * * std::cout << Is< ConceptA >::VALUE << std::endl; // 1 * std::cout << Is< ConceptB >::VALUE << std::endl; // 1 * std::cout << Is< ConceptC >::VALUE << std::endl; // 0 * std::cout << Is< ConceptD >::VALUE << std::endl; // 0 - * + * * std::cout << Is< ConceptA >::VALUE << std::endl; // 1 * std::cout << Is< ConceptB >::VALUE << std::endl; // 1 * std::cout << Is< ConceptC >::VALUE << std::endl; // 1 @@ -921,55 +787,9 @@ void sameType(T, T) { } * @endcode * * @see EnableIfFunctionality#SEQAN_FUNC_ENABLE_IF - * @see SEQAN_CONCEPT_ASSERT + * @see ConceptChecking#SEQAN_CONCEPT_ASSERT */ -/** -.Metafunction.Is -..cat:Concepts -..summary:Returns whether a concept is fulfilled. -..signature:Is::Type -..signature:Is::VALUE -..param.TConcept:A concept that is specialized with type(s) that should be tested for fulfilling the concept. -..returns:@Tag.Logical Values.tag.True@/$true$ if $TConcept$ is a fulfilled concept, otherwise @Tag.Logical Values.tag.False@/$false$. -..remarks: -...text:The @Metafunction.Is@-metafunction can be used to test types for fulfilling a concept without causing compilation errors. -If @Tag.Logical Values.tag.True@/$true$ is returned, $TConcept$ must pass the concept test via @Macro.SEQAN_CONCEPT_ASSERT@. -It can be used to switch between different implementations depending on the concept of a type, or in combination with @Macro.SEQAN_FUNC_ENABLE_IF@ to make a function only visible to types of certain concepts. -..example.code: -Is >::Type -IfC >::VALUE, T1, T2>::Type - -std::cout << Is >::VALUE << std::endl; // 1 -std::cout << Is >::VALUE << std::endl; // 0 -..example.text:Define a hierarchy of concepts and two models $Alice$ and $Bob$ that implements some of them. -@Metafunction.Is@ determines which concepts are explicitly or implicitly fulfilled. -..example.code: -struct Alice {}; -struct Bob {}; - -SEQAN_CONCEPT(ConceptA, (T)) {}; -SEQAN_CONCEPT(ConceptB, (T)) {}; -SEQAN_CONCEPT_REFINE(ConceptC, (T), (ConceptA)(ConceptB)) {}; -SEQAN_CONCEPT_REFINE(ConceptD, (T), (ConceptC)) {}; - -SEQAN_CONCEPT_IMPL(Alice, (ConceptA)(ConceptB)); -SEQAN_CONCEPT_IMPL(Bob, (ConceptC)); - -std::cout << Is< ConceptA >::VALUE << std::endl; // 1 -std::cout << Is< ConceptB >::VALUE << std::endl; // 1 -std::cout << Is< ConceptC >::VALUE << std::endl; // 0 -std::cout << Is< ConceptD >::VALUE << std::endl; // 0 - -std::cout << Is< ConceptA >::VALUE << std::endl; // 1 -std::cout << Is< ConceptB >::VALUE << std::endl; // 1 -std::cout << Is< ConceptC >::VALUE << std::endl; // 1 -std::cout << Is< ConceptD >::VALUE << std::endl; // 0 -..include:seqan/basic.h -..see:Macro.SEQAN_FUNC_ENABLE_IF -..see:Macro.SEQAN_CONCEPT_ASSERT -*/ - // test whether a concept is fulfilled (without concept checking) template struct Implements: False {}; @@ -988,8 +808,8 @@ template struct IsRecurse_ >: True {}; template -struct IsRecurse_< TConceptModel, TagList >: - Or< +struct IsRecurse_< TConceptModel, TagList >: + Or< IsRecurse_::Type >, IsRecurse_ > {}; @@ -998,10 +818,10 @@ template struct Is; template