From 22b23bb00ed077c96225d185fbebccc2b8ba2564 Mon Sep 17 00:00:00 2001 From: Glenn Hickey Date: Sat, 13 Oct 2012 16:07:12 -0700 Subject: [PATCH] fix bugs in masking, add support for target genomes --- maf/impl/halMafScanDimensions.cpp | 4 +-- maf/impl/halMafScanner.cpp | 41 +++++++++++++++++++++++++------ maf/impl/halMafWriteGenomes.cpp | 7 +----- maf/impl/maf2hal.cpp | 15 ++++++++--- maf/inc/halMafScanDimensions.h | 3 ++- maf/inc/halMafScanner.h | 5 +++- maf/inc/halMafWriteGenomes.h | 2 +- 7 files changed, 56 insertions(+), 21 deletions(-) diff --git a/maf/impl/halMafScanDimensions.cpp b/maf/impl/halMafScanDimensions.cpp index db75f957..4116ce6a 100644 --- a/maf/impl/halMafScanDimensions.cpp +++ b/maf/impl/halMafScanDimensions.cpp @@ -27,7 +27,7 @@ MafScanDimensions::~MafScanDimensions() } } -void MafScanDimensions::scan(const std::string& mafPath) +void MafScanDimensions::scan(const string& mafPath, const set& targets) { for (DimMap::iterator i = _dimMap.begin(); i != _dimMap.end(); ++i) { @@ -35,7 +35,7 @@ void MafScanDimensions::scan(const std::string& mafPath) } _dimMap.clear(); - MafScanner::scan(mafPath); + MafScanner::scan(mafPath, targets); updateDimensionsGlobal(); } diff --git a/maf/impl/halMafScanner.cpp b/maf/impl/halMafScanner.cpp index ba43b9b3..dcc966fe 100644 --- a/maf/impl/halMafScanner.cpp +++ b/maf/impl/halMafScanner.cpp @@ -24,8 +24,9 @@ MafScanner::~MafScanner() } -void MafScanner::scan(const std::string& mafFilePath) +void MafScanner::scan(const string& mafFilePath, const set& targets) { + _targets = targets; _mafFile.open(mafFilePath.c_str()); if (!_mafFile) @@ -42,9 +43,12 @@ void MafScanner::scan(const std::string& mafFilePath) _mafFile >> buffer; if (buffer == "a") { - updateMask(); - aLine(); - nextLine(); + if (_rows > 0) + { + updateMask(); + aLine(); + nextLine(); + } _rows = 0; } else if (buffer == "s") @@ -70,13 +74,27 @@ void MafScanner::scan(const std::string& mafFilePath) << _block[_rows - 2]._startPosition; throw hal_exception(ss.str()); } - sLine(); + + if (_targets.size() > 1 && // (will always include reference) + _targets.find(genomeName(row._sequenceName)) == _targets.end()) + { + // genome not in targets, pretend like it never happened. + --_rows; + } + else + { + sLine(); + } } else { nextLine(); } } + if (_rows > 0) + { + updateMask(); + } end(); _mafFile.close(); } @@ -98,10 +116,12 @@ void MafScanner::updateMask() { size_t length = _block[0]._line.length(); _mask.resize(length, false); - + + // scan left to right for (size_t i = 1; i < length; ++i) { - for (size_t j = 0; j < _rows && _mask[j] == true; ++j) + // scan up to down + for (size_t j = 0; j < _rows && _mask[i] == false; ++j) { // beginning of gap run. add position of first gap to mask if (_block[j]._line[i] == '-' && _block[j]._line[i-1] != '-') @@ -118,3 +138,10 @@ void MafScanner::updateMask() } } +std::string MafScanner::genomeName(const std::string fullName) const +{ + assert(fullName.find('.') != string::npos); + return fullName.substr(0, fullName.find('.')); +} + + diff --git a/maf/impl/halMafWriteGenomes.cpp b/maf/impl/halMafWriteGenomes.cpp index c711c642..5beb320e 100644 --- a/maf/impl/halMafWriteGenomes.cpp +++ b/maf/impl/halMafWriteGenomes.cpp @@ -27,6 +27,7 @@ MafWriteGenomes::~MafWriteGenomes() void MafWriteGenomes::convert(const string& mafPath, const string& refGenomeName, + const set& targets, const DimMap& dimMap, AlignmentPtr alignment) { @@ -38,12 +39,6 @@ void MafWriteGenomes::convert(const string& mafPath, } -std::string MafWriteGenomes::genomeName(const std::string fullName) const -{ - assert(fullName.find('.') != string::npos); - return fullName.substr(0, fullName.find('.')); -} - MafWriteGenomes::MapRange MafWriteGenomes::getRefSequences() const { DimMap::const_iterator i = _dimMap->lower_bound(_refName); diff --git a/maf/impl/maf2hal.cpp b/maf/impl/maf2hal.cpp index 6aef46b2..840e860e 100644 --- a/maf/impl/maf2hal.cpp +++ b/maf/impl/maf2hal.cpp @@ -89,12 +89,21 @@ int main(int argc, char** argv) alignment->setOptionsFromParser(optionsParser); alignment->createNew(halPath); } + + vector targetNames; + if (targetGenomes != "\"\"") + { + targetNames = chopString(targetGenomes, ","); + } + set targetSet(targetNames.begin(), targetNames.end()); + targetSet.insert(refGenomeName); MafScanDimensions dScan; - dScan.scan(mafPath); + dScan.scan(mafPath, targetSet); MafWriteGenomes writer; - writer.convert(mafPath, refGenomeName, dScan.getDimensions(), alignment); - + writer.convert(mafPath, refGenomeName, targetSet, dScan.getDimensions(), + alignment); + const MafScanDimensions::DimMap& dimMap = dScan.getDimensions(); for (MafScanDimensions::DimMap::const_iterator i = dimMap.begin(); i != dimMap.end(); ++i) diff --git a/maf/inc/halMafScanDimensions.h b/maf/inc/halMafScanDimensions.h index ec6fae87..34e3be42 100644 --- a/maf/inc/halMafScanDimensions.h +++ b/maf/inc/halMafScanDimensions.h @@ -37,7 +37,8 @@ class MafScanDimensions : public MafScanner MafScanDimensions(); ~MafScanDimensions(); - void scan(const std::string& mafPath); + void scan(const std::string& mafPath, + const std::set& targetSet); const DimMap& getDimensions() const; protected: diff --git a/maf/inc/halMafScanner.h b/maf/inc/halMafScanner.h index 428c5512..9f93857a 100644 --- a/maf/inc/halMafScanner.h +++ b/maf/inc/halMafScanner.h @@ -25,7 +25,8 @@ class MafScanner public: MafScanner(); virtual ~MafScanner(); - virtual void scan(const std::string& mafPath); + virtual void scan(const std::string& mafPath, + const std::set& targetSet); struct Row { std::string _sequenceName; @@ -44,8 +45,10 @@ class MafScanner virtual void end() = 0; void nextLine(); void updateMask(); + std::string genomeName(const std::string fullName) const; std::ifstream _mafFile; + std::set _targets; Block _block; size_t _rows; diff --git a/maf/inc/halMafWriteGenomes.h b/maf/inc/halMafWriteGenomes.h index 8dd3ee53..5fd3b0cc 100644 --- a/maf/inc/halMafWriteGenomes.h +++ b/maf/inc/halMafWriteGenomes.h @@ -32,12 +32,12 @@ class MafWriteGenomes : private MafScanner void convert(const std::string& mafPath, const std::string& refGenomeName, + const std::set& targets, const DimMap& dimMap, AlignmentPtr alignment); private: - std::string genomeName(const std::string fullName) const; MapRange getRefSequences() const; MapRange getNextSequences(DimMap::const_iterator jprev) const;