Skip to content

Commit

Permalink
fix bugs in masking, add support for target genomes
Browse files Browse the repository at this point in the history
  • Loading branch information
glennhickey committed Oct 13, 2012
1 parent f32bd93 commit 22b23bb
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 21 deletions.
4 changes: 2 additions & 2 deletions maf/impl/halMafScanDimensions.cpp
Expand Up @@ -27,15 +27,15 @@ MafScanDimensions::~MafScanDimensions()
}
}

void MafScanDimensions::scan(const std::string& mafPath)
void MafScanDimensions::scan(const string& mafPath, const set<string>& targets)
{
for (DimMap::iterator i = _dimMap.begin(); i != _dimMap.end(); ++i)
{
delete i->second;
}
_dimMap.clear();

MafScanner::scan(mafPath);
MafScanner::scan(mafPath, targets);

updateDimensionsGlobal();
}
Expand Down
41 changes: 34 additions & 7 deletions maf/impl/halMafScanner.cpp
Expand Up @@ -24,8 +24,9 @@ MafScanner::~MafScanner()

}

void MafScanner::scan(const std::string& mafFilePath)
void MafScanner::scan(const string& mafFilePath, const set<string>& targets)
{
_targets = targets;
_mafFile.open(mafFilePath.c_str());

if (!_mafFile)
Expand All @@ -42,9 +43,12 @@ void MafScanner::scan(const std::string& mafFilePath)
_mafFile >> buffer;
if (buffer == "a")
{
updateMask();
aLine();
nextLine();
if (_rows > 0)
{
updateMask();
aLine();
nextLine();
}
_rows = 0;
}
else if (buffer == "s")
Expand All @@ -70,13 +74,27 @@ void MafScanner::scan(const std::string& mafFilePath)
<< _block[_rows - 2]._startPosition;
throw hal_exception(ss.str());
}
sLine();

if (_targets.size() > 1 && // (will always include reference)
_targets.find(genomeName(row._sequenceName)) == _targets.end())
{
// genome not in targets, pretend like it never happened.
--_rows;
}
else
{
sLine();
}
}
else
{
nextLine();
}
}
if (_rows > 0)
{
updateMask();
}
end();
_mafFile.close();
}
Expand All @@ -98,10 +116,12 @@ void MafScanner::updateMask()
{
size_t length = _block[0]._line.length();
_mask.resize(length, false);


// scan left to right
for (size_t i = 1; i < length; ++i)
{
for (size_t j = 0; j < _rows && _mask[j] == true; ++j)
// scan up to down
for (size_t j = 0; j < _rows && _mask[i] == false; ++j)
{
// beginning of gap run. add position of first gap to mask
if (_block[j]._line[i] == '-' && _block[j]._line[i-1] != '-')
Expand All @@ -118,3 +138,10 @@ void MafScanner::updateMask()
}
}

std::string MafScanner::genomeName(const std::string fullName) const
{
assert(fullName.find('.') != string::npos);
return fullName.substr(0, fullName.find('.'));
}


7 changes: 1 addition & 6 deletions maf/impl/halMafWriteGenomes.cpp
Expand Up @@ -27,6 +27,7 @@ MafWriteGenomes::~MafWriteGenomes()

void MafWriteGenomes::convert(const string& mafPath,
const string& refGenomeName,
const set<string>& targets,
const DimMap& dimMap,
AlignmentPtr alignment)
{
Expand All @@ -38,12 +39,6 @@ void MafWriteGenomes::convert(const string& mafPath,

}

std::string MafWriteGenomes::genomeName(const std::string fullName) const
{
assert(fullName.find('.') != string::npos);
return fullName.substr(0, fullName.find('.'));
}

MafWriteGenomes::MapRange MafWriteGenomes::getRefSequences() const
{
DimMap::const_iterator i = _dimMap->lower_bound(_refName);
Expand Down
15 changes: 12 additions & 3 deletions maf/impl/maf2hal.cpp
Expand Up @@ -89,12 +89,21 @@ int main(int argc, char** argv)
alignment->setOptionsFromParser(optionsParser);
alignment->createNew(halPath);
}

vector<string> targetNames;
if (targetGenomes != "\"\"")
{
targetNames = chopString(targetGenomes, ",");
}
set<string> targetSet(targetNames.begin(), targetNames.end());
targetSet.insert(refGenomeName);

MafScanDimensions dScan;
dScan.scan(mafPath);
dScan.scan(mafPath, targetSet);
MafWriteGenomes writer;
writer.convert(mafPath, refGenomeName, dScan.getDimensions(), alignment);

writer.convert(mafPath, refGenomeName, targetSet, dScan.getDimensions(),
alignment);

const MafScanDimensions::DimMap& dimMap = dScan.getDimensions();
for (MafScanDimensions::DimMap::const_iterator i = dimMap.begin();
i != dimMap.end(); ++i)
Expand Down
3 changes: 2 additions & 1 deletion maf/inc/halMafScanDimensions.h
Expand Up @@ -37,7 +37,8 @@ class MafScanDimensions : public MafScanner

MafScanDimensions();
~MafScanDimensions();
void scan(const std::string& mafPath);
void scan(const std::string& mafPath,
const std::set<std::string>& targetSet);
const DimMap& getDimensions() const;

protected:
Expand Down
5 changes: 4 additions & 1 deletion maf/inc/halMafScanner.h
Expand Up @@ -25,7 +25,8 @@ class MafScanner
public:
MafScanner();
virtual ~MafScanner();
virtual void scan(const std::string& mafPath);
virtual void scan(const std::string& mafPath,
const std::set<std::string>& targetSet);

struct Row {
std::string _sequenceName;
Expand All @@ -44,8 +45,10 @@ class MafScanner
virtual void end() = 0;
void nextLine();
void updateMask();
std::string genomeName(const std::string fullName) const;

std::ifstream _mafFile;
std::set<std::string> _targets;

Block _block;
size_t _rows;
Expand Down
2 changes: 1 addition & 1 deletion maf/inc/halMafWriteGenomes.h
Expand Up @@ -32,12 +32,12 @@ class MafWriteGenomes : private MafScanner

void convert(const std::string& mafPath,
const std::string& refGenomeName,
const std::set<std::string>& targets,
const DimMap& dimMap,
AlignmentPtr alignment);

private:

std::string genomeName(const std::string fullName) const;
MapRange getRefSequences() const;
MapRange getNextSequences(DimMap::const_iterator jprev) const;

Expand Down

0 comments on commit 22b23bb

Please sign in to comment.