Skip to content

Commit

Permalink
Add basic framework for computing segment array indices
Browse files Browse the repository at this point in the history
  • Loading branch information
glennhickey committed Oct 14, 2012
1 parent 22b23bb commit ac76341
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 10 deletions.
89 changes: 82 additions & 7 deletions maf/impl/halMafWriteGenomes.cpp
Expand Up @@ -35,8 +35,8 @@ void MafWriteGenomes::convert(const string& mafPath,
_dimMap = &dimMap;
_alignment = alignment;

createGenomes();

createGenomes();
MafScanner::scan(mafPath, targets);
}

MafWriteGenomes::MapRange MafWriteGenomes::getRefSequences() const
Expand Down Expand Up @@ -156,7 +156,8 @@ void MafWriteGenomes::createGenomes()
for (DimMap::const_iterator i = curRange.first; i != curRange.second; ++i)
{
genomeDimensions.push_back(
Sequence::Info(i->first, i->second->_length, i->second->_segments, 0));
Sequence::Info(i->first, i->second->_length,
i->second->_segments, 0));
}
Genome* childGenome = _alignment->openGenome(childName);
assert(childGenome != NULL);
Expand All @@ -166,23 +167,97 @@ void MafWriteGenomes::createGenomes()
}
}

void MafWriteGenomes::convertBlock()
{
assert(_rows > 0);
initArrayIndexes(0);


}

void MafWriteGenomes::scan(const std::string& mafPath)
void MafWriteGenomes::initArrayIndexes(size_t col)
{
if (col == 0)
{
if (_blockInfo.size() < _rows)
{
_blockInfo.resize(_rows);
}
for (size_t i = 0; i < _rows; ++i)
{
_blockInfo[i]._arrayIndex = NULL_INDEX;
_blockInfo[i]._gaps = 0;
assert(_dimMap->find(_block[i]._sequenceName) != _dimMap->end());
_blockInfo[i]._record = _dimMap->find(_block[i]._sequenceName)->second;
}
}
else
{
assert(_blockInfo.size() >= _rows);
}

// our range will be [col, last)
size_t last = col + 1;
while (last < _mask.size() && _mask[last] == true)
{
++last;
}

for (size_t i = 0; i < _rows; ++i)
{
std::string& line = _block[i]._line;
RowInfo& rowInfo = _blockInfo[i];
if (line[col] == '-')
{
rowInfo._gaps += last - col;
}
else
{
if (rowInfo._arrayIndex == NULL_INDEX)
{
MafScanDimensions::Interval interval(col, last - 1);
pair<MafScanDimensions::IntervalList::const_iterator,
MafScanDimensions::IntervalList::const_iterator> range =
equal_range(rowInfo._record->_intervals.begin(),
rowInfo._record->_intervals.end(), interval);
assert(range.first != rowInfo._record->_intervals.end());
assert(range.second == range.first);
rowInfo._arrayIndex = range.first - rowInfo._record->_intervals.begin();
}
else
{
++rowInfo._arrayIndex;
}
}
}
}

void MafWriteGenomes::aLine()
{

assert(_rows <= _block.size());
if (_rows > 0)
{
convertBlock();
}
}

void MafWriteGenomes::sLine()
{

MafScanner::Row& row = _block[_rows-1];

// CONVERT TO FORWARD COORDINATES
if (row._strand == '-')
{
row._startPosition = row._srcLength - 1 - row._length;
reverseComplement(row._line);
}
}

void MafWriteGenomes::end()
{

assert(_rows <= _block.size());
if (_rows > 0)
{
convertBlock();
}
}
17 changes: 14 additions & 3 deletions maf/inc/halMafWriteGenomes.h
Expand Up @@ -20,14 +20,17 @@
namespace hal {

/** update the HAL graph from the dimension information scanned
* from the input MAF file, thumbing our nose at the haters as we do it! */
* from the input MAF file, thumbing our nose at the haters as we do it!
* remember that we convert lines to forward coordates as we read them
* (but leave the strand character unchanged as a reminder */
class MafWriteGenomes : private MafScanner
{
public:
MafWriteGenomes();
~MafWriteGenomes();

typedef MafScanDimensions::DimMap DimMap;
typedef MafScanDimensions::Record Record;
typedef std::pair<DimMap::const_iterator, DimMap::const_iterator> MapRange;

void convert(const std::string& mafPath,
Expand All @@ -42,18 +45,26 @@ class MafWriteGenomes : private MafScanner
MapRange getNextSequences(DimMap::const_iterator jprev) const;

void createGenomes();
void convertBlock();
void initArrayIndexes(size_t col);

void scan(const std::string& mafPath);
void aLine();
void sLine();
void end();

private:

struct RowInfo
{
hal_index_t _arrayIndex;
size_t _gaps;
const Record* _record;
};

std::string _refName;
const DimMap* _dimMap;
AlignmentPtr _alignment;

std::vector<RowInfo> _blockInfo;
};

}
Expand Down

0 comments on commit ac76341

Please sign in to comment.