Skip to content

Commit

Permalink
Merge branch 'blockViz' into chains
Browse files Browse the repository at this point in the history
  • Loading branch information
joelarmstrong committed Nov 6, 2013
2 parents b62c591 + 61d2f32 commit eb06969
Show file tree
Hide file tree
Showing 23 changed files with 455 additions and 93 deletions.
39 changes: 32 additions & 7 deletions chain/impl/halBlockViz.cpp
Expand Up @@ -182,7 +182,8 @@ extern "C" void halFreeBlocks(struct hal_block_t* head)
while (head != NULL)
{
hal_block_t* next = head->next;
free(head->sequence);
free(head->qSequence);
free(head->tSequence);
free(head->qChrom);
free(head);
head = next;
Expand Down Expand Up @@ -707,7 +708,8 @@ void readBlock(AlignmentConstPtr seqAlignment,
cur->next = NULL;

string seqBuffer = qSequence->getName();
string dnaBuffer;
string qDnaBuffer;
string tDnaBuffer;
size_t prefix =
seqBuffer.find(genomeName + '.') != 0 ? 0 : genomeName.length() + 1;
cur->qChrom = (char*)malloc(seqBuffer.length() + 1 - prefix);
Expand Down Expand Up @@ -737,7 +739,8 @@ void readBlock(AlignmentConstPtr seqAlignment,
assert(firstRefSeg->getLength() == firstQuerySeg->getLength());
cur->size = 1 + tEnd - cur->tStart;
cur->strand = firstQuerySeg->getReversed() ? '-' : '+';
cur->sequence = NULL;
cur->tSequence = NULL;
cur->qSequence = NULL;
if (getSequenceString != 0)
{
const Genome* qSeqGenome =
Expand All @@ -757,14 +760,36 @@ void readBlock(AlignmentConstPtr seqAlignment,
<< " for DNA sequence extraction";
throw hal_exception(ss.str());
}

const Genome* tSeqGenome =
seqAlignment->openGenome(tSequence->getGenome()->getName());
if (tSeqGenome == NULL)
{
stringstream ss;
ss << "Unable to open genome " << tSequence->getGenome()->getName()
<< " for DNA sequence extraction";
throw hal_exception(ss.str());
}

const Sequence* tSeqSequence = tSeqGenome->getSequence(tSequence->getName());
if (tSeqSequence == NULL)
{
stringstream ss;
ss << "Unable to open sequence " << tSequence->getName()
<< " for DNA sequence extraction";
throw hal_exception(ss.str());
}

qSeqSequence->getSubString(dnaBuffer, cur->qStart, cur->size);
qSeqSequence->getSubString(qDnaBuffer, cur->qStart, cur->size);
tSeqSequence->getSubString(tDnaBuffer, cur->tStart, cur->size);
if (cur->strand == '-')
{
reverseComplement(dnaBuffer);
reverseComplement(qDnaBuffer);
}
cur->sequence = (char*)malloc(dnaBuffer.length() * sizeof(char) + 1);
strcpy(cur->sequence, dnaBuffer.c_str());
cur->qSequence = (char*)malloc(qDnaBuffer.length() * sizeof(char) + 1);
cur->tSequence = (char*)malloc(tDnaBuffer.length() * sizeof(char) + 1);
strcpy(cur->qSequence, qDnaBuffer.c_str());
strcpy(cur->tSequence, tDnaBuffer.c_str());
}
}

Expand Down
3 changes: 2 additions & 1 deletion chain/inc/halBlockViz.h
Expand Up @@ -57,7 +57,8 @@ struct hal_block_t
hal_int_t qStart;
hal_int_t size;
char strand;
char *sequence;
char *qSequence; // query DNA, if requested
char *tSequence; // target DNA, if requested
};

/** Some information about a genome */
Expand Down
4 changes: 2 additions & 2 deletions chain/test/blockVizTest.c
Expand Up @@ -70,8 +70,8 @@ static int parseArgs(int argc, char** argv, bv_args_t* args)

static void printBlock(FILE* file, struct hal_block_t* b)
{
fprintf(file, "chr:%s, tSt:%ld, qSt:%ld, size:%ld, strand:%c: %s\n",
b->qChrom, b->tStart, b->qStart, b->size, b->strand, b->sequence);
fprintf(file, "chr:%s, tSt:%ld, qSt:%ld, size:%ld, strand:%c: tgt : %s query: %s\n",
b->qChrom, b->tStart, b->qStart, b->size, b->strand, b->tSequence, b->qSequence);
}

static void printDupeList(FILE* file, struct hal_target_dupe_list_t* d)
Expand Down
16 changes: 0 additions & 16 deletions extract/hal4dExtract.cpp

This file was deleted.

5 changes: 5 additions & 0 deletions include.mk
Expand Up @@ -26,6 +26,11 @@ HDF5_CCLINKER = ${cxx}
cpp = h5c++ ${h5prefix}
cxx = h5cc ${h5prefix}

ifeq (${SYS},Darwin) #This is to deal with the Mavericks replacing gcc with clang fully and changing libraries
cppflags += -stdlib=libstdc++
cflags += -stdlib=libstdc++
endif

# add compiler flag and kent paths if udc is enabled
# relies on KENTSRC containing path to top level kent/ dir
# and MACHTYPE being specified
Expand Down
1 change: 1 addition & 0 deletions liftover/impl/halBedLine.cpp
Expand Up @@ -30,6 +30,7 @@ istream& BedLine::read(istream& is, int version, string& lineBuffer)
_version = version;
std::getline(is, lineBuffer);
stringstream ss(lineBuffer);
ss.imbue(is.getloc());
ss >> _chrName;
if (ss.bad() || ss.fail())
{
Expand Down
40 changes: 31 additions & 9 deletions liftover/impl/halBedScanner.cpp
Expand Up @@ -25,10 +25,15 @@ BedScanner::~BedScanner()

}

void BedScanner::scan(const string& bedPath, int bedVersion)
void BedScanner::scan(const string& bedPath, int bedVersion,
const locale* inLocale)
{
assert(_bedStream == NULL);
_bedStream = new ifstream(bedPath.c_str());
if (inLocale != NULL)
{
_bedStream->imbue(*inLocale);
}

try {
scan(_bedStream, bedVersion);
Expand All @@ -46,11 +51,15 @@ void BedScanner::scan(const string& bedPath, int bedVersion)
_bedStream = NULL;
}

void BedScanner::scan(istream* is, int bedVersion)
void BedScanner::scan(istream* is, int bedVersion, const locale* inLocale)
{
visitBegin();
_bedStream = is;
_bedVersion = bedVersion;
if (inLocale != NULL)
{
_bedStream->imbue(*inLocale);
}

if (_bedVersion == -1)
{
Expand All @@ -65,13 +74,13 @@ void BedScanner::scan(istream* is, int bedVersion)
_lineNumber = 0;
try
{
skipWhiteSpaces(_bedStream);
skipWhiteSpaces(_bedStream, inLocale);
while (_bedStream->good())
{
++_lineNumber;
_bedLine.read(*_bedStream, _bedVersion, lineBuffer);
visitLine();
skipWhiteSpaces(_bedStream);
skipWhiteSpaces(_bedStream, inLocale);
}
}
catch(hal_exception e)
Expand All @@ -84,13 +93,18 @@ void BedScanner::scan(istream* is, int bedVersion)
_bedStream = NULL;
}

int BedScanner::getBedVersion(istream* bedStream)
int BedScanner::getBedVersion(istream* bedStream, const locale* inLocale)
{
assert(bedStream != &cin);
if (bedStream->bad())
{
throw hal_exception("Error reading bed input stream");
}
if (inLocale != NULL)
{
bedStream->imbue(*inLocale);
}

string lineBuffer;
BedLine bedLine;
int version = 12;
Expand All @@ -101,7 +115,7 @@ int BedScanner::getBedVersion(istream* bedStream)
{
bedStream->clear();
bedStream->seekg(pos);
skipWhiteSpaces(bedStream);
skipWhiteSpaces(bedStream, inLocale);
*bedStream >> std::skipws;
bedLine.read(*bedStream, version, lineBuffer);
break;
Expand All @@ -120,9 +134,14 @@ int BedScanner::getBedVersion(istream* bedStream)
return version;
}

size_t BedScanner::getNumColumns(const string& bedLine)
size_t BedScanner::getNumColumns(const string& bedLine,
const locale* inLocale)
{
stringstream ss(bedLine);
if (inLocale != NULL)
{
ss.imbue(*inLocale);
}
size_t c = 0;
string buffer;
while (ss.good())
Expand All @@ -148,9 +167,12 @@ void BedScanner::visitEOF()
{
}

void BedScanner::skipWhiteSpaces(istream* bedStream)
void BedScanner::skipWhiteSpaces(istream* bedStream,
const locale* inLocale)
{
while (bedStream->good() && std::isspace(bedStream->peek()))
locale defaultLocale;
const locale& myLocale = inLocale == NULL ? defaultLocale : *inLocale;
while (bedStream->good() && std::isspace((char)bedStream->peek(), myLocale))
{
bedStream->get();
}
Expand Down
14 changes: 8 additions & 6 deletions liftover/impl/halLiftover.cpp
Expand Up @@ -33,7 +33,8 @@ void Liftover::convert(AlignmentConstPtr alignment,
int outBedVersion,
bool addExtraColumns,
bool traverseDupes,
bool outPSL)
bool outPSL,
const locale* inLocale)
{
_srcGenome = srcGenome;
_tgtGenome = tgtGenome;
Expand All @@ -43,6 +44,7 @@ void Liftover::convert(AlignmentConstPtr alignment,
_outBedVersion = outBedVersion;
_traverseDupes = traverseDupes;
_outPSL = outPSL;
_inLocale = inLocale;
_missedSet.clear();
_tgtSet.clear();
assert(_srcGenome && inBedStream && tgtGenome && outBedStream);
Expand All @@ -56,12 +58,12 @@ void Liftover::convert(AlignmentConstPtr alignment,
stringstream* firstLineStream = NULL;
if (_inBedVersion <= 0)
{
skipWhiteSpaces(inBedStream);
skipWhiteSpaces(inBedStream, _inLocale);
std::getline(*inBedStream, firstLineBuffer);
firstLineStream = new stringstream(firstLineBuffer);
_inBedVersion = BedScanner::getBedVersion(firstLineStream);
_inBedVersion = BedScanner::getBedVersion(firstLineStream, inLocale);
assert(inBedStream->eof() || _inBedVersion >= 3);
size_t numCols = BedScanner::getNumColumns(firstLineBuffer);
size_t numCols = BedScanner::getNumColumns(firstLineBuffer, inLocale);
if ((int)numCols > _inBedVersion)
{
cerr << "Warning: auto-detecting input BED version " << _inBedVersion
Expand All @@ -75,10 +77,10 @@ void Liftover::convert(AlignmentConstPtr alignment,

if (firstLineStream != NULL)
{
scan(firstLineStream, _inBedVersion);
scan(firstLineStream, _inBedVersion, inLocale);
delete firstLineStream;
}
scan(inBedStream, _inBedVersion);
scan(inBedStream, _inBedVersion, inLocale);
}

void Liftover::visitBegin()
Expand Down
20 changes: 18 additions & 2 deletions liftover/impl/halLiftoverMain.cpp
Expand Up @@ -9,6 +9,7 @@
#include <fstream>
#include "halColumnLiftover.h"
#include "halBlockLiftover.h"
#include "halTabFacet.h"

using namespace std;
using namespace hal;
Expand Down Expand Up @@ -43,7 +44,10 @@ static CLParserPtr initParser()
"columns in the input beyond the specified or "
"detected bed version, and which are cut by "
"default.", false);

optionsParser->addOptionFlag("tab", "input is tab-separated. this allows"
" column entries to contain spaces. if this"
" flag is not set, both spaces and tabs are"
" used to separate input columns.", false);
optionsParser->setDescription("Map BED genome interval coordinates between "
"two genomes.");
return optionsParser;
Expand All @@ -64,6 +68,7 @@ int main(int argc, char** argv)
int outBedVersion;
bool keepExtra;
bool outPSL;
bool tab;
try
{
optionsParser->parseOptions(argc, argv);
Expand All @@ -78,6 +83,7 @@ int main(int argc, char** argv)
outBedVersion = optionsParser->getOption<int>("outBedVersion");
keepExtra = optionsParser->getFlag("keepExtra");
outPSL = optionsParser->getFlag("outPSL");
tab = optionsParser->getFlag("tab");
}
catch(exception& e)
{
Expand Down Expand Up @@ -145,11 +151,21 @@ int main(int argc, char** argv)
throw hal_exception("Error opening tgtBed, " + tgtBedPath);
}
}

locale* inLocale = NULL;
if (tab == true)
{
inLocale = new locale(cin.getloc(), new TabSepFacet(cin.getloc()));
assert(std::isspace('\t', *inLocale) == true);
assert(std::isspace(' ', *inLocale) == false);
}

BlockLiftover liftover;
liftover.convert(alignment, srcGenome, srcBedPtr, tgtGenome, tgtBedPtr,
inBedVersion, outBedVersion, keepExtra, !noDupes,
outPSL);
outPSL, inLocale);

delete inLocale;

}
catch(hal_exception& e)
Expand Down

0 comments on commit eb06969

Please sign in to comment.