Permalink
Browse files

*** empty log message ***

  • Loading branch information...
1 parent 82fa7b0 commit b1006b5fec9e72bde45bb78f5d28901e3a3325e5 langmead committed Oct 19, 2009
Showing with 423 additions and 344 deletions.
  1. +17 −0 alphabet.h
  2. +2 −2 ebwt_search_backtrack.h
  3. +1 −1 edit.cpp
  4. +14 −10 edit.h
  5. +2 −2 hit.h
  6. +9 −2 hit_set.cpp
  7. +45 −0 hit_set.h
  8. +333 −327 range_source.h
View
@@ -207,6 +207,23 @@ static inline float entropyDna5(const String<Dna5>& read) {
return ent;
}
+/**
+ * Return the DNA complement of the given ASCII char.
+ */
+static inline char comp(char c) {
+ switch(c) {
+ case 'a': return 't';
+ case 'A': return 'T';
+ case 'c': return 'g';
+ case 'C': return 'G';
+ case 'g': return 'c';
+ case 'G': return 'C';
+ case 't': return 'a';
+ case 'T': return 'A';
+ default: return c;
+ }
+}
+
extern uint8_t dna4Cat[];
extern uint8_t charToDna5[];
extern uint8_t rcCharToDna5[];
View
@@ -2424,7 +2424,7 @@ class EbwtRangeSource : public RangeSource {
for(size_t i = 0; i < br->edits_.size(); i++) {
Edit e = br->edits_.get(i);
cout << (curEbwt_->fw() ? (qlen_ - e.pos - 1) : e.pos)
- << "ACGT"[e.chr];
+ << (char)e.chr;
if(i < br->edits_.size()-1) cout << " ";
}
cout << endl;
@@ -2653,7 +2653,7 @@ class EbwtRangeSource : public RangeSource {
curRange_.refcs.clear();
for(size_t i = 0; i < nedits; i++) {
curRange_.mms.push_back(qlen_ - br->edits_.get(i).pos - 1);
- curRange_.refcs.push_back("ACGTN"[br->edits_.get(i).chr]);
+ curRange_.refcs.push_back((char)br->edits_.get(i).chr);
}
addPartialEdits();
curRange_.ebwt = ebwt_;
View
@@ -11,6 +11,6 @@
using namespace std;
ostream& operator<< (ostream& os, const Edit& e) {
- os << e.pos << "ACGT"[e.chr];
+ os << e.pos << (char)e.chr;
return os;
}
View
24 edit.h
@@ -18,7 +18,7 @@
* reference, deletion in the reference.
*/
enum {
- EDIT_TYPE_MM = 0,
+ EDIT_TYPE_MM = 1,
EDIT_TYPE_SNP,
EDIT_TYPE_INS,
EDIT_TYPE_DEL
@@ -33,21 +33,21 @@ struct Edit {
Edit() : pos(1023) { }
Edit(int po, int ch, int ty = EDIT_TYPE_MM) :
- type(ty), pos(po), chr(ch) { }
+ chr(ch), qchr(0), type(ty), pos(po) { }
/**
* Write Edit to an OutFileBuf.
*/
void serialize(OutFileBuf& fb) const {
- assert_eq(2, sizeof(Edit));
- fb.writeChars((const char*)this, 2);
+ assert_eq(4, sizeof(Edit));
+ fb.writeChars((const char*)this, 4);
}
/**
* Read Edit from a FileBuf.
*/
void deserialize(FileBuf& fb) {
- fb.get((char*)this, 2);
+ fb.get((char*)this, 4);
}
/**
@@ -56,7 +56,9 @@ struct Edit {
int operator< (const Edit &rhs) const {
if(pos < rhs.pos) return 1;
if(pos > rhs.pos) return 0;
- return (chr < rhs.chr)? 1 : 0;
+ if(chr < rhs.chr) return 1;
+ if(chr > rhs.chr) return 0;
+ return (qchr < rhs.qchr)? 1 : 0;
}
/**
@@ -65,6 +67,7 @@ struct Edit {
int operator== (const Edit &rhs) const {
return(pos == rhs.pos &&
chr == rhs.chr &&
+ qchr == rhs.qchr &&
type == rhs.type);
}
@@ -75,10 +78,11 @@ struct Edit {
return pos != 1023;
}
- uint16_t type : 2; // 1 -> subst, 2 -> ins, 3 -> del, 0 -> empty
- uint16_t pos : 10; // position w/r/t search root
- uint16_t chr : 2; // character involved (for subst and ins)
- uint16_t reserved : 2; // reserved
+ uint32_t chr : 8; // reference character involved (for subst and ins)
+ uint32_t qchr : 8; // read character involved (for subst and del
+ uint32_t type : 4; // 1 -> mm, 2 -> SNP, 3 -> ins, 4 -> del
+ uint32_t pos : 10; // position w/r/t search root
+ uint32_t reserved : 2; // padding
friend std::ostream& operator<< (std::ostream& os, const Edit& e);
};
View
4 hit.h
@@ -129,7 +129,7 @@ class Hit {
hse.expand();
hse.back().type = EDIT_TYPE_MM;
hse.back().pos = i;
- hse.back().chr = charToDna5[(int)refcs[i]];
+ hse.back().chr = refcs[i];
}
}
}
@@ -183,7 +183,7 @@ class Hit {
for(size_t j = 0; j < hs[i].size(); j++) {
if(hs[i][j].type != EDIT_TYPE_MM) continue;
hits[i].mms.set(hs[i][j].pos);
- hits[i].refcs[hs[i][j].pos] = "ACGT"[hs[i][j].chr];
+ hits[i].refcs[hs[i][j].pos] = hs[i][j].chr;
}
}
}
View
@@ -43,8 +43,15 @@ void HitSet::reportUpTo(ostream& os, int khits) {
const Edit& e = h.edits[i];
os << e.pos;
if(e.type == EDIT_TYPE_SNP) os << "S";
- os << ":" << "ACGT"[e.chr] << ">" << seq[e.pos];
- if(i < h.edits.size()-1) os << ",";
+ os << ":" << (char)e.chr << ">" << (e.qchr != 0 ? (char)e.qchr : (char)seq[e.pos]);
+ if(i < h.edits.size()-1 || !h.cedits.empty()) os << ",";
+ }
+ for(size_t i = 0; i < h.cedits.size(); i++) {
+ const Edit& e = h.cedits[i];
+ os << e.pos;
+ if(e.type == EDIT_TYPE_SNP) os << "S";
+ os << ":" << (char)e.chr << ">" << (e.qchr != 0 ? (char)e.qchr : (char)seq[e.pos]);
+ if(i < h.cedits.size()-1) os << ",";
}
os << endl;
}
View
@@ -47,6 +47,11 @@ struct HitSetEnt {
for(it = edits.begin(); it != edits.end(); it++) {
it->serialize(fb);
}
+ sz = cedits.size();
+ fb.writeChars((const char*)&sz, 4);
+ for(it = cedits.begin(); it != cedits.end(); it++) {
+ it->serialize(fb);
+ }
}
/**
@@ -70,6 +75,12 @@ struct HitSetEnt {
for(uint32_t i = 0; i < sz; i++) {
edits[i].deserialize(fb);
}
+ fb.get((char*)&sz, 4);
+ assert_lt(sz, 1024);
+ cedits.resize(sz);
+ for(uint32_t i = 0; i < sz; i++) {
+ cedits[i].deserialize(fb);
+ }
}
/**
@@ -127,6 +138,34 @@ struct HitSetEnt {
}
/**
+ * Another way to get at an edit.
+ */
+ Edit& editAt(unsigned i) {
+ return edits[i];
+ }
+
+ /**
+ * Another way to get at a const edit.
+ */
+ const Edit& editAt(unsigned i) const {
+ return edits[i];
+ }
+
+ /**
+ * Get the ith color edit.
+ */
+ Edit& colorEditAt(unsigned i) {
+ return cedits[i];
+ }
+
+ /**
+ * Another way to get at an edit.
+ */
+ const Edit& colorEditAt(unsigned i) const {
+ return cedits[i];
+ }
+
+ /**
* Return the front entry.
*/
Edit& front() {
@@ -176,6 +215,7 @@ struct HitSetEnt {
uint16_t cost; // cost, including stratum
uint32_t oms; // # others
std::vector<Edit> edits; // edits to get from reference to subject
+ std::vector<Edit> cedits; // color edits to get from reference to subject
};
/**
@@ -200,12 +240,14 @@ struct HitSet {
* Write binary representation of HitSet to an OutFileBuf.
*/
void serialize(OutFileBuf& fb) const {
+ fb.write(color ? 1 : 0);
uint32_t i = seqan::length(name);
assert_gt(i, 0);
fb.writeChars((const char*)&i, 4);
fb.writeChars(seqan::begin(name), i);
i = seqan::length(seq);
assert_gt(i, 0);
+ assert_lt(i, 1024);
fb.writeChars((const char*)&i, 4);
for(size_t j = 0; j < i; j++) {
fb.write("ACGTN"[(int)seq[j]]);
@@ -224,6 +266,7 @@ struct HitSet {
* Repopulate a HitSet from its binary representation in FileBuf.
*/
void deserialize(FileBuf& fb) {
+ color = (fb.get() != 0 ? true : false);
uint32_t sz = 0;
if(fb.get((char*)&sz, 4) != 4) {
seqan::clear(name);
@@ -389,6 +432,7 @@ struct HitSet {
seqan::clear(seq);
seqan::clear(qual);
ents.clear();
+ color = false;
}
/**
@@ -433,6 +477,7 @@ struct HitSet {
seqan::String<char> qual;
int8_t maxedStratum;
std::vector<HitSetEnt> ents;
+ bool color; // whether read was orginally in colorspace
};
#endif /* HIT_SET_H_ */
Oops, something went wrong.

0 comments on commit b1006b5

Please sign in to comment.