Skip to content

Commit

Permalink
optimize Unary Encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhangShangtong authored and ZhangShangtong committed Jul 9, 2014
1 parent accebef commit 0c23a1f
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 3 deletions.
45 changes: 42 additions & 3 deletions xapian-core/common/bitstream.cc
Expand Up @@ -51,6 +51,11 @@ static const unsigned char flstab[256] = {
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8
};

const unsigned char Xapian::UnaryEncoder::mask_1s[8] = {
0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff
};

// a mask to get the i-th bit to j-th bit of an 8-bit variable
const unsigned char Xapian::OrdinaryDecoder::mask_nbits[8][9] = {
{0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff},
{0, 0x40, 0x60, 0x70, 0x78, 0x7c, 0x7e, 0x7f, 0},
Expand All @@ -62,6 +67,7 @@ const unsigned char Xapian::OrdinaryDecoder::mask_nbits[8][9] = {
{0, 0x1, 0, 0, 0, 0, 0, 0, 0}
};

// a mask to retrive low bits of an 8-bit variable
const unsigned int Xapian::OrdinaryEncoder::mask_low_n_bits[9] = {
0,0x1,0x3,0x7,0xf,0x1f,0x3f,0x7f,0xff
};
Expand Down Expand Up @@ -89,17 +95,21 @@ inline int log2(unsigned val, bool up) {
return result;
}

// return the number of bits to encode @n by Unary Encoder
unsigned int get_Unary_encode_length(unsigned int n) {
return n;
}

// return the number of bits to encode @n by Gamma Encoder
unsigned int get_Gamma_encode_length(unsigned int n){
return 2*log2(n,false)+1;
}

namespace Xapian {

inline bool Encoder::check_acc() {

// If acc has 8 bits, append it to string chunk and set bits to 0.
if ( bits == 8 ) {
buf += acc;
acc = 0;
Expand All @@ -108,19 +118,48 @@ inline bool Encoder::check_acc() {
}
return false;
}


// encode @n using Unary Encoder
void UnaryEncoder::encode(unsigned int n) {
for (int i = 0 ; i < (int)n-1 ; ++i) {
int num_of_1s = n-1;
if (n == 1) {
acc <<= 1;
bits++;
check_acc();
return;
}
if (bits + num_of_1s <= 8) {
acc <<= num_of_1s;
bits += num_of_1s;
acc |= mask_1s[num_of_1s-1];
check_acc();
acc <<= 1;
acc |= 1;
bits++;
check_acc();
return;
}

acc <<= 8-bits;
acc |= mask_1s[7-bits];
buf += acc;
num_of_1s -= 8-bits;
acc = 0;
bits = 0;
while (num_of_1s > 8) {
buf += (char)0xff;
num_of_1s -= 8;
}

acc |= mask_1s[num_of_1s-1];
bits = num_of_1s;
check_acc();

acc = acc << 1;
bits++;
check_acc();
}

void GammaEncoder::encode(unsigned int n) {
int n_bin_bits = log2(n,false)+1;
UnaryEncoder u(buf, acc, bits);
Expand Down
7 changes: 7 additions & 0 deletions xapian-core/common/bitstream.h
Expand Up @@ -47,6 +47,8 @@ namespace Xapian {
* it is often the case that in the end there are data less than 8 bit, that data is stored in @acc
* @bits : the number of valid bits in @acc */
class Encoder{

protected:
std::string& buf;
unsigned char& acc;
int& bits;
Expand All @@ -70,6 +72,10 @@ class Encoder{
* Therefore, the length of the encoding of an integer x is |Unary(x)| = x.
* As an example, if x = 5 we have UN(5) = 11110. */
class UnaryEncoder : public Encoder{

// mask to get 1 of n bits
static const unsigned char mask_1s[8];

public:
UnaryEncoder(std::string& buf_, unsigned char& acc_, int& bits_)
: Encoder(buf_, acc_, bits_) { }
Expand Down Expand Up @@ -175,6 +181,7 @@ class Decoder {

// decode a number encoded by Unary Encoder
class UnaryDecoder : public Decoder {

public:
unsigned int decode();
UnaryDecoder(const char*& pos_, const char* end_, unsigned char& acc_, int& acc_bits_, int& p_bit_)
Expand Down

0 comments on commit 0c23a1f

Please sign in to comment.