In [1]:
import numpy as np
import math

In [2]:
bits = 12
bytebits = 8
decoded_types = [np.uint8, np.uint16, np.uint32, np.uint64]
decoded_type = None
for t in decoded_types:
    if np.dtype(t).itemsize*bytebits >= bits:
        decoded_type = t
        break;
decoded_size = np.dtype(decoded_type).itemsize*bytebits

if decoded_size <= 32:
    encoded_type = np.uint32
elif decoded_size <= 64:
    encoded_type = np.uint64
else:
    raise Exception(decoded_size + " is too many bits")

encoded_size = np.dtype(encoded_type).itemsize*bytebits

gcd = math.gcd(bits, encoded_size)

# how many words of encoded_type an encoded block
encoded_stride = bits // gcd
# how many words of "bits" bits in an encoded block
decoded_stride = encoded_size // gcd

print(encoded_stride, decoded_stride)

3 8


In [3]:
total_bits = encoded_size * encoded_stride
encoded_bytes = np.dtype(encoded_type).itemsize

# Calculate the significance of a bit in a little-endian word, counting from the left
def shift_little(bitindex, bytesize, wordsize):
    byte = bitindex // bytesize
    rest = bitindex % bytesize
    return byte*bytesize - rest + bytesize - 1

# Calculate the significance of a bit in a big-endian word, counting from the left    
def shift_big(bitindex, bytesize, wordsize):
    return wordsize * bytesize - bitindex - 1

# Calculate mask and shift operations to encode and decode "bits" bits 
# packed little endian uints on a little endian machine
def little_on_little_op(b, bits, bytesize, encoded_bytes):
    encoded_size = bytesize * encoded_bytes
    decoded_index = shift_little(b, bytesize, encoded_bytes) // bits
    encoded_index = b // encoded_size
    decoded_shift = shift_little(b, bytesize, encoded_bytes) % bits    
    encoded_shift = shift_little(b % encoded_size, bytesize, encoded_bytes)
    return (decoded_index, encoded_index, decoded_shift, encoded_shift)

# Calculate mask and shift operations to encode and decode "bits" bits 
# packed big endian uints on a big endian machine
def big_on_big_op(b, bits, bytesize, encoded_bytes):
    decoded_index = b // bits
    encoded_index = b // encoded_size
    decoded_shift = shift_big(b % bits, bits, 1)
    encoded_shift = shift_big(b % encoded_size, bytesize, encoded_bytes)
    return (decoded_index, encoded_index, decoded_shift, encoded_shift)

def big_on_little_op(b, bits, bytesize, encoded_bytes):
    decoded_index = b // bits
    encoded_index = b // encoded_size
    decoded_shift = shift_big(b % bits, bits, 1)
    encoded_shift = shift_little(b % encoded_size, bytesize, encoded_bytes)
    return (decoded_index, encoded_index, decoded_shift, encoded_shift)

def little_on_big_op(b, bits, bytesize, encoded_bytes):
    decoded_index = shift_little(b, bytesize, encoded_bytes) // bits
    encoded_index = b // encoded_size
    decoded_shift = shift_little(b, bytesize, encoded_bytes) % bits
    encoded_shift = shift_big(b % encoded_size, bytesize, encoded_bytes)
    return (decoded_index, encoded_index, decoded_shift, encoded_shift)

def ops(total_bits, bits, bytesize, encoded_bytes, opfunc):
    ops = dict()
    for b in range(total_bits):
        (decoded_index, encoded_index, decoded_shift, encoded_shift) = opfunc(b, bits, bytesize, encoded_bytes)
        mask = 1 << decoded_shift
        net_shift = encoded_shift - decoded_shift
        # Combine processing operations that have same 
        # decoded and encoded index and same net shift
        # by OR-ing the bits together into one mask
        key = (decoded_index, encoded_index, net_shift)
        if key in ops:
            ops[key] |= mask
        else:
            ops[key] = mask
    return ops

def pretty_print(ops):
    for key in ops:
        mask = ops[key]
        print(key, hex(mask))

for f in (little_on_little_op, big_on_big_op, big_on_little_op, little_on_big_op):
    print()
    print(f.__name__)
    pretty_print(ops(total_bits, bits, bytebits, encoded_bytes, f))



little_on_little_op
(0, 0, 0) 0xfff
(1, 0, 12) 0xfff
(2, 0, 24) 0xff
(3, 1, 4) 0xfff
(2, 1, -8) 0xf00
(4, 1, 16) 0xfff
(5, 1, 28) 0xf
(5, 2, -4) 0xff0
(6, 2, 8) 0xfff
(7, 2, 20) 0xfff

big_on_big_op
(0, 0, 20) 0xfff
(1, 0, 8) 0xfff
(2, 0, -4) 0xff0
(2, 1, 28) 0xf
(3, 1, 16) 0xfff
(4, 1, 4) 0xfff
(5, 1, -8) 0xf00
(5, 2, 24) 0xff
(6, 2, 12) 0xfff
(7, 2, 0) 0xfff

big_on_little_op
(0, 0, -4) 0xff0
(0, 0, 12) 0xf
(1, 0, 0) 0xf00
(1, 0, 16) 0xff
(2, 0, 20) 0xff0
(2, 1, 4) 0xf
(3, 1, -8) 0xf00
(3, 1, 8) 0xff
(4, 1, 12) 0xff0
(4, 1, 28) 0xf
(5, 1, 16) 0xf00
(5, 2, 0) 0xff
(6, 2, 4) 0xff0
(6, 2, 20) 0xf
(7, 2, 8) 0xf00
(7, 2, 24) 0xff

little_on_big_op
(0, 0, 24) 0xff
(1, 0, 20) 0xf
(0, 0, 8) 0xf00
(1, 0, 4) 0xff0
(2, 0, 0) 0xff
(3, 1, 28) 0xf
(2, 1, 16) 0xf00
(3, 1, 12) 0xff0
(4, 1, 8) 0xff
(5, 1, 4) 0xf
(4, 1, -8) 0xf00
(5, 2, 20) 0xff0
(6, 2, 16) 0xff
(7, 2, 12) 0xf
(6, 2, 0) 0xf00
(7, 2, -4) 0xff0


In [4]:
def validate(bits, pattern, decoded_dtype, encoded_dtype, opfunc):
    bytebits = 8
    decoded_size = decoded_dtype.itemsize*bytebits
    encoded_size = encoded_dtype.itemsize*bytebits
    gcd = math.gcd(bits, encoded_size)

    encoded_stride = bits // gcd
    decoded_stride = encoded_size // gcd
    
    decoded = np.zeros(decoded_stride, dtype=decoded_dtype)
    decoded_result = np.zeros(decoded_stride, dtype=decoded_dtype)
    
    for i in range(decoded_stride):
        decoded[i] = pattern[i % len(pattern)]
    
    print(list(map(hex, decoded.view(np.uint8))))

    encoded = np.zeros(encoded_stride, dtype=encoded_dtype)
    total_bits = encoded_size * encoded_stride
    encoded_bytes = encoded_dtype.itemsize
    
    my_ops = ops(total_bits, bits, bytebits, encoded_bytes, opfunc)
    
    apply_encode_ops(decoded, encoded, my_ops)
    
    print(list(map(hex, encoded.view(np.uint8))))

    apply_decode_ops(encoded, decoded_result, my_ops)

    print(list(map(hex, decoded_result.view(np.uint8))))

def shift(i, shift):
    return (i << shift) if (shift > 0) else (i >> -shift)

def apply_encode_ops(decoded_block, encoded_block, ops):
    encoded_block.fill(0)
    encoded_dtype = encoded_block.dtype
    decoded_dtype = decoded_block.dtype
    for key in ops:
        mask = encoded_dtype.type(ops[key])
        (decoded_index, encoded_index, net_shift) = key
        encoded_block[encoded_index] |= encoded_dtype.type(shift((decoded_block[decoded_index] & mask), net_shift))
        
def apply_decode_ops(encoded_block, decoded_block, ops):
    decoded_block.fill(0)
    encoded_dtype = encoded_block.dtype
    decoded_dtype = decoded_block.dtype
    for key in ops:
        mask = encoded_dtype.type(ops[key])
        (decoded_index, encoded_index, net_shift) = key
        decoded_block[decoded_index] |= decoded_dtype.type(shift(encoded_block[encoded_index], -net_shift) & mask)

tests = (
    (np.dtype('>u2'), np.dtype('>u4'), big_on_big_op),
    (np.dtype('<u2'), np.dtype('>u4'), little_on_big_op),
    (np.dtype('<u2'), np.dtype('<u4'), little_on_little_op),
    (np.dtype('>u2'), np.dtype('<u4'), big_on_little_op)
)

for (decoded_dtype, encoded_dtype, func) in tests:
    print(func.__name__)
    validate(12, [0xabc, 0xdef, 0xaaa, 0xbbb, 0xccc, 0xfed, 0xcba, 0x001], decoded_dtype, encoded_dtype, func)
    print()

big_on_big_op
['0xa', '0xbc', '0xd', '0xef', '0xa', '0xaa', '0xb', '0xbb', '0xc', '0xcc', '0xf', '0xed', '0xc', '0xba', '0x0', '0x1']
['0xab', '0xcd', '0xef', '0xaa', '0xab', '0xbb', '0xcc', '0xcf', '0xed', '0xcb', '0xa0', '0x1']
['0xa', '0xbc', '0xd', '0xef', '0xa', '0xaa', '0xb', '0xbb', '0xc', '0xcc', '0xf', '0xed', '0xc', '0xba', '0x0', '0x1']

little_on_big_op
['0xbc', '0xa', '0xef', '0xd', '0xaa', '0xa', '0xbb', '0xb', '0xcc', '0xc', '0xed', '0xf', '0xba', '0xc', '0x1', '0x0']
['0xbc', '0xfa', '0xde', '0xaa', '0xba', '0xbb', '0xcc', '0xdc', '0xfe', '0xba', '0x1c', '0x0']
['0xbc', '0xa', '0xef', '0xd', '0xaa', '0xa', '0xbb', '0xb', '0xcc', '0xc', '0xed', '0xf', '0xba', '0xc', '0x1', '0x0']

little_on_little_op
['0xbc', '0xa', '0xef', '0xd', '0xaa', '0xa', '0xbb', '0xb', '0xcc', '0xc', '0xed', '0xf', '0xba', '0xc', '0x1', '0x0']
['0xbc', '0xfa', '0xde', '0xaa', '0xba', '0xbb', '0xcc', '0xdc', '0xfe', '0xba', '0x1c', '0x0']
['0xbc', '0xa', '0xef', '0xd', '0xaa', '0xa', '0xbb', '0xb'

In [11]:
def encode_12_little_little(inp, out):
    bytebits = 8
    bits = 12
    out = out.view(np.uint8)
    encoded_type = np.dtype('<u4')
    decoded_words = len(inp)
    assert(len(out)*bytebits >= decoded_words*bits)
    
    encoded_bytes = encoded_type.itemsize
    
    # how many words of encoded_type an encoded block
    encoded_stride = 3
    # how many words of "bits" bits in an encoded block
    decoded_stride = 8    
    
    mask = 0xfff   
    
    # How many blocks are encoded in one loop
    middle_loops = 8
    # How many decoded words processed in one middle loop
    middle_decoded_words = middle_loops * decoded_stride
    middle_encoded_words = middle_loops * encoded_stride
    middle_encoded_bytes = middle_encoded_words * encoded_bytes
    
    blocks = decoded_words // middle_decoded_words
    
    rest = decoded_words % middle_decoded_words
    
    def decoded_start_index(block):
        return block*middle_decoded_words
    
    def encoded_start_index(block):
        return block*middle_encoded_words
    
    def decoded_loopbuffer_index(middle):
        return middle*decoded_stride
    
    def encoded_loopbuffer_index(middle):
        return middle*encoded_stride
    
    def char_start_index(block):
        return block*middle_encoded_words*encoded_bytes
    
    def encode_pair(k, l):
        a = np.uint8(k & 0xff)
        b = np.uint8(((k & 0xf00) >> 8) | ((l & 0xf) << 4))
        c = np.uint8((l & 0xff0) >> 4)
        return (a, b, c) 
    
    loopbuffer_in  = np.zeros(middle_decoded_words, dtype=inp.dtype)
    loopbuffer_out = np.zeros(middle_encoded_words, dtype=encoded_type)
    loopbuffer_out_chars = loopbuffer_out.view(np.uint8)
    
    for block in range(blocks):
        decoded_start = decoded_start_index(block)
        for i in range(middle_decoded_words):
            loopbuffer_in[i] = inp[decoded_start + i]
        encoded_start = encoded_start_index(block)
        
        for middle in range(middle_loops):
            decoded_base = decoded_loopbuffer_index(middle)
            encoded_base = encoded_loopbuffer_index(middle)
            loopbuffer_out[encoded_base + 0] = (   
                   (loopbuffer_in[decoded_base + 0] & mask)
                | ((loopbuffer_in[decoded_base + 1] & mask) << 12) 
                | ((loopbuffer_in[decoded_base + 2] & mask) << 24)
            )
             
            loopbuffer_out[encoded_base + 1] = (  
                  ((loopbuffer_in[decoded_base + 2] & mask) >>  8)
                | ((loopbuffer_in[decoded_base + 3] & mask) <<  4)
                | ((loopbuffer_in[decoded_base + 4] & mask) << 16)
                | ((loopbuffer_in[decoded_base + 5] & mask) << 28)
            )

            loopbuffer_out[encoded_base + 2] = (  
                  ((loopbuffer_in[decoded_base + 5] & mask) >>  4) 
                | ((loopbuffer_in[decoded_base + 6] & mask) <<  8)
                | ((loopbuffer_in[decoded_base + 7] & mask) << 20)
            )
        char_start = char_start_index(block)
        for i in range(middle_encoded_bytes):
            out[char_start + i] = loopbuffer_out_chars[i]
    decoded_remainder_offset = decoded_start_index(blocks)
    encoded_remainder_offset = char_start_index(blocks)    
    if (rest > 1):    
        for r in range(0, rest - 1, 2):
            k = inp[decoded_remainder_offset + r]
            l = inp[decoded_remainder_offset + r + 1]
            (a, b, c) = encode_pair(k, l)
            out[encoded_remainder_offset]     = a
            out[encoded_remainder_offset + 1] = b
            out[encoded_remainder_offset + 2] = c
            encoded_remainder_offset += 3
    if (rest % 2):
        k = inp[decoded_remainder_offset + rest - 1]
        l = 0
        (a, b, c) = encode_pair(k, l)
        out[encoded_remainder_offset]     = a
        out[encoded_remainder_offset + 1] = b
            
def decode_12_little_little(inp, out):
    bytebits = 8
    bits = 12
    inp = inp.view(np.uint8)
    encoded_type = np.dtype('<u4')
    decoded_words = len(out)
    assert(len(inp)*bytebits <= decoded_words*bits)
    
    encoded_bytes = encoded_type.itemsize
    
    # how many words of encoded_type an encoded block
    encoded_stride = 3
    # how many words of "bits" bits in an encoded block
    decoded_stride = 8    
    
    mask = 0xfff   
    
    # How many blocks are encoded in one loop
    middle_loops = 8
    # How many decoded words processed in one middle loop
    middle_decoded_words = middle_loops * decoded_stride
    middle_encoded_words = middle_loops * encoded_stride
    middle_encoded_bytes = middle_encoded_words * encoded_bytes
    
    blocks = decoded_words // middle_decoded_words
    
    rest = len(inp) % middle_encoded_bytes
    
    def decoded_start_index(block):
        return block*middle_decoded_words
    
    def encoded_start_index(block):
        return block*middle_encoded_words
    
    def decoded_loopbuffer_index(middle):
        return middle*decoded_stride
    
    def encoded_loopbuffer_index(middle):
        return middle*encoded_stride
    
    def char_start_index(block):
        return block*middle_encoded_words*encoded_bytes
    
    def decode_triple(a, b, c):
        k = (a & 0xff) | ((b << 8) & 0xf00)
        l = ((b >> 4) & 0xf) | ((c << 4) & 0xff0)
        return (k, l)

    loopbuffer_in  = np.zeros(middle_encoded_bytes, dtype=np.uint8)
    loopbuffer_work = loopbuffer_in.view(encoded_type)
    loopbuffer_out = np.zeros(middle_decoded_words, dtype=out.dtype)
    
    for block in range(blocks):
        decoded_start = decoded_start_index(block)
        decoded_stop = decoded_start_index(block + 1)
        encoded_start = encoded_start_index(block)
        encoded_char_start = char_start_index(block)
        
        for i in range(middle_encoded_bytes):
            loopbuffer_in[i] = inp[encoded_char_start + i]
        
        for middle in range(middle_loops):
            decoded_base = decoded_loopbuffer_index(middle)
            encoded_base = encoded_loopbuffer_index(middle)
            
            loopbuffer_out[decoded_base + 0]  =      loopbuffer_work[encoded_base + 0]        & mask
            loopbuffer_out[decoded_base + 1]  =      loopbuffer_work[encoded_base + 0] >> 12) & mask
            loopbuffer_out[decoded_base + 2]  = ((  (loopbuffer_work[encoded_base + 0] >> 24) & mask)
                                                 | ((loopbuffer_work[encoded_base + 1] <<  8) & mask))
            loopbuffer_out[decoded_base + 3]  =     (loopbuffer_work[encoded_base + 1] >>  4) & mask
            loopbuffer_out[decoded_base + 4]  =     (loopbuffer_work[encoded_base + 1] >> 16) & mask
            loopbuffer_out[decoded_base + 5]  = ((  (loopbuffer_work[encoded_base + 1] >> 28) & mask) 
                                                 | ((loopbuffer_work[encoded_base + 2] <<  4) & mask))
            loopbuffer_out[decoded_base + 6]  =     (loopbuffer_work[encoded_base + 2] >>  8) & mask
            loopbuffer_out[decoded_base + 7]  =     (loopbuffer_work[encoded_base + 2] >> 20) & mask
        
        for i in range(middle_decoded_words):
            out[decoded_start + i] = loopbuffer_out[i]
    decoded_remainder_offset = decoded_start_index(blocks)
    encoded_remainder_offset = char_start_index(blocks)
    if (rest > 2):    
        for r in range(0, rest - 2, 3):
            a = inp[encoded_remainder_offset + r]
            b = inp[encoded_remainder_offset + r + 1]
            c = inp[encoded_remainder_offset + r + 2]
            (k, l) = decode_triple(a, b, c)
            out[decoded_remainder_offset] = k
            out[decoded_remainder_offset + 1] = l
            decoded_remainder_offset += 2
    tail = rest % 3
    if tail > 0:
        a = inp[encoded_remainder_offset + rest - 2]
        (b, c) = (0, 0)
        if tail >= 2:
            b = inp[encoded_remainder_offset + rest - 1]
        # c is always zero, otherwise we wouldn't have a tail!
        (k, l) = decode_triple(a, b, c)    
        
        out[decoded_remainder_offset] = k
        if tail >= 2:
            out[decoded_remainder_offset + 1] = l            
        
mult = 45     
        
inp = np.arange(2*mult, dtype=np.uint16)
#inp = np.array((0xdea, ), dtype=np.uint16)
#inp = np.array((0xdea, 0xdbe), dtype=np.uint16)
#inp = np.array((0xdea, 0xdbe, 0xef0), dtype=np.uint16)
#inp = np.array((0xdea, 0xdbe, 0xef0, 0xabc), dtype=np.uint16)
#inp = np.array((0xdea, 0xdbe, 0xef0, 0xabc, 0xdef), dtype=np.uint16)
#inp = np.array((0xdea, 0xdbe, 0xef0, 0xabc, 0xdef, 0x1), dtype=np.uint16)

out = np.zeros(int(math.ceil(len(inp) * 12 / 8)), dtype=np.uint8)
result = np.zeros(int(math.ceil(len(out) * 2 / 3)) , dtype=np.uint16)

print(list(map(hex, inp)))

encode_12_little_little(inp, out)
        
print(list(map(hex, out.view(np.uint8))))

decode_12_little_little(out, result)

print(list(map(hex, result)))


['0x0', '0x1', '0x2', '0x3', '0x4', '0x5', '0x6', '0x7', '0x8', '0x9', '0xa', '0xb', '0xc', '0xd', '0xe', '0xf', '0x10', '0x11', '0x12', '0x13', '0x14', '0x15', '0x16', '0x17', '0x18', '0x19', '0x1a', '0x1b', '0x1c', '0x1d', '0x1e', '0x1f', '0x20', '0x21', '0x22', '0x23', '0x24', '0x25', '0x26', '0x27', '0x28', '0x29', '0x2a', '0x2b', '0x2c', '0x2d', '0x2e', '0x2f', '0x30', '0x31', '0x32', '0x33', '0x34', '0x35', '0x36', '0x37', '0x38', '0x39', '0x3a', '0x3b', '0x3c', '0x3d', '0x3e', '0x3f', '0x40', '0x41', '0x42', '0x43', '0x44', '0x45', '0x46', '0x47', '0x48', '0x49', '0x4a', '0x4b', '0x4c', '0x4d', '0x4e', '0x4f', '0x50', '0x51', '0x52', '0x53', '0x54', '0x55', '0x56', '0x57', '0x58', '0x59']
['0x0', '0x10', '0x0', '0x2', '0x30', '0x0', '0x4', '0x50', '0x0', '0x6', '0x70', '0x0', '0x8', '0x90', '0x0', '0xa', '0xb0', '0x0', '0xc', '0xd0', '0x0', '0xe', '0xf0', '0x0', '0x10', '0x10', '0x1', '0x12', '0x30', '0x1', '0x14', '0x50', '0x1', '0x16', '0x70', '0x1', '0x18', '0x90', '0x1', '0x