In [1]:
from process_wav import read_wav_file, brainwire_quant
from arithmetic import load_probabilities, build_cdf, arithmetic_decode, arithmetic_encode

In [2]:
# Load model
probs = load_probabilities("ngram_prob_python.txt")
cdf = build_cdf(probs)

In [3]:
len(cdf), type(cdf)

(1427664, dict)

In [None]:
# Load and quantize a test file
some_wav = './data/d1768885-813b-42b4-9b90-150a8b47e1db.wav'
samples = read_wav_file(some_wav)
quantized = [brainwire_quant(s) for s in samples]

# Form n-grams
n = 3
symbols = [tuple(quantized[i:i+n]) for i in range(len(quantized) - n + 1)]

# Encode
code = arithmetic_encode(symbols, cdf) # this is a single float

# Decode
decoded_symbols = arithmetic_decode(code, cdf, len(symbols))

In [None]:
import struct

def write_compressed(code, total_symbols, output_path):
    '''Save the encoded into a file'''
    with open(output_path, "wb") as f:
        # Write the code (64-bit float)
        f.write(struct.pack("d", code))  # 'd' = double = 8 bytes
        # Write the number of symbols (so decoder knows when to stop)
        f.write(struct.pack("I", total_symbols))  # 'I' = unsigned int = 4 bytes

def read_compressed(input_path):
    '''Load the encoded from a file'''
    with open(input_path, "rb") as f:
        code = struct.unpack("d", f.read(8))[0]
        total_symbols = struct.unpack("I", f.read(4))[0]
    return code, total_symbols

write_compressed(code, len(symbols), "compressed.bin")


In [None]:
# compute compression ratio
import os
original_size = os.path.getsize(some_wav)
compressed_size = os.path.getsize("compressed.bin")
print(f"Compression ratio: {original_size / compressed_size:.2f}x")