Import the neccessary libraries to accomplish this task.

In [1]:
import numpy as np
from scipy.io import wavfile
import math
import wave
import os

# What is rice coding?

Rice coding is a lossless data compression technique that divides the data into two parts: a quotient, obtained by dividing the original value by 2^K, where K is an input parameter, and a remainder. The quotient is encoded using unary coding, where the number of ones corresponds to the quotient value, followed by a zero. The remainder is then encoded using binary coding with a fixed number of bits K. 

### Rice's algorithm

Taken from Coursera Lab 9.005 Exercise 17. Implementing a rice encoder and decoder using Python 

**Encoding**
1. Fix an integer value K.
2. Compute the modulus, M by using the equation $ M = 2^K $
3. For S, the number to be encoded, find
    1. quotient = $ q = int(S/M) $
    2. remainder = $ r = S  modulo  M $
4. Generate Codeword
    1. The Quotient_Code is q in unary format.
    2. The Remainder_Code is r in binary using K bits.
    3. The Codeword will have the format <Quotient_Code\> <Remainder_Code\>

**Decoding**
1. Determine q by counting the number of 1s before the first 0.
2. Determine r reading the next K bits as a binary value.
3. Write out S, the encoded number, as q × M + r.

We first build the encoding and decoding functions.

In [2]:
def rice_encoder(samples, K):
    # get sign of samples
    if samples < 0:
        samples = - samples
        sign_sam = "-"
    else:
        sign_sam = "+"

    M = 2**K

    #find quotient and remainder
    q = int(samples / M)
    r = samples % M

    #get code word
    code_word = ""
    for i in range(q):
        code_word += "1"

    #get r in binary
    b = K
    a = "{0:0" + str(b) + "b}"
    r_bin = a.format(r)
    
    encode_str = sign_sam + code_word + "0" + str(r_bin)

    return encode_str

In [3]:
def rice_decoder(samples, K):
    #  separate
    sign_sam = samples[0]
    samples = samples[1:]

    M = 2**K

    #separate quotient and remainder 
    q_r = samples.split('0', 1)
    quotient = len(q_r[0]) 
    remainder = int(q_r[1][:K], 2)

    #get encoded number 
    samples = quotient * M + remainder

    #change sign of the decoded bit
    if sign_sam == '-':
        samples = -samples

    return samples

Now to build a function that reads the given uncompressed WAV file, encodes it into a new file with the extension `.ex2` and decode that file, back into a new uncompressed WAV file, with the extension `_EncDec.wav`.

In [4]:
#read sound file, encode, decode, and write new wav file
def encode_and_decode(filepath:str,k:int):
    sr, audio_data = wavfile.read(filepath)

    #encode sound file and save as _Enc.ex2
    index = filepath.find(".wav")
    encoded_name = filepath[:index] + "_Enc.ex2"
    #bit number
    K = k 
    with open(encoded_name, 'wb') as cf:
        for i in range(len(audio_data)):
            e = rice_encoder(audio_data[i], K) + '\n'
            cf.write(e.encode())
    cf.close()

    #decode the encoded sound file
    decoded_sound = []
    with open(encoded_name, 'rb') as cf:
        for i in cf:
            decoded_sound.append(rice_decoder(i.decode('utf8').strip(), K))
    cf.close()
    decoded_sound = np.array(decoded_sound, dtype='int16')

    #write decoded audio file in .wav format
    idx = encoded_name.find(".ex2")
    decoded_name = encoded_name[:idx] + "Dec.wav"
    wavfile.write(decoded_name, sr, decoded_sound)

When K = 2 bits

In [12]:
encode_and_decode('sounds/Sound1.wav',2)
encode_and_decode('sounds/Sound2.wav',2)

  Sample = - Sample


Display the size of the encoded file.

In [13]:
print(os.path.getsize('sounds/Sound1_Enc.ex2')>> 20,'MB')
print(os.path.getsize('sounds/Sound2_Enc.ex2')>> 20,'MB')

89 MB
591 MB


When K = 4 bits

In [14]:
encode_and_decode('sounds/Sound1.wav',4)
encode_and_decode('sounds/Sound2.wav',4)

  Sample = - Sample


In [15]:
print(os.path.getsize('sounds/Sound1_Enc.ex2')>> 20,'MB')
print(os.path.getsize('sounds/Sound2_Enc.ex2')>> 20,'MB')

25 MB
150 MB


Check that the original audio file and the reconstructed file are the same.

In [16]:
audio1 = wave.open('sounds/Sound1.wav','rb')
audio1_specs = audio1.getparams()
print('Sound1.wav:',audio1_specs)
print('Sound1.wav size, in bytes:',os.stat('sounds/Sound1.wav').st_size)

print('\n')

audio1_recon = wave.open('sounds/Sound1_EncDec.wav','rb')
audio1_specs_recon = audio1_recon.getparams()
print('Sound1_EncDec.wav:',audio1_specs_recon)
print('Sound1_EncDec.wav size, in bytes:',os.stat('sounds/Sound1_EncDec.wav').st_size)

print('\n')

audio2 = wave.open('sounds/Sound2.wav','rb')
audio2_specs = audio2.getparams()
print('Sound2.wav:',audio2_specs)
print('Sound2.wav size, in bytes:',os.stat('sounds/Sound2.wav').st_size)

print('\n')

audio2_recon = wave.open('sounds/Sound2_EncDec.wav','rb')
audio2_specs_recon = audio2_recon.getparams()
print('Sound2_EncDec.wav:',audio2_specs_recon)
print('Sound2_EncDec.wav size, in bytes:',os.stat('sounds/Sound2_EncDec.wav').st_size)

Sound1.wav: _wave_params(nchannels=1, sampwidth=2, framerate=44100, nframes=501022, comptype='NONE', compname='not compressed')
Sound1.wav size, in bytes: 1002088


Sound1_EncDec.wav: _wave_params(nchannels=1, sampwidth=2, framerate=44100, nframes=501022, comptype='NONE', compname='not compressed')
Sound1_EncDec.wav size, in bytes: 1002088


Sound2.wav: _wave_params(nchannels=1, sampwidth=2, framerate=44100, nframes=504000, comptype='NONE', compname='not compressed')
Sound2.wav size, in bytes: 1008044


Sound2_EncDec.wav: _wave_params(nchannels=1, sampwidth=2, framerate=44100, nframes=504000, comptype='NONE', compname='not compressed')
Sound2_EncDec.wav size, in bytes: 1008044


Files are identical. Successfully encoded and decoded using lossless compression.