### Exercise 2
Encoding and decoding a series of uncompressed audio files(WAV files) using the lossless data compression method 'Rice coding'.

In [32]:
from pydub import AudioSegment
from IPython.display import Audio
import numpy as np
import os
from scipy.io import wavfile
import wave

Retrieve audio files and play it in the application

In [54]:
def play_sound(wav_file):
    return Audio(filename=wav_file, autoplay=True)

sound_file1 = 'Sound1.wav'

# Play sound file 1
play_sound(sound_file1)

In [55]:
sound_file2 = 'Sound2.wav'

# Play sound file 2
play_sound(sound_file2)

Get file size of each audio files to be compared during decoding and encoding later

In [35]:
def get_file_size(file_path):
    return os.path.getsize(file_path)

print("File size of sound1.wav:",get_file_size(sound_file1), "bytes")
print("File size of sound1.wav:",get_file_size(sound_file2), "bytes")

File size of sound1.wav: 1002088 bytes
File size of sound1.wav: 1008044 bytes


Get the input sample from both audio files to begin the Rice encoding

In [36]:
# Get the input sample of both audio files
sound1 = AudioSegment.from_file(sound_file1)
sound2 = AudioSegment.from_file(sound_file2)

sample1 = sound1.get_array_of_samples()
sample2 = sound2.get_array_of_samples()

print("Input sample (first 10 values):", sample1[:10])
print("Input sample (first 10 values):", sample2[:10])

Input sample (first 10 values): array('h', [-7, -7, -7, -7, -8, -7, -6, -7, -5, -5])
Input sample (first 10 values): array('h', [-999, 886, -1325, 886, -1514, 957, -1861, 766, -2008, 1067])


Define the K bits and M(2^K) to be used later

In [37]:
K1 = 4 # First bits to be used
K2 = 2 # Second bits to be used

M1 = 2 ** K1 # First M to be used
M2 = 2 ** K2 # Second M to be used

print("M1 value:", M1)
print("M2 value:", M2)

M1 value: 16
M2 value: 4


Rice encoding is designed to work with non-negative integers therefore, ensuring that the samples are non-negative by making the negative value absolute.

In [38]:
# Ensure all values are non-negative
min_value1 = np.min(sample1)
min_value2 = np.min(sample2)

# If the most negative value is less than zero, adjust both samples
if min_value1 < 0:
    sample1 = np.abs(sample1)
if min_value2 < 0:
    sample2 = np.abs(sample2)

print(sample1)
print(sample2)

[7 7 7 ... 0 2 1]
[ 999  886 1325 ...   31  876  339]


Get the R1 by using the sample in the logic

In [39]:
# R1 is S & (M - 1) where & means Logic AND

# Calculate R1 for sample1
R1_1_1 = sample1 & (M1 - 1)
binary_R1_1_1 = [format(r1, f'0{K1}b') for r1 in R1_1_1]

R1_1_2 = sample1 & (M2 - 1)
binary_R1_1_2 = [format(r1, f'0{K2}b') for r1 in R1_1_2]

# Calculate R1 for sample2
R1_2_1 = sample2 & (M1 - 1)
binary_R1_2_1 = [format(r1, f'0{K1}b') for r1 in R1_2_1]

R1_2_2 = sample2 & (M2 - 1)
binary_R1_2_2 = [format(r1, f'0{K2}b') for r1 in R1_2_2]


print("Binary R1_1_1 for sample1:", binary_R1_1_1[:70])
print("Binary R1_1_2 for sample1:", binary_R1_1_2[:70])
print("Binary R1_2_1 for sample2:", binary_R1_2_1[:70])
print("Binary R1_2_2 for sample2:", binary_R1_2_2[:70])

Binary R1_1_1 for sample1: ['0111', '0111', '0111', '0111', '1000', '0111', '0110', '0111', '0101', '0101', '0110', '0101', '0110', '0111', '0101', '0101', '0101', '0101', '0100', '0101', '0100', '0101', '0101', '0101', '0110', '0100', '0011', '0100', '0101', '0100', '0011', '0101', '0100', '0100', '0101', '0101', '0011', '0011', '0001', '0100', '0011', '0011', '0010', '0010', '0011', '0100', '0010', '0010', '0100', '0010', '0010', '0001', '0011', '0001', '0010', '0001', '0010', '0010', '0001', '0001', '0001', '0001', '0001', '0001', '0001', '0001', '0000', '0001', '0010', '0010']
Binary R1_1_2 for sample1: ['11', '11', '11', '11', '00', '11', '10', '11', '01', '01', '10', '01', '10', '11', '01', '01', '01', '01', '00', '01', '00', '01', '01', '01', '10', '00', '11', '00', '01', '00', '11', '01', '00', '00', '01', '01', '11', '11', '01', '00', '11', '11', '10', '10', '11', '00', '10', '10', '00', '10', '10', '01', '11', '01', '10', '01', '10', '10', '01', '01', '01', '01', '01', '01', 

Get R2 by using the sample in the logic

In [51]:
# R2 is S >> K, written in unary, where >> means bit shifting

# Calculate R2 for sample1
R2_1_1 = sample1 >> K1
unary_R2_1_1 = ['1' * r2 for r2 in R2_1_1]

R2_1_2 = sample1 >> K2
unary_R2_1_2 = ['1' * r2 for r2 in R2_1_2]

# Calculate R2 for sample2
R2_2_1 = sample2 >> K1
unary_R2_2_1 = ['1' * r2 for r2 in R2_2_1]

R2_2_2 = sample2 >> K2
unary_R2_2_2 = ['1' * r2 for r2 in R2_2_2]

print("Unary R2_1_1 for sample1:", unary_R2_1_1[:70])
print("Unary R2_1_2 for sample1:", unary_R2_1_2[:70])
print("Unary R2_2_1 for sample2:", unary_R2_2_1[:70])
print("Unary R2_2_2 for sample2:", unary_R2_2_2[:70])

Unary R2_1_1 for sample1: ['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
Unary R2_1_2 for sample1: ['1', '1', '1', '1', '11', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '', '1', '1', '1', '', '1', '1', '1', '1', '1', '', '', '', '1', '', '', '', '', '', '1', '', '', '1', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
Unary R2_2_1 for sample2: ['11111111111111111111111111111111111111111111111111111111111111', '1111111111111111111111111111111111111111111111111111111', '1111111111111111111111111111111111111111111111111111111111111111111111111111111111', '1111111111111111111111111111111111111111111111111111111', '11111111111111111111111111111111111111111111111111

Encoding the audio files by combining R2 with R1 (R2R1)

In [41]:
# Combine unary R2 with binary R1 for sample 1 (R2R1 with K = 4)
encoded_values1 = [r2 + r1 for r1, r2 in zip(binary_R1_1_1, unary_R2_1_1)]
# Combine unary R2 with binary R1 for sample 1 (R2R1 with K = 2)
encoded_values2 = [r2 + r1 for r1, r2 in zip(binary_R1_1_2, unary_R2_1_2)]
# Combine unary R2 with binary R1 for sample 2 (R2R1 with K = 4)
encoded_values3 = [r2 + r1 for r1, r2 in zip(binary_R1_2_1, unary_R2_2_1)]
# Combine unary R2 with binary R1 for sample 2 (R2R1 with K = 2)
encoded_values4 = [r2 + r1 for r1, r2 in zip(binary_R1_2_2, unary_R2_2_2)]

print("Encoded values for sample1:", encoded_values1[:40])
print("Encoded values for sample1:", encoded_values2[:40])
print("Encoded values for sample2:", encoded_values3[:40])
print("Encoded values for sample2:", encoded_values4[:40])

Encoded values for sample1: ['0111', '0111', '0111', '0111', '1000', '0111', '0110', '0111', '0101', '0101', '0110', '0101', '0110', '0111', '0101', '0101', '0101', '0101', '0100', '0101', '0100', '0101', '0101', '0101', '0110', '0100', '0011', '0100', '0101', '0100', '0011', '0101', '0100', '0100', '0101', '0101', '0011', '0011', '0001', '0100']
Encoded values for sample1: ['111', '111', '111', '111', '1100', '111', '110', '111', '101', '101', '110', '101', '110', '111', '101', '101', '101', '101', '100', '101', '100', '101', '101', '101', '110', '100', '11', '100', '101', '100', '11', '101', '100', '100', '101', '101', '11', '11', '01', '100']
Encoded values for sample2: ['111111111111111111111111111111111111111111111111111111111111110111', '11111111111111111111111111111111111111111111111111111110110', '11111111111111111111111111111111111111111111111111111111111111111111111111111111111101', '11111111111111111111111111111111111111111111111111111110110', '111111111111111111111111111111

Decodes binary-encoded values back to numerical format and creates new AudioSegments (encoded_sound1, encoded_sound2, encoded_sound3, encoded_sound4) based on different encoding configurations, which are then saved as text files named 'sound1_Enc.ex2' and 'sound2_Enc.ex2'.

In [42]:
# Convert encoded values back to numerical format
decoded_values1 = [int(encoded, 2) for encoded in encoded_values1]
decoded_values2 = [int(encoded, 2) for encoded in encoded_values2]
decoded_values3 = [int(encoded, 2) for encoded in encoded_values3]
decoded_values4 = [int(encoded, 2) for encoded in encoded_values4]
        
# Create a new AudioSegment with the decoded values
# Sample 1 R2R1 with K = 4
encoded_sound1 = AudioSegment(decoded_values1, frame_rate=sound1.frame_rate, sample_width=sound1.sample_width, channels=sound1.channels)
# Sample 1 R2R1 with K = 2
encoded_sound2 = AudioSegment(decoded_values2, frame_rate=sound2.frame_rate, sample_width=sound2.sample_width, channels=sound2.channels)
# Sample 2 R2R1 with K = 4
encoded_sound3 = AudioSegment(decoded_values3, frame_rate=sound1.frame_rate, sample_width=sound1.sample_width, channels=sound1.channels)
# Sample 2 R2R1 with K = 2
encoded_sound4 = AudioSegment(decoded_values4, frame_rate=sound2.frame_rate, sample_width=sound2.sample_width, channels=sound2.channels)

# Save the encoded values as text files (Change here to save sound file 1 or 2)
# sound file 1 = encoded values 1 and 2, sound file 2 = encoded values 3 and 4
# sound file 1 and 2 with k bits = 4 : encoded values 1 and 3
# sound file 1 and 2 with k bits = 2 : encoded values 2 and 4
with open('sound1_Enc.ex2', 'w') as file:
    for encoded_value in encoded_values1:
        file.write(encoded_value + '\n')

with open('sound2_Enc.ex2', 'w') as file:
    for encoded_value in encoded_values3:
        file.write(encoded_value + '\n')

Calculate the size of Rice encoding based on the specified bit length (k). Calculates the Rice encoding size and percentage compression for two different encoding configurations (K=4 bits and K=2 bits) for two sound files (sound_file1 and sound_file2).

In [43]:
def rice_encoding_size(binary_samples, k):
    m = 2 ** k
    encoded_values = [(int(binary_sample, 2) >> k) + (int(binary_sample, 2) & (m - 1)) for binary_sample in binary_samples]
    return len(encoded_values) * k

# Define K values
K4 = 4
K2 = 2

# Calculate Rice encoding size for K = 4 bits
rice_size_encoded1 = rice_encoding_size(encoded_values1, K4)
rice_size_encoded2 = rice_encoding_size(encoded_values3, K4)

# Calculate Rice encoding size for K = 2 bits
rice_size_encoded1_k2 = rice_encoding_size(encoded_values2, K2)
rice_size_encoded2_k2 = rice_encoding_size(encoded_values4, K2)

# Calculate original size
original_size1 = (get_file_size(sound_file1) * 8)
original_size2 = (get_file_size(sound_file2) * 8)

# Calculate percentage compression for K = 4 bits
compression_ratio_encoded1 = (1 - rice_size_encoded1 / original_size1) * 100
compression_ratio_encoded2 = (1 - rice_size_encoded2 / original_size2) * 100

# Calculate percentage compression for K = 2 bits
compression_ratio_encoded1_k2 = (1 - rice_size_encoded1_k2 / original_size1) * 100
compression_ratio_encoded2_k2 = (1 - rice_size_encoded2_k2 / original_size2) * 100

print("original size:", original_size1, "bits")
print("original size:", original_size2, "bits")

print(f"Rice (K = {K4} bits) for Sound 1: {rice_size_encoded1} bits")
print(f"Rice (K = {K4} bits) for Sound 2: {rice_size_encoded2} bits")
print(f"% Compression (K = {K4} bits) for Sound 1: {compression_ratio_encoded1:.2f}%")
print(f"% Compression (K = {K4} bits) for Sound 2: {compression_ratio_encoded2:.2f}%")

print(f"\nRice (K = {K2} bits) for Sound 1: {rice_size_encoded1_k2} bits")
print(f"Rice (K = {K2} bits) for Sound 2: {rice_size_encoded2_k2} bits")
print(f"% Compression (K = {K2} bits) for Sound 1: {compression_ratio_encoded1_k2:.2f}%")
print(f"% Compression (K = {K2} bits) for Sound 2: {compression_ratio_encoded2_k2:.2f}%")


original size: 8016704 bits
original size: 8064352 bits
Rice (K = 4 bits) for Sound 1: 2004088 bits
Rice (K = 4 bits) for Sound 2: 2016000 bits
% Compression (K = 4 bits) for Sound 1: 75.00%
% Compression (K = 4 bits) for Sound 2: 75.00%

Rice (K = 2 bits) for Sound 1: 1002044 bits
Rice (K = 2 bits) for Sound 2: 1008000 bits
% Compression (K = 2 bits) for Sound 1: 87.50%
% Compression (K = 2 bits) for Sound 2: 87.50%


Define a function extract_s_values_from_file to read S values from a specified file. Extract S values from the two encoded sound files (sound1_Enc.ex2 and sound2_Enc.ex2)

In [44]:
# Rice decoding
# S = encoded values
# K = Bit depth
# M = 2 ^ K
# Q = number of 1s before first 0 in S in Binary
# R = next K bits of S
# result = Q x M + R

def extract_s_values_from_file(file_path):
    s_values = []

    try:
        with open(file_path, 'r') as file:
            # Read each line from the file
            lines = file.readlines()

            for line in lines:
                # Remove newline characters and append the S value to the list
                s_values.append(line.strip())
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None

    return s_values

file_path_sound1 = 'sound1_Enc.ex2'
file_path_sound2 = 'sound2_Enc.ex2'

extracted_s_values_sound1 = extract_s_values_from_file(file_path_sound1)
extracted_s_values_sound2 = extract_s_values_from_file(file_path_sound2)

if extracted_s_values_sound1 is not None:
    print(extracted_s_values_sound1[:30])

if extracted_s_values_sound2 is not None:
    print(extracted_s_values_sound2[:30])

['0111', '0111', '0111', '0111', '1000', '0111', '0110', '0111', '0101', '0101', '0110', '0101', '0110', '0111', '0101', '0101', '0101', '0101', '0100', '0101', '0100', '0101', '0101', '0101', '0110', '0100', '0011', '0100', '0101', '0100']
['111111111111111111111111111111111111111111111111111111111111110111', '11111111111111111111111111111111111111111111111111111110110', '11111111111111111111111111111111111111111111111111111111111111111111111111111111111101', '11111111111111111111111111111111111111111111111111111110110', '11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111010', '111111111111111111111111111111111111111111111111111111111111101', '111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111110101', '111111111111111111111111111111111111111111111111110', '111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111

Define K and M value for decoding

In [45]:
# For extracted_s_values_sound1 (K = 4)
K_sound1 = 4
M_sound1 = 2 ** K_sound1

# For extracted_s_values_sound2 (K = 2)
K_sound2 = 2
M_sound2 = 2 ** K_sound2

Define rice decoding using the encoded values and the K and M values

In [46]:
def rice_decode(encoded_values, K, M):
    decoded_values = []

    for encoded_value in encoded_values:
        # Find the index of the first 0 in the binary string
        first_zero_index = encoded_value.find('0')

        # If there is no 0, set the first_zero_index to the length of the string
        if first_zero_index == -1:
            first_zero_index = len(encoded_value)

        # Extract Q and R
        Q = first_zero_index
        R = encoded_value[first_zero_index:first_zero_index + K]

        # Check if R is not empty before converting
        if R:
            # Convert R to decimal
            R_decimal = int(R, 2)
        else:
            R_decimal = 0

        # Decode the result
        result = Q * M + R_decimal

        # Append the decoded result to the list
        decoded_values.append(result)

    return decoded_values

decoded_values_sound1 = rice_decode(extracted_s_values_sound1, K_sound1, M_sound1)
decoded_values_sound2 = rice_decode(extracted_s_values_sound2, K_sound2, M_sound2)

print(decoded_values_sound1[:30])
print(decoded_values_sound2[:30])

[7, 7, 7, 7, 16, 7, 6, 7, 5, 5, 6, 5, 6, 7, 5, 5, 5, 5, 4, 5, 4, 5, 5, 5, 6, 4, 3, 4, 5, 4]
[249, 221, 337, 221, 381, 245, 465, 200, 504, 269, 517, 221, 565, 181, 556, 296, 580, 265, 560, 228, 485, 220, 420, 180, 308, 148, 228, 160, 176, 308]


Save decoded audio files as a new file

In [52]:
# Save audio file as wav file
def save_as_wav(decoded_values, file_path, frame_rate=44100, sample_width=2, channels=1):
    decoded_samples = np.array(decoded_values, dtype=np.int16)
    decoded_sound = AudioSegment(decoded_samples, frame_rate=frame_rate, sample_width=sample_width, channels=channels)
    decoded_sound.export(file_path, format="wav")

save_as_wav(decoded_values_sound1, 'sound1_Enc_Dec.wav', frame_rate=sound1.frame_rate, sample_width=sound1.sample_width, channels=sound1.channels)
save_as_wav(decoded_values_sound2, 'sound2_Enc_Dec.wav', frame_rate=sound2.frame_rate, sample_width=sound2.sample_width, channels=sound2.channels)

In [48]:
sound_file_decoded1 = 'sound1_Enc_Dec.wav'

# Play sound file 1 with K bit = 4
play_sound(sound_file_decoded1)

In [49]:
sound_file_decoded2 = 'sound2_Enc_Dec.wav'

# Play sound file 1 with K bit = 2
play_sound(sound_file_decoded2)

Check if the size of the original audio files and decoded file is the same

In [50]:
# Get file size after decode to see if it matches the original file size
print("File size of original sound1.wav:",get_file_size(sound_file1), "bytes")
print("File size of original sound1.wav:",get_file_size(sound_file2), "bytes")
print("File size of sound1_Enc_Dec.wav:",get_file_size(sound_file_decoded1), "bytes")
print("File size of sound2_Enc_Dec.wav:",get_file_size(sound_file_decoded2), "bytes")

File size of original sound1.wav: 1002088 bytes
File size of original sound1.wav: 1008044 bytes
File size of sound1_Enc_Dec.wav: 1002088 bytes
File size of sound2_Enc_Dec.wav: 1008044 bytes
