# Removing data-redundancy in overlapped DWT of audio

Chunks must overlap in order to avoid boundary artifacts when quantization is used. Overlapped chunks share the boundary coefficients, and therefore, they should be sent only once.

```
    +----------------+----------------+----------------+
    |     chunk 0    |     chunk 1    |     chunk 2    | ...
    +----------------+----------------+----------------+ 
00000------------oooooOOOOO  <-- extended chunk 0 (we can suppose that the missing samples 00000 = 0)
                 oooooOOOOO*******oooooOOOOO  <-- extended chunk 1
                                  oooooOOOOO*******oooooOOOOO  <-- extended chunk 2
                              
Coefficients generated by the overlapped areas oooooOOOOO are shared between adjacent chunks.
For example, the last coefficients of the chunk 0 (ooooo) are also the first coefficients
of the extended chunk 1, and the last coefficients of the extended chunk 0 (00000)
are the first coefficients of the chunk 1.
   
Therefore, to reconstruct the audio signal using overlapped chunks for the chunk i-th,
we have to use the last (ooooo) coefficients from the previous (i-1)-th chunk,
as the first coefficients of the current i-th extended chunk. Then, the i-th chunk is reconstructed
using the central samples of the i-th extended chunk.
```

In [None]:
import sounddevice as sd
import pywt
import math
import numpy as np
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from scipy import signal
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import pylab

## Capture an audio sequence

In [None]:
def plot(y, xlabel='', ylabel='', title='', marker='.'):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_title(title)
    ax.grid()
    ax.xaxis.set_label_text(xlabel)
    ax.yaxis.set_label_text(ylabel)
    x = np.linspace(0, len(y)-1, num=len(y))
    ax.plot(x, y, marker, markersize=1)
    plt.show(block=False)

In [None]:
fs = 44100
duration = 80000/44100  # seconds
signal = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype=np.int16)
print("Say something!")
while sd.wait():
    pass
print("done")
signal = signal.flatten()

In [None]:
plot(signal, "sample", "amplitude", "original")

### Configuration of the experiment

In [None]:
levels = 2           # Number of levels of the DWT
#filters_name = "db5"
filters_name = "haar"
#filters_name = "db11"
#filters_name = "db20"
#filters_name = "bior2.2"
#filters_name = "bior3.5"
#filters_name = "rbio2.2"
wavelet = pywt.Wavelet(filters_name)
signal_mode_extension = "per"
chunk_size = 16
chunk_number = 150

In [None]:
number_of_overlapped_samples = 1 << math.ceil(math.log(wavelet.dec_len * levels) / math.log(2))
number_of_overlapped_samples

## The rest of this notebook is not finished

In [None]:
left_overlapped_chunk = signal[chunk_size * (chunk_number - 1) : chunk_size * chunk_number + number_of_overlapped_samples]
right_chunk = signal[chunk_size * chunk_number: chunk_size * (chunk_number + 1)]

pylab.plot(np.linspace(0, len(left_overlapped_chunk)-1, len(left_overlapped_chunk)), left_overlapped_chunk)
pylab.plot(np.linspace(0, len(right_chunk)-1, len(right_chunk)), right_chunk)

In [None]:
print(len(left_overlapped_chunk))
print(len(right_chunk))
print(left_overlapped_chunk)
print(right_chunk)

In [None]:
left_decomp = pywt.wavedec(left_overlapped_chunk, wavelet=wavelet, level=levels, mode=signal_mode_extension)
left_coeffs = pywt.coeffs_to_array(left_decomp)[0]
#print(left_coeffs[0][-number_of_overlapped_samples:])
print(left_coeffs)

right_decomp = pywt.wavedec(right_chunk, wavelet=wavelet, level=levels, mode=signal_mode_extension)
right_coeffs = pywt.coeffs_to_array(right_decomp)[0]
#print(right_coeffs[0][:number_of_overlapped_samples])

#print(len(left_coeffs[0]), len(right_coeffs[0]))
print(right_coeffs)

## Extract 3 consecutive chunks from the audio sequence

In [None]:
left_chunk = signal[chunk_size * (chunk_number - 1) : chunk_size * chunk_number]
center_chunk = signal[chunk_size * chunk_number : chunk_size * (chunk_number + 1)]
right_chunk = signal[chunk_size * (chunk_number + 1) : chunk_size * (chunk_number + 2)]
chunks = np.concatenate([left_chunk, center_chunk, right_chunk])
pylab.plot(np.linspace(0, len(left_chunk)-1, len(left_chunk)), left_chunk)
pylab.plot(np.linspace(len(left_chunk), len(left_chunk) + len(center_chunk) - 1, len(center_chunk)), center_chunk)
pylab.plot(np.linspace(len(left_chunk) + len(center_chunk), len(left_chunk) + len(center_chunk) + len(right_chunk) - 1, len(right_chunk)), right_chunk)
pylab.xlabel("sample")
pylab.ylabel("amplitude")
pylab.title("3 chunks")
pylab.show()

### Last coeffs of the left chunk

In [None]:
decomposition = pywt.wavedec(left_chunk, wavelet=wavelet, level=levels, mode=signal_mode_extension)
coeffs = pywt.coeffs_to_array(decomposition)
print(coeffs[0][-10:])

### First coeffs of the center chunk

In [None]:
decomposition = pywt.wavedec(center_chunk, wavelet=wavelet, level=levels, mode=signal_mode_extension)
coeffs = pywt.coeffs_to_array(decomposition)
print(coeffs[0][:10])

## Algorithm 0: Quantization of the chunks in the DWT domain

* For each chunk:
    * Transform.
    * Quantize.
    * Dequantize.
    * Detransform.

In [None]:
def transform_and_quantize(chunk):
    decomposition = pywt.wavedec(chunk, wavelet=wavelet, level=levels, mode=signal_mode_extension)
    quantized_decomposition = []
    for subband in decomposition:
        quantized_subband = deadzone_quantizer(subband, quantization_step)
        quantized_decomposition.append(quantized_subband)
    return quantized_decomposition
    
def dequantize_and_detransform(decomposition):
    dequantized_decomposition = []
    for subband in decomposition:
        dequantized_subband = deadzone_dequantizer(subband, quantization_step)
        dequantized_decomposition.append(dequantized_subband)
    chunk = pywt.waverec(dequantized_decomposition, wavelet=wavelet, mode=signal_mode_extension)
    return chunk

def reconstruct_chunk(chunk):
    quantization_indexes = transform_and_quantize(chunk)
    reconstructed_chunk = dequantize_and_detransform(quantization_indexes)
    return reconstructed_chunk
    
reconstructed_left_chunk = reconstruct_chunk(left_chunk)
reconstructed_center_chunk = reconstruct_chunk(center_chunk)
reconstructed_right_chunk = reconstruct_chunk(right_chunk)
ideal_chunks_reconstruction = reconstruct_chunk(chunks)

pylab.plot(np.linspace(0, len(reconstructed_left_chunk)-1, len(reconstructed_left_chunk)), reconstructed_left_chunk)
pylab.plot(np.linspace(len(reconstructed_left_chunk), len(reconstructed_left_chunk) + len(reconstructed_center_chunk) - 1, len(reconstructed_center_chunk)), reconstructed_center_chunk)
pylab.plot(np.linspace(len(reconstructed_left_chunk) + len(reconstructed_center_chunk), len(reconstructed_left_chunk) + len(reconstructed_center_chunk) + len(reconstructed_right_chunk) - 1, len(reconstructed_right_chunk)), reconstructed_right_chunk)
pylab.plot(np.linspace(0, len(ideal_chunks_reconstruction)-1, len(ideal_chunks_reconstruction)), ideal_chunks_reconstruction, linestyle='dotted', label="ideal")
pylab.xlabel("sample")
pylab.ylabel("amplitude")
pylab.title("reconstruction of the 3 chunks")
pylab.legend(loc='upper right')
pylab.show()

Signal discontinuities usually happen between adjacent chunks :-/

## Algorithm 1: Overlap the chunks

* For each chunk:
    * Build an extended chunk using neighbor samples.
    * Transform the extended chunk.
    * Quantize the extended decomposition.
    * Dequantize the extended decomposition.
    * Detransform the extended decomposition.
    * Use only the non-overlaped samples of the reconstructed extended chunk.
    
```
+------------+--------------+-------------+
| left_chunk | center_chunk | right_chunk |
+------------+--------------+-------------+
           OO OOOOOOOOOOOOOO OO <--- extended center_chunk
           ^^                ^^
           ||                ||
           ++----------------++----- overlaped samples (for center_chunk)
```

Notice that the extended decomposition (in general) is going to be longer than the non-extended one, and therefore, it  will need more bits to be represented. However, notice also that we are compressing the decomposition and therefore, the memory increment should be small. Anyway, to determine which stragegy is the best one, ask to the RD curves!

In [None]:
number_of_overlapped_samples = 1 << math.ceil(math.log(wavelet.dec_len * levels) / math.log(2))
number_of_overlapped_samples

### Creation of the extended chunk

In [None]:
last_samples_left_chunk = left_chunk[chunk_size - number_of_overlapped_samples :]
first_samples_right_chunk = right_chunk[: number_of_overlapped_samples]
extended_chunk = np.concatenate([last_samples_left_chunk, center_chunk, first_samples_right_chunk])
print("number of samples overlapped with left_chunk =", len(last_samples_left_chunk))
print("number of samples in the center_chunk =", len(center_chunk))
print("number of samples overlapped with right_chunk =", len(first_samples_right_chunk))
print("length of the extended center_chunk =", len(extended_chunk))
pylab.plot(np.linspace(0,
                       len(last_samples_left_chunk)-1,
                       len(last_samples_left_chunk)),
           last_samples_left_chunk)
pylab.plot(np.linspace(len(last_samples_left_chunk),
                       len(last_samples_left_chunk) + len(center_chunk) - 1,
                       len(center_chunk)),
           center_chunk)
pylab.plot(np.linspace(len(last_samples_left_chunk) + len(center_chunk),
                       len(last_samples_left_chunk) + len(center_chunk) + len(first_samples_right_chunk) - 1,
                       len(first_samples_right_chunk)),
           first_samples_right_chunk)
pylab.xlabel("sample")
pylab.ylabel("amplitude")
pylab.title("the extended chunk")
pylab.show()

### Transform, quantize, dequantize and detransform the extended chunk

In [None]:
reconstructed_extended_chunk = reconstruct_chunk(extended_chunk)
reconstructed_last_samples_left_chunk = reconstructed_extended_chunk[0:number_of_overlapped_samples]
reconstructed_center_chunk = reconstructed_extended_chunk[number_of_overlapped_samples:chunk_size + number_of_overlapped_samples]
reconstructed_first_samples_right_chunk = reconstructed_extended_chunk[chunk_size + number_of_overlapped_samples:]
ideal_reconstruction = ideal_chunks_reconstruction[
    chunk_size - number_of_overlapped_samples:
    2*chunk_size + number_of_overlapped_samples]

pylab.plot(np.linspace(0,
                       len(reconstructed_last_samples_left_chunk)-1,
                       len(reconstructed_last_samples_left_chunk)),
           reconstructed_last_samples_left_chunk)
pylab.plot(np.linspace(len(reconstructed_last_samples_left_chunk),
                       len(reconstructed_last_samples_left_chunk) + len(reconstructed_center_chunk) - 1,
                       len(reconstructed_center_chunk)),
           reconstructed_center_chunk, label="center_chunk")
pylab.plot(np.linspace(len(reconstructed_last_samples_left_chunk) + len(reconstructed_center_chunk),
                       len(reconstructed_last_samples_left_chunk) + len(reconstructed_center_chunk) + len(reconstructed_first_samples_right_chunk) - 1,
                       len(reconstructed_first_samples_right_chunk)),
           reconstructed_first_samples_right_chunk)
pylab.plot(np.linspace(0,
                       len(ideal_reconstruction)-1,
                       len(ideal_reconstruction)),
           ideal_reconstruction, linestyle='dotted', label="ideal")
pylab.xlabel("sample")
pylab.ylabel("amplitude")
pylab.title("reconstructed extended chunk")
#pylab.legend(loc='upper right')
pylab.legend()
pylab.show()

Now, center_chunk is reconstructed without discontinuities :-)

### Are the "overlapped" coeffs repeated between chunks?

quantized_subbands = transform_and_quantize(extended_chunk)
print(pywt.coeffs_to_array(quan


In [None]:
chunk_number += 1
left_chunk = signal[chunk_size * (chunk_number - 1) : chunk_size * chunk_number]
center_chunk = signal[chunk_size * chunk_number : chunk_size * (chunk_number + 1)]
right_chunk = signal[chunk_size * (chunk_number + 1) : chunk_size * (chunk_number + 2)]
chunks = np.concatenate([left_chunk, center_chunk, right_chunk])
pylab.plot(np.linspace(0, len(left_chunk)-1, len(left_chunk)), left_chunk)
pylab.plot(np.linspace(len(left_chunk), len(left_chunk) + len(center_chunk) - 1, len(center_chunk)), center_chunk)
pylab.plot(np.linspace(len(left_chunk) + len(center_chunk), len(left_chunk) + len(center_chunk) + len(right_chunk) - 1, len(right_chunk)), right_chunk)
pylab.xlabel("sample")
pylab.ylabel("amplitude")
pylab.title("3 chunks")
pylab.show()

In [None]:
last_samples_left_chunk = left_chunk[chunk_size - number_of_overlapped_samples :]
first_samples_right_chunk = right_chunk[: number_of_overlapped_samples]
extended_chunk = np.concatenate([last_samples_left_chunk, center_chunk, first_samples_right_chunk])
print("number of samples overlapped with left_chunk =", len(last_samples_left_chunk))
print("number of samples in the center_chunk =", len(center_chunk))
print("number of samples overlapped with right_chunk =", len(first_samples_right_chunk))
print("length of the extended center_chunk =", len(extended_chunk))
pylab.plot(np.linspace(0,
                       len(last_samples_left_chunk)-1,
                       len(last_samples_left_chunk)),
           last_samples_left_chunk)
pylab.plot(np.linspace(len(last_samples_left_chunk),
                       len(last_samples_left_chunk) + len(center_chunk) - 1,
                       len(center_chunk)),
           center_chunk)
pylab.plot(np.linspace(len(last_samples_left_chunk) + len(center_chunk),
                       len(last_samples_left_chunk) + len(center_chunk) + len(first_samples_right_chunk) - 1,
                       len(first_samples_right_chunk)),
           first_samples_right_chunk)
pylab.xlabel("sample")
pylab.ylabel("amplitude")
pylab.title("the extended chunk")
pylab.show()

In [None]:
reconstructed_extended_chunk = reconstruct_chunk(extended_chunk)
reconstructed_last_samples_left_chunk = reconstructed_extended_chunk[0:number_of_overlapped_samples]
reconstructed_center_chunk = reconstructed_extended_chunk[number_of_overlapped_samples:chunk_size + number_of_overlapped_samples]
reconstructed_first_samples_right_chunk = reconstructed_extended_chunk[chunk_size + number_of_overlapped_samples:]
ideal_reconstruction = ideal_chunks_reconstruction[
    chunk_size - number_of_overlapped_samples:
    2*chunk_size + number_of_overlapped_samples]

pylab.plot(np.linspace(0,
                       len(reconstructed_last_samples_left_chunk)-1,
                       len(reconstructed_last_samples_left_chunk)),
           reconstructed_last_samples_left_chunk)
pylab.plot(np.linspace(len(reconstructed_last_samples_left_chunk),
                       len(reconstructed_last_samples_left_chunk) + len(reconstructed_center_chunk) - 1,
                       len(reconstructed_center_chunk)),
           reconstructed_center_chunk, label="center_chunk")
pylab.plot(np.linspace(len(reconstructed_last_samples_left_chunk) + len(reconstructed_center_chunk),
                       len(reconstructed_last_samples_left_chunk) + len(reconstructed_center_chunk) + len(reconstructed_first_samples_right_chunk) - 1,
                       len(reconstructed_first_samples_right_chunk)),
           reconstructed_first_samples_right_chunk)
pylab.plot(np.linspace(0,
                       len(ideal_reconstruction)-1,
                       len(ideal_reconstruction)),
           ideal_reconstruction, linestyle='dotted', label="ideal")
pylab.xlabel("sample")
pylab.ylabel("amplitude")
pylab.title("reconstructed extended chunk")
#pylab.legend(loc='upper right')
pylab.legend()
pylab.show()

### Using only the central coefficients of the extended chunk

In [None]:
quantized_subbands = transform_and_quantize(extended_chunk)
foa = number_of_overlapped_samples#//2
_foa = foa
print(foa)
_quantized_subbands = [None]*len(quantized_subbands)
for i in range(len(quantized_subbands)-1, 0, -1):
    foa >>= 1
    print(foa)
    _quantized_subbands[i] = quantized_subbands[i][foa:len(quantized_subbands[i])-foa].copy()
_quantized_subbands[0] = quantized_subbands[0][foa:len(quantized_subbands[0])-foa].copy()    
reconstructed_center_chunk = dequantize_and_detransform(_quantized_subbands)[_foa:-_foa]
print(len(reconstructed_center_chunk))

pylab.plot(np.linspace(0,
                       len(reconstructed_last_samples_left_chunk)-1,
                       len(reconstructed_last_samples_left_chunk)),
           reconstructed_last_samples_left_chunk)
pylab.plot(np.linspace(len(reconstructed_last_samples_left_chunk),
                       len(reconstructed_last_samples_left_chunk) + len(reconstructed_center_chunk) - 1,
                       len(reconstructed_center_chunk)),
           reconstructed_center_chunk, label="center_chunk")
pylab.plot(np.linspace(len(reconstructed_last_samples_left_chunk) + len(reconstructed_center_chunk),
                       len(reconstructed_last_samples_left_chunk) + len(reconstructed_center_chunk) + len(reconstructed_first_samples_right_chunk) - 1,
                       len(reconstructed_first_samples_right_chunk)),
           reconstructed_first_samples_right_chunk)
pylab.plot(np.linspace(0,
                       len(ideal_reconstruction)-1,
                       len(ideal_reconstruction)),
           ideal_reconstruction, linestyle='dotted', label="ideal")
pylab.xlabel("sample")
pylab.ylabel("amplitude")
pylab.title("reconstructed extended chunk")
pylab.legend(loc='upper right')
pylab.show()

Now, center_chunk is reconstructed without discontinuities :-)

## Only for the sake of curiosity ...

In [None]:
decomposition = pywt.wavedec(extended_chunk, wavelet=wavelet, level=levels, mode=signal_mode_extension)
subband_index = len(decomposition)
for subband in extended_decomposition:
    pylab.plot(np.linspace(0, len(subband)-1, len(subband)), subband, label=subband_index)
    subband_index -= 1
pylab.xlabel("coefficient")
pylab.ylabel("amplitude")
pylab.legend(loc='upper right')
pylab.title("extended center_chunk's subbands")
pylab.show()

In [None]:
decomposition = pywt.wavedec(center_chunk, wavelet=wavelet, level=levels, mode=signal_mode_extension)
subband_index = len(decomposition)
for subband in decomposition:
    pylab.plot(np.linspace(0, len(subband)-1, len(subband)), subband, label=subband_index)
    subband_index -= 1
pylab.xlabel("coefficient")
pylab.ylabel("amplitude")
pylab.legend(loc='upper right')
pylab.title("center_chunk's subbands")
pylab.show()

In [None]:
extended_decomposition = pywt.wavedec(extended_chunk, wavelet=wavelet, level=levels, mode=signal_mode_extension)
subband_index = len(extended_decomposition)
for subband in extended_decomposition:
    quantized_subband = deadzone_quantizer(subband, quantization_step)
    pylab.plot(np.linspace(0, len(quantized_subband)-1, len(quantized_subband)), quantized_subband, label=subband_index)
    subband_index -= 1
pylab.xlabel("coefficient")
pylab.ylabel("amplitude")
pylab.legend(loc='upper right')
pylab.title("quantized extended center_chunk's subbands")
pylab.show()

In [None]:
decomposition = pywt.wavedec(center_chunk, wavelet=wavelet, level=levels, mode=signal_mode_extension)
subband_index = len(decomposition)
for subband in decomposition:
    quantized_subband = deadzone_quantizer(subband, quantization_step)
    pylab.plot(np.linspace(0, len(quantized_subband)-1, len(quantized_subband)), quantized_subband, label=subband_index)
    subband_index -= 1
pylab.xlabel("coefficient")
pylab.ylabel("amplitude")
pylab.legend(loc='upper right')
pylab.title("quantized center_chunk's subbands")
pylab.show()