[![Binder](https://mybinder.org/badge_logo.svg)](https://nbviewer.org/github/Sistemas-Multimedia/Sistemas-Multimedia.github.io/blob/master/milestones/07-DCT/block_DCT_compression.ipynb)

# Image Compression with YCoCg + 2D-DCT

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import math
import os
import pylab
import cv2

!ln -sf ~/repos/image_IO/logging_config.py .
!ln -sf ~/repos/image_IO/image_3.py .
import image_3 as RGB_image
!ln -sf ~/repos/image_IO/image_1.py .
import image_1 as gray_image
!ln -sf ~/repos/DCT/block_DCT.py .
import block_DCT
!ln -sf ~/repos/YCoCg/YCoCg.py .
import YCoCg as YUV
#!ln -sf ~/MRVC/src/color_DCT.py .
#import color_DCT as YUV
#!ln -sf ~/MRVC/src/RGB.py .
#import RGB as YUV
!ln -sf ~/repos/information_theory/information.py .
import information
!ln -sf ~/repos/information_theory/distortion.py .
import distortion
!ln -sf ~/repos/scalar_quantization/quantization.py .
import quantization
!ln -sf ~/repos/quantization/deadzone_quantization.py .
#import deadzone_quantizer as Q
#import deadzone_quantization as deadzone
from deadzone_quantization import Deadzone_Quantizer as Quantizer


## Configuration

In [None]:
HOME = os.environ["HOME"]
#test_image = "../sequences/stockholm/"
test_image = HOME + "/repos/MRVC/images/lena_color/"
#test_image = "../images/lena_bw/"

RGB_image.write = RGB_image.debug_write # Faster, but lower compression
#RGB_image.write = information.write # The fastest, but returns only an estimation of the length
gray_image.write = gray_image.debug_write # Faster, but lower compression
#gray_image.write = information.write # The fastest, but returns only an estimation of the length

In [None]:
block_y_side = block_x_side = 8 # Block-size used by JPEG
# block_y_side = block_x_side = 16

In [None]:
N_components = 3

## Testing `block_DCT.analyze_block()` and `block_DCT.synthesize_block()`

Let's see how the DCT concentrates the energy of the signal in a few coefficients. These methods compute the forward and the backward transforms of the input block (the input is not divided into blocks).

In [None]:
#a = np.random.randint(low=0, high=100, size=(4,4,3))
a = np.full(shape=(5, 4, 3), fill_value=10, dtype=np.int16) + np.random.randint(low=-5, high=5, size=(5, 4, 3))
# 5 is the number of rows
# 4 is the number of columns
# 3 is the number of (for example, RGB) channels

In [None]:
a

In [None]:
np.average(a)

In [None]:
RGB_image.show(RGB_image.normalize(a))

In [None]:
b = block_DCT.analyze_block(a)

In [None]:
b

In [None]:
b.astype(np.int16)

In [None]:
RGB_image.show(RGB_image.normalize(b))

In [None]:
c = block_DCT.synthesize_block(b)

In [None]:
c.astype(np.int16)

In [None]:
RGB_image.show(RGB_image.normalize(c))

In [None]:
(a == c.astype(np.int32)).all()

## Testing `block_DCT.analyze_image()` and `block_DCT.synthesize_image()`
Now we apply the block transform to an image that previously has been divided into blocks.

In [None]:
img = RGB_image.read(test_image, 0)
RGB_image.show(img, title="Original")

In [None]:
DCT_img = block_DCT.analyze_image(img, block_y_side, block_x_side)
#DCT_img = block_DCT.analyze_image(img, 2, 2)

In [None]:
RGB_image.show(RGB_image.normalize(DCT_img), f"{block_y_side}x{block_x_side}-DCT domain of {test_image}")

In [None]:
RGB_image.show(RGB_image.normalize(DCT_img[:64, :64]), "detail [0:64, 0:64]")

Again, as it can be seen, most of the energy of each block has been concentrated in the low-pass frequency component (DC component).

### Reconstruction and error

In [None]:
recons_img = block_DCT.synthesize_image(DCT_img, block_y_side, block_x_side)

In [None]:
RGB_image.show(recons_img.astype(np.uint8), "Reconstructed image")

In [None]:
error = img - recons_img

In [None]:
RGB_image.show(RGB_image.normalize(error), "DCT floating-point error")
#RGB_image.show(error, "DCT floating-point error")

This error es generated by the truncation of the floating point coefficients (remember that we work with 16 bits integers) after the analysis, and also by the truncation of the floating point pixels after the synthesis. 

## Switching between blocks and subbands

The coefficients of all DCT-blocks can be reorganized in subbands. A subband with coordinates (X, Y) is the 2D arragement of the coefficients that are in the coordinates (X, Y) of each block. The representation in subbands increases the spatial correlation between the coefficients (which also provides an improved visual comprehension of the content of the coefficients).

In [None]:
img = RGB_image.read(test_image)
DCT_blocks = block_DCT.analyze_image(img, block_y_side, block_x_side)
DCT_subbands = block_DCT.get_subbands(DCT_blocks, block_y_side, block_x_side)

In [None]:
RGB_image.show(RGB_image.normalize(DCT_subbands), f"Subbands of the {block_y_side}x{block_x_side} DCT domain")

In [None]:
print(f"We have {block_y_side}x{block_x_side} subbands of {int(img.shape[0]/block_y_side)}x{int(img.shape[1]/block_x_side)} coefficients (each one)")

The inverse process which reorder the coefficients into subbands is completely reversible (obviously).

In [None]:
_ = block_DCT.get_blocks(DCT_subbands, block_y_side, block_x_side)
(_ == DCT_blocks).all()

And, as it can be seen, the 2D correlation is higher in the low spatial frequencies (left up corner) than in the high frequencies (right down corner).

In [None]:
blocks_in_y = img.shape[0]//block_y_side
blocks_in_x = img.shape[1]//block_x_side
RGB_image.show(RGB_image.normalize(DCT_subbands[:blocks_in_y, :blocks_in_x]), f"Subband (0, 0) ({block_y_side}x{block_x_side} DCT)")

Subband (0,0) contains the low frequencies of the image.

In [None]:
RGB_image.show(RGB_image.normalize(DCT_subbands[:blocks_in_y, blocks_in_x:2*blocks_in_x]), f"Subband (0, 1) ({block_y_side}x{block_x_side} DCT)")

The subband (0, 1) represents the slowest changes of the image in the horizontal direction.

In [None]:
RGB_image.show(RGB_image.normalize(DCT_subbands[blocks_in_y:2*blocks_in_y, :blocks_in_x]), f"Subband (1, 0) ({block_y_side}x{block_x_side} DCT)")

The subband (1, 0) represents the slowest changes of the image in the vertical domain.

In [None]:
RGB_image.show(RGB_image.normalize(DCT_subbands[blocks_in_y:2*blocks_in_y, blocks_in_x:2*blocks_in_x]), f"Subband (1, 1) ({block_y_side}x{block_x_side} DCT)")

The subband (1, 1) represents slowest changes in the diagonal (left up corner to right down corner) of the image.

## Subband-components information

In [None]:
img = RGB_image.read(test_image, 0)
#YUV_img = YUV.from_RGB(img.astype(np.int16) - 128) # -128 decreases maximum value of the DC coefficients
###############################################################
# This reduces the energy (not the entropy) of the            #
# coefficients compared to the previous option.               #
# However, the averages should be encoded to                  #
# reconstruct the image.                                      #
YUV_img = YUV.from_RGB(img.astype(np.int16))                  #
YUV_img[...,0] -= np.average(YUV_img[...,0]).astype(np.int16) #
YUV_img[...,1] -= np.average(YUV_img[...,1]).astype(np.int16) #
YUV_img[...,2] -= np.average(YUV_img[...,2]).astype(np.int16) #
###############################################################
DCT_blocks = block_DCT.analyze_image(YUV_img, block_y_side, block_x_side)
DCT_subbands = block_DCT.get_subbands(DCT_blocks, block_y_side, block_x_side)
print("sorting subband-components by entropy")
print("subband component maximum mininum max-min average std-dev entropy        energy  avg-enegy")
accumulated_entropy = 0
blocks_in_y = img.shape[0]//block_y_side
blocks_in_x = img.shape[1]//block_x_side
list_of_subbands_components = []
for _y in range(block_y_side):
    for _x in range(block_x_side):
        for _c in range(N_components):
            subband = DCT_subbands[blocks_in_y*_y:blocks_in_y*(_y+1), blocks_in_x*_x:blocks_in_x*(_x+1), _c]
            entropy = information.entropy(subband.flatten().astype(np.int16))
            accumulated_entropy += entropy
            max = subband.max()
            min = subband.min()
            max_min = max - min
            avg = np.average(subband)
            dev = math.sqrt(np.var(subband))
            energy = information.energy(subband)
            avg_energy = energy/subband.size
            list_of_subbands_components.append((_y, _x, _c, max, min, max_min, avg, dev, entropy, energy, avg_energy))
            #print(f"{_y:2d} {_x:2d} {_c:9d} {max:7.1f} {min:7.1f} {max_min:7.1f} {avg:7.1f} {dev:7.1f} {entropy:7.1f} {energy:13.1f} {avg_energy:10.1f}")
sorted_list_of_subbands_components = sorted(list_of_subbands_components, key=lambda x: x[8])[::-1]
for _i in sorted_list_of_subbands_components:
    _y, _x, _c, max, min, max_min, avg, dev, entropy, energy, avg_energy = _i
    print(f"  {_y:2d} {_x:2d} {_c:9d} {max:7.1f} {min:7.1f} {max_min:7.1f} {avg:7.1f} {dev:7.1f} {entropy:7.1f} {energy:13.1f} {avg_energy:10.1f}")
avg_entropy = accumulated_entropy/(block_x_side*block_y_side*img.shape[2])
print("Average entropy in the DCT domain:", avg_entropy)
print("Entropy in the image domain:", information.entropy(img.flatten().astype(np.uint8)))

As it can be observed, the 8x8-DCT accumulates most of the energy (and information, for this reason the entropy is decreased) in the low-frequency subbands. Notice also the high correlation that exists between the entropy, the variance and the energy of the subbands.

## Lossless compression

In [None]:
img = RGB_image.read(test_image)
YUV_img = YUV.from_RGB(img.astype(np.int16) - 128) # -128 decreases maximum value of the DC coefficients
DCT_blocks = block_DCT.analyze_image(YUV_img, block_y_side, block_x_side)
DCT_subbands = block_DCT.get_subbands(DCT_blocks, block_y_side, block_x_side)

img = RGB_image.read(test_image, 0)
YUV_img = YUV.from_RGB(img.astype(np.int16) - 128)
DCT_blocks = block_DCT.analyze_image(YUV_img, block_y_side, block_x_side)
DCT_subbands = block_DCT.get_subbands(DCT_blocks, block_y_side, block_x_side)
DCT_subbands = (DCT_subbands + 32768).astype(np.uint16)
output_len = RGB_image.write(DCT_subbands, "/tmp/lossless", 0)
print(f"output_length={output_len}")
_DCT_subbands = RGB_image.read("/tmp/lossless", 0)
_DCT_subbands = _DCT_subbands.astype(np.float32) - 32768
_DCT_blocks = block_DCT.get_blocks(_DCT_subbands, block_y_side, block_x_side)
_YUV_img = block_DCT.synthesize_image(_DCT_blocks, block_y_side, block_x_side)
_img = YUV.to_RGB(_YUV_img.astype(np.int16)) + 128
RGB_image.show(_img)

## Lossy compression

### Quantization steps

Considering the previous dynamic range values for the YCoCg/8x8-DCT coefficients, this parameter should allow to use 8 bits/pixel images, if we are using PNG as an entropy codec. As it can be seen, we need 11 bits for representing the DC coefficients and after quantization, we should use only 8. Therefore, the minimum quantization step should be 1<<3 = 8. Notice that 11 - 8 = 3.

In [None]:
#Q_steps = [128, 64, 32, 16, 8] #, 4, 2, 1]
#Q_steps = [(2**i)//16 for i in range(16, -1, -1)]
Q_steps = [i for i in range(128, 8, -4)]
#Q_steps = [128, 96, 64, 32, 16, 8] #, 4, 2, 1]

In [None]:
Q_steps

## Testing `block_DCT.uniform_quantize()` and `block_DCT.uniform_dequantize()` (MOVER A test_block_DCT)
Quantization removes information but also increases the compression ratios of the stored images. These methods quantize all coefficients with the same quantization step.

In [None]:
Q_step = 64
img = RGB_image.read(test_image, 0)
YUV_img = YUV.from_RGB(img.astype(np.int16) - 128)
DCT_blocks = block_DCT.analyze_image(YUV_img, block_y_side, block_x_side)
DCT_subbands = block_DCT.get_subbands(DCT_blocks, block_y_side, block_x_side)
DCT_subbands_k = block_DCT.uniform_quantize(DCT_subbands, block_y_side, block_x_side, N_components, Q_step)
DCT_subbands_dQ = block_DCT.uniform_dequantize(DCT_subbands_k, block_y_side, block_x_side, N_components, Q_step)
DCT_blocks_dQ = block_DCT.get_blocks(DCT_subbands_dQ, block_y_side, block_x_side)
YUV_img_dQ = block_DCT.synthesize_image(DCT_blocks_dQ, block_y_side, block_x_side)
img_dQ = YUV.to_RGB(YUV_img_dQ) + 128
RGB_image.show(np.clip(img_dQ, a_min=0, a_max=255), f"Quantized image (Q_step={Q_step}) in the {block_y_side}x{block_x_side} DCT {YUV.name} domain")

In [None]:
error = img - img_dQ
RGB_image.show(RGB_image.normalize(error), "Quantization error")

Therefore, quantization in the DCT domain tends to remove high frequencies (it works basically as a low pass filter).

## Coding subbands vs coding blocks
Let's see the effect of encoding the DCT coefficients grouped by blocks and subbands. For simplicity, we will use uniform quantization. Distortion is measured in the image domain.

In [None]:
#img = RGB_image.read(test_image, 0)
#YUV_img = YUV.from_RGB(img.astype(np.int16) - 128)
img = RGB_image.read(test_image)
YUV_img = YUV.from_RGB(img.astype(np.int16))
avgs = [np.average(YUV_img[..., c]) for c in range(3)]
print(f"avgs={avgs}")
for c in range(3):
    YUV_img[..., c] -= int(avgs[c])
RD_points_blocks = []
RD_points_subbands = []
for Q_step in Q_steps:
    DCT_blocks = block_DCT.analyze_image(YUV_img, block_y_side, block_x_side)
    # Notice that with uniform_quantize() does not matter if the DCT domain
    # is organized in subbands or blocks.
    DCT_blocks_k = block_DCT.uniform_quantize(DCT_blocks, block_y_side, block_x_side, N_components, Q_step)
    BPP = RGB_image.write((DCT_blocks_k + 128).astype(np.uint8), f"/tmp/{Q_step}_", 0)*8/(YUV_img.shape[0]*YUV_img.shape[1])
    # Check that we can recover the code-stream ################
    __ = RGB_image.read(f"/tmp/{Q_step}_", 0)                    #
    try:                                                       #
        assert ((DCT_blocks_k + 128) == __).all()              #
    except AssertionError:                                     #
        counter = 0                                            #
        for _i in range(img.shape[0]):                         #
            for _j in range(img.shape[1]):                     #
                if (DCT_blocks_k[_i, _j] != __[_i, _j]).any(): #
                    print(DCT_blocks_k[_i, _j], __[_i, _j])    #
                    if counter > 10:                           #
                        break                                  #
                    counter += 1                               #
            if counter > 10:                                   #
                break                                          #
    ############################################################
    DCT_blocks_dQ = block_DCT.uniform_dequantize(DCT_blocks_k, block_y_side, block_x_side, N_components, Q_step)
    YUV_img_dQ = block_DCT.synthesize_image(DCT_blocks_dQ, block_y_side, block_x_side)
    #img_dQ = YUV.to_RGB(YUV_img_dQ) + 128
    for c in range(3):
        YUV_img_dQ[..., c] += int(avgs[c])
    img_dQ = YUV.to_RGB(YUV_img_dQ)

    # Notice that to compute the distortion, the DCT domain could be
    # also used because the DCT is unitary.
    RMSE = distortion.RMSE(img, img_dQ)
    RD_points_blocks.append((BPP, RMSE))
    DCT_subbands_k = block_DCT.get_subbands(DCT_blocks_k, block_y_side, block_x_side)
    #BPP = compute_BPP((DCT_subbands_k + 128).astype(np.uint8), f"/tmp/{Q_step}_")
    BPP = RGB_image.write((DCT_subbands_k + 128).astype(np.uint8), f"/tmp/{Q_step}_", 0)*8/(YUV_img.shape[0]*YUV_img.shape[1])
    # Check that we can recover the code-stream #################
    __ = RGB_image.read(f"/tmp/{Q_step}_", 0)                     #
    try:                                                        #
        assert ((DCT_subbands_k + 128) == __).all()             #
    except AssertionError:                                      #
        counter = 0                                             #
        for _i in range(img.shape[0]):                          #
            for _j in range(img.shape[1]):                      #
                if (DCT_subbands_k[_i, _j] != __[_i, _j]).any():#
                    print(DCT_subbands_k[_i, _j], __[_i, _j])   #
                    if counter > 10:                            #
                        break                                   #
                    counter += 1                                #
            if counter > 10:                                    # 
                break                                           #
    #############################################################
    RD_points_subbands.append((BPP, RMSE))
    print(f"Q_step={Q_step} BPP={BPP} image-domain-RMSE={RMSE}")


In [None]:
pylab.figure(dpi=150)
pylab.plot(*zip(*RD_points_blocks), label=f"{block_y_side}x{block_x_side} DCT (encoded by blocks)")
pylab.plot(*zip(*RD_points_subbands), label=f"{block_y_side}x{block_x_side} DCT (encoded by subbands)")
pylab.title("")
pylab.xlabel("BPP")
pylab.ylabel("RMSE")
plt.legend(loc="best")
pylab.show()

## Compare

In [None]:
YCoCg_SQ = []
with open(f'../YCoCg_SQ/YCoCg_SQ.txt', 'r') as f:
    for line in f:
        BPP, RMSE = line.split('\t')
        YCoCg_SQ.append((float(BPP), float(RMSE)))

In [None]:
pylab.figure(dpi=150)
pylab.plot(*zip(*YCoCg_SQ), c='b', marker='o', label="Deadzone($\mathbf{\Delta}^{\mathrm{Y}}_i = \mathbf{\Delta}^{\mathrm{Co}}_i = \mathbf{\Delta}^{\mathrm{Cg}}_i$)+PNG", linestyle="dashed")
pylab.plot(*zip(*RD_points_subbands), label=r"Deadzone(2D-DCT($\mathbf{\Delta}^{\mathrm{Y}}_i = \mathbf{\Delta}^{\mathrm{Co}}_i = \mathbf{\Delta}^{\mathrm{Cg}}_i)$)+PNG")
pylab.title(fn)
pylab.xlabel("BPP")
pylab.ylabel("RMSE")
plt.legend(loc="best")
pylab.show()

Coding by subbands is more efficient because PNG can exploit better the spatial correlation between the coefficients.

In [None]:
with open('YCoCg_2D_DCT_SQ.txt', 'w') as f:
    for item in RD_points_subbands:
        f.write(f"{item[0]}\t{item[1]}\n")

In [None]:
import time
while True:
    time.sleep(1)

## Ignore the rest ...

## Can we do it better?

Let's compute the optimal sequence of quantization steps for the set of possible combinations of subbands and components. We will compute the distortion of each subband-component in the YCoCg/8x8-DCT domain for a set of quantization steps, considering that the YCoCg transform is near-orthogonal and that the 8x8-DCT is full-orthogonal. Thanks to orthogonality, we can assume that the quantization error generated in one subband does not influence on the quantization error added to the other subbands because the DCT coefficients are uncorrelated, or in other words, that the quantization error generated in one coefficient (or subband) is not correlated with the quantization error generated in other coefficients (or subbands).

Algorithm:
1. Read the image.
2. Transform it to the YCoCg domain.
3. Transform each YCoCg component to the 8x8-DCT domain.
4. Find a set RD points for each subband-component.
5. Compute the slope of each point and put all the slopes in the same list.
6. Sort the previous list by the slope field.
7. Find the RD curve that progressively uses smaller slopes.

### Read the image and move to the 0-mean YCoCg domain

In [None]:
img = RGB_image.read(test_image, 0)
#xx = YUV.from_RGB(img.astype(np.int16) - 128)
YUV_img = YUV.from_RGB(img.astype(np.int16))

# Shift the YCoCg components to the zero mean.
# We will not need this information later (in this notebook)
# because we will not reconstruct the images.
# The distortion is computed in the DCT domain.
#YUV_img[...,0] -= np.average(YUV_img[...,0]).astype(np.int16)
#YUV_img[...,1] -= np.average(YUV_img[...,1]).astype(np.int16)
#YUV_img[...,2] -= np.average(YUV_img[...,2]).astype(np.int16)

avgs = [np.average(YUV_img[..., c]) for c in range(3)]
print(f"avgs={avgs}")
for c in range(3):
    YUV_img[..., c] -= int(avgs[c])

### Move each component to the 8x8-DCT domain

In [None]:
DCT_blocks = block_DCT.analyze_image(YUV_img, block_y_side, block_x_side)
DCT_subbands = block_DCT.get_subbands(DCT_blocks, block_y_side, block_x_side)

In [None]:
RGB_image.show(RGB_image.normalize(DCT_subbands), f"Subbands of the {YUV.name}/{block_y_side}x{block_x_side} DCT domain")

### Find the slope of each quantization step for each subband-component
Create a list per subband-component of RD points and a list per subband-component of RD slopes. The first RD point is computed for 0 BPP, where the MSE distortion is equal to the average energy of the subband-component (notice that the average of each subband-component should be 0).

In [None]:
RD_points = []
RD_slopes = []
N_components = YUV_img.shape[2]
for _y in range(block_y_side):
    for _x in range(block_x_side):
        for _c in range(N_components):
            sbc = DCT_subbands[blocks_in_y*_y : blocks_in_y*(_y + 1), blocks_in_x*_x : blocks_in_x*(_x + 1), _c]
            sbc_energy = information.average_energy(sbc)
            # The first point of each RD curve has a maximum distortion equal
            # to the energy of the subband and a rate = 0
            RD_points.append([(0, sbc_energy)])
            RD_slopes.append([])

In [None]:
RD_points # (BPP, RMSE)

In [None]:
len(RD_points)

In [None]:
8*8*3

Now populate the rest of points of each subband-component. **Distortion is estimated in the transform domain** because we are searching RD points for sorting the quantization patterns, not for obtaining true distortion measures that would demand more computation.

In [None]:
for _y in range(block_y_side):
    for _x in range(block_x_side):
        for _c in range(N_components):
            sbc = DCT_subbands[blocks_in_y*_y : blocks_in_y*(_y + 1), blocks_in_x*_x : blocks_in_x*(_x + 1), _c]
            counter = 0
            for Q_step in Q_steps:
                Q = Quantizer(Q_step=Q_step)
                sbc_k = Q.quantize(sbc)
                sbc_dQ = Q.dequantize(sbc_k)
                RMSE = distortion.RMSE(sbc, sbc_dQ)
                BPP = RGB_image.write(sbc_k.astype(np.uint8), f"/tmp/{_y}_{_x}_{_c}_{Q_step}_", 0)*8/(YUV_img.shape[0]*YUV_img.shape[1])
                point = (BPP, RMSE)
                RD_points[(_y * block_x_side * N_components + _x * N_components ) + _c].append(point)
                print("Q_step =", Q_step, "BPP =", point[0], "RMSE =", point[1])
                delta_BPP = BPP - RD_points[(_y*block_x_side + _x)*N_components + _c][counter][0]
                delta_RMSE = RD_points[(_y*block_x_side + _x)*N_components + _c][counter][1] - RMSE
                if delta_BPP > 0:
                    slope = delta_RMSE/delta_BPP
                    RD_slopes[(_y*block_x_side + _x)*N_components + _c].append((slope, (_y, _x, _c), Q_step))
                else:
                    slope = 0
                #RD_slopes[(_y * block_x_side * N_components + _x * N_components) + _c].append((Q_step, slope, (_y, _x, _c)))
                counter += 1

In [None]:
RD_points

In [None]:
RD_slopes # (Qstep, slope, subband-component_index)

### Remove points that do not belong to the convex-hull

In [None]:
def filter_slopes(slopes):
    filtered_slopes = []
    slopes_iterator = iter(slopes)
    prev = next(slopes_iterator)
    for curr in slopes_iterator:
        if prev[0] < curr[0]:
            print(f"deleted {prev}")
        else:
            filtered_slopes.append(prev)
        prev = curr
    filtered_slopes.append(prev)
    return filtered_slopes

filtered_slopes = []
for i in RD_slopes:
    filtered_slopes.append(filter_slopes(i))

In [None]:
filtered_slopes

### Sort the RD points by their slope

In [None]:
single_list = []
for l in filtered_slopes:
    #l = filter_slopes(l)
    for i in l:
        #if i[1] > 0:
        single_list.append(i)

In [None]:
single_list

In [None]:
sorted_slopes = sorted(single_list, key=lambda x: x[0])[::-1]

In [None]:
sorted_slopes

### Build the optimal RD curve
We use the sorted list of slopes (with quantization and subband-component information) to generate the optimal RD list of RD points. Notice that, although the YCoCg components gains ($\frac{3}{2}\Delta_{\text{Y}} = \Delta_{\text{Co}} = \frac{3}{2}\Delta_{\text{Cg}}$) have not been taken into consideration, the RD points are sorted by their slope, and therefore this information has already influenced on the RD points.

Notice that initially, all the subband-components are initialized to zero, and the quantization steps (one per subband) are set to zero the subbands (see `Q_steps_combination`). Then, starting with the subband with the highest contribution (information that is provided by `sorted_slopes`) the, subband-components are progressively  quantized and dequantized.

Notice that the distortion can measured in the YCoCg/block-DCT domain because both transforms are considered orthogonal, making unnecessary the inverse transforms. Otherwise, the distortion should be measured in the image domain.

In [None]:
optimal_RD_points = []
DCT_subbands_prog = np.zeros_like(DCT_subbands)
Q_steps_combination = np.full(shape=(block_x_side, block_y_side, N_components), fill_value=99999999)
for s in sorted_slopes:
    sbc_index = s[1]
    _y = sbc_index[0]
    _x = sbc_index[1]
    _c = sbc_index[2]
    Q_steps_combination[_y, _x, _c] = s[2]
    #DCT_subbands_prog[blocks_in_y*_y : blocks_in_y*(_y + 1), blocks_in_x*_x : blocks_in_x*(_x + 1), _c] \
    #    = DCT_subbands[blocks_in_y*_y : blocks_in_y*(_y + 1), blocks_in_x*_x : blocks_in_x*(_x + 1), _c]
    DCT_subbands_prog = DCT_subbands.copy()
    DCT_subbands_prog_k = block_DCT.quantize(DCT_subbands_prog, Q_steps_combination)
    DCT_subbands_prog_dQ = block_DCT.dequantize(DCT_subbands_prog_k, Q_steps_combination)
    
    # Uncomment the following line to measure the distortion in the YUV+DCT domain.
    # Use only if both, the spatial transform and the color transform are orthogonal.
    #RMSE = distortion.RMSE(DCT_subbands, DCT_subbands_prog_dQ)

    # Uncomment the following 3 lines to measure the distortion in the YUV domain.
    # Use only if the color transform is orthogonal.
    #DCT_blocks_prog_dQ = block_DCT.get_blocks(DCT_subbands_prog_dQ, block_y_side, block_x_side)
    #YUV_img_prog = block_DCT.synthesize_image(DCT_blocks_prog_dQ, block_y_side, block_x_side)
    #RMSE = distortion.RMSE(YUV_img, YUV_img_prog)
    
    # Uncomment the following 7 lines to measure the distortion in the RGB domain.
    # Notice that if the color transform domain is not orthogonal,
    # the RMSE should be measured in the RGB domain.
    DCT_blocks_prog_dQ = block_DCT.get_blocks(DCT_subbands_prog_dQ, block_y_side, block_x_side)
    YUV_img_prog = block_DCT.synthesize_image(DCT_blocks_prog_dQ, block_y_side, block_x_side)
    for c in range(3):
        YUV_img_prog[..., c] += int(avgs[c])
    img_prog = YUV.to_RGB(YUV_img_prog)
    RMSE = distortion.RMSE(img, img_prog)

    # Add 128 to convert 2's complement 8-bits integers to unsigned 8-bit integers.
    BPP = RGB_image.write((DCT_subbands_prog_k + 128).astype(np.uint8), f"/tmp/{_y}_{_x}_{_c}_{s[0]}_", 0)*8/(YUV_img.shape[0]*YUV_img.shape[1])
    point = (BPP, RMSE)
    print("sbc =", sbc_index, "Q_step =", s[2], "BPP =", BPP, "RMSE =", RMSE)
    optimal_RD_points.append(point)

### Read JPEG RD data to compare
Notice that the chromas have not been subsampled in JPEG.

In [None]:
JPEG_RD_points = []
with open("../JPEG/JPEG.txt", 'r') as f:
    for line in f:
        rate, _distortion = line.split('\t')
        JPEG_RD_points.append((float(rate), float(_distortion)))

In [None]:
#DCT2 = []
#with open("DCT.txt", 'r') as f:
#    for line in f:
#        rate, _distortion = line.split('\t')
#        DCT2.append((float(rate), float(_distortion)))

### Compare

In [None]:
pylab.figure(dpi=150)
pylab.plot(*zip(*RD_points_subbands), label="Without RDO")
#pylab.plot(*zip(*optimal_RD_points), label="With RDO")
#pylab.plot(*zip(*optimal_RD_points_128), label="optimal quantization 128")
pylab.plot(*zip(*JPEG_RD_points), label="JPEG")
#pylab.plot(*zip(*DCT2), label="old")
pylab.title(f"{YUV.name}/{block_y_side}x{block_x_side}-DCT")
pylab.xlabel("Bits/Pixel")
pylab.ylabel("RMSE")
plt.legend(loc="best")
#pylab.yscale('log')
#pylab.xscale('log')
pylab.show()

In [None]:
with open('DCT_RDO.txt', 'w') as f:
    for item in optimal_RD_points:
        f.write(f"{item[0]}\t{item[1]}\n")

## Conclusions

RDO over $\Delta$ provides optimality in those combinations where $\Delta$ is not the same for all subband-components. This is a consequence of progressively selecting those quantization steps for the subband-components that contribute more to the quality of the reconstruction between constant-$\Delta$ quantization patterns. Notice that between each different constant-$\Delta$ quantization pattern there are so many different-$\Delta$ quantization patterns as the number of subband-components. Notice also that the pattern of quantization steps must be known by the decoder to reconstruct the image.

## Use the RGB domain to compare the distortion (TODO)

## Optimizing the block-size

So far, we have used a constant block-size. However, this can be suboptimal (different images and areas of those images can be analyzed better, from a RD point of view, variying the block-size). The idea is to decompose the image into blocks building a quad-tree, in which a node (a quadrant) is splitted if its RD slope considering the quadrant as a single block (of for example 16x16 pixels) is smaller than its RD slope considering 4 blocks (of 8x8 pixels). We have supposed that the image is a square. On the contrary, the image should be procesed by squared tiles (TO-DO).

The optimal splitting pattern is chosen computing, for the given quantization step, the best score:
$$
\frac{1}{\text{RMSE}\times\text{BPP}}
$$

Notice that the quad-tree must be transmitted to the decoder, and a way of encode the tree is to send a sequence of symbols 0 and 1, symbol 0 representing that the quadrant is not divided, and symbol 1 that the quadrant is divided. For example:

    +-------+---+---+ +---+---+
    |       |   |   | | 1 | 2 |
    |       +---+---+ +---+---+
    |       |   |   | | 3 | 4 |
    +---+-+-+---+---+ +---+---+
    |   + + +       |
    +---+-+-+       |
    |   |   |       |
    +---+---+-------+
    01    1        0
      0000 01    00
             0000 <- removable if the minimum block size has been reached
             
Only luminance is considered.

In [None]:
# Returns True if the current block_size if optimal
def is_block_size_optimal(block, Q_step):
    # First compute the slope of the block
    DCT = block_DCT.analyze_block(block)
    Q_DCT = Q.quantize(DCT, Q_step)
    R = information.entropy(Q_DCT.flatten().astype(np.int16))
    D = distortion.RMSE(Q_DCT, DCT)
    block_slope = 1/(R*D)
    # Now compute the slope, but applying the block_DCT to each quadrant
    DCT = block_DCT.analyze_image(block, block.shape[0]//2, block.shape[1]//2)
    Q_DCT = Q.quantize(DCT, Q_step)
    R = information.entropy(Q_DCT.flatten().astype(np.int16))
    D = distortion.RMSE(Q_DCT, DCT)
    splitted_block_slope = 1/(R*D)
    #print(block_slope, splitted_block_slope)
    return block_slope > splitted_block_slope

In [None]:
max_block_side = 32
min_block_side = 4
Q_step = 64

luma = YUV_img[..., 0]

for log_block_side in range(6, 1, -1): # min_block_side = 2²=4, max_block_side = 2⁵=32
    block_side = 1 << log_block_side
    block_size_is_optimal = np.zeros(shape=(luma.shape[0]//block_side, luma.shape[1]//block_side), dtype=bool)
    #DCT_luma = block_DCT.analyze_image(luma, block_side, block_side)
    # For each block
    for y in range(luma.shape[0]//block_side):
        for x in range(luma.shape[1]//block_side):
            if not block_size_is_optimal[y, x]:
                block = luma[y*block_side:(y+1)*block_side, x*block_side:(x+1)*block_side]
                if is_block_size_optimal(block, Q_step):
                    block_size_is_optimal[y, x] = 1
                    print('.', end='') # No divide the block
                else:
                    block_is_optimal[y, x] = 0
                    print('O', end='') # Divide the block
    block_is_optimal_copy = block_is_optimal.copy()
    print('')

In [None]:
Q_DCT_blocks = block_DCT.uniform_quantize(DCT_blocks, max_block_side, max_block_side, N_components, Q_step)
for y in range(luma.shape[0]//max_block_side):
    for x in range(luma.shape[1]//max_block_side):
        Q_DCT_block = Q_DCT_blocks[y*max_block_side:(y+1)*max_block_side, x*max_block_side:(x+1)*max_block_side]
        DCT_block = DCT_blocks[y*max_block_side:(y+1)*max_block_side, x*max_block_side:(x+1)*max_block_side]
        block_rate = distortion.entropy(Q_DCT_block.flatten().astype(np.int16))
        block_distortion = distortion.RMSE(Q_DCT_block, DCT_block)
        block_slope = 1/block_rate*block_distortion
        for yy in range(DCT_block.shape[0]):
            for xx in range(DCT_block.shape[1])

In [None]:
def local_entropy(image, x_range, y_range):
    pass

!ln -sf ~/MRVC/src/DWT.py .
import DWT
import pywt

img = image_3.read(test_image, 0)
YUV_img = YUV.from_RGB(img.astype(np.int16))

wavelet_name = "Haar"
wavelet = pywt.Wavelet(wavelet_name)
N_levels = 3
decom = DWT.analyze(YUV_img, wavelet, N_levels)

Q_step = 256

LL = decom[0]
LL_k = Q.quantize(LL, Q_step) # Baybe bettter Q.get_indexes()
LL_dQ = Q.dequantize(LL_k, Q_step) # Q.get_signal()
decom_dQ = [LL_dQ]
for sr in decom[1:]: # sr = spatial_resolution
    sr_dQ = []
    for sb in sr: # sb = subband
        sb_k = Q.quantize(sb, Q_step)
        sb_dQ = Q.dequantize(sb_k, Q_step)
        sr_dQ.append(sb_dQ)
    decom_dQ.append(tuple(sr_dQ))
YUV_img_dQ = DWT.synthesize(decom_dQ, wavelet, N_levels)
img_dQ = YUV.to_RGB(YUV_img_dQ)
img_dQ = np.clip(img_dQ, a_min=0, a_max=255).astype(np.uint8)
image_3.show(img_dQ.astype(np.uint8), f"Q_step={Q_step}")
image_1.show(YUV_img_dQ[...,0].astype(np.uint8), f"Q_step={Q_step}")

## Optimizing the block-shape