[![Binder](https://mybinder.org/badge_logo.svg)](https://nbviewer.org/github/Sistemas-Multimedia/Sistemas-Multimedia.github.io/blob/master/milestones/07-DCT/block_DCT_compression.ipynb)

# III... video compression

## Parameters

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
!ln -sf ~/MRVC/src/image_3.py .
import image_3 as image
!ln -sf ~/MRVC/src/image_1.py .
import image_1 as component
!ln -sf ~/MRVC/src/block_DCT.py .
#!ln -sf ~/MRVC/src/YCoCg.py .
#import YCoCg as color
!ln -sf ~/MRVC/src/color_DCT.py .
import color_DCT as color
#!ln -sf ~/MRVC/src/RGB.py .
#import RGB as color
import cv2 # pip install opencv-python
!ln -sf ~/quantization/information.py .
import information
!ln -sf ~/quantization/distortion.py .
import distortion
import os
import pylab
!ln -sf ~/quantization/deadzone_quantizer.py .
import deadzone_quantizer as Q
import math
import block_DCT as DCT

In [None]:
G = 4 # GOP size

In [None]:
!~/MRVC/sequences/container/runme.sh -n $G

In [None]:
sequence = "/tmp/original_"

In [None]:
block_y_side = block_x_side = 8

In [None]:
N_components = 3

In [None]:
entropy_estimator = "PNG"
if entropy_estimator == "PNG":
    def compute_BPP(_image, filename_prefix, index):
        BPP = image.write(_image, filename_prefix, index)*8/_image.size
        return BPP
else:
    def compute_BPP(_image, filename_prefix, index):
        entropy = information.entropy(_image.flatten().astype(np.int16))
        return entropy

## Using the DCT

### Quantization steps

In [None]:
Q_steps = [128, 64, 32, 16, 8]

### Using same $\Delta$ for all coefficients

#### Version 0: Each quantized decomposition is written in a different PNG file

In [None]:
RD_points_no_RDO = []
for Q_step in Q_steps:
    acc_BPP = 0 # Accacumulated rate
    acc_RMSE = 0 # Accumulated distortion
    for i in range(G):
        frame = image.read(sequence, i)
        YUV_frame = color.from_RGB(frame.astype(np.int16) - 128)
        DCT_blocks = DCT.analyze_image(YUV_frame, block_y_side, block_x_side)
        DCT_blocks_k = DCT.uniform_quantize(DCT_blocks, block_y_side, block_x_side, N_components, Q_step)
        DCT_blocks_dQ = DCT.uniform_dequantize(DCT_blocks_k, block_y_side, block_x_side, N_components, Q_step)
        YUV_frame_dQ = DCT.synthesize_image(DCT_blocks_dQ, block_y_side, block_x_side)
        frame_dQ = color.to_RGB(YUV_frame_dQ) + 128
        RMSE = distortion.RMSE(frame, frame_dQ)
        acc_RMSE += RMSE
        DCT_subbands_k = DCT.get_subbands(DCT_blocks_k, block_y_side, block_x_side)
        assert (DCT_subbands_k.all() >= -128), f"min value = {np.min(DCT_subbands_k)}"
        assert (DCT_subbands_k.all() <=  127), f"min value = {np.max(DCT_subbands_k)}"
        if __debug__:
            print(np.min(DCT_subbands_k), np.max(DCT_subbands_k))
        BPP = compute_BPP((DCT_subbands_k + 128).astype(np.uint8), f"/tmp/{Q_step}_", i)
        acc_BPP += BPP
    RD_points_no_RDO.append((acc_BPP/G, acc_RMSE/G))
    print(i, Q_step, end=' ', flush=True)

In [None]:
RD_points_no_RDO

#### Version 1: All (quantization indexes) decomposition are concatenated and then written into a single PNG file

In [None]:
RD_points_no_RDO_one_PNG = []
for Q_step in Q_steps:
    avg_RMSE = 0
    sequence_of_quantized_decompositions = []
    for i in range(G):
        frame = image.read(sequence, i)
        YUV_frame = color.from_RGB(frame.astype(np.int16) - 128)
        DCT_blocks = DCT.analyze_image(YUV_frame, block_y_side, block_x_side)
        DCT_blocks_k = DCT.uniform_quantize(DCT_blocks, block_y_side, block_x_side, N_components, Q_step)
        DCT_blocks_dQ = DCT.uniform_dequantize(DCT_blocks_k, block_y_side, block_x_side, N_components, Q_step)
        YUV_frame_dQ = DCT.synthesize_image(DCT_blocks_dQ, block_y_side, block_x_side)
        frame_dQ = color.to_RGB(YUV_frame_dQ) + 128
        RMSE = distortion.RMSE(frame, frame_dQ)
        avg_RMSE += RMSE
        DCT_subbands_k = DCT.get_subbands(DCT_blocks_k, block_y_side, block_x_side)
        sequence_of_quantized_decompositions.append(DCT_subbands_k)
    concatenation = np.concatenate(sequence_of_quantized_decompositions)
    assert (concatenation.all() >= -128), f"min value = {np.min(concatenation)}"
    assert (concatenation.all() <=  127), f"min value = {np.max(concatenation)}"
    if __debug__:
        print(np.min(concatenation), np.max(concatenation))
    BPP = compute_BPP((concatenation + 128).astype(np.uint8), f"/tmp/{Q_step}_", 0)
    RD_points_no_RDO_one_PNG.append((BPP, avg_RMSE/G))
    print(i, Q_step, end=' ', flush=True)

In [None]:
RD_points_no_RDO_one_PNG

In [None]:
pylab.figure(dpi=150)
pylab.plot(*zip(*RD_points_no_RDO), label="Different PNG files")
pylab.plot(*zip(*RD_points_no_RDO_one_PNG), label="One PNG file")
pylab.title("")
pylab.xlabel("BPP")
pylab.ylabel("MSE")
plt.legend(loc="best")
pylab.show()

The differences are insignificant.

### Using RDO (Rate/Distortion Optimization)

#### Find the optimal progression of combinations of quantization steps
Each input frame is transformed. The resulting subband-components are quantized and their RD contribution estimated, supossing that the distortion can be measured in the transform domain, and the spatial/statistical decorrelation of the entropy codec between subband-components is zero.

In [None]:
RD_points = []
RD_slopes = []
N_components = YUV_frame.shape[2]
single_list = []
counter = 0

for frame_number in range(G):
    frame = image.read(sequence, frame_number)
    blocks_in_y = frame.shape[0]//block_y_side
    blocks_in_x = frame.shape[1]//block_x_side

    YUV_frame = color.from_RGB(frame.astype(np.int16))
    YUV_frame[...,0] -= np.average(YUV_frame[...,0]).astype(np.int16)
    YUV_frame[...,1] -= np.average(YUV_frame[...,1]).astype(np.int16)
    YUV_frame[...,2] -= np.average(YUV_frame[...,2]).astype(np.int16)

    DCT_blocks = DCT.analyze_image(YUV_frame, block_y_side, block_x_side)
    DCT_subbands = DCT.get_subbands(DCT_blocks, block_y_side, block_x_side)

    # Top-left point of each subband-component RD curve
    for _y in range(block_y_side):
        for _x in range(block_x_side):
            for _c in range(N_components):
                sbc = DCT_subbands[blocks_in_y*_y : blocks_in_y*(_y + 1), blocks_in_x*_x : blocks_in_x*(_x + 1), _c]
                sbc_energy = information.average_energy(sbc)
                # The first point of each RD curve has a maximum distortion equal
                # to the energy of the subband and a rate = 0
                RD_points.append([(0, sbc_energy)]) # (Rate, Distortion) of a subband-component of a frame
                RD_slopes.append([])
                counter += 1
    print(counter)

    for _y in range(block_y_side):
        for _x in range(block_x_side):
            for _c in range(N_components):
                sbc = DCT_subbands[blocks_in_y*_y : blocks_in_y*(_y + 1), blocks_in_x*_x : blocks_in_x*(_x + 1), _c]
                frame_subband_component_number = 0
                for Q_step in Q_steps:
                    sbc_k = Q.quantize(sbc, Q_step)
                    sbc_dQ = Q.dequantize(sbc_k, Q_step)
                    RMSE = distortion.RMSE(sbc, sbc_dQ)
                    assert (sbc_k.all() >=   0), f"min value = {np.min(sbc_k)}"
                    assert (sbc_k.all() <= 255), f"min value = {np.max(sbc_k)}"
                    BPP = component.write(sbc_k.astype(np.uint8), f"/tmp/{_y}_{_x}_{Q_step}_", 0)*8/YUV_frame.size
                    #BPP_Q_indexes = information.PNG_BPP((Q_indexes.astype(np.int32) + 32768).astype(np.uint16), "/tmp/BPP_")[0]
                    #BPP_Q_indexes = information.entropy(Q_indexes.astype(np.int16).flatten())
                    point = (BPP, RMSE)
                    RD_points[frame_number*block_y_side*block_x_side*N_components + (_y*block_x_side + _x)*N_components + _c].append(point)
                    print("Q_step =", Q_step, "BPP =", point[0], "RMSE =", point[1])
                    delta_BPP = BPP - RD_points[frame_number*block_y_side*block_x_side*N_components + (_y*block_x_side + _x)*N_components + _c][frame_subband_component_number][0]
                    delta_RMSE = RD_points[frame_number*block_y_side*block_x_side*N_components + (_y*block_x_side + _x)*N_components + _c][frame_subband_component_number][1] - MSE
                    if delta_BPP > 0:
                        slope = delta_RMSE/delta_BPP
                        RD_slopes[frame_number*block_y_side*block_x_side*N_components + (_y*block_x_side + _x)*N_components + _c].append((slope, (frame_number, _y, _x, _c), Q_step))
                    else:
                        slope = 0
                    frame_subband_component_number += 1

In [None]:
RD_slopes

In [None]:
len(RD_slopes)

In [None]:
    def filter_slopes(slopes):
        filtered_slopes = []
        slopes_iterator = iter(slopes)
        prev = next(slopes_iterator)
        for curr in slopes_iterator:
            if int(prev[0]) <= int(curr[0]):
                print(f"deleted {prev}")
            else:
                filtered_slopes.append(prev)
            prev = curr
        filtered_slopes.append(prev)
        return filtered_slopes

    filtered_slopes = []
    for i in RD_slopes:
        filtered_slopes.append(filter_slopes(i))

    for l in filtered_slopes:
        #l = filter_slopes(l)
        for i in l:
            #if i[1] > 0:
            single_list.append(i)

In [None]:
sorted_slopes = sorted(single_list, key=lambda x: x[0])[::-1]

In [None]:
sorted_slopes

In [None]:
len(sorted_slopes)

### Build the optimal RD curve
For each quantization steps combination, that now has 4 dimensions (frame_number, subband_x, subband_y, component), compute the distortion of the (all) decompositions (generated by the GOF) in the transform domain. As we did with a single image, the initial quantization steps combination
\begin{equation}
  \boxed{
    \begin{matrix}
       \infty & \infty & \cdots & \infty \\
       \infty & \infty & \cdots & \infty \\
       \vdots & \vdots &        & \vdots \\
       \infty & \infty & \cdots & \infty 
    \end{matrix}
  }
  \boxed{
    \begin{matrix}
       \infty & \infty & \cdots & \infty \\
       \infty & \infty & \cdots & \infty \\
       \vdots & \vdots &        & \vdots \\
       \infty & \infty & \cdots & \infty 
    \end{matrix}
  }
  \cdots
  \boxed{
    \begin{matrix}
       \infty & \infty & \cdots & \infty \\
       \infty & \infty & \cdots & \infty \\
       \vdots & \vdots &        & \vdots \\
       \infty & \infty & \cdots & \infty 
    \end{matrix}
  }
\end{equation}
(remember, one quantization step by subband-component, so, in the previous figure, only one component has been considered) "kills" all the coefficients of all the subband-components (this generates the top-left point of the RD curve). Then, the combinations start to incorporate the quantization steps defined in `sorted_slopes`, until the quantization steps combination
\begin{equation}
  \boxed{
    \begin{matrix}
       1 & 1 & \cdots & 1 \\
       1 & 1 & \cdots & 1 \\
       \vdots & \vdots &        & \vdots \\
       1 & 1 & \cdots & 1 
    \end{matrix}
  }
  \boxed{
    \begin{matrix}
       1 & 1 & \cdots & 1 \\
       1 & 1 & \cdots & 1 \\
       \vdots & \vdots &        & \vdots \\
       1 & 1 & \cdots & 1 
    \end{matrix}
  }
  \cdots
  \boxed{
    \begin{matrix}
       1 & 1 & \cdots & 1 \\
       1 & 1 & \cdots & 1 \\
       \vdots & \vdots &        & \vdots \\
       1 & 1 & \cdots & 1 
    \end{matrix}
  }
\end{equation}
is finally used (if it has not been discarted by the convex-hull filter).

In [None]:
decompositions = []
for frame_number in range(G):
    frame = image.read(sequence, frame_number)
    YUV_frame = color.from_RGB(frame.astype(np.int16))
    YUV_frame[...,0] -= np.average(YUV_frame[...,0]).astype(np.int16)
    YUV_frame[...,1] -= np.average(YUV_frame[...,1]).astype(np.int16)
    YUV_frame[...,2] -= np.average(YUV_frame[...,2]).astype(np.int16)
    DCT_blocks = DCT.analyze_image(YUV_frame, block_y_side, block_x_side)
    DCT_decomposition = DCT.get_subbands(DCT_blocks, block_y_side, block_x_side)
    decompositions.append(DCT_decomposition)

optimal_RD_points = []
Q_steps_combinations = []
decompositions_prog = []
for i in range(G):
    Q_steps_combinations.append(np.full(shape=(block_x_side, block_y_side, N_components), fill_value=99999999))
    decompositions_prog.append(np.zeros_like(decompositions[0]))
    
for s in sorted_slopes:
    sbc_index = s[1]
    decomposition_number = sbc_index[0]
    _y = sbc_index[1]
    _x = sbc_index[2]
    _c = sbc_index[3]
    
    # Gets the (unquantized) coefficients
    for i in range(G):
        decompositions_prog[i] = decompositions[i].copy()
    #reconstructed_decompositions
    #    [decomposition]
    #    [blocks_in_y*_y : blocks_in_y*(_y + 1), blocks_in_x*_x : blocks_in_x*(_x + 1), c]
    #    = decomposed_GOF[decomposition]
    #                    [blocks_in_y*_y : blocks_in_y*(_y + 1), blocks_in_x*_x : blocks_in_x*(_x + 1), _c]
    
    # Consider the next quantization step
    Q_steps_combinations[decomposition_number][_y, _x, _c] = s[2]
    
    decompositions_prog_k = []
    decompositions_prog_dQ = []
    for i in range(G):
        decompositions_prog_k.append(DCT.quantize(decompositions_prog[i], Q_steps_combinations[i]))
        decompositions_prog_dQ.append(DCT.dequantize(decompositions_prog_k[i], Q_steps_combinations[i]))
    
    acc_BPP = 0 # Accacumulated rate
    acc_RMSE = 0 # Accumulated distortion
    for i in range(G):
        RMSE = distortion.RMSE(decompositions[i], decompositions_prog_dQ[i])
        acc_RMSE += RMSE
        BPP = image.write((decompositions_prog_k[i] + 128).astype(np.uint8), f"/tmp/{decomposition_number}_{_y}_{_x}_{_c}_{s[0]}_", 0)*8/frame.size
        acc_BPP += BPP

    point = (acc_BPP/G, acc_RMSE/G)
    print("sbc =", sbc_index, "Q_step =", s[2], "BPP =", BPP, "RMSE =", RMSE)
    optimal_RD_points.append(point)

### Compare

In [None]:
pylab.figure(dpi=150)
pylab.plot(*zip(*RD_points_no_RDO), label="No RDO")
pylab.plot(*zip(*optimal_RD_points), label="Using RDO")
pylab.title(f"Effect of using RDO ($G={G}$)")
pylab.xlabel("Bits/Pixel")
pylab.ylabel("MSE")
plt.legend(loc="best")
#pylab.yscale('log')
#pylab.xscale('log')
pylab.show()

## Using the YCoCg (TODO)
Remember that this color transform is not orthogonal and therefore, the distortion cannot be measured in the transform domain.

## Using the DWT (TODO)