In [None]:
%matplotlib inline


# Lloyd-Max Quantization

Use K-means to find the centroid of each bin. See [scikit-learn's Vector Quantization Example](https://scikit-learn.org/stable/auto_examples/cluster/plot_face_compress.html#sphx-glr-auto-examples-cluster-plot-face-compress-py).

Notice that the centroids must be transmitted to the decoder.

In [None]:
!ln -sf ~/MRVC/src/logging_config.py .
!ln -sf ~/repos/quantization/deadzone_quantizer.py .
!ln -sf ~/repos/quantization/distortion.py .
!ln -sf ~/MRVC/src/image_1.py .
#!ln -sf ~/repos/quantization/information.py .

In [None]:
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from sklearn import cluster
import os
import pylab
import image_1 as gray_image
import distortion

## Configuration

In [None]:
home = os.environ["HOME"]
fn = home + "/MRVC/images/lena_bw/"
n_clusters = 64  # Number of bins
N_tries = 4  # Number of times K-means is run
N_bins = range(2, 128, 1)
gray_image.write = gray_image.debug_write # faster
#gray_image.write = gray_image.write # higher compression

## Read the image and show it

In [None]:
img = gray_image.read(fn, 0)
gray_image.show(img, fn + "000.png")

In [None]:
np.random.seed(0)  # makes the random numbers predictable
flatten_img = img.reshape((-1, 1))  # flatten
k_means = cluster.KMeans(n_clusters=n_clusters, n_init=N_tries)
k_means.fit(flatten_img)
centroids = k_means.cluster_centers_.squeeze()  # Centroids
labels = k_means.labels_  # Labels of the centroids
print(len(labels), len(centroids), img.shape, n_clusters, len(centroids.flatten()))

# create an array from labels and values
#img_dequantized = np.choose(labels, centroids)
#img_dequantized = centroids[range(len(labels)), labels]
#img_dequantized = np.empty_like(flatten_img)
#for i in range(len(labels)):
#    img_dequantized[i] = centroids[labels[i]]
img_dequantized = centroids[labels[range(len(labels))]]
img_dequantized.shape = img.shape

print(centroids)
print(labels, labels.shape)
print(img_dequantized.max(), img_dequantized.min())

vmin = img.min()
vmax = img.max()

In [None]:
def RD_curve(img, N_bins):
    points = []
    flatten_img = img.reshape((-1, 1))  # flatten
    for n in N_bins:
        k_means = cluster.KMeans(n_clusters=n, n_init=N_tries)
        k_means.fit(flatten_img)
        centroids = k_means.cluster_centers_.squeeze().astype(np.uint8)  # Centroids
        k = k_means.labels_.astype(np.uint8)  # Labels of the centroids
        #y = np.choose(k, centroids)
        y = centroids[k[range(len(k))]]
        y.shape = img.shape
        k.shape = img.shape
        print("Quantization indexes: ", np.unique(k))
        rate = gray_image.write(k, "/tmp/" + str(n) + '_', 0)*8/k.size
        _distortion = distortion.RMSE(img, y)
        if n<16:
            plt.imshow(y, cmap=plt.cm.gray, vmin=0, vmax=256)
            plt.show()
        points.append((rate, _distortion))
        print(f"n={n:>3}, rate={rate:>7} bits/pixel, distortion={_distortion:>6.1f}")
    return points

RD_points = RD_curve(img, N_bins)

In [None]:
pylab.figure(dpi=150)
pylab.scatter(*zip(*RD_points), label=f"K-means PNG", s=1, marker='.')
#pylab.plot(*zip(*RD_points), c='m', marker='x', label=f"K-means", linestyle="dotted")
pylab.title(f"Rate/Distortion Performance")
pylab.xlabel("Bits/Pixel")
pylab.ylabel("RMSE")
pylab.legend(loc='upper right')
pylab.show()

In [None]:
with open(f"Lloyd-Max_RD_points.txt", 'w') as f:
    for item in RD_points:
        f.write(f"{item[0]}\t{item[1]}\n")