# Histogram Equalization
Histogram equalization concept and algorithm applied to digital image color processing.

In [None]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import imageio

from _utils import *

## 1. Grayscale images
---
*Histogram equalization* of a grayscale images $x$ (single channel) considers the *probability density* of the gray levels $i$, defined by:

$$ \large
p_x(i)=p(x=i)=\frac{n_i}{n}, \quad 0 \leq i \leq L
$$

where:

- $n_i$ is the number of occurrences of gray level $i$
- $n$ total number of pixels
- $L$ total number of grey levels (8bits images this value is 256)

Considering that we have the ordered *probability density* $p_x(i)$, the equalization is given by the [cumulative distribution function](https://en.wikipedia.org/wiki/Cumulative_distribution_function), defined by:

$$ \large
cdf_x(i)=\sum_{j=0}^{i}p_x(j)
$$

The motivation comes from the continuous space concept of *CDF*, which could be understanded by:

$$
CDF_X(x)=\int_{-\infty}^{x}p_x(t)dt
$$

In [None]:
# Read grayscale image
img_float = imageio.imread('../_data/ship.png')

In [None]:
summary(img_float)

In [None]:
# Change range from [0-255] to [0.0-1.0]
img_float = img_float/(2**8 - 1)
summary(img_float)

In [None]:
# Visualize the input image histogram
histogram(img_float, bins=2**8, interval=[0, 1])

One easier and more performatic way to do the process of histogram equalization is by ordering the gray values and finding its sorted indices, instead of finding the probability distribution and do the cummulative sum of those values. At the end of this process, it is necessary to normalize the output value by the total number of pixels. The result is exactly the same.

In [None]:
def eqHist1(matrix_in):
    # Read input grayscale image
    matrix_in = matrix_in.copy()
    N1, N2 = matrix_in.shape
    # Flat image to be sorted
    flat = matrix_in.ravel()
    # Sort the pixels
    sort = np.sort(flat)
    # Find the sorted index for each gray value
    search = sort.searchsorted(matrix_in)
    # Normalize the output
    norm = search/(N1*N2 - 1)
    return norm

img_float_eq = eqHist1(img_float)

In [None]:
# Visualized the histogram of the equalized image
histogram(img_float_eq, bins=2**8, interval=[0, 1])

## 2. Color images
---
Applies the same concept of grayscale images on multi-channel images.

In [None]:
img_rgb = imageio.imread('../_data/aerial03.png')/(2**8 - 1)

In [None]:
summary(img_rgb)

In [None]:
histogram(img_rgb, bins=2**8, interval=[0, 1])

### 2.1. RGB Histogram equalization
---
Applies histogram equalization, stretching each channel separately. This process does not preserve the hue distribution what it means that the colors might be changed.

In [None]:
def eqHist3(img_in):
    img_in = img_in.copy()
    # Split channels
    R = img_in[:,:,0]
    G = img_in[:,:,1]
    B = img_in[:,:,2]
    N1, N2, _ = img_in.shape
    # Sort channels
    Rs = np.sort(R.ravel())
    Gs = np.sort(G.ravel())
    Bs = np.sort(B.ravel())
    # Find sorted indices
    R[:,:] = Rs.searchsorted(R)
    G[:,:] = Gs.searchsorted(G)
    B[:,:] = Bs.searchsorted(B)
    # Return normalized result
    return img_in/(N1*N2 - 1)

img_rgb_eq = eqHist3(img_rgb)

In [None]:
summary(img_rgb_eq)

In [None]:
%%time
histogram(img_rgb_eq, bins=2**8, interval=[0, 1])

### 2.2. HSV Histogram equalization
---
Applies histogram equalization preserving the hue distribution. This process can affect the saturation value or not.

In [None]:
def eqHist3hsv(img_in, saturation=False):
    img_rgb = img_in.copy()
    # Convert from RGB to HSV
    img_hsv = mpl.colors.rgb_to_hsv(img_rgb)
    N1, N2, _ = img_hsv.shape
    if saturation:
        # If saturation is true
        # stretches the saturation component
        S = img_hsv[:,:,1]
        Ss = np.sort(S.ravel())
        S[:,:] = Ss.searchsorted(S)/(N1*N2 - 1)
    # Stretches the value component
    V = img_hsv[:,:,2]
    Vs = np.sort(V.ravel())
    V[:,:] = Vs.searchsorted(V)/(N1*N2 - 1)
    return mpl.colors.hsv_to_rgb(img_hsv)

img_hsv_eq = eqHist3hsv(img_rgb)

In [None]:
summary(img_hsv_eq)

In [None]:
%%time
histogram(img_hsv_eq, bins=2**8, interval=[0, 1])

### Same process but stretching the saturation values.

In [None]:
img_hsv_eq2 = eqHist3hsv(img_rgb, saturation=True)

In [None]:
summary(img_hsv_eq2)

In [None]:
%%time
histogram(img_hsv_eq2, bins=2**8, interval=[0, 1])