## Histogram Comparing

Check if the histograms of two images are the same (or very similar) and, if they are, it can be interpreted that the images are, in some way, the same or similar.

##### Library Importing

In [2]:
import pandas as pd
import cv2 as cv
import numpy as np
import glob
import os

##### Methods to Compare Histograms

`compareHist(H1, H2, method)` <br>
* Input: <br>
    1.	`H1`: First histogram to be compared; <br>
    2.  `H2`: Second histogram to be compared; <br>
    3.  `method`: Flag indicating which comparation method should be executed. <br>
            `HISTCMP_CORREL`: Calculate the correlation between two histograms; <br>
            `HISTCMP_CHISQR`: Apply the Chi-Square distance to histograms; <br>
            `HISTCMP_INTESECT`: Calculate the intersection between two histograms; <br>
            `HISTCMP_BHATTACHARYYA`: Apply the Bhattacharyya distance. Used to mesure the overlap between two histograms. <br><br>

* Output: Graph representing the histogram of the image.<br><br><br>


`calcHist([img], [channels], mask, [histSize], ranges)` <br>
* Input: <br>
    1.	`img`: Image to be analyzed; <br>
    2.  `channels`: Index of the channel on which the histogram will be calculated; <br>
    3.  `mask`: Mask image (if you want to compare the fully image, pass: `None`); <br>
    4.  `histSize`: Represents BIN couting (usually `256` as it is the full scale of values); <br>
    5.  `ranges`: Image values range (`0` to `256`). <br><br>
* Output: Graph representing the histogram of the image.

In [3]:
# Index dictionary to storage image name and its corresponding histogram
index = {}

# Images dictionary to storage image name and its corresponsing the image itself
images = {}

# Getting the imagens in folder
for imagePath in glob.glob(os.getcwd() + "/Resources/Img/*.jpg"):

    # Extracting the image filename (considered unique) and load
    # the image, updating the image dictionary
    fileName = imagePath[imagePath.rfind("/") + 1:]
    print(fileName)
    image = cv.imread(imagePath)
    images[fileName] = cv.cvtColor(image, cv.COLOR_BGR2RGB)

    # Extracting an RGB color histogram from the image using 8 boxes per channel
    hist = cv.calcHist([image], [1,2], None, [8, 8], [0, 256, 0, 256])
    hist = cv.normalize(hist, hist).flatten() # flatten converts to 1D array
    index[fileName] = hist

Img\apple1.jpg
Img\apple2.jpg
Img\apple3.jpg
Img\apple_green.jpg
Img\Binaria.jpg
Img\Cinza.jpg
Img\colorida.jpg
Img\colorida0.jpg
Img\einstein.jpg
Img\pineapple.jpg


In [4]:
# Checking the Histogram Calc

index 

{'Img\\apple1.jpg': array([8.4590201e-06, 5.4560677e-04, 1.0298857e-03, 2.2162632e-03,
        3.1234932e-03, 6.7693307e-03, 1.4371876e-02, 4.0645590e-03,
        0.0000000e+00, 1.5014761e-04, 1.0869841e-03, 2.7047717e-03,
        1.4338039e-03, 6.3484944e-03, 2.1481682e-02, 2.2329699e-02,
        2.5165585e-04, 6.5768883e-04, 1.2054104e-04, 1.3936235e-03,
        2.5842306e-03, 1.7552467e-03, 1.4331695e-02, 3.7640523e-02,
        5.7098387e-05, 4.4621332e-03, 4.7793463e-04, 3.6796738e-04,
        2.2226076e-03, 2.5694275e-03, 8.8291019e-03, 4.7571413e-02,
        0.0000000e+00, 1.9371156e-03, 4.1153133e-03, 7.0421345e-04,
        1.0214266e-03, 4.1005099e-03, 5.3439857e-03, 4.4970267e-02,
        0.0000000e+00, 0.0000000e+00, 1.9603779e-03, 8.0402987e-03,
        4.5742150e-03, 2.1316730e-03, 8.2179382e-03, 2.2365650e-02,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.3111481e-04,
        6.8877572e-03, 1.4190007e-03, 5.8261501e-03, 2.0511009e-02,
        0.0000000e+00, 0.0000

#### Methods to Histogram Calc

In [45]:
OPENCV_METHODS = (
    ("Correlation", cv.HISTCMP_CORREL),
    ("Chi-Squared", cv.HISTCMP_CHISQR),
    ("Intersection", cv.HISTCMP_INTERSECT),
    ("Bhattacharyya", cv.HISTCMP_BHATTACHARYYA))

analyzedImg = 'Img\\apple1.jpg'

results_list = []
methods_names_list = []

for (methodName, method) in OPENCV_METHODS:
    
    results = {}
    reverse = False

    # If we are using correlation or intersection:
    # Classify the results in rerse order
    if methodName in ("Correlation", "Intersection"):
        reverse = True
    
    for (k, hist) in index.items():
        # Claculate the distance between two histograms using the methods
        # Update results dictionary
        d = cv.compareHist(index[analyzedImg], hist, method)
        results[k] = d

    # Sorting the results
    methods_names_list.append(methodName)
    results = sorted([(v, k) for (v, k) in results.items()], reverse = reverse)
    results_list.append(results)

In [46]:
# The generated output is a list of lists, in which each of the internal lists
# corresponds to a comparison method. This comparison is made of the analyzed
# image with all the others (including itself).

results_list

[[('Img\\pineapple.jpg', 0.9856005600869122),
  ('Img\\einstein.jpg', 0.0024663768916125053),
  ('Img\\colorida0.jpg', -0.04022128548982981),
  ('Img\\colorida.jpg', -0.04022128548982981),
  ('Img\\apple_green.jpg', 0.99374455258592),
  ('Img\\apple3.jpg', 0.9865322210986823),
  ('Img\\apple2.jpg', 0.9977949986529369),
  ('Img\\apple1.jpg', 1.0),
  ('Img\\Cinza.jpg', -0.03594841932083633),
  ('Img\\Binaria.jpg', 0.9399692187994699)],
 [('Img\\Binaria.jpg', 11130.477169003187),
  ('Img\\Cinza.jpg', 2286.426832628531),
  ('Img\\apple1.jpg', 0.0),
  ('Img\\apple2.jpg', 0.46854840131568914),
  ('Img\\apple3.jpg', 8.074007497620023),
  ('Img\\apple_green.jpg', 4.982400138697053),
  ('Img\\colorida.jpg', 1223.5607416044397),
  ('Img\\colorida0.jpg', 1223.5607416044397),
  ('Img\\einstein.jpg', 99359.26689037033),
  ('Img\\pineapple.jpg', 2.1143753210409972)],
 [('Img\\pineapple.jpg', 1.1465068510374294),
  ('Img\\einstein.jpg', 0.04913638391644781),
  ('Img\\colorida0.jpg', 0.068460955105365

##### Creating DataFrame

In [47]:
for i in range(len(methods_names_list)):
    results_list[i] = pd.DataFrame(results_list[i])
    results_list[i]["Method"] = methods_names_list[i]

df = pd.concat(results_list)
df.to_csv('./Resources/Others/Result_compare_hist.csv', sep=';', encoding='latin1')

##### DataFrame Analysis

In [48]:
# Method: Correlation
correlation = df.loc[df['Method'] == 'Correlation']
correlation.sort_values(0)

for i in range(len(correlation)):
    print(correlation[1][i])

0.9856005600869122
0.0024663768916125053
-0.04022128548982981
-0.04022128548982981
0.99374455258592
0.9865322210986823
0.9977949986529369
1.0
-0.03594841932083633
0.9399692187994699


In [50]:
# Method: Chi-Square
ChiSquared = df.loc[df['Method'] == 'Chi-Squared']
ChiSquared.sort_values(0)

for i in range(len(ChiSquared)):
    print(ChiSquared[1][i])

11130.477169003187
2286.426832628531
0.0
0.46854840131568914
8.074007497620023
4.982400138697053
1223.5607416044397
1223.5607416044397
99359.26689037033
2.1143753210409972


In [51]:
# Method: Intersection
Intersection = df.loc[df['Method'] == 'Intersection']
Intersection.sort_values(0)

for i in range(len(Intersection)):
    print(Intersection[1][i])

1.1465068510374294
0.04913638391644781
0.06846095510536543
0.06846095510536543
1.0351118630470637
1.1398096398916095
1.2569620175636373
1.3534432270998877
0.017037612843523675
0.9548498154535991


In [52]:
# Method: Bhattacharyya
Bhattacharyya = df.loc[df['Method'] == 'Bhattacharyya']
Bhattacharyya.sort_values(0)

for i in range(len(Bhattacharyya)):
    print(Bhattacharyya[1][i])

0.5414776988745068
0.9411999019062316
0.0
0.1610709497331469
0.34973544724834466
0.38059305604662713
0.8927848297517853
0.8927848297517853
0.9112322888562902
0.2995433724518647
