# Image analysis

Some basic information about image.

## Imports

In [None]:
import os
import cv2
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from pathlib import Path

## Loading image

In [None]:
# Define the image file name and full path to the image
image_name = f'12a.jpg'
image_path = Path(f'../../data/images/{image_name}')

# Read the image from the specified path
img = cv2.imread(image_path)

# Convert the image from BGR to RGB color space
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

## Small analysis

### Showing image

In [None]:
plt.imshow(img)     # Displays the image
plt.axis(False)     # Hides both the X and Y axes (including ticks and labels).
plt.show()          # Show 

### Image shape

In [None]:
# Print the image width, which corresponds to the second dimension of the image shape
print(f'Image width: {img.shape[1]}')

# Print the image height, which corresponds to the first dimension of the image shape
print(f'Image height: {img.shape[0]}')


### Image histograms

In [None]:
# Split the image into its channels (B, G, R)
b, g, r = cv2.split(img)

# Compute histograms for each channel
hist_b = cv2.calcHist([b], [0], None, [256], [0, 256])
hist_g = cv2.calcHist([g], [0], None, [256], [0, 256])
hist_r = cv2.calcHist([r], [0], None, [256], [0, 256])

# Compute the average histogram (RGB histogram)
hist_avg = (hist_b + hist_g + hist_r) / 3

# Plot histograms
plt.figure(figsize=(12,2))
plt.plot(hist_b, color='blue', label='Blue Channel')
plt.plot(hist_g, color='green', label='Green Channel')
plt.plot(hist_r, color='red', label='Red Channel')
plt.plot(hist_avg, color='black', linestyle='--', label='Average (RGB)')

# Add titles and legends
plt.title('Channel Histograms')
plt.xlabel('Pixel Value')
plt.ylabel('Frequency')
plt.legend()

# Customize the Y-axis
plt.gca().yaxis.set_ticks([])
plt.gca().yaxis.label.set_visible(False)

plt.show()

## Image similarity in dataset

### Loading

In [None]:
# Labeled images
data_path = Path('../../data/images/')
image_names_l = os.listdir(data_path)

labeled_images = list()

for img_name in image_names_l:
    labeled_images.append(cv2.imread(data_path / Path(img_name)))

In [None]:
# Other images
data_path = Path('../../data/other_images/')
image_names_o = os.listdir(data_path)

other_images = list()

for img_name in image_names_o:
    other_images.append(cv2.imread(data_path / Path(img_name)))

In [None]:
# Merging labeled and other images
images = labeled_images + other_images
image_names = image_names_l + image_names_o

### Computing similarity matrix based on histograms

In [None]:
def calc_avg_hist(img):
    """
    Calculate the average histogram for an image across all three color channels (Red, Green, Blue).

    The function computes the individual histograms for each of the three channels (Red, Green, Blue) of 
    the input image and then calculates their average histogram. The result is a single histogram that 
    represents the combined pixel intensity distribution across all channels.
    
    Parameters:
    img (numpy.ndarray): Input image.
    
    Returns:
    numpy.ndarray: A 1D array representing the average histogram across all three channels.    
    """

    # Compute histograms for each channel
    hist_r = cv2.calcHist([img], [0], None, [256], [0, 256])
    hist_g = cv2.calcHist([img], [1], None, [256], [0, 256])
    hist_b = cv2.calcHist([img], [2], None, [256], [0, 256])

    # Compute the average histogram (RGB histogram)
    hist_avg = (hist_b + hist_g + hist_r) / 3

    return hist_avg

In [None]:
# Initialize a similarity matrix with zeros
similarity_matrix = np.zeros((len(images), len(images)))

# Loop over each pair of images in the 'images' list
for i, img1 in enumerate(images):
    for j, img2 in enumerate(images):
        
        # Calculate the histogram for the first image (img1)
        hist1 = calc_avg_hist(img1)
        
        # Calculate the histogram for the second image (img2)
        hist2 = calc_avg_hist(img2)
        
        # Compare the two histograms using the correlation method
        similarity_matrix[i,j] = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)


### Visualization of similarity matrix

In [None]:
# Create a new figure for plotting
plt.figure(figsize=(16,8))

# Create a heatmap of the similarity matrix using seaborn's heatmap function
sns.heatmap(
    similarity_matrix,          # The data to visualize, which is the similarity matrix
    annot=True,                 # Display the numeric values inside the heatmap cells
    cbar=True,                  # Display the color bar on the side to indicate the scale of values
    cmap='coolwarm',            # Use the 'coolwarm' colormap, which is suitable for displaying data
    fmt='.2f',                  # Format the annotation values to 2 decimal places
    xticklabels=image_names,    # Set the x-axis tick labels as the image names
    yticklabels=image_names,    # Set the y-axis tick labels as the image names
    vmin=-1,                    # Set the minimum value of the color scale (for the heatmap) to -1
    vmax=1,                     # Set the maximum value of the color scale (for the heatmap) to 1
)

# Display the heatmap plot
plt.show()