In [4]:
# the purpose here is to get the region of interest of the extracted e's
# and calculate the GLCM of the pixels slices received, 

# acc. to paper the pixels recieved are 180x160, but we receive 14x14, lol so that's one thing
# output preferebly will be a feature vector having the GLCM features for this one pic

In [8]:
# A. Marginal Probabalities - GLCM - DONE
#     a. mean of row
#     b. mean of column
#     c. variance of row
#     d. variance of column
# B. Direct GLCM Metrics - GLCM
#     a. Energy (sum of squares of marginal probs) E
#     b. hxy1: Cross entropy using marginal product CEn
#     c. hxy2: Entropy of marginal product MEn
#     d. hglcm: Entropy of the GLCM itself EN
#     e. Max prob in GLCM values P_max
#     f. correlation C
#     g. diagonal correlation C_d
# C. From Difference Matrices - Diff GLCM
#     a. Energy E_d
#     b. Entropy En_d
#     c. Inertia I_d
#     d. Local Homogenteity H_d
# D. From Sum Matrices - Sum GLCM
#     a. Energy E_s
#     b. Entropy En_s
#     d. Variance V_s
#     d. cluster shade
#     e. cluster prominence

In [14]:
import cv2
import pytesseract
import numpy as np
from PIL import Image, ImageDraw
import os

# GLCM
from skimage.feature import graycomatrix, graycoprops

In [15]:
def get_e(image_path, n, crop_size, op_dir):
    
    # Load the image
    # image_path = 'test_images/test_0.png'  # Replace with your image
    img1 = cv2.imread(image_path)
    if img1 is None:
        raise ValueError(f"Image at path {image_path} could not be loaded.")
            
    img2 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
    
    #upscalling images, solely to help tesseract, image extraction will be done from the og image
    # img = cv2.resize(img2, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    img = cv2.resize(img2, (img1.shape[1], img1.shape[0]), interpolation=cv2.INTER_CUBIC)   # this is the processed image which 
                                                                                           # we put in the tesseract
                                                                                           # coz it needs clearer pcitures to 
                                                                                           # accurately locate the e's
    # print(img.shape, img2.shape)
    half = crop_size//2
    
    # Get image height (Tesseract origin is bottom-left)
    h, w = img.shape
    
    # op_dir = 'saved_e'
    os.makedirs(op_dir, exist_ok=True)
    
    # Draw setup
    # draw = ImageDraw.Draw(img)
    
    # Get character bounding boxes
    boxes = pytesseract.image_to_boxes(img)
    count = 0

    # Draw green dots on top of each 'e'
    for b in boxes.strip().splitlines():
        b = b.split()
        char, x1, y1, x2, y2 = b[0], int(b[1]), int(b[2]), int(b[3]), int(b[4])
        
        # Flip y-coordinates
        y1_new = h - y1
        y2_new = h - y2
    
        # make a bounding box around the e
        # cv2.rectangle(img, (x1, y2_new), (x2, y1_new), (255, 0, 0), 1)
    
        if char.lower() == 'e' and count<n:
    
            # compute center of bounding box    
            cx = (x1 + x2) // 2
            cy = (y1_new + y2_new) // 2
        
            # Get top-left and bottom-right coordinates of fixed crop
            x_start = max(0, cx - half)
            y_start = max(0, cy - half)
            x_end = min(img2.shape[1], cx + half)
            y_end = min(img2.shape[0], cy + half)
    
            # crop image
            cropped = img2[y_start:y_end, x_start:x_end]
    
            # if dimensions okay, at image to output dir, 
            # later make sure that the image dirs are constant, else will cause computation error
            if cropped.shape[0] > 0 and cropped.shape[1] > 0:
                out_path = os.path.join(op_dir, f"e_{count+1}.png")
                cv2.imwrite(out_path, cropped)
                count += 1

    cv2.imwrite("output_with_boxes.png", img)
    print(f"Saved {count} 'e' characters in '{op_dir}/' and annotated image as 'output_with_boxes.png'")
    
    # # to check the dimensions of the images
    # for filename in os.listdir(op_dir):
    #     full_path = os.path.join(op_dir, filename)
    #     img_saved = cv2.imread(full_path)
        
    #     if img_saved is not None:
    #         print(f"{filename}: {img_saved.shape}")
    #     else:
    #         print(f"Failed to read {filename}")

    return 0

In [98]:
# get region of interest

# threshold -> if sufficiently black, it is counted as ROI
def roi(img, threshold):
    if len(img.shape) != 2:
        raise ValueError("Input image must be grayscale")
    
    R = np.sum(img < threshold)
    return R

In [99]:
# lets get the dimensions correct here
# GLCM's are inherenetly 4Ds - distance, angles, nxm pairs and number of pair occurances
# we are eliminating angle, hence we get a 3D vector -> (G,G,F), G=levels, D=distances
# 3D vector (GLCM GxGxD) -> normalise -> 2 x row/column sums (1 x 10)-> 2 x mean and variance of each row/column sum(1 x scalar)
# total = 4 features

In [102]:
def glcm(img_path, dist, levels):
    img = cv2.imread(img_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Quantize to 8 gray levels (0 to 7)
    # levels = 8
    # quantized = (gray / (256 // levels)).astype(np.uint8)

    distance = list(range(1, dist+1))
    R = roi(gray, 100)
    
    # Compute GLCM
    glcm = graycomatrix(gray, distances=distance, angles=[0], levels=levels, symmetric=False, normed=False)
    
    # display GLCM matrix for angle=0 and distance=1
    # print(glcm[:, :, 0, 0])
    return glcm[:,:,:,0]//R
    

In [104]:
if __name__ == "__main__":
    # # img_path = input("Enter image path: ")
    # dists = [int(x) for x in input("Enter distances (comma-separated): ").split(",")]
    # levels = int(input("Enter number of gray levels: "))

    img_path = 'test_images/test4.jpg'
    result = glcm(img_path, dist = 10, levels = 256) # levels = 8, coz the paper said so
    print("GLCM shape:", result.shape)
    print(result)

GLCM shape: (256, 256, 10)
[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 ...

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]]


In [127]:
def get_sum(arr,ch):
    print(arr.shape)
    if ch == 'c':
        return np.sum(arr, axis=0)
    elif ch == 'r':
        return np.sum(arr, axis=1) 

In [128]:
def get_mean(arr):
    # print("1",arr.shape)
    indices = np.arange(len(arr))
    return np.sum(arr)

In [129]:
def get_var(arr):
    # print("2",arr.shape)
    indices = np.arange(len(arr))
    mean = get_mean(arr)
    
    # print("len(arr)", len(arr))
    # print("value of mean", mean)
    
    summ = 0
    for i in range(0,(len(arr))):
        summ = summ + i*arr[i]
    print("s",summ.shape)
    variance = summ - mean ** 2
    
    return variance

In [130]:
if __name__ == "__main__":
    img_path = 'saved_e/e_1.png'  

    row_sums = get_sum(glcm(img_path, 10, 256), 'r')
    col_sums = get_sum(glcm(img_path, 10, 256), 'c')
    print("r",row_sums.shape)
    print("c",col_sums.shape)

    print("Mean of Row Sums:", get_mean(row_sums))
    print("Mean of Column Sums:", get_mean(col_sums))
    print("Variance of Row Sums:", get_mean(get_var(row_sums)))
    print("Variance of Column Sums:", get_mean(get_var(col_sums)))


(256, 256, 10)
(256, 256, 10)
r (256, 10)
c (256, 10)
Mean of Row Sums: 7
Mean of Column Sums: 7
s (10,)
Variance of Row Sums: 1295
s (10,)
Variance of Column Sums: 1295
