In [None]:
import cv2
import numpy as np
from skimage.morphology import skeletonize
from google.colab.patches import cv2_imshow

In [None]:
# Define a function to classify neighborhood pixels into direct and diagonal
def classify_neighbors(x, y, img):
    direct_pixels = []
    diagonal_pixels = []

    for i in range(x - 1, x + 2):
        for j in range(y - 1, y + 2):
            if (i == x and j == y) or i < 0 or j < 0 or i >= img.shape[1] or j >= img.shape[0]:
                continue

            if img[j, i] == 255:
                if i == x or j == y:
                    direct_pixels.append((i, j))
                else:
                    diagonal_pixels.append((i, j))

    return direct_pixels, diagonal_pixels

# Define a function to extract individual components
def extract_components(img, min_size_threshold,endpoints, intersection_points):
    components = []
    component_id = 1

    #endpoints, intersection_points = identify_endpoints_and_intersections(img.copy())
    while endpoints or intersection_points:
        if endpoints:
            current_pixel = endpoints.pop()
        else:
            current_pixel = intersection_points.pop()

        stack = [current_pixel]
        component = set()

        while stack:
            x, y = stack.pop()
            component.add((x, y))
            img[y, x] = 0

            direct_neighbors, diagonal_neighbors = classify_neighbors(x, y, img)

            unvisited_neighbors = [
                (nx, ny) for nx, ny in direct_neighbors + diagonal_neighbors if img[ny, nx] == 255
            ]

            if len(unvisited_neighbors) == 1:
                stack.append(unvisited_neighbors[0])

        if len(component) >= min_size_threshold:
            components.append(component)

    return components

# Define a function to compute the equation of a line given two points
def compute_line_equation(point1, point2):
    x1, y1 = point1
    x2, y2 = point2

    a = y1 - y2
    b = x2 - x1
    c = x1 * y2 - x2 * y1

    return a, b, c

# Define a function to calculate perpendicular distances
def calculate_perpendicular_distances(component, line_equation):
    distances = []

    a, b, c = line_equation

    for x, y in component:
        distance = abs(a * x + b * y + c) / np.sqrt(a ** 2 + b ** 2)
        distances.append(distance)

    return distances

# Define a function to decode shape symbols
def decode_shape_symbols(distances):
    shape_symbols = []

    for i in range(len(distances) - 1):
        di = distances[i]
        dj = distances[i + 1]

        if np.isclose(di, dj):
            shape_symbols.append("E")  # Equal distances
        elif di < dj:
            shape_symbols.append("U")  # First distance is less
        else:
            shape_symbols.append("D")  # First distance is greater

    return shape_symbols

# Define a function to extract shape descriptors for a component
def extract_shape_descriptor(component):
    if len(component) < 2:
        return None  # Cannot extract shape descriptor for very small components

    # Convert the component set to a list
    component = list(component)

    # Get extreme end pixel coordinates
    p1 = component[0]
    p2 = component[-1]

    # Compute the line equation between P1 and P2
    line_equation = compute_line_equation(p1, p2)

    # Calculate perpendicular distances
    distances = calculate_perpendicular_distances(component, line_equation)

    # Decode shape symbols
    shape_symbols = decode_shape_symbols(distances)

    return shape_symbols
# Define a function to extract shape descriptors for all components
def extract_shape_descriptors(components):
    shape_descriptors = []

    for component in components:
        shape_descriptor = extract_shape_descriptor(component)
        if shape_descriptor is not None:
            shape_descriptors.append(shape_descriptor)

    return shape_descriptors



In [None]:
def sgpd_string(filepath):
  # Load the grayscale character image
  character_image = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)

  # Threshold the image to create a binary image
  _, binary_image = cv2.threshold(character_image, 128, 255, cv2.THRESH_BINARY)

  # resizing the character_image
  binary_image = cv2.resize(binary_image, (40, 60))

  # Complement the binary image (invert pixel values)
  binary_image = 255 - binary_image

  # Apply morphological skeletonization
  skeleton = skeletonize(binary_image / 255)  # Divide by 255 to ensure 0/1 values

  cv2.imwrite('skeleton1.png', (skeleton * 255).astype(np.uint8))

  # Load the skeletonized image (binary image with skeleton)
  skeleton_image = cv2.imread('/content/skeleton1.png', cv2.IMREAD_GRAYSCALE)

  # Define lists to store endpoints and intersection points
  endpoints = []
  intersection_points = []

  # Iterate through the skeletonized image
  for y in range(1, skeleton_image.shape[0] - 1):
    for x in range(1, skeleton_image.shape[1] - 1):
      # Check if the current pixel is part of the skeleton
      if skeleton_image[y, x] == 255:
        # Define neighborhood pixel coordinates
        neighbors = [
            (y - 1, x - 1), (y - 1, x), (y - 1, x + 1),
            (y, x - 1), (y, x + 1),
            (y + 1, x - 1), (y + 1, x), (y + 1, x + 1)
        ]

        # Count the number of neighboring pixels that are part of the skeleton
        skeleton_neighbors = [skeleton_image[ny, nx] for ny, nx in neighbors]
        skeleton_count = sum(1 for pixel_value in skeleton_neighbors if pixel_value == 255)

        # Check for endpoints and intersection points based on criteria
        if skeleton_count == 1:
          endpoints.append((x, y))
        elif skeleton_count >= 3:
          intersection_points.append((x, y))


  # Set a minimum size threshold for components (e.g., 5 pixels)
  min_size_threshold = 5

  # Extract and filter components
  filtered_components = extract_components(skeleton_image, min_size_threshold, endpoints, intersection_points)

  # Extract shape descriptors for all components
  shape_descriptors = extract_shape_descriptors(filtered_components)

  # Combine shape descriptors to form SGPD
  sgpd = "".join("".join(sd) for sd in shape_descriptors)

  return sgpd


In [None]:
!unzip Train_Results.zip -d /content/results  # Replace 'file.zip' and '/content/dataset_folder' with appropriate file names and paths

In [None]:
sgpd = sgpd_string('/content/test_chha.jpg')

In [None]:
import editdistance
import os
from google.colab import files
def sgpd_result(sgpd):
  folder_path1 = '/content/results/Train_Results'
  end = ""
  min = 99999
  files = os.listdir(folder_path1)
  for file in files:
    file_path = os.path.join(folder_path1, file)

    # Open the file in read mode ('r')
    with open(file_path, 'r') as file1:
      # Read all lines from the file into a list of strings
      strings_list = file1.readlines()

    for str_list in strings_list:
      line = str_list
      a = line.split()
      str1 = a[-1]
      distance = editdistance.eval(str1, sgpd)
      if min>distance:
        min = distance
        end = a[0]
  return end

In [None]:
!unzip validation.zip -d /content/validation  # Replace 'file.zip' and '/content/dataset_folder' with appropriate file names and paths

In [None]:
import os
folder_path1 = '/content/validation/validation'
sub_folders = os.listdir(folder_path1)
file_path = '/content/test_alpha_results.txt'
for folder in sub_folders:
  folder_path = os.path.join(folder_path1, folder)
  image_files = os.listdir(folder_path)
  total = 0
  positive = 0
  negative = 0

  # Iterate through each file in the folder
  for img_file in image_files:
    if img_file.endswith('.jpg') or img_file.endswith('.png'):  # Check for image extensions
      img_path = os.path.join(folder_path, img_file)
      total = total + 1
      sgpd = sgpd_string(img_path)
      res = sgpd_result(sgpd)
      # print(folder+' '+res)
      if res == folder:
        positive = positive + 1
      else:
        negative = negative + 1
  with open(file_path, 'a') as file:
  # Write data to the file
    file.write('Character: \t'+ folder + '\t total: '+str(total)+'\t positive: '+str(positive)+'\t negative: '+str(negative)+'\n')