In [3]:
import cv2
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import pylab as pl
from preprocess import *

In [4]:
def box_count(image, box_size):
  """
  Counts the number of non-empty boxes of a specific size covering the image.

  Args:
    image: A 2D numpy array representing the binary image.
    box_size: The size of the square boxes used for counting.

  Returns:
    The number of non-empty boxes.
  """
  n_rows, n_cols = image.shape
  n_boxes_x = int(n_cols // box_size)
  n_boxes_y = int(n_rows // box_size)
  count = 0
  resized_image = image[:n_boxes_y * box_size, :n_boxes_x * box_size]
    
    # Reshape the resized image into boxes
  boxes = resized_image.reshape(n_boxes_y, box_size, n_boxes_x, box_size)
    
    # Check if any pixel in each box is non-zero (foreground)
  non_zero_boxes = np.any(boxes, axis=(1, 3))
    
    # Count the number of non-zero boxes
  count = np.sum(non_zero_boxes)
  return count
def box_counting_dimension(image, min_size, max_size, scale_factor=2):
  """
  Calculates the box counting dimension of a binary image.

  Args:
    image: A 2D numpy array representing the binary image.
    min_size: The minimum size of the square boxes used for counting.
    max_size: The maximum size of the square boxes used for counting.
    scale_factor: The factor by which the box size is scaled at each iteration.

  Returns:
    The estimated box counting dimension.
  """
  # Convert image to binary (foreground = 1, background = 0)
  image = image > 0  

  # Prepare lists to store box sizes and counts
  box_sizes = []
  box_counts = []
  
  # Iterate through different box sizes
  box_size = min_size
  while box_size <= max_size:
    count = box_count(image, box_size)
    box_sizes.append(box_size)
    box_counts.append(count)
    box_size *= scale_factor

  # Fit a linear regression to log(box_size) vs log(box_count)
  log_sizes = np.log(box_sizes)
  log_counts = np.log(box_counts)
  slope, _ = np.polyfit(log_sizes, log_counts, 1)
  print (slope)
  # Estimated box counting dimension is negative of the slope
  return (slope-1*100)

In [5]:
A = load_Dataset("../fonts-dataset/Scheherazade New/*.jpeg")
B= load_Dataset("../fonts-dataset/Lemonada/*.jpeg")
C= load_Dataset("../fonts-dataset/Marhey/*.jpeg")
D= load_Dataset("../fonts-dataset/IBM Plex Sans Arabic/*.jpeg")

0
0
0
0


In [6]:
A_PROCESSED = []
for img in A:
    img = threshold_image(img)
    img= assure_white_bg(img)
    A_PROCESSED.append(img)
B_PROCESSED = []
for img in B:
    img = threshold_image(img)
    img= assure_white_bg(img)
    B_PROCESSED.append(img)
C_PROCESSED = []
for img in C:
    img = threshold_image(img)
    img= assure_white_bg(img)
    C_PROCESSED.append(img)
D_PROCESSED = []
for img in D:
    img = threshold_image(img)
    img= assure_white_bg(img)
    D_PROCESSED.append(img)

In [7]:
A_BCD=[]
for img in A_PROCESSED:
    A_BCD.append(box_counting_dimension(img,2,120))
print("A done")
B_BCD=[]
for img in B_PROCESSED:
    B_BCD.append(box_counting_dimension(img,2,120))
print("B done")
C_BCD=[]
for img in C_PROCESSED:
    C_BCD.append(box_counting_dimension(img,2,120))
print("C done")
D_BCD=[]
for img in D_PROCESSED:
    D_BCD.append(box_counting_dimension(img,2,120))
print("D done")
A_BCD=[result for result in A_BCD if not np.isnan(result)]
B_BCD=[result for result in B_BCD if not np.isnan(result)]
C_BCD=[result for result in C_BCD if not np.isnan(result)]
D_BCD=[result for result in D_BCD if not np.isnan(result)]

A done
B done
C done
D done


In [8]:
X = np.concatenate([A_BCD, B_BCD, C_BCD, D_BCD]).reshape(-1,1)
y = np.concatenate([np.zeros(len(A_BCD)), np.ones(len(B_BCD)), 
                    2*np.ones(len(C_BCD)), 3*np.ones(len(D_BCD))]).reshape(-1,1)
train_features, test_features, train_labels, test_labels = train_test_split(
        X, y, test_size=0.3, random_state=40)
    
    #print(labels)
KNN = KNeighborsClassifier(n_neighbors=5)

KNN.fit(train_features, train_labels)
        
    
accuracy = KNN.score(test_features, test_labels)
        
print('accuracy: ', accuracy*100, '%')

ValueError: With n_samples=0, test_size=0.3 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.