In [1]:
import cv2
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import pylab as pl
from preprocess import *

In [2]:
def box_count(image, box_size):
  """
  Counts the number of non-empty boxes of a specific size covering the image.

  Args:
    image: A 2D numpy array representing the binary image.
    box_size: The size of the square boxes used for counting.

  Returns:
    The number of non-empty boxes.
  """
  n_rows, n_cols = image.shape
  n_boxes_x = int(n_cols // box_size)
  n_boxes_y = int(n_rows // box_size)
  count = 0
  resized_image = image[:n_boxes_y * box_size, :n_boxes_x * box_size]
    
    # Reshape the resized image into boxes
  boxes = resized_image.reshape(n_boxes_y, box_size, n_boxes_x, box_size)
    
    # Check if any pixel in each box is non-zero (foreground)
  non_zero_boxes = np.any(boxes, axis=(1, 3))
    
    # Count the number of non-zero boxes
  count = np.sum(non_zero_boxes)
  return count
def box_counting_dimension(image, min_size, max_size, scale_factor=2):
  """
  Calculates the box counting dimension of a binary image.

  Args:
    image: A 2D numpy array representing the binary image.
    min_size: The minimum size of the square boxes used for counting.
    max_size: The maximum size of the square boxes used for counting.
    scale_factor: The factor by which the box size is scaled at each iteration.

  Returns:
    The estimated box counting dimension.
  """
  # Convert image to binary (foreground = 1, background = 0)
  image = image > 0  

  # Prepare lists to store box sizes and counts
  box_sizes = []
  box_counts = []
  
  # Iterate through different box sizes
  box_size = min_size
  while box_size <= max_size:
    count = box_count(image, box_size)
    box_sizes.append(box_size)
    box_counts.append(count)
    box_size *= scale_factor

  # Fit a linear regression to log(box_size) vs log(box_count)
  log_sizes = np.log(box_sizes)
  log_counts = np.log(box_counts)
  slope, _ = np.polyfit(log_sizes, log_counts, 1)
  print (slope)
  # Estimated box counting dimension is negative of the slope
  return (slope-1*100)

In [3]:
A = load_Dataset("../fonts-dataset/Scheherazade New/*.jpeg")
B= load_Dataset("../fonts-dataset/Lemonada/*.jpeg")
C= load_Dataset("../fonts-dataset/Marhey/*.jpeg")
D= load_Dataset("../fonts-dataset/IBM Plex Sans Arabic/*.jpeg")

400
400
400
400


In [4]:
A_PROCESSED = []
for img in A:
    img = threshold_image(img)
    img= assure_white_bg(img)
    A_PROCESSED.append(img)
B_PROCESSED = []
for img in B:
    img = threshold_image(img)
    img= assure_white_bg(img)
    B_PROCESSED.append(img)
C_PROCESSED = []
for img in C:
    img = threshold_image(img)
    img= assure_white_bg(img)
    C_PROCESSED.append(img)
D_PROCESSED = []
for img in D:
    img = threshold_image(img)
    img= assure_white_bg(img)
    D_PROCESSED.append(img)

In [5]:
A_BCD=[]
for img in A_PROCESSED:
    A_BCD.append(box_counting_dimension(img,2,120))
print("A done")
B_BCD=[]
for img in B_PROCESSED:
    B_BCD.append(box_counting_dimension(img,2,120))
print("B done")
C_BCD=[]
for img in C_PROCESSED:
    C_BCD.append(box_counting_dimension(img,2,120))
print("C done")
D_BCD=[]
for img in D_PROCESSED:
    D_BCD.append(box_counting_dimension(img,2,120))
print("D done")
A_BCD=[result for result in A_BCD if not np.isnan(result)]
B_BCD=[result for result in B_BCD if not np.isnan(result)]
C_BCD=[result for result in C_BCD if not np.isnan(result)]
D_BCD=[result for result in D_BCD if not np.isnan(result)]

-1.3416258034420236
-1.28020393347032
-1.3930107403966139
-1.3380069797725465
-1.5133960011134617
-1.2735114933323426
-1.3901559685823008
-1.321682890865267
-1.3483270012535407
-1.3277251558956313
-1.2491155651643246
-1.343036324623426
-1.3039773785961608
-1.3571535553868859
-1.299158488942883
-1.2881672043818735
-1.2670115809317652
-1.3535553185345064
-1.260468389862351
-1.3136568307840832
-1.3560615109352876
-1.407575797175966
-1.3127663035588568
-1.3286716220054375
-1.2989488861212064
-1.3162870466044454
-1.3623177830902404
-1.4219291071691935
-1.3718883544821276
-1.3032653166109647
-1.3093352451218607
-1.4421104550554198
-1.3115592387987711
-1.450901085023625
-1.4672661982059674
-1.296156232994599
-1.3246068853751267
-1.3459102510572278
-1.2639356211044899
-1.4334475868822485
-1.2532726673933139
-1.3051545378251297
-1.3596812232844204
-1.3117845690220442
-1.5147147910757273
-1.4637574597388159
-1.3260076245685373
-1.3103020124551679
-1.3458824180297735
-1.3745556751916994
-1.347945

  log_counts = np.log(box_counts)


-1.4343339685445708
-1.4415714715691315
-1.4875103779616268
-1.4815474592259619
-1.3772090304712996
-1.4940510678361663
-1.4393829076390696
-1.4190095888272387
-1.494059708730875
-1.5746673830112854
-1.4569027156845742
-1.497660610683969
-1.3961510986628445
-1.4617710401816302
-1.3961396027502457
-1.3794081888168785
-1.3971289928740362
-1.6016252644591105
-1.4671582183021497
-1.4818517443396615
-1.5142617931026698
-1.4531539803771591
-1.4862752659160754
-1.467342659535075
-1.455281534601332
-1.485201764868196
-1.491826900482043
-1.4154426314663946
-1.5017283802738945
-1.4332646086059364
-1.479216442711213
-1.4435362993451362
-1.4517523862182444
-1.4085416083841793
-1.5757975597369114
-1.4174530674867578
-1.5822855138517822
-1.5031224748446577
-1.4650024631218255
-1.4574079184158735
-1.4965049824606214
-1.4959232146426262
-1.4890776658163272
-1.5827152927076582
-1.4483606133410383
-1.452742801425607
-1.4281639311546401
-1.5883892772793453
-1.4524173139414411
-1.5823688733719767
-1.45470

In [6]:
X = np.concatenate([A_BCD, B_BCD, C_BCD, D_BCD]).reshape(-1,1)
y = np.concatenate([np.zeros(len(A_BCD)), np.ones(len(B_BCD)), 
                    2*np.ones(len(C_BCD)), 3*np.ones(len(D_BCD))]).reshape(-1,1)
train_features, test_features, train_labels, test_labels = train_test_split(
        X, y, test_size=0.3, random_state=40)
    
    #print(labels)
KNN = KNeighborsClassifier(n_neighbors=5)

KNN.fit(train_features, train_labels)
        
    
accuracy = KNN.score(test_features, test_labels)
        
print('accuracy: ', accuracy*100, '%')

accuracy:  45.416666666666664 %


  return self._fit(X, y)
