In [1]:
import cv2
from sklearn.cluster import KMeans
from sklearn.ensemble import AdaBoostClassifier
from sklearn.svm import SVC  # Example weak learner
from sklearn.model_selection import train_test_split
import os
import numpy as np
from cv2.xfeatures2d import SIFT_create as sift_create

# Define paths and hyperparameters
data_path = "train-dataset"
k = 100  # Number of visual words (adjust as needed)

# Load image paths and labels
images = []
labels = []
for folder_name in os.listdir(data_path):
  folder_path = os.path.join(data_path, folder_name)
  for filename in os.listdir(folder_path):
    image_path = os.path.join(folder_path, filename)
    label = folder_name  # Assuming folder name represents class label
    images.append(image_path)
    labels.append(label)

# Function to extract SURF features
def extract_surf_features(image_path):
  img = cv2.imread(image_path)
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  surf = sift_create()
  keypoints, descriptors = surf.detectAndCompute(gray, None)
  return descriptors

# Extract features from training images
all_features = []
for image_path in images:
  features = extract_surf_features(image_path)
  all_features.append(features)


# Create the visual vocabulary using k-means
kmeans = KMeans(n_clusters=k)
kmeans.fit(np.concatenate(all_features))  # Combine all descriptors

  super()._check_params_vs_input(X, default_n_init=10)


In [2]:
# Function to create image histograms
kmeans.cluster_centers_ = kmeans.cluster_centers_.astype(np.double)
    
def create_image_histogram(image_path, kmeans):
  descriptors = extract_surf_features(image_path)
  histogram = np.zeros(k)
  for descriptor in descriptors:
    cluster_id = kmeans.predict([descriptor])[0]
    histogram[cluster_id] += 1
  return histogram

# Create histograms for training images
image_features = []
for image_path in images:
  histogram = create_image_histogram(image_path, kmeans)
  image_features.append(histogram)

KeyboardInterrupt: 

In [None]:
# Train the SVM classifier
clf = AdaBoostClassifier(n_estimators=100, random_state=42)
clf.set_params(algorithm="SAMME")
clf.set_params(estimator=SVC(kernel='linear'))  # Set the base learner
clf.fit(np.array(image_features), labels)

In [None]:
# Function to classify a new image
def classify_image(image_path, kmeans, clf):
  histogram = create_image_histogram(image_path, kmeans)
  prediction = clf.predict([histogram])[0]
  return prediction

wrong = 0
for i in range(1 , 1000):
  new_image_path = f"fonts-dataset/Lemonada/{i}.jpeg"
  predicted_class = classify_image(new_image_path, kmeans, clf)
  print("Predicted class for", new_image_path, ":", predicted_class)
  if (predicted_class != "IBM Plex Sans Arabic"):
    wrong = wrong + 1

Predicted class for fonts-dataset/Lemonada/1.jpeg : Lemonada
Predicted class for fonts-dataset/Lemonada/2.jpeg : Lemonada
Predicted class for fonts-dataset/Lemonada/3.jpeg : Scheherazade New
Predicted class for fonts-dataset/Lemonada/4.jpeg : Lemonada
Predicted class for fonts-dataset/Lemonada/5.jpeg : Lemonada
Predicted class for fonts-dataset/Lemonada/6.jpeg : Lemonada


KeyboardInterrupt: 

In [None]:
print(f"Errors: {wrong}")
print(f"accuriacy : {100 - wrong / 1000 * 100}")

Errors: 72
accuriacy : 92.8
