In [49]:

import skimage.io as io
import matplotlib.pyplot as plt
import os
import cv2
import numpy as np
from skimage.exposure import histogram
from matplotlib.pyplot import bar
from skimage.color import rgb2gray,rgb2hsv
from scipy.signal import convolve2d
from scipy.signal import fftconvolve
from scipy import fftpack
from skimage.filters import median
from skimage.feature import canny
from skimage.filters import sobel_h, sobel, sobel_v,roberts, prewitt
from scipy.ndimage import gaussian_filter
from skimage import color
import pickle

def unsharp_masking(image, blur_radius=5, sharpen_amount=1.0):
    """Apply unsharp masking followed by dilation to enhance image details."""
    # Apply Gaussian blur
    blurred = cv2.GaussianBlur(image, (0, 0), blur_radius)
    
    # Apply unsharp masking
    sharpened = cv2.addWeighted(image, 1.0 + sharpen_amount, blurred, -sharpen_amount, 0)
    
    # Ensure the sharpened image doesn't have values below the original image
    sharpened = np.where(image >= blurred, sharpened, image)
    
    return sharpened

def remove_noise22(image, dilation_kernel=None):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    bilateral_filtered = cv2.bilateralFilter(gray, 15, sigmaColor=3, sigmaSpace=10)
    
    median_blurred = cv2.medianBlur(bilateral_filtered, 3)
    result = unsharp_masking(median_blurred, blur_radius=10, sharpen_amount=4) * 255
    
    result = np.uint8(result)
    edges = cv2.Canny(result, 80, 255,apertureSize=3)
       # Optionally apply dilation
    if dilation_kernel is not None:
        # Perform dilation
        edges = cv2.dilate(edges, dilation_kernel)
    return edges


In [58]:


def construct_dataset(input_folder, output_folder):
    # Iterate over subfolders in the input folder

    for root, dirs, files in os.walk(input_folder):
        for dir_name in dirs:
         
            input_subfolder = os.path.join(root, dir_name)
            output_subfolder = os.path.join(output_folder, os.path.relpath(input_subfolder, input_folder))
            
            # Create corresponding subfolder in the output folder
            os.makedirs(output_subfolder, exist_ok=True)
            
            # Iterate over files in the subfolder
            for file_name in os.listdir(input_subfolder):
                input_image_path = os.path.join(input_subfolder, file_name)
                output_image_path = os.path.join(output_subfolder, file_name)
                
                # Read the image
                image = io.imread(input_image_path)
                
                # Apply preprocessing
                # np.ones((kernel_size, kernel_size), np.uint8)
                processed_image = remove_noise22(image,dilation_kernel=None)
                
                # Save the processed image
                cv2.imwrite(output_image_path, processed_image)
                print(f"Processed and saved: {output_image_path}")


input_folder = "fonts-dataset"
output_folder = "fonts_processed-dataset"
construct_dataset(input_folder, output_folder)


['tests']
Processed and saved: fonts_processed-dataset\tests\0.jpeg
Processed and saved: fonts_processed-dataset\tests\1.jpeg
Processed and saved: fonts_processed-dataset\tests\16.jpeg
Processed and saved: fonts_processed-dataset\tests\2.jpeg
Processed and saved: fonts_processed-dataset\tests\4.jpeg
[]


In [59]:
def getSIFTFeatures(input_folder):
    sift = cv2.SIFT_create()
    SIFTListLabeled = []

    # Iterate over subfolders in the input folder
    for root, dirs, files in os.walk(input_folder):
        for dir_name in dirs:
            input_subfolder = os.path.join(root, dir_name)
            label = dir_name  # Assuming folder name is the label
            
            # Get the list of files in the subfolder and sort them based on filenames
            file_list = sorted(os.listdir(input_subfolder), key=lambda x: int(x.split('.')[0]))
            
            # Iterate over sorted files in the subfolder
            for file_name in file_list:
                input_image_path = os.path.join(input_subfolder, file_name)
                image = cv2.imread(input_image_path)
                # Convert the image to grayscale
                # Detect keypoints and compute descriptors
                keypoints, descriptors = sift.detectAndCompute(image, None)
                # Append descriptors and label as a pair
                SIFTListLabeled.append((descriptors, label))

    return SIFTListLabeled


In [60]:
from sklearn.model_selection import train_test_split

# getSIFTdescriptors
input_folder = "fonts_processed-dataset"
SIFTListLabeled = getSIFTFeatures(input_folder)

# Split the dataset into training and validation sets
train_data, val_data = train_test_split(SIFTListLabeled, test_size=0.2, random_state=42)

# Print the sizes of the training and validation sets
print("Number of samples in training set:", len(train_data))
print("Number of samples in validation set:", len(val_data))


Number of samples in training set: 4
Number of samples in validation set: 1


In [52]:
from sklearn.cluster import KMeans
#Visual Vocabulary Construction
# Concatenate all SIFT descriptors from the training set into a single array
all_descriptors_train = np.concatenate([data[0] for data in train_data if data[0] is not None], axis=0)

# Perform k-means clustering on the concatenated descriptors
k = 70 # Number of clusters
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(all_descriptors_train)

# Get the cluster centroids (visual words)
visual_words = kmeans.cluster_centers_

# Save the trained KMeans model as a pickle file
kmeans_model_filename = "kmeans_model.pkl"
with open(kmeans_model_filename, 'wb') as file:
    pickle.dump(kmeans, file)

print("KMeans model saved as", kmeans_model_filename)


  super()._check_params_vs_input(X, default_n_init=10)


KMeans model saved as kmeans_model.pkl


In [53]:
#Feature Encoding

from sklearn.metrics.pairwise import pairwise_distances_argmin_min

# Initialize empty lists to store histograms for training and validation sets
train_histograms = []
val_histograms = []

# Iterate over training data
for descriptors, label in train_data:
    if descriptors is not None:  
        descriptors = np.array(descriptors)  # Convert to numpy array
        # Check if descriptors array is non-empty and has the correct shape
        if descriptors.shape[0] > 0 and descriptors.shape[1] > 0:
            # Find the nearest visual word for each descriptor
            nearest_clusters = pairwise_distances_argmin_min(descriptors, visual_words)[0]
            # Count the occurrences of each visual word and create a histogram
            histogram, _ = np.histogram(nearest_clusters, bins=np.arange(k+1))
            # Append histogram and label to the list
            train_histograms.append((histogram, label))

# Iterate over validation data
for descriptors, label in val_data:
    if descriptors is not None:  
        descriptors = np.array(descriptors)  # Convert to numpy array
        # Check if descriptors array is non-empty and has the correct shape
        if descriptors.shape[0] > 0 and descriptors.shape[1] > 0:
            # Find the nearest visual word for each descriptor
            nearest_clusters = pairwise_distances_argmin_min(descriptors, visual_words)[0]
            # Count the occurrences of each visual word and create a histogram
            histogram, _ = np.histogram(nearest_clusters, bins=np.arange(k+1))
            # Append histogram and label to the list
            val_histograms.append((histogram, label))


In [54]:
from sklearn.svm import SVC
import pickle
from sklearn.metrics import accuracy_score

# Extract features (histograms) and labels from training set
X_train = [histogram for histogram, _ in train_histograms]
y_train = [label for _, label in train_histograms]

# Extract features (histograms) and labels from validation set
X_val = [histogram for histogram, _ in val_histograms]
y_val = [label for _, label in val_histograms]

# Initialize and train SVM classifier
svm_classifier = SVC(kernel='linear', random_state=42)
svm_classifier.fit(X_train, y_train)

# Predict labels for validation set
y_pred = svm_classifier.predict(X_val)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_pred)
print("Validation accuracy:", accuracy)

# Save the trained model as a pickle file
model_filename = "svm_model.pkl"
with open(model_filename, 'wb') as file:
    pickle.dump(svm_classifier, file)

print("Model saved as", model_filename)


Validation accuracy: 0.8125
Model saved as svm_model.pkl
