# Arabic Calligraphy Style Recognition

## Reference Paper 
## An efficient multiple-classifier system for Arabic calligraphy style recognition

In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
from skimage.morphology import skeletonize
from sklearn.neighbors import KNeighborsClassifier
import os
from sklearn import metrics
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from __future__ import division
from scipy.signal import convolve2d
from sklearn.ensemble import VotingClassifier

In [2]:
## Reference: https://stackoverflow.com/questions/23548863/converting-a-specific-matlab-script-to-python/23575137
#Perform local Phase Quantization
def lpq(img, winSize = 3, freqestim = 1, mode = 'nh'):
    
    rho = 0.90
    STFTalpha = 1/winSize    # alpha in STFT approaches (for Gaussian derivative alpha=1)
    sigmaS = (winSize-1)/4   # Sigma for STFT Gaussian window (applied if freqestim==2)
    sigmaA = 8/(winSize-1)   # Sigma for Gaussian derivative quadrature filters (applied if freqestim==3)

    convmode = 'valid'   # Compute descriptor responses only on part that have full neigborhood. Use 'same' if all pixels are included (extrapolates np.image with zeros).

    img = np.float64(img)                # Convert np.image to double
    r = (winSize-1)/2                    # Get radius from window size
    x = np.arange(-r, r+1)[np.newaxis]   # Form spatial coordinates in window

    if freqestim == 1:  #  STFT uniform window
        # Basic STFT filters
        w0 = np.ones_like(x)
        w1 = np.exp(-2*np.pi*x*STFTalpha*1j)
        w2 = np.conj(w1)

    ## Run filters to compute the frequency response in the four points. Store np.real and np.imaginary parts separately
    # Run first filter
    filterResp1 = convolve2d(convolve2d(img,w0.T,convmode),w1,convmode)
    filterResp2 = convolve2d(convolve2d(img,w1.T,convmode),w0,convmode)
    filterResp3 = convolve2d(convolve2d(img,w1.T,convmode),w1,convmode)
    filterResp4 = convolve2d(convolve2d(img,w1.T,convmode),w2,convmode)

    # Initilize frequency domain matrix for four frequency coordinates (np.real and np.imaginary parts for each frequency).
    freqResp = np.dstack([filterResp1.real, filterResp1.imag,
                         filterResp2.real, filterResp2.imag,
                         filterResp3.real, filterResp3.imag,
                         filterResp4.real, filterResp4.imag])

    ## Perform quantization and compute LPQ codewords
    inds = np.arange(freqResp.shape[2])[np.newaxis,np.newaxis,:]
    LPQdesc = ((freqResp>0)*(2**inds)).sum(2)

    ## Switch format to uint8 if LPQ code np.image is required as output
    if mode =='im':
        LPQdesc = np.uint8(LPQdesc)

    ## Histogram if needed
    if mode == 'nh' or mode == 'h':
        LPQdesc = np.histogram(LPQdesc.flatten(),range(256))[0]

    ## Normalize histogram if needed
    if mode == 'nh':
        LPQdesc = LPQdesc/LPQdesc.sum()

    #print(LPQdesc)
    return LPQdesc

# 1. Preprocessing


In [3]:
# 1. Perform Preprocessing
def preprocessing(image):
    ret2,th2 = cv2.threshold(image, 0, 1, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    result = np.array(th2, dtype = 'float')
    return result

# 2. Feature Extraction 


In [4]:
# 2. Feature Extraction using Local Phase Quantization
def extractFeatures(image):
    return lpq(image, winSize = 5, mode ='nh')

# 3. Classification


In [7]:
data = []
dataLabels = []

In [8]:
# Read font names from file
fontFile = open("names.txt",'r')
fonts = np.loadtxt(fontFile, dtype='str')
for font in fonts:
    fontDir, fontName = font.split("___")
    print(fontDir, fontName)
    for file in os.listdir(fontDir):
        image = cv2.imread(fontDir+"/"+file,0)
        image_processed = preprocessing(image)
        data.append(image_processed)
        dataLabels.append(fontName)

1 diwani
2 naskh
3 parsi
4 rekaa
5 thuluth
6 maghribi
7 kufi
8 mohakek
9 Squar-kufic


In [12]:
# Convert data to numpy array
data = np.asarray(data, dtype=np.ndarray)
dataLabels = np.asarray(dataLabels)

In [13]:
# Splitting data to 20% testing and 80% training
X_train, X_test, y_train, y_test = train_test_split(data, dataLabels, test_size = 0.2, random_state = 30)

In [14]:
# Begin extracting features from training data
N = X_train.shape[0]
trainFeatures = np.zeros((N, 255))

for i in range(trainFeatures.shape[0]):
    trainFeatures[i] = extractFeatures(X_train[i])


In [15]:
# Begin extracting features from testing data
N = X_test.shape[0]
testFeatures = np.zeros((N, 255))

for i in range(testFeatures.shape[0]):
    testFeatures[i] = extractFeatures(X_test[i])

## KNN Classifier

In [17]:
# Classifying using K-Nearest Neighbors
clf_knn = KNeighborsClassifier(n_neighbors = 3)
clf_knn.fit(trainFeatures, y_train)
y_pred_knn = clf_knn.predict(testFeatures)
print("Accuracy KNN:",metrics.accuracy_score(y_test, y_pred_knn)*100)

Accuracy KNN: 80.71216617210682


## SVM Classifier

In [25]:
# Classifying using Support Vector Machine
clf_svm = svm.SVC(decision_function_shape = 'ovo', probability = True, kernel = 'rbf')
clf_svm.fit(trainFeatures, y_train)
y_pred_svm = clf_svm.predict(testFeatures)
print("Accuracy SVM:",metrics.accuracy_score(y_test, y_pred_svm)*100)

Accuracy SVM: 52.818991097922854


## MLP Classifier

In [26]:
# Classifying using Multilayer Perceptron
clf_mlp = MLPClassifier(random_state = 1, max_iter = 500)
clf_mlp.fit(trainFeatures, y_train)
y_pred_mlp = clf_mlp.predict(testFeatures)
print("Accuracy MLPClassifier:",metrics.accuracy_score(y_test, y_pred_mlp)*100)

Accuracy MLPClassifier: 94.95548961424333




## Combining Classifiers

###  Majority/Plurality Vote

In [27]:
clf_knn = KNeighborsClassifier(n_neighbors=5)
clf_mlp = MLPClassifier(random_state=1, max_iter=500)
clf_svm = svm.SVC(decision_function_shape='ovo',probability=True)

# Using majority vote
clf_max = VotingClassifier(estimators=[('knn', clf_knn), ('svm', clf_svm), ('mlp', clf_mlp)], voting='hard')
clf_max.fit(trainFeatures, y_train)
y_pred_max = clf_max.predict(testFeatures)

print("Accuracy - Majority Vote:",metrics.accuracy_score(y_test, y_pred_max)*100)



Accuracy - Majority Vote: 82.7893175074184


###  Sum

In [28]:
clf_knn = KNeighborsClassifier(n_neighbors=5)
clf_mlp = MLPClassifier(random_state=1, max_iter=500)
clf_svm = svm.SVC(decision_function_shape='ovo',probability=True)

# Using summation
clf_sum = VotingClassifier(estimators=[('knn', clf_knn), ('svm', clf_svm), ('mlp', clf_mlp)], voting='soft')
clf_sum.fit(trainFeatures, y_train)
y_pred_sum = clf_max.predict(testFeatures)

print("Accuracy - Sum:",metrics.accuracy_score(y_test, y_pred_sum)*100)



Accuracy - Sum: 82.7893175074184
