## Imports

In [1]:
import cv2
import numpy as np
from scipy.signal import convolve2d
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split 
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import os
from sklearn.cluster import MiniBatchKMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import accuracy_score, classification_report

### SIFT Descriptor

In [2]:

sift = cv2.SIFT_create()
def get_sift_features(image):
    keypoints, descriptors = sift.detectAndCompute(image, None)
    # print(descriptors.shape)
    if descriptors is not None:
        if descriptors.shape[0]>200:
            descriptors = descriptors[:200]
        else:
            descriptors = np.concatenate((descriptors, np.zeros((200-descriptors.shape[0], 128))))
    else:
        descriptors = np.zeros((200, 128))
    descriptors = descriptors.flatten()
    return keypoints, descriptors
def extract_sift_features(images):
    # Initialize the SIFT detector
    sift = cv2.SIFT_create()
    sift_features = []

    for image in images:
        # Convert the image to grayscale
        # gray_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)

        # Detect keypoints and compute SIFT descriptors
        kp, descriptors = sift.detectAndCompute(image, None)

        if descriptors is not None:
            sift_features.append(descriptors)

    return sift_features
def create_codebook(features, num_clusters, batch_size):
    # Create a MiniBatchKMeans clustering object
    kmeans = MiniBatchKMeans(n_clusters=num_clusters, batch_size=batch_size, random_state=0)

    # Fit the clustering model to the SIFT features
    kmeans.fit(features)

    return kmeans
def compute_bovw_representation(features, codebook):
    num_clusters = codebook.n_clusters
    bovw_representation = []

    for image_features in features:
        if len(image_features) > 0:
            # Assign each feature to a cluster
            cluster_assignments = codebook.predict(image_features)

            # Create a histogram of cluster frequencies
            histogram = np.bincount(cluster_assignments, minlength=num_clusters)

            # Normalize the histogram
            histogram = histogram / np.sum(histogram)

            bovw_representation.append(histogram)
        else:
            # Handle cases where no features were detected
            bovw_representation.append(np.zeros(num_clusters))

    return bovw_representation
def train_knn_classifier(train_bovw_features, train_labels):
    knn_classifier = NearestNeighbors(n_neighbors=5, algorithm='ball_tree', n_jobs=-1)

    # Fit the classifier on the training data
    knn_classifier.fit(train_bovw_features)

    return knn_classifier
def evaluate_classifier(classifier, test_bovw_features, train_labels,test_labels):
    # Find nearest neighbors for test data
    distances, indices = classifier.kneighbors(test_bovw_features)

    # Evaluate the classifier
    predictions = []

    for neighbors in indices:
        if len(neighbors) > 0:
            neighbor_labels = train_labels[neighbors]
            most_common_label = np.bincount(neighbor_labels).argmax()
            predictions.append(most_common_label)
        else:
            # Handle cases where no neighbors were found
            predictions.append(-1)

    # Filter out test samples without predictions (-1)
    filtered_test_labels = []
    filtered_predictions = []

    for i, prediction in enumerate(predictions):
        if prediction != -1:
            filtered_test_labels.append(test_labels[i])
            filtered_predictions.append(prediction)

    # Calculate accuracy and generate a classification report
    accuracy = accuracy_score(filtered_test_labels, filtered_predictions)
    report = classification_report(filtered_test_labels, filtered_predictions)

    return accuracy, report


In [3]:
# img = cv2.imread("images/train/Marhey/3.jpeg",)
# img =cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# img= np.float32(img)
# dest = cv2.cornerHarris(img,2, 5, 0.07)
# dest = cv2.dilate(dest, None) 
# img[dest > 0.01 * dest.max()]=[0]
# plt.imshow(img,cmap='gray')  

## Local Phase Quantization (LPQ)

In [4]:
class LPQ(object):
    """ This implementation of Local Phase Quantization (LPQ) is a 
      
      Reference: 
        Ojansivu V & Heikkilä J (2008) Blur insensitive texture classification 
        using local phase quantization. Proc. Image and Signal Processing 
	(ICISP 2008), Cherbourg-Octeville, France, 5099:236-243.
        Copyright 2008 by Heikkilä & Ojansivu
    """

    def __init__(self, radius=3):
        self.radius = radius

    def euc_dist(self, X):
        Y = X = X.astype(np.int32)
        XX = np.sum(X * X, axis=1)[:, np.newaxis]
        YY = XX.T
        distances = np.dot(X,Y.T)
        distances *= -2
        distances += XX
        distances += YY
        np.maximum(distances, 0, distances)
        distances.flat[::distances.shape[0] + 1] = 0.0
        return np.sqrt(distances)

    def __call__(self,X):
        f = 1.0
        x = np.arange(-self.radius,self.radius+1)
        n = len(x)
        rho = 0.95
        [xp, yp] = np.meshgrid(np.arange(1,(n+1)),np.arange(1,(n+1)))
        pp = np.concatenate((xp,yp)).reshape(2,-1)
        dd = self.euc_dist(pp.T) # squareform(pdist(...)) would do the job, too...
        C = np.power(rho,dd)

        w0 = (x*0.0+1.0)
        w1 = np.exp(-2*np.pi*1j*x*f/n)
        w2 = np.conj(w1)

        q1 = w0.reshape(-1,1)*w1
        q2 = w1.reshape(-1,1)*w0
        q3 = w1.reshape(-1,1)*w1
        q4 = w1.reshape(-1,1)*w2

        u1 = np.real(q1)
        u2 = np.imag(q1)
        u3 = np.real(q2)
        u4 = np.imag(q2)
        u5 = np.real(q3)
        u6 = np.imag(q3)
        u7 = np.real(q4)
        u8 = np.imag(q4)

        M = np.matrix([u1.flatten(), u2.flatten(), u3.flatten(), u4.flatten(), u5.flatten(), u6.flatten(), u7.flatten(), u8.flatten()])

        D = np.dot(np.dot(M,C), M.T)
        U,S,V = np.linalg.svd(D)

        Qa = convolve2d(convolve2d(X,w0.reshape(-1,1),mode='same'),w1.reshape(1,-1),mode='same')
        Qb = convolve2d(convolve2d(X,w1.reshape(-1,1),mode='same'),w0.reshape(1,-1),mode='same')
        Qc = convolve2d(convolve2d(X,w1.reshape(-1,1),mode='same'),w1.reshape(1,-1),mode='same')
        Qd = convolve2d(convolve2d(X, w1.reshape(-1,1),mode='same'),w2.reshape(1,-1),mode='same')

        Fa = np.real(Qa)
        Ga = np.imag(Qa)
        Fb = np.real(Qb) 
        Gb = np.imag(Qb)
        Fc = np.real(Qc) 
        Gc = np.imag(Qc)
        Fd = np.real(Qd) 
        Gd = np.imag(Qd)

        F = np.array([Fa.flatten(), Ga.flatten(), Fb.flatten(), Gb.flatten(), Fc.flatten(), Gc.flatten(), Fd.flatten(), Gd.flatten()])
        G = np.dot(V.T, F)

        t = 0

        # Calculate the LPQ Patterns:
        B = (G[0,:]>=t)*1 + (G[1,:]>=t)*2 + (G[2,:]>=t)*4 + (G[3,:]>=t)*8 + (G[4,:]>=t)*16 + (G[5,:]>=t)*32 + (G[6,:]>=t)*64 + (G[7,:]>=t)*128
        B = np.reshape(B, np.shape(Fa))

        # And finally build the histogram:
        h, b  = np.histogram(B, bins=256, range = (0,255))

        return h

    def __repr__(self):
        return "LPQ (radius=%s)" % (self.radius)

In [5]:
# Function to generate horizontal projection profile 
def getHorizontalProjectionProfile(image): 
  
    # Convert black spots to ones 
    image[image == 0]   = 1
    # Convert white spots to zeros 
    image[image == 255] = 0
  
    horizontal_projection = np.sum(image, axis = 1)  
  
    return horizontal_projection
def getVerticalProjectionProfile(image): 
  
    # Convert black spots to ones  
    image[image == 0]   = 1
    # Convert white spots to zeros  
    image[image == 255] = 0
  
    vertical_projection = np.sum(image, axis = 0) 
  
    return vertical_projection  

## Getting All Data

In [6]:
def load_images_from_directory(directory):
    images = []
    count=0
    for filename in os.listdir(directory):
        if filename.endswith(".jpeg") or filename.endswith(".png"):
            count+=1
            img = cv2.imread(os.path.join(directory, filename),0)
            if img is not None:
                images.append(img)
    print("Got "+str(count)+" images")
    return images

In [7]:
print("-----------------Loading images-----------------")
# Load images from directories
print("-----------------Marehy-----------------")
marehy_image_list = load_images_from_directory("images/cropped_train/Marehy/")
print("-----------------Lemonada-----------------")
lemonada_image_list = load_images_from_directory("images/cropped_train/Lemonada/")
print("-----------------Scheherazade-----------------")
scheherazade_image_list = load_images_from_directory("images/cropped_train/Scheherazade New/")
print("-----------------IBM Plex sans Arabic-----------------")
ibm_image_list = load_images_from_directory("images/cropped_train/IBM Plex Sans Arabic")
print("--------------Fetching images Done---------------")
# l_p_q = LPQ(5)

-----------------Loading images-----------------
-----------------Marehy-----------------
Got 6023 images
-----------------Lemonada-----------------
Got 5869 images
-----------------Scheherazade-----------------
Got 4994 images
-----------------IBM Plex sans Arabic-----------------
Got 5360 images
--------------Fetching images Done---------------


### Initializations

In [8]:
lpq = LPQ(5)
marehy_expected_output = np.zeros(len(marehy_image_list))
lemonada_expected_output = np.ones(len(lemonada_image_list))
scheherazade_expected_output = np.full(len(scheherazade_image_list), 2)
ibm_expected_output = np.full(len(ibm_image_list), 3)
features = []
classes = []

### Split Train and Test

In [9]:

marehy_train_x,marehy_test_x,marehy_train_y,marehy_test_y = train_test_split(
    marehy_image_list,
    marehy_expected_output,random_state=104,  
    test_size=0.25,  
    shuffle=True)

lemonada_train_x,lemonada_test_x,lemonada_train_y,lemonada_test_y = train_test_split(
    lemonada_image_list,
    lemonada_expected_output,random_state=104,  
    test_size=0.25,  
    shuffle=True)

scheherazade_train_x,scheherazade_test_x,scheherazade_train_y,scheherazade_test_y = train_test_split(
    scheherazade_image_list,
    scheherazade_expected_output,random_state=104,  
    test_size=0.25,  
    shuffle=True)

ibm_train_x,ibm_test_x,ibm_train_y,ibm_test_y = train_test_split(
    ibm_image_list,
    ibm_expected_output,random_state=104,  
    test_size=0.25,  
    shuffle=True)


## Train Images feature extraction

In [10]:
print("--------------Marehy----------------")
for i in range(0,len(marehy_train_x)):
    print("Image = "+str(i+1))
    features.append(lpq.__call__(marehy_train_x[i]))
    classes.append(0)
#     img = marehy_image_list[i]
#     keypoints = get_sift_features(img)
#     features.append(keypoints)
#     classes.append(marehy_train_y[i])

--------------Marehy----------------
Image = 1
Image = 2
Image = 3
Image = 4
Image = 5
Image = 6
Image = 7
Image = 8
Image = 9
Image = 10
Image = 11
Image = 12
Image = 13
Image = 14
Image = 15
Image = 16
Image = 17
Image = 18
Image = 19
Image = 20
Image = 21
Image = 22
Image = 23
Image = 24
Image = 25
Image = 26
Image = 27
Image = 28
Image = 29
Image = 30
Image = 31
Image = 32
Image = 33
Image = 34
Image = 35
Image = 36
Image = 37
Image = 38
Image = 39
Image = 40
Image = 41
Image = 42
Image = 43
Image = 44
Image = 45
Image = 46
Image = 47
Image = 48
Image = 49
Image = 50
Image = 51
Image = 52
Image = 53
Image = 54
Image = 55
Image = 56
Image = 57
Image = 58
Image = 59
Image = 60
Image = 61
Image = 62
Image = 63
Image = 64
Image = 65
Image = 66
Image = 67
Image = 68
Image = 69
Image = 70
Image = 71
Image = 72
Image = 73
Image = 74
Image = 75
Image = 76
Image = 77
Image = 78
Image = 79
Image = 80
Image = 81
Image = 82
Image = 83
Image = 84
Image = 85
Image = 86
Image = 87
Image = 88
Imag

In [11]:
print("--------------Lemonada----------------")
for i in range(0,len(lemonada_train_x)):
    print("Image = "+str(i+1))
    # keypoints,descriptors = get_sift_features(lemonada_train_x[i])
    # features.append(descriptors)
    features.append(lpq.__call__(lemonada_train_x[i]))
    classes.append(1)

--------------Lemonada----------------
Image = 1
Image = 2
Image = 3
Image = 4
Image = 5
Image = 6
Image = 7
Image = 8
Image = 9
Image = 10
Image = 11
Image = 12
Image = 13
Image = 14
Image = 15
Image = 16
Image = 17
Image = 18
Image = 19
Image = 20
Image = 21
Image = 22
Image = 23
Image = 24
Image = 25
Image = 26
Image = 27
Image = 28
Image = 29
Image = 30
Image = 31
Image = 32
Image = 33
Image = 34
Image = 35
Image = 36
Image = 37
Image = 38
Image = 39
Image = 40
Image = 41
Image = 42
Image = 43
Image = 44
Image = 45
Image = 46
Image = 47
Image = 48
Image = 49
Image = 50
Image = 51
Image = 52
Image = 53
Image = 54
Image = 55
Image = 56
Image = 57
Image = 58
Image = 59
Image = 60
Image = 61
Image = 62
Image = 63
Image = 64
Image = 65
Image = 66
Image = 67
Image = 68
Image = 69
Image = 70
Image = 71
Image = 72
Image = 73
Image = 74
Image = 75
Image = 76
Image = 77
Image = 78
Image = 79
Image = 80
Image = 81
Image = 82
Image = 83
Image = 84
Image = 85
Image = 86
Image = 87
Image = 88
Im

In [12]:
print("--------------Scheherazade----------------")
for i in range(0,len(scheherazade_train_x)):
    print("Image = "+str(i+1))
#     features.append(get_sift_features(scheherazade_image_list[i]))
    features.append(lpq.__call__(scheherazade_train_x[i]))
    classes.append(2)

--------------Scheherazade----------------
Image = 1
Image = 2
Image = 3
Image = 4
Image = 5
Image = 6
Image = 7
Image = 8
Image = 9
Image = 10
Image = 11
Image = 12
Image = 13
Image = 14
Image = 15
Image = 16
Image = 17
Image = 18
Image = 19
Image = 20
Image = 21
Image = 22
Image = 23
Image = 24
Image = 25
Image = 26
Image = 27
Image = 28
Image = 29
Image = 30
Image = 31
Image = 32
Image = 33
Image = 34
Image = 35
Image = 36
Image = 37
Image = 38
Image = 39
Image = 40
Image = 41
Image = 42
Image = 43
Image = 44
Image = 45
Image = 46
Image = 47
Image = 48
Image = 49
Image = 50
Image = 51
Image = 52
Image = 53
Image = 54
Image = 55
Image = 56
Image = 57
Image = 58
Image = 59
Image = 60
Image = 61
Image = 62
Image = 63
Image = 64
Image = 65
Image = 66
Image = 67
Image = 68
Image = 69
Image = 70
Image = 71
Image = 72
Image = 73
Image = 74
Image = 75
Image = 76
Image = 77
Image = 78
Image = 79
Image = 80
Image = 81
Image = 82
Image = 83
Image = 84
Image = 85
Image = 86
Image = 87
Image = 8

In [13]:
print("--------------IBM----------------")
for i in range(0,len(ibm_train_x)):
    print("Image = "+str(i+1))
    # keypoints,descriptors = get_sift_features(ibm_train_x[i])
    
    # features.append(descriptors)
    features.append(lpq.__call__(ibm_train_x[i]))
    classes.append(3)

--------------IBM----------------
Image = 1
Image = 2
Image = 3
Image = 4
Image = 5
Image = 6
Image = 7
Image = 8
Image = 9
Image = 10
Image = 11
Image = 12
Image = 13
Image = 14
Image = 15
Image = 16
Image = 17
Image = 18
Image = 19
Image = 20
Image = 21
Image = 22
Image = 23
Image = 24
Image = 25
Image = 26
Image = 27
Image = 28
Image = 29
Image = 30
Image = 31
Image = 32
Image = 33
Image = 34
Image = 35
Image = 36
Image = 37
Image = 38
Image = 39
Image = 40
Image = 41
Image = 42
Image = 43
Image = 44
Image = 45
Image = 46
Image = 47
Image = 48
Image = 49
Image = 50
Image = 51
Image = 52
Image = 53
Image = 54
Image = 55
Image = 56
Image = 57
Image = 58
Image = 59
Image = 60
Image = 61
Image = 62
Image = 63
Image = 64
Image = 65
Image = 66
Image = 67
Image = 68
Image = 69
Image = 70
Image = 71
Image = 72
Image = 73
Image = 74
Image = 75
Image = 76
Image = 77
Image = 78
Image = 79
Image = 80
Image = 81
Image = 82
Image = 83
Image = 84
Image = 85
Image = 86
Image = 87
Image = 88
Image =

## Fit classifier

### Getting features of test images

In [14]:
print("Getting features of test images")
total_accuracy = 0
marehy_test_x_features = []
print("--------------Marehy----------------")
for i in range(0,len(marehy_test_x)):
    print("Test Image = "+str(i))
#     keypoints, descriptors = get_sift_features(marehy_test_x[i])
    descriptors = lpq.__call__(marehy_test_x[i])
    marehy_test_x_features.append(descriptors)
scheherazade_test_x_features = []
print("--------------Scheherazade----------------")
for i in range(0,len(scheherazade_test_x)):
    print("Test Image = "+str(i))
    scheherazade_test_x_features.append(lpq.__call__(scheherazade_test_x[i]))
#     keypoints, descriptors = get_sift_features(scheherazade_test_x[i])
#     scheherazade_test_x_features.append(descriptors)
lemonada_test_x_features = []
print("--------------Lemonada----------------")
for i in range(0,len(lemonada_test_x)):
    print("Test Image = "+str(i))
    # keypoints,descriptors = get_sift_features(lemonada_test_x[i])
    
    descriptors = lpq.__call__(lemonada_test_x[i])
    lemonada_test_x_features.append(descriptors)
ibm_test_x_features = []
print("--------------IBM----------------")
for i in range(0,len(ibm_test_x)):
    print("Test Image = "+str(i))
    # keypoints,descriptors = get_sift_features(ibm_test_x[i])
    
    descriptors = lpq.__call__(ibm_test_x[i])
    ibm_test_x_features.append(descriptors)


Getting features of test images
--------------Marehy----------------
Test Image = 0
Test Image = 1
Test Image = 2
Test Image = 3
Test Image = 4
Test Image = 5
Test Image = 6
Test Image = 7
Test Image = 8
Test Image = 9
Test Image = 10
Test Image = 11
Test Image = 12
Test Image = 13
Test Image = 14
Test Image = 15
Test Image = 16
Test Image = 17
Test Image = 18
Test Image = 19
Test Image = 20
Test Image = 21
Test Image = 22
Test Image = 23
Test Image = 24
Test Image = 25
Test Image = 26
Test Image = 27
Test Image = 28
Test Image = 29
Test Image = 30
Test Image = 31
Test Image = 32
Test Image = 33
Test Image = 34
Test Image = 35
Test Image = 36
Test Image = 37
Test Image = 38
Test Image = 39
Test Image = 40
Test Image = 41
Test Image = 42
Test Image = 43
Test Image = 44
Test Image = 45
Test Image = 46
Test Image = 47
Test Image = 48
Test Image = 49
Test Image = 50
Test Image = 51
Test Image = 52
Test Image = 53
Test Image = 54
Test Image = 55
Test Image = 56
Test Image = 57
Test Image = 

#### KNN

In [15]:
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(features, classes)

In [16]:
total_accuracy = 0
print("--------------Marehy----------------")
total_accuracy+= knn.score(marehy_test_x_features,marehy_test_y)
print("--------------Scheherazade----------------")
total_accuracy+= knn.score(scheherazade_test_x_features,scheherazade_test_y)
print("--------------Lemonada----------------")
total_accuracy+= knn.score(lemonada_test_x_features,lemonada_test_y)
print("--------------IBM----------------")
total_accuracy+= knn.score(ibm_test_x_features,ibm_test_y)
print("Total Accuracy of KNN = "+str(total_accuracy/4*100))

--------------Marehy----------------
--------------Scheherazade----------------
--------------Lemonada----------------
--------------IBM----------------
Total Accuracy of KNN = 91.07963023212008


#### Decision Tree

In [17]:
dt = DecisionTreeClassifier()
dt.fit(features, classes)


In [18]:
total_accuracy=0
print("--------------Marehy----------------")
total_accuracy+= dt.score(marehy_test_x_features,marehy_test_y)
print("--------------Scheherazade----------------")
total_accuracy+= dt.score(scheherazade_test_x_features,scheherazade_test_y)
print("--------------Lemonada----------------")
total_accuracy+= dt.score(lemonada_test_x_features,lemonada_test_y)
print("--------------IBM----------------")
total_accuracy+= dt.score(ibm_test_x_features,ibm_test_y)
print("Total Accuracy of Decision Tree = "+str(total_accuracy/4*100))

--------------Marehy----------------
--------------Scheherazade----------------
--------------Lemonada----------------
--------------IBM----------------
Total Accuracy of Decision Tree = 93.54738696121035


#### SVM

In [19]:
clf = svm.SVC()
clf.fit(np.array(features),np.array( classes))

In [20]:
total_accuracy = 0
print("--------------Marehy----------------")
total_accuracy+= clf.score(marehy_test_x_features,marehy_test_y)
print("--------------Scheherazade----------------")
total_accuracy+= clf.score(scheherazade_test_x_features,scheherazade_test_y)
print("--------------Lemonada----------------")
total_accuracy+= clf.score(lemonada_test_x_features,lemonada_test_y)
print("--------------IBM----------------")
total_accuracy+= clf.score(ibm_test_x_features,ibm_test_y)
print("Total Accuracy of SVM = "+str(total_accuracy/4*100))

--------------Marehy----------------
--------------Scheherazade----------------
--------------Lemonada----------------
--------------IBM----------------
Total Accuracy of SVM = 73.41678988180095


#### Random Forest

In [36]:
clf = RandomForestClassifier()
clf.fit(features, classes)


In [37]:
total_accuracy =0
print("--------------Marehy----------------")
total_accuracy+= clf.score(marehy_test_x_features,marehy_test_y)
print("--------------Scheherazade----------------")
total_accuracy+= clf.score(scheherazade_test_x_features,scheherazade_test_y)
print("--------------Lemonada----------------")
total_accuracy+= clf.score(lemonada_test_x_features,lemonada_test_y)
print("--------------IBM----------------")
total_accuracy+= clf.score(ibm_test_x_features,ibm_test_y)
print("Total Accuracy of Random Forest = "+str(total_accuracy/4*100))

--------------Marehy----------------
--------------Scheherazade----------------
--------------Lemonada----------------
--------------IBM----------------
Total Accuracy of Random Forest = 98.81815093739331


### Saving Model

In [38]:
import pickle
pickle.dump(clf, open('random_99.pkl', 'wb'))