In [60]:
# All the imports you will need in the whole world
from skimage.feature import greycomatrix, greycoprops
from skimage import io
from skimage.color import rgb2gray
import matplotlib.pyplot as plt
import os
import numpy as np
import cv2
import imutils
from skimage.transform import (hough_line, hough_line_peaks)
from skimage.feature import greycomatrix, greycoprops, canny, corner_harris
from skimage.morphology import binary_erosion, binary_dilation, binary_closing,skeletonize, thin
from sklearn.model_selection import train_test_split
from skimage.filters import sobel_h, sobel, sobel_v,roberts, prewitt,threshold_otsu
import glob
import math
from matplotlib import cm
from skimage.measure import find_contours

from sklearn.tree import DecisionTreeClassifier
from sklearn import svm

In [61]:
def show_images(images, titles=None):
    # This function is used to show image(s) with titles by sending an array of images and an array of associated titles.
    # images[0] will be drawn with the title titles[0] if exists
    # You aren't required to understand this function, use it as-is.
    n_ims = len(images)
    if titles is None:
        titles = ['(%d)' % i for i in range(1, n_ims + 1)]
    fig = plt.figure()
    n = 1
    for image, title in zip(images, titles):
        a = fig.add_subplot(1, n_ims, n)
        if image.ndim == 2:
            plt.gray()
        plt.imshow(image)
        a.set_title(title)
        n += 1
    fig.set_size_inches(np.array(fig.get_size_inches()) * n_ims)
    plt.show()

In [62]:
def preprocess(img):
    img = np.asarray(img)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    return img

In [63]:
def preprocessing(gray):

    # gray = np.uint8(gray)
    img = cv2.GaussianBlur(gray,(3,3),0)
    # img = cv2.bilateralFilter(gray,9,75,75)
    # convolute with proper kernels
    # img = np.uint8(img)
    dest = cv2.Laplacian(img, cv2.CV_16S, ksize=3)
    abs_dest = cv2.convertScaleAbs(dest)
    # show_images([abs_dest])
    Otsu_Threshold = threshold_otsu(img)   
    if (img[0,0] < Otsu_Threshold and img[0,-1] < Otsu_Threshold and img[-1,0] < Otsu_Threshold and img[-1,0] < Otsu_Threshold):
        binary = img > Otsu_Threshold         
    else:
        binary = img < Otsu_Threshold  
    skeleton_img =  skeletonize(binary)

    #separating the diac.
    H = np.sum(binary,axis = 1)
    I = np.argmax(H)
    point = [(x,I) for x in range(binary.shape[0])]
    # print(I)
    h, w = binary.shape[:2]
    # print(h,w)
    mask = np.zeros((h+2, w+2), np.uint8)
    # print(mask)
    binary = binary < 1 
    binary = binary.astype(np.uint8)
    # show_images([binary])
    for j in range(binary.shape[1] - 1):
        if binary[I][j] == 0 and binary[I][j+1] == 1:
            cv2.floodFill(binary,mask,seedPoint = (j,I),newVal = 1) 
    # print(b)
    text = mask
    diacritics = binary
    return img,abs_dest,skeleton_img,text,diacritics

In [64]:
def HVSL(edge):
    horizontal = edge.copy()
    vertical = edge.copy()
    H = 0
    V = 0 
    
    cols = horizontal.shape[1]
    horizontal_size = cols / 30
    horizontalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (int(horizontal_size + 4), 1))
    horizontal = cv2.erode(horizontal, horizontalStructure)
    horizontal = cv2.dilate(horizontal, horizontalStructure)
    Otsu_Threshold = threshold_otsu(horizontal)   
    horizontal = horizontal > Otsu_Threshold 
    H, output, stats, centroids = cv2.connectedComponentsWithStats(horizontal.astype(np.uint8),connectivity=8)
    


    rows = vertical.shape[0]
    verticalsize = rows / 30
    verticalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (1,int(verticalsize + 4)))
    vertical = cv2.erode(vertical, verticalStructure)
    vertical = cv2.dilate(vertical.astype(np.uint8), verticalStructure)
    Otsu_Threshold = threshold_otsu(vertical)   
    vertical = vertical > Otsu_Threshold 

    V, output, stats, centroids = cv2.connectedComponentsWithStats(vertical.astype(np.uint8),connectivity=8)
    return H - 1,V - 1

In [65]:
def TOE(edge):
    houghSpace,angles, dists = hough_line(edge)
    houghSpace,angles, dists = hough_line_peaks(houghSpace,angles,dists,threshold=0.5*np.amax(houghSpace))
    start = -2
    bin = []
    while True :
        if(start >= 2):
            break
        bin.append(start)
        start += 0.01
    bin = [round(bins,2) for bins in bin ]
    avg_angle = np.average(angles)
    angles = [round(angle,2) for angle in angles ]
    hist , bins = np.histogram(angles,bins = bin)
    return hist,bins,avg_angle

In [66]:
def TOS(skeleton):
    houghSpace,angles, dists = hough_line(skeleton)
    houghSpace,angles, dists = hough_line_peaks(houghSpace,angles,dists,threshold=0.5*np.amax(houghSpace))

    start = -2
    bin = []
    while True :
        if(start >= 2):
            break
        bin.append(start)
        start += 0.01
    bin = [round(bins,2) for bins in bin ]
    avg_angle = np.average(angles)
    angles = [round(angle,2) for angle in angles ]
    hist , bins = np.histogram(angles,bins = bin)
    return hist,bins,avg_angle

In [67]:
def LVL(ske):
    vertical = ske.copy()
    V = 0 
    rows = vertical.shape[0]
    verticalsize = rows // 30
    verticalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, (1, int(verticalsize + 4)))
    vertical = cv2.erode(vertical.astype(np.uint8), verticalStructure)
    vertical = cv2.dilate(vertical.astype(np.uint8), verticalStructure)
    Otsu_Threshold = threshold_otsu(vertical)   
    vertical = vertical > Otsu_Threshold 
    V, output, stats, centroids = cv2.connectedComponentsWithStats(vertical.astype(np.uint8),connectivity=8)
    sizes = stats[1:, -1]

    Otsu_Threshold = threshold_otsu(ske)   
    ske = ske > Otsu_Threshold 
    min = math.inf
    max = -1
    for i in range(ske.shape[0]):
        for j in range(ske.shape[1]):
            if (ske[i][j] == 1):
                if i > max:
                    max = i
                elif i < min:
                    min = i
    text_hight = max - min
    num_VL = V - 1
    if(len(sizes) == 0):
        sizes = [2,1,2]
    higtest_VL = np.max(sizes)
    drvt = abs(text_hight - higtest_VL)
    # variance = np.var(sizes)
    # print(sizes)
    # print(text_hight,"text hight")
    # print(num_VL,"num_VL")
    # print(higtest_VL,"higtest_VL")
    # print(drvt,"drvt")
    # print(variance,"variance")
    return text_hight,num_VL,higtest_VL,drvt

In [68]:
def Tth(edge,ske):
    Otsu_Threshold = threshold_otsu(ske)   
    ske = ske > Otsu_Threshold 

    Otsu_Threshold = threshold_otsu(edge)   
    edge = edge > Otsu_Threshold 
    

    dest_up = []
    dest_down = []
    for i in range(ske.shape[0]):
        for j in range(ske.shape[1]):
            if (ske[i][j] == 1):
                c = True
                for k in range(i + 1,edge.shape[0]):
                    if(edge[k][j] == 1):
                        c = False
                        d = abs(k - i)
                        dest_down.append(d)
                        break
                if(c):
                    dest_down.append(0)        
                
                u = 0
                for k in range(0, i):
                    if(edge[k][j] == 1):
                        u = k
                dest_up.append(abs(u-i))
    return dest_down + dest_up

In [69]:
def SDs(diacritics):

    arra  = np.zeros(diacritics.shape)
    arra = diacritics == 0

    contours = find_contours(arra ,level = 0.2,fully_connected='high')

    
    m1 = rgb2gray(io.imread("mark1.jpg"))
    Otsu_Threshold = threshold_otsu(m1)   
    m1 = m1 < Otsu_Threshold 


    m2 = rgb2gray(io.imread("mark2.jpg"))
    Otsu_Threshold = threshold_otsu(m2)   
    m2 = m2 < Otsu_Threshold 

    dist_1 = []
    dist_2 = []
    bounding_boxes = []
    for contour in contours:
        Y_Values = np.asarray(contour[:,0])
        X_Values = np.asarray(contour[:,1])
        bounding_boxes.append([
        int(np.amin(X_Values)),
        int(np.amax(X_Values)),
        int(np.amin(Y_Values)),
        int(np.amax(Y_Values))])

    for box in bounding_boxes:
        [Xmin, Xmax, Ymin, Ymax] = box
        dist_1.append(cv2.matchShapes(arra[Ymin:Ymax,Xmin:Xmax].astype(np.uint8),m1.astype(np.uint8),cv2.CONTOURS_MATCH_I2,0))
        dist_2.append(cv2.matchShapes(arra[Ymin:Ymax,Xmin:Xmax].astype(np.uint8),m2.astype(np.uint8),cv2.CONTOURS_MATCH_I2,0))
    if (len(dist_1) == 0 ):
        return 20 , 20 
    return np.min(dist_1),np.min(dist_2)

In [70]:
def WOR(text):
    contours, _ = cv2.findContours(text.astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
    angles = []
    for i, c in enumerate(contours):
        
        # Calculate the area of each contour
        area = cv2.contourArea(c)
        if( area < 0.002*(len(text)*len(text[0]) or area > 0.4*(len(text)*len(text[0])))):
            continue
        # Ignore contours that are too small or too large
        rect = cv2.minAreaRect(c)
        box = cv2.boxPoints(rect)
        box = np.int0(box)

        center = (int(rect[0][0]),int(rect[0][1])) 
        width = int(rect[1][0])
        height = int(rect[1][1])
        angle = int(rect[2])
        
        if width < height:
            angle = 90 - angle
        else:
            angle = -angle
        angles.append(angle)
      
    angles = np.sort(angles)
    for i in range(len(angles)):
        if angles[i] == 180:
            angles[i] = 0
    ang = range(90)
    hist , bins = np.histogram(angles,bins = ang)
    ori = np.average(angles)
    return ori,hist,len(angles)

In [71]:
def HPP(img):
    H = np.sum(img[1:img.shape[0]-1,:],axis = 1)
    I = np.argmax(H)
    H = np.sort(H)
    if(I == 0):
        I = 1
    hpp = np.sum(img[:,1:img.shape[1] - 1]) / I
    horizontal_projection = np.sum(img, axis = 1) 
    return H[-3:],hpp

In [72]:
def readData():
    xData = []
    yData = []
    for filename in sorted(glob.glob('ACdata_base/1/*.jpg')):
        img = cv2.imread(filename)
        img = preprocess(img)
        xData.append(img)
        yData.append(1)
    for filename in sorted(glob.glob('ACdata_base/2/*.jpg')):
        img = cv2.imread(filename)
        img = preprocess(img)
        xData.append(img)
        yData.append(2)
    for filename in sorted(glob.glob('ACdata_base/3/*.jpg')):
        img = cv2.imread(filename)
        img = preprocess(img)
        xData.append(img)
        yData.append(3)
    for filename in sorted(glob.glob('ACdata_base/4/*.jpg')):
        img = cv2.imread(filename)
        img = preprocess(img)
        xData.append(img)
        yData.append(4)
    for filename in sorted(glob.glob('ACdata_base/5/*.jpg')):
        img = cv2.imread(filename)
        img = preprocess(img)
        xData.append(img)
        yData.append(5)
    for filename in sorted(glob.glob('ACdata_base/6/*.jpg')):
        img = cv2.imread(filename)
        img = preprocess(img)
        xData.append(img)
        yData.append(6)
    for filename in sorted(glob.glob('ACdata_base/7/*.jpg')):
        img = cv2.imread(filename)
        img = preprocess(img)
        xData.append(img)
        yData.append(7)
    for filename in sorted(glob.glob('ACdata_base/8/*.jpg')):
        img = cv2.imread(filename)
        img = preprocess(img)
        xData.append(img)
        yData.append(8)
    for filename in sorted(glob.glob('ACdata_base/9/*.jpg')):
        img = cv2.imread(filename)
        img = preprocess(img)
        xData.append(img)
        yData.append(9)
    xTrain, xTest, yTrain, yTest = train_test_split(np.asarray(xData), np.asarray(yData), test_size = 0.2, shuffle = True)
    return xTrain, xTest, yTrain, yTest

In [73]:
def Featur_Extraction(xTrain):
    Features = []
    for gray in xTrain:
        img_feature = []
        img,edges,ske,text,diacritics = preprocessing(gray)
        Otsu_Threshold = threshold_otsu(img)   
        img = img < Otsu_Threshold

        H,V = HVSL(edges)
        img_feature.append(H)
        img_feature.append(V)

        hist_e , b_e,avg_angle_e = TOE(edges)
        hist_s , b_s,avg_angle_s = TOS(ske)
        img_feature.append(avg_angle_s)
        img_feature.append(avg_angle_e)
        # for i in range(len(hist_e)):
        #     img_feature.append(hist_e[i])
        #     img_feature.append(hist_s[i])
        
        text_hight,num_VL,higtest_VL,drvt = LVL(ske)
        img_feature.append(text_hight)
        img_feature.append(num_VL)
        img_feature.append(higtest_VL)
        img_feature.append(drvt)


        Thickness = Tth(edges,ske)
        avg_thick = np.average(Thickness)
        img_feature.append(avg_thick)
        # for i in range(300):
        #     img_feature.append(Thickness[i])
        
        d1 , d2 = SDs(diacritics)
        img_feature.append(d1)
        img_feature.append(d2)
        
        H , hpp = HPP(text)
        img_feature.append(H[0])
        img_feature.append(H[1])
        img_feature.append(H[2])
        img_feature.append(hpp)
         
        ori,hist_o,o = WOR(text)
        img_feature.append(ori)
        for i in range(len(hist_o)):
            img_feature.append(hist_o[i])
        img_feature.append(o)
        Features.append(img_feature)
        
    return Features

In [74]:
def Featur_Extraction_test(gray):
    img_feature = []
    img,edges,ske,text,diacritics = preprocessing(gray)
    Otsu_Threshold = threshold_otsu(img)   
    img = img < Otsu_Threshold

    H,V = HVSL(edges)
    img_feature.append(H)
    img_feature.append(V)

    hist_e , b_e,avg_angle_e = TOE(edges)
    hist_s , b_s,avg_angle_s = TOS(ske)
    img_feature.append(avg_angle_s)
    img_feature.append(avg_angle_e)
    
    text_hight,num_VL,higtest_VL,drvt = LVL(ske)
    img_feature.append(text_hight)
    img_feature.append(num_VL)
    img_feature.append(higtest_VL)
    img_feature.append(drvt)


    Thickness = Tth(edges,ske)
    avg_thickness = np.average(Thickness)
    img_feature.append(avg_thickness)
#     for i in range(300):
#             img_feature.append(Thickness[i])
    
    d1 , d2 = SDs(diacritics)
    img_feature.append(d1)
    img_feature.append(d2)
    
    H , hpp = HPP(text)
    # diff = abs((H[0] - H[1]) + (H[1] - H[2]))
    img_feature.append(H[0])
    img_feature.append(H[1])
    img_feature.append(H[2])
    img_feature.append(hpp)
        
    ori,hist_o,o = WOR(text)
    img_feature.append(ori)
    for i in range(len(hist_o)):
        img_feature.append(hist_o[i])
    img_feature.append(o)
    return img_feature

In [75]:
# img = cv2.cvtColor(cv2.imread("ACdata_base/9/1503.jpg"), cv2.COLOR_RGB2GRAY)
# img,edges,ske,text,diacritics = preprocessing(img)
# show_images([img,edges,ske,text,diacritics])
# H,V = HVSL(edges)
# hist_e,b_e,avg_angle_e = TOE(edges)
# hist_s,b_s,avg_angle_s = TOS(ske)
# text_hight,num_VL,higtest_VL,drvt = LVL(ske)
# thickness = Tth(edges,ske)
# d1 , d2 =SDs(diacritics)
# HH,hpp = HPP(text)
# ori,hist_o,o = WOR(text)
# print([H,V,avg_angle_e,avg_angle_e,text_hight,num_VL,higtest_VL,drvt,np.average(thickness),d1,d2,HH,hpp,ori])

In [76]:
xTrain, xTest, yTrain, yTest = readData()
xTrain = Featur_Extraction(xTrain)

  xTrain, xTest, yTrain, yTest = train_test_split(np.asarray(xData), np.asarray(yData), test_size = 0.2, shuffle = True)
  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)


In [77]:
print(len(xTrain[0]))

106


In [78]:
xTest = np.asarray(xTest)
xTrain = np.asarray(xTrain)
yTest = np.asarray(yTest)
yTrain = np.asarray(yTrain)

x_test = []
for i in range(xTest.shape[0]):
    testPoint = np.asarray(Featur_Extraction_test(xTest[i]))
    x_test.append(testPoint)
# x_test = np.asarray(x_test)

# for i in range(x_test.shape[0]):
#     for j in range(x_test.shape[1]):
#         if(x_test[i][j] > 10000 ):
#             x_test[i][j] = 0
xTest = np.asarray(x_test) 



In [79]:
for i in range(xTrain.shape[0]):
    for j in range(xTrain.shape[1]):
        if(xTrain[i][j] > 10000 or math.isnan (xTrain[i][j])):
            xTrain[i][j] = 0
        

In [80]:
for i in range(xTest.shape[0]):
    for j in range(xTest.shape[1]):
        if(xTest[i][j] > 10000 or math.isnan (xTest[i][j])):
            xTest[i][j] = 0

In [81]:
#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel

#Train the model using the training sets
clf.fit(xTrain, yTrain)

#Predict the response for test dataset
predict_Test = clf.predict(xTest)
SVM_pred = (np.sum(predict_Test == yTest) / len(yTest)) * 100
print("SVM: ", SVM_pred, "%")

SVM:  83.67952522255193 %


In [82]:
from sklearn.neural_network import MLPClassifier

In [83]:
classifier = MLPClassifier(hidden_layer_sizes=(150,100,50), max_iter=300,activation = 'relu',solver='adam',random_state=1)
classifier.fit(xTrain, yTrain)
MLP_pred = classifier.predict(xTest)
kk = (np.sum(MLP_pred == yTest) / len(yTest)) * 100
print("MLP: ", kk, "%")

MLP:  86.05341246290801 %


In [84]:
# Import necessary modules
from sklearn.neighbors import KNeighborsClassifier

In [91]:
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(xTrain, yTrain)
KNN_p = knn.predict(xTest)
KNN = (np.sum(KNN_p == yTest) / len(yTest)) * 100
print("KNN: ", KNN, "%")

KNN:  68.84272997032642 %


In [86]:
from sklearn.naive_bayes import GaussianNB


In [87]:
#Create a Gaussian Classifier
model = GaussianNB()

# Train the model using the training sets
model.fit(xTrain,yTrain)

#Predict Output
predicted= model.predict(xTest) # 0:Overcast, 2:Mild
gau = (np.sum(predicted == yTest) / len(yTest)) * 100
print("gau: ", gau, "%")

gau:  56.083086053412465 %


In [88]:
from sklearn.neighbors import KernelDensity

In [89]:
from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier(max_depth=2, random_state=0)

classifier.fit(xTrain, yTrain)
y_pred = classifier.predict(xTest)
RF = (np.sum(y_pred == yTest) / len(yTest)) * 100
print("RF: ", RF, "%")

RF:  64.09495548961425 %


In [90]:
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier(n_estimators=100, random_state=0, learning_rate = 0.01)
clf.fit(xTrain, yTrain)
adb_p = clf.predict(xTest)
adb = (np.sum(adb_p == yTest) / len(yTest)) * 100
print("adb: ", adb, "%")

adb:  54.3026706231454 %


In [93]:
#Create a svm Classifier
clf = svm.SVC(kernel='poly') # Linear Kernel

#Train the model using the training sets
clf.fit(xTrain, yTrain)

#Predict the response for test dataset
predict_Test = clf.predict(xTest)
SVM_pred = (np.sum(predict_Test == yTest) / len(yTest)) * 100
print("SVM: ", SVM_pred, "%")

SVM:  33.531157270029674 %


In [94]:
#Create a svm Classifier
clf = svm.SVC(kernel='rbf') # Linear Kernel

#Train the model using the training sets
clf.fit(xTrain, yTrain)

#Predict the response for test dataset
predict_Test = clf.predict(xTest)
SVM_pred = (np.sum(predict_Test == yTest) / len(yTest)) * 100
print("SVM: ", SVM_pred, "%")

SVM:  57.566765578635014 %
