In [1]:

import numpy as np
import cv2
import os
from sklearn.preprocessing import PolynomialFeatures
import skimage.measure

from sklearn import svm
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from skimage.feature import hog
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
import joblib

In [15]:
import cv2
import joblib
from skimage.feature import hog
import numpy as np
import imutils
import skimage.measure
from skimage.morphology import opening
from skimage.morphology import erosion
from matplotlib import pyplot as plt
from skimage.morphology import skeletonize
from skimage.util import invert


In [82]:
def dfs(img, mask, i, j):
    if i < 0 or j < 0 or i > 31 or j > 31 or mask[i,j] < 0.5 or img[i,j] < 0.5:
        return
    mask[i,j] = 0
    dfs(img, mask, i+1, j)
    dfs(img, mask, i, j+1)
    dfs(img, mask, i-1, j)
    dfs(img, mask, i, j-1)
    return

def cnt(mask):
    x1 = np.sum(mask[0:16, :])
    x2 = np.sum(mask[16:32,:])
    return x1 / (32.*16), x2 / (32.*16)

def count_neighbours(i,j,mat):
    m = mat.copy()
    m = m[i-1:i+2,j-1:j+2]
    kern = np.array([[1,1,1],[1,0,1],[1,1,1]])
    prod = sum(sum(m*kern))
    return prod
def find_highest_dot(skel):
    m = skel.copy()
    for i in range(m.shape[0]):
        for j in range(m.shape[1]):
            if m[i][j] == 1:
                return j
def find_lowest_dot(skel):
    m = skel.copy()
    for i in range(m.shape[0]-1,-1,-1):
        for j in range(m.shape[1]):
            if m[i][j] == 1:
                return j
def find_left_dot(skel):
    m = skel.copy()
    for j in range(m.shape[1]):
        for i in range(m.shape[0]):
            if m[i][j] == 1:
                return i
def find_right_dot(skel):
    m = skel.copy()
    for j in range(m.shape[1]-1,-1,-1):
        for i in range(m.shape[0]):
            if m[i][j] == 1:
                return i

In [163]:
def preProcessing(img):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.equalizeHist(img)
    img[img > imthr] = 255.
    img[img <= imthr] = 0
    img[:, 31:32] = 255.
    img[31:32, :] = 255.
    img[0:1, :] = 255.
    img[:, 0:1] = 255.
    img = img / 255.
    return img


def get_feature(img):
    cn = np.zeros(9, dtype=float)
    inv_img = invert(img)
    square = sum(sum(inv_img))
    skeleton = skeletonize(inv_img).astype('float64')
    for i in range(1,skeleton.shape[0]-1):
        for j in range(1, skeleton.shape[1] - 1):
            if skeleton[i][j] == 1:
                count_n = count_neighbours(i,j,skeleton)
                cn[int(count_n)] +=1
    #количество точек с числом соседей
    #print(img.shape)
    skel_length = sum(sum(skeleton))
    # b или d (самая верхняя точка ближе к правому или левому краю)
    coor_x_h = find_highest_dot(skeleton)
    coor_x_l = find_lowest_dot(skeleton)
    coor_y_r = find_right_dot(skeleton)
    coor_y_l = find_left_dot(skeleton)
    #отношение площали к длине
    if (skel_length > 1e-3):
        sq_len = square/skel_length
    else:
        sq_len = 0
    feature_vector = np.array([coor_x_h,coor_x_l,coor_y_r,coor_y_l,int(10*sq_len)], dtype=float)
    feature_vector = np.append(feature_vector,cn)
    feature_vector = feature_vector / (32.*32)
    mask = np.ones((32,32), dtype = float)
    dfs(img,mask,0,0)         
    mask[img < 0.5] = 0
    x1, x2 = cnt(mask)
    feature_vector = np.append(feature_vector, float(x1))
    feature_vector = np.append(feature_vector, float(x2))
    return feature_vector

def extract(features, Y):
    list_fd = []
    del_ind = list()
    #clf = RandomForestClassifier(n_estimators = 100, max_depth=16, verbose = 1)
    for feature in features:
        img = feature.reshape((32, 32))
        fd = get_feature(img)
        list_fd.append(fd)
    return np.array(list_fd, dtype=float)

In [4]:
width = 640
height = 480
threshold = 0.7
imthr = 50

In [5]:
path = 'myData'
testRatio = 0.2
valRatio = 0.2
imageDimensions= (32,32,3)

In [6]:
images = []
classNo = []
myList = os.listdir(path)
print("Total No of Classes Detected", len(myList))
noOfClasses = len(myList)
print("Importing Classes ......")
for x in range(0, noOfClasses):
    myPicList = os.listdir(path + "/" + str(x))
    for y in myPicList:
        curImg = cv2.imread(path + "/" + str(x) + "/" + y)
        curImg = cv2.resize(curImg, (imageDimensions[0], imageDimensions[1]))
        images.append(curImg)
        classNo.append(x)
    print(x, end=" ")
print(" ")

Total No of Classes Detected 10
Importing Classes ......
0 1 2 3 4 5 6 7 8 9  


In [7]:
images = np.array(images)
classNo = np.array(classNo)
print(images.shape)

(10423, 32, 32, 3)


In [53]:
X_train,X_test,y_train,y_test = train_test_split(images,classNo,test_size=testRatio)
X_train,X_validation,y_train,y_validation = train_test_split(X_train,y_train,test_size=valRatio)
print(X_train.shape)
print(X_test.shape)
print(X_validation.shape)

(6670, 32, 32, 3)
(2085, 32, 32, 3)
(1668, 32, 32, 3)


In [54]:
numOfSamples= []
for x in range(0,noOfClasses):
    # print(len(np.where(y_train==x)[0]))
    numOfSamples.append(len(np.where(y_train==x)[0]))

print(numOfSamples)

[630, 658, 679, 652, 668, 681, 677, 678, 684, 663]


In [55]:
X_train= np.array(list(map(preProcessing,X_train)))
X_test= np.array(list(map(preProcessing,X_test)))
X_validation= np.array(list(map(preProcessing,X_validation)))

In [56]:
X_train = X_train.reshape(X_train.shape[0],X_train.shape[1],X_train.shape[2])
X_test = X_test.reshape(X_test.shape[0],X_test.shape[1],X_test.shape[2])
X_validation = X_validation.reshape(X_validation.shape[0],X_validation.shape[1],X_validation.shape[2])

In [102]:
X_train.shape

(6670, 32, 32)

In [117]:
y_train1 = y_train[0:100]

In [164]:
fd_X_train = extract(X_train, y_train)
fd_X_test = extract(X_test, y_test)
fd_X_validation = extract(X_validation, y_validation)

In [165]:
fd_X_train.shape

(6670, 16)

In [166]:
y_train1 = np.array(y_train)

In [167]:
fd_x = np.array(fd_X_train)

In [173]:
fd_x[fd_x !=  fd_x] = 0.

In [174]:
from sklearn.ensemble import RandomForestClassifier

In [176]:
clf = RandomForestClassifier(n_estimators = 100, verbose = 1)

In [177]:
clf.fit(fd_x, y_train1)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    1.1s finished


RandomForestClassifier(verbose=1)

In [178]:
predicted = clf.predict(fd_X_test)
expected = y_test
print(accuracy_score(expected,predicted))

0.8925659472422063


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [113]:
fd_X_train[0][fd_X_train[0] != fd_X_train[0]]

array([], dtype=float64)