In [16]:
from skimage.feature import hog
from skimage import io,color
from skimage.transform import resize
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn import metrics
import os
from  LBP_descriptor import LocalBinaryPatterns
import commonfunctions as cf
import cv2
import pandas as pd
import csv
from cold_feature import cold_feature
from skimage.feature import greycomatrix, greycoprops

# Get our training data 
X_train: features of training data.\
Y_train: labels of training data (1-->F, 0--> M).


In [2]:

# ICDAR LABELS 
labels_ICDAR=[]
with open("our dataset/train_answers.csv", 'r') as file:
    csvreader = csv.reader(file, delimiter=',')
    rows= np.array(list(csvreader))[1:].astype(float).astype(int)
for row in rows:
    labels_ICDAR.append(row[1])
    
def get_label_ICDAR(img):
    if img[0]=='0':
        if img[1]=='0': 
            return labels_ICDAR[int(img[2])-1]
        else: 
            return labels_ICDAR[int(img[1:3])-1]
    else: 
         return labels_ICDAR[int(img[0:3])-1]
        


def read_labels(path): 
    y=[]
    files = os.listdir(path)

    for file in files:
        if file[0]=='F':
            y.append(0)
        elif file[0]=='M':
            y.append(1)
        else: 
            y.append(get_label_ICDAR(file[1:4]))
          
    y=np.array(y).astype(float)
    return y 

Y_train= read_labels("Training_data/")
Y_test= read_labels("Test_data/")

# GLCM & HOG

In [3]:
def GLCM (image):

    # convert image to gray
    image= cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    featureVector=[]
    
    # distances -> skipped pixels
    glcm = greycomatrix(image, distances=[5], angles=[45], levels=256,
                            symmetric=True, normed=True)
    featureVector.append(greycoprops(glcm, 'dissimilarity')[0, 0])
    featureVector.append(greycoprops(glcm, 'correlation')[0, 0])
    featureVector.append(greycoprops(glcm, 'contrast')[0, 0])
    featureVector.append(greycoprops(glcm, 'homogeneity')[0, 0])
    return featureVector
def HOG(img):
    img = np.array(resize(img,(128,64))) 
    feature_vector, hog_image = hog(img, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(3,3), visualize=True)
    return feature_vector,hog_image    

# LBP Feature

In [4]:
# create object from LBP class to use it as our descriptor
# takes 2 parameters: number of data (train + test ) and number of neighbors
desc = LocalBinaryPatterns(24, 8)

# Cold Feature

In [5]:
# instantiate a new cold feature object
cold = cold_feature()

In [6]:
# TRAIN HOG & GLCM
X_train=[]
HOG_train=[]
GLCM_train=[]
 
files = os.listdir("Training_data/")
for file in files:
    # read the image
    img = cv2.imread("Training_data/"+file )
    # ------------------- Convert into gray image -------------
    # convert to gray
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
    # threshold the grayscale image
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # ------------------- GLCM feature-----------------------
    GLCM_train=GLCM(img)
    #--------------------------------------------------------
    # ------------------- HOG feature------------------------
    HOG_train,hog_image=HOG(thresh)
    # --------------------------------------------------------
    #------------------- LBP feature------------------------
    hist = desc.describe(thresh)
    #-------------------- Cold feature ------------------------
    cold_feature_vector = cold.getFeatureVectors(thresh)

    # concatenate all the features in X_train   
    feature_vector_temp=np.hstack((HOG_train, GLCM_train, hist, cold_feature_vector)).tolist()
    X_train.append(feature_vector_temp)
    # reset them for the next img
    HOG_train=[]
    GLCM_train=[]
   
 
#convert to numpy array
X_train=np.array(X_train)
 
#write feature vector of each image in external file
with open('ALL_train.npy', 'wb') as f:
    np.save(f, X_train)
f.close()  
   

In [7]:
# Read feature vector of train data from the npy file 
with open('ALL_train.npy', 'rb') as f:
    X_train = np.load(f,allow_pickle=True)
f.close()

In [8]:
# TEST HOG & GLCM
X_test=[]
files = os.listdir("Test_data/")
HOG_test=[]
GLCM_test=[]
for file in files:
   
    # read the image
    img = cv2.imread("Test_data/"+file )
    # ------------------- Convert into gray image -------------
    # convert to gray
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
    # threshold the grayscale image
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # ------------------- GLCM feature-----------------------
    GLCM_train=GLCM(img)
    #--------------------------------------------------------
    # ------------------- HOG feature------------------------
    HOG_train,hog_image=HOG(thresh)
    # --------------------------------------------------------
    #------------------- LBP feature------------------------
    hist = desc.describe(thresh)
    #-------------------- Cold feature ------------------------
    cold_feature_vector = cold.getFeatureVectors(thresh)

    # concatenate all the features in X_train   
    feature_test_temp=np.hstack((HOG_train, GLCM_train, hist, cold_feature_vector)).tolist()
    X_test.append(feature_test_temp)
    # reset them for the next img
    HOG_test=[]
    GLCM_test=[]
   
    
    
    
X_test=np.array(X_test)

# Classification:

In [9]:
Y_votting = np.zeros(X_test.shape[0])

# 1. Random forest classifier

In [10]:

clf=RandomForestClassifier(n_estimators=2000)
clf.fit(X_train,Y_train)
Y_Predicted=clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(Y_test, Y_Predicted)*100,"%")
Y_votting = np.add(Y_Predicted, Y_votting)


Accuracy: 79.72972972972973 %


# 2. Linear SVM classifier

In [11]:
clf=LinearSVC(C=300.0, random_state=42)
clf.fit(X_train,Y_train )
Y_Predicted=clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(Y_test, Y_Predicted)*100,"%")
Y_votting = np.add(Y_Predicted, Y_votting)

Accuracy: 72.97297297297297 %




# 3. Adaboost classifier

In [12]:
clf=AdaBoostClassifier(n_estimators=370)
clf.fit(X_train,Y_train)
Y_Predicted=clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(Y_test, Y_Predicted)*100,"%")
Y_votting = np.add(Y_Predicted, Y_votting)

Accuracy: 71.62162162162163 %


# 4. KNN classifier

In [13]:
accuracies=[]
max_accuracy_predicted = 0
max_predicted_y = []
for k in range(1,30):
    clf=KNeighborsClassifier(n_neighbors = k)
    clf.fit(X_train,Y_train)
    Y_Predicted=clf.predict(X_test)
    accuracy = metrics.accuracy_score(Y_test, Y_Predicted)*100
    if accuracy > max_accuracy_predicted:
        max_predicted_y = Y_Predicted
    accuracies.append(accuracy)

print("Accuracy:",accuracies[np.argmax(accuracies)],"%")
Y_votting = np.add(Y_Predicted, Y_votting)

Accuracy: 67.56756756756756 %


# Votting system classification

In [14]:
Y_votting = np.where(Y_votting > 2, 1, 0)
print("Accuracy:", metrics.accuracy_score(Y_test, Y_votting)*100,"%")

Accuracy: 74.32432432432432 %


# Gradient Boosting classifier

In [19]:
clf=GradientBoostingClassifier(n_estimators=120, learning_rate=1.0, max_depth=8,random_state=0)
clf.fit(X_train,Y_train)
Y_Predicted=clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(Y_test, Y_Predicted)*100,"%")

Accuracy: 85.13513513513513 %
