In [8]:
from skimage.feature import hog
from skimage import io,color
from skimage.transform import resize
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn import metrics
import os
from  LBP_descriptor import LocalBinaryPatterns
import commonfunctions as cf
import cv2
import pandas as pd
import csv


# Get our training data 
X_train: features of training data.\
Y_train: labels of training data (1-->F, 0--> M).


In [20]:

# ICDAR LABELS 
labels_ICDAR=[]
with open("our dataset/train_answers.csv", 'r') as file:
    csvreader = csv.reader(file, delimiter=',')
    rows= np.array(list(csvreader))[1:].astype(float).astype(int)
for row in rows:
    labels_ICDAR.append(row[1])
    
def get_label_ICDAR(img):
    if img[0]=='0':
        if img[1]=='0': 
            return labels_ICDAR[int(img[2])-1]
        else: 
            return labels_ICDAR[int(img[1:3])-1]
    else: 
         return labels_ICDAR[int(img[0:3])-1]
        


def read_labels(path): 
    y=[]
    files = os.listdir(path)

    for file in files:
        if file[0]=='F':
            y.append(0)
        elif file[0]=='M':
            y.append(1)
        else: 
            y.append(get_label_ICDAR(file[1:4]))
          
    y=np.array(y).astype(float)
    return y 

Y_train= read_labels("Training_data/")
Y_test= read_labels("Test_data/")

238


# HOG feature

In [10]:
def HOG(img):
    img = np.array(resize(img,(128,64))) 
    feature_vector, hog_image = hog(img, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(3,3), visualize=True)
    return feature_vector,hog_image


# LBP feature and histogram descriptor

In [11]:
# create object from LBP class to use it as our descriptor
# takes 2 parameters: number of data (train + test ) and number of neighbors
desc = LocalBinaryPatterns(24, 8)

# Extract features from training data

In [12]:
#  NOTE => to save your time: 
#  Run this cell once and the features will be saved in external file so that you can read them by running the next cell.
X_train=[]
HOG_train=[]
LBP_train=[]
files = os.listdir("Training_data/")
i=0
for file in files:
    # read the image
    img = io.imread("Training_data/"+file )

    # ------------------- HOG feature------------------------
    feature_vector,hog_image=HOG(img)
    # --------------------------------------------------------
    HOG_train.append(feature_vector)
    
    #------------------- LBP feature------------------------
    
    img = cf.downSize(img , 0.5)
    hist = desc.describe(img)
    LBP_train.append(hist)
    #--------------------------------------------------------
    
    # concatenate all the features in X_train   
    feature_vector_temp=np.hstack((HOG_train,LBP_train)).tolist()
    X_train.append(feature_vector_temp[0])
    
    # reset them for the next img
    HOG_train=[]
    LBP_train=[]
    
#convert to numpy array
#X_train=np.array(X_train)

#write feature vector of each image in external file
with open('training_features.npy', 'wb') as f:
    np.save(f, X_train)
f.close()  
   

In [13]:
print(len(Y_test))

238


In [14]:
# Read feature vector of train data from the npy file 
with open('training_features.npy', 'rb') as f:
    X_train = np.load(f,allow_pickle=True)
f.close() 

# Get our test data 

In [21]:
# NOTE => to save your time: 
# Run this cell once and the features will be saved in external files so that you can read them by running the next cell.
X_test=[]
HOG_test=[]
LBP_test=[]

files = os.listdir("Test_data/")

for file in files:
   
    # read te img
    img = io.imread("Test_data/" +file )
        
    #------------------- HOG feature------------------------
    feature_vector,hog_image=HOG(img)
    HOG_test.append(feature_vector)
   
    
    # #------------------- LBP feature------------------------
    img = cf.downSize(img , 0.5)
    hist = desc.describe(img)
    LBP_test.append(hist)
    #--------------------------------------------------------

    # concatenate all the features in X_train
    feature_test_temp=(np.hstack((HOG_test,LBP_test))).tolist()
    X_test.append( feature_test_temp[0] )

    # reset them for the next image
    HOG_test=[]
    LBP_test=[]

    
#X_test=np.array(X_test)

#write feature vector of test data in external file
with open('test_features.npy', 'wb') as f:
    np.save(f, X_test)
f.close()   
#write labels of test data in external file
with open('Y_test.npy', 'wb') as f:
    np.save(f, Y_test)
f.close()     

In [22]:
# Read feature vector of test data from the npy file 
with open('test_features.npy', 'rb') as f:
    X_test = np.load(f ,allow_pickle=True)
f.close()  
# Read labels of test data from the npy file 
with open('Y_test.npy', 'rb') as f:
    Y_test = np.load(f ,allow_pickle=True)
f.close()  

# Assign weights for each feature

In [None]:
# initialize array of ones for initial weights
w= np.ones(206)
#----------- feature vector partitions -------------------
# HOG : 0 => 10800
# LBP : 10800 => 11060
#---------------------------------------

# update weights of each feature
w[0: 10800 ] = 2
w[ 10800: 11060 ] = 0.5

# length of weights should be the same as number of points??!
print(len(w))
#print(len(X_train[0]))


206


# Classification:

# 1. Random forest classifier

In [23]:

clf=RandomForestClassifier(n_estimators=1000)
clf.fit(X_train,Y_train)
Y_Predicted=clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(Y_test, Y_Predicted)*100,"%")


238 238
Accuracy: 76.89075630252101 %


# 2. Linear SVM classifier

In [30]:
clf=LinearSVC(C=50.0, random_state=42)
clf.fit(X_train,Y_train )
Y_Predicted=clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(Y_test, Y_Predicted)*100,"%")

Accuracy: 78.57142857142857 %
6830


# 3. Adaboost classifier

In [25]:
clf=AdaBoostClassifier(n_estimators=500)
clf.fit(X_train,Y_train)
Y_Predicted=clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(Y_test, Y_Predicted)*100,"%")

Accuracy: 79.83193277310924 %


# 4. KNN classifier

In [26]:
accuracies=[]
for k in range(1,30):
    clf=KNeighborsClassifier(n_neighbors = k)
    clf.fit(X_train,Y_train)
    Y_Predicted=clf.predict(X_test)
    accuracies.append(metrics.accuracy_score(Y_test, Y_Predicted)*100)

print("Accuracy:",accuracies[np.argmax(accuracies)],"%")

Accuracy: 76.05042016806722 %
