In [1]:
# Imports
import cv2
import os
import pandas as pd
from cmath import e
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split,GridSearchCV,cross_validate
from time import time
import numpy as np

## HOG STEPS:

#REF https://www.intel.com/content/www/us/en/develop/documentation/ipp-dev-reference/top/volume-2-image-processing/computer-vision/feature-detection-functions/histogram-of-oriented-gradients-hog-descriptor.html
* Implementation of the HOG descriptor algorithm is as follows:
1. Divide the image into small connected regions called cells, and for each cell compute a histogram of gradient directions or edge orientations for the pixels within the cell.
2. Discretize each cell into angular bins according to the gradient orientation.
3. Each cell's pixel contributes weighted gradient to its corresponding angular bin.
4. Groups of adjacent cells are considered as spatial regions called blocks. The grouping of cells into a block is the basis for grouping and normalization of histograms.
5. Normalized group of histograms represents the block histogram. The set of these block histograms represents the descriptor.

## HOG:

In [2]:
def extractHOG(filename, outputFileName):
    # Loading the image
    img = cv2.imread(filename).astype('uint8')
    #REF https://learnopencv.com/handwritten-digits-classification-an-opencv-c-python-tutorial/
    #  In most cases we will use the default value of these parameters:
    #  derivAperture, winSigma, histogramNormType, L2HysThreshold, gammaCorrection and nlevels 
    derivAperture = 1
    winSigma = -1.
    histogramNormType = 0
    L2HysThreshold = 0.2
    gammaCorrection = 1
    nlevels = 64
    #-----------------------------------------------------
    #REF https://www.intel.com/content/www/us/en/develop/documentation/ipp-dev-reference/top/volume-2-image-processing/computer-vision/feature-detection-functions/histogram-of-oriented-gradients-hog-descriptor.html
    # According to [Dalal05] the recommended values for the HOG parameters are:
    # 1D centered derivative mask [-1, 0, +1]
    # Detection window size is 64x128
    # Cell size is 8x8
    # Block size is 16x16 (2x2 cells)
    #-----------------------------------------------------
    img=cv2.resize(img,(64,128))
    winSize = (64, 128)
    # The notion of blocks exist to tackle illumination variation.
    # A large block size makes local changes less significant while a smaller block 
    # size weights local changes more.
    # Typically blockSize is set to 2 x cellSize
    blockSize = (16, 16)
    # The blockStride determines the overlap between neighboring blocks
    # and controls the degree of contrast normalization.
    # Typically a blockStride is set to 50% of blockSize.
    blockStride = (8, 8)
    #The cellSize is chosen based on the scale of the features important to do the classification.
    # A very small cellSize would blow up the size of the feature vector
    #  and a very large one may not capture relevant information
    cellSize = (8,8)
    # nbins sets the number of bins in the histogram of gradients.
    # The authors of the HOG paper had recommended a 
    # value of 9 to capture gradients between 0 and 180 degrees in 20 degrees increments
    nbins = 9
    # Typically gradients can have any orientation between 0 and 360 degrees. 
    # These gradients are referred to as “signed” gradients as opposed to “unsigned” 
    # gradients that drop the sign and take values between 0 and 180 degrees. 
    # In the original HOG paper, unsigned gradients were used for pedestrian detection. 
    # In my experiments,for this problem, signed gradients produced slightly better results.
    signedGradient = True
    #-----------------------------------------------------
    hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,
    cellSize,nbins,derivAperture,winSigma,histogramNormType
    ,L2HysThreshold,gammaCorrection,nlevels, signedGradient)
    # img = cv2.resize(img, (64,128))

    # img	Matrix of the type CV_8U containing an image where 
    # HOG features will be calculated descriptors	Matrix of the type CV_32F
    #----------------------------------------------------------
    # CV_8U is unsigned 8bit/pixel - ie a pixel can have values 0-255, 
    # this is the normal range for most image and video formats
    descriptor = hog.compute(img)
    with open(outputFileName+'.csv', 'a') as csvfile:
        np.savetxt(csvfile, [descriptor], fmt='%f', delimiter=',')
        csvfile.close()
    return descriptor


In [3]:
def removeFileIfExists(fileName):
    if os.path.isfile(fileName):
        os.remove(fileName)

In [4]:
def writeFeaturesToFile(features, fileName):
    with open(fileName, 'a') as csvfile:
        np.savetxt(csvfile, features, fmt='%f', delimiter=',')
        csvfile.close()

In [5]:
def readFeaturesFromFile(fileName):
    CSVData = open(fileName)
    features = np.genfromtxt(CSVData, delimiter=",")
    return features

In [6]:
def extractFeaturesFromFolder(folder,outputFileName,gender):
    train_classes=[]
    features=[]
    for filename in os.listdir(folder):
        try:
            features.append(extractHOG(folder+filename,outputFileName))
            train_classes.append(gender)
        except Exception as e:
            print(e)
            continue
    return np.array(features),np.array(train_classes)


In [7]:
def extractICDARFeatures():
    features=[]
    # read csv file
    df = pd.read_csv('train_answers.csv')
    # get the labels
    icdar_classes = df['male'].values
    print(icdar_classes.shape)
    icdar_classes_train = np.array([])
    i = 0
    for filename in os.listdir('images_gender/images/train'):
        try:
            features.append(extractHOG('images_gender/images/train/'+filename,'icdar_hog'))
            icdar_classes_train = np.append(icdar_classes_train, icdar_classes[i//2])
            i = i + 1
        except Exception as e:
            print(e)
            continue
    icdar_classes  = icdar_classes_train
    return np.array(features),np.array(icdar_classes)

In [8]:
def getBestParamsForSVM(X_train, Y_train, scalerOutputFileName='scaler.joblib'):
    scaler = preprocessing.MinMaxScaler().fit(X_train)

    GridSearchCV_parameters = {
        'C': [0.01, 0.1, 1, 10, 100, 1000],
        'gamma': [0.0001, 0.001, 0.01, 0.1, 1, 'scale'],
        'kernel': ['rbf', 'linear']
    }

    t0 = time()
    clf = GridSearchCV(SVC(class_weight='balanced'),
                       GridSearchCV_parameters,  refit=True, cv=9)

    clf = clf.fit(scaler.transform(X_train), Y_train)
    print("Best estimator found by grid search:")
    print("\nBest parameters: ", clf.best_params_)
    print("Mean Cross Validation Score: %0.2f" % clf.best_score_)
    print("Training time: %.3f" % (time() - t0))
    return clf.best_estimator_


In [9]:
def FeaturesFromScratch():
    removeFileIfExists('female_hog.csv')
    removeFileIfExists('male_hog.csv')
    removeFileIfExists('icdar_hog.csv')


    f_features,f_classes = extractFeaturesFromFolder('Females/Females/','female_hog',0)
    f_features =  f_features.reshape(f_features.shape[0], -1)
    print(f_features.shape)
    print(f_classes.shape)
    m_features,m_classes = extractFeaturesFromFolder('Males/Males/','male_hog',1)
    m_features =  m_features.reshape(m_features.shape[0], -1)
    print(m_features.shape)
    print(m_classes.shape)
    i_features,i_classes = extractICDARFeatures()
    i_features = i_features.reshape(i_features.shape[0], -1)
    print(i_features.shape)
    print(i_classes.shape)

In [10]:
# FeaturesFromScratch()

In [11]:
def svm(X_train,Y_train,X_test,Y_test):
    # train the classifier and predict the test data
    scaler = preprocessing.StandardScaler().fit(X_train)
    print("Training the classifier...")          
    clf = SVC(C=100.0, gamma=0.01,kernel='rbf',class_weight='balanced',random_state=1)
    clf.fit(scaler.transform( X_train), Y_train) 
    
    print("Predicting the test data...")
    # score_training = clf.score(scaler.transform( X_train), Y_train) 
    # score = clf.score(scaler.transform(X_test), Y_test)
    predicted_labels = clf.predict(scaler.transform(X_test))
    score = accuracy_score(Y_test, predicted_labels)
    print("Accuracy on test set: {:.4f}".format(score))
    print("Accuracy on training set: {:.4f}".format(clf.score(scaler.transform( X_train), Y_train)))
    return predicted_labels

In [12]:

# FeaturesFromScratch() 
f_features = readFeaturesFromFile('female_hog.csv')
m_features = readFeaturesFromFile('male_hog.csv')
i_features = readFeaturesFromFile('icdar_hog.csv')


# f_features = np.delete(f_features,np.arange(13,17),1)
# m_features = np.delete(m_features,np.arange(13,17),1)
# i_features = np.delete(i_features,np.arange(13,17),1)

# f_features = f_features[:,[0,4,8,12]]
# m_features = m_features[:,[0,4,8,12]]
# i_features = i_features[:,[0,4,8,12]]


print(m_features.shape)

train_classes = []
# read csv file
df = pd.read_csv('train_answers.csv')
# get the labels
icdar_classes = df['male'].values
print(icdar_classes.shape)
icdar_classes_train = np.array([])

for i in range(1, 132):
    try:
        train_classes.append(0)
    except Exception as e:
        print(e)
        continue

for i in range(1, 233):
    try:
        train_classes.append(1)
    except Exception as e:
        print(e)
        continue

for i in range(0, 564):
    try:
        icdar_classes_train = np.append(icdar_classes_train, icdar_classes[i//2])
        i = i + 1
    except Exception as e:
        print(e)
        continue

icdar_classes = icdar_classes_train

X_train = np.concatenate((f_features,m_features,i_features),axis=0)
Y_train = np.concatenate((train_classes,icdar_classes),axis=0)



 
X_train, X_test, Y_train, Y_test = train_test_split(
    X_train, Y_train, test_size=.1)

X_test, X_val, Y_test, Y_val = train_test_split(
    X_test, Y_test, test_size=.5)

(232, 13572)
(282,)


In [13]:
clf_svm = getBestParamsForSVM(X_train,Y_train)
clf_svm.fit(X_train,Y_train)
score = clf_svm.score(X_test,Y_test)
score_train = clf_svm.score(X_train,Y_train)
print("Accuracy on test set: {:.4f}".format(score))
print("Accuracy on training set: {:.4f}".format(score_train))