In [1]:
import cv2
from sklearn import svm
import numpy as np
import pandas as pd
import glob
import functools
import matplotlib.pyplot as plt
import imutils
from PIL import Image, ImageEnhance
from sklearn import svm
from sklearn.model_selection import train_test_split
from skimage.feature import local_binary_pattern
from sklearn.preprocessing import normalize
import timeit

Read data of Males and Females

In [123]:
Male_training_data='Data_split\\train\\Males\\*.jpg'
Female_training_data='Data_split\\train\\Females\\*.jpg'

Preprocessing  

In [124]:
def Preprocessing (image) : 
    image = cv2.cvtColor(image,cv2.COLOR_RGB2GRAY)  # convert color from RGB to GRAY
    height, width = image.shape # get image dimensions
    img = cv2.GaussianBlur(image, (9, 9), 0) #decrease noise for dialation
    img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV, 101, 30) # apply threshold on blured image
    image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV, 101, 30)  # apply threshold on original image
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 20)) 
    img = cv2.dilate(img, kernel, iterations=8)
    contours = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] 
    biggest_contour = functools.reduce(lambda c1, c2: c1 if cv2.contourArea(c1) > cv2.contourArea(c2) else c2,contours) #find the biggest contour for text area
    x, y, w, h = cv2.boundingRect(biggest_contour) # find smallest rect that can contain the text area after dialation
    image = image[y:y + h, x:x + w]
    return image

Feature Extraction

1- COLD features

In [125]:
#define some constants for cold feature extraction 
N_RHO_BINS = 7
N_ANGLE_BINS = 12
N_BINS = N_RHO_BINS * N_ANGLE_BINS
BIN_SIZE = 360 // N_ANGLE_BINS
R_INNER = 5.0
R_OUTER = 35.0
K_S = np.arange(3, 8)

In [126]:
def get_contour_pixels(bw_image):
        contours, _= cv2.findContours(bw_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) 
        # contours = imutils.grab_contours(contours)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[1:]
        
        img2 = bw_image.copy()[:,:,np.newaxis]
        img2 = np.concatenate([img2, img2, img2], axis = 2)
        return contours

In [127]:
def get_cold_features(bw_image, approx_poly_factor = 0.01):
    
        contours = get_contour_pixels(bw_image)
        
        rho_bins_edges = np.log10(np.linspace(R_INNER, R_OUTER, N_RHO_BINS))
        feature_vectors = np.zeros((len(K_S), N_BINS))
        
        # print([len(cnt) for cnt in contours])
        for j, k in enumerate(K_S):
            hist = np.zeros((N_RHO_BINS, N_ANGLE_BINS))
            for cnt in contours:
                epsilon = approx_poly_factor * cv2.arcLength(cnt,True)
                cnt = cv2.approxPolyDP(cnt,epsilon,True)
                n_pixels = len(cnt)
                
                point_1s = np.array([point[0] for point in cnt])
                x1s, y1s = point_1s[:, 0], point_1s[:, 1]
                point_2s = np.array([cnt[(i + k) % n_pixels][0] for i in range(n_pixels)])
                x2s, y2s = point_2s[:, 0], point_2s[:, 1]
                
                thetas = np.degrees(np.arctan2(y2s - y1s, x2s - x1s) + np.pi)
                rhos = np.sqrt((y2s - y1s) ** 2 + (x2s - x1s) ** 2)
                rhos_log_space = np.log10(rhos)
                
                quantized_rhos = np.zeros(rhos.shape, dtype=int)
                for i in range(N_RHO_BINS):
                    quantized_rhos += (rhos_log_space < rho_bins_edges[i])
                    
                for i, r_bin in enumerate(quantized_rhos):
                    theta_bin = int(thetas[i] // BIN_SIZE) % N_ANGLE_BINS
                    hist[r_bin - 1, theta_bin] += 1
                
            normalised_hist = hist / hist.sum()
            feature_vectors[j] = normalised_hist.flatten()
            
        return feature_vectors.flatten()

2- HINGE features

In [128]:
#define some constants for hinge 
N_ANGLE_BINS = 40
BIN_SIZE = 360 // N_ANGLE_BINS
LEG_LENGTH = 25

In [129]:
def get_hinge_features(bw_image):
        
        contours = get_contour_pixels(bw_image)
        
        hist = np.zeros((N_ANGLE_BINS, N_ANGLE_BINS))
            
        # print([len(cnt) for cnt in contours])
        for cnt in contours:
            n_pixels = len(cnt)
            if n_pixels <= LEG_LENGTH:
                continue
            
            points = np.array([point[0] for point in cnt])
            xs, ys = points[:, 0], points[:, 1]
            point_1s = np.array([cnt[(i + LEG_LENGTH) % n_pixels][0] for i in range(n_pixels)])
            point_2s = np.array([cnt[(i - LEG_LENGTH) % n_pixels][0] for i in range(n_pixels)])
            x1s, y1s = point_1s[:, 0], point_1s[:, 1]
            x2s, y2s = point_2s[:, 0], point_2s[:, 1]
            
            phi_1s = np.degrees(np.arctan2(y1s - ys, x1s - xs) + np.pi)
            phi_2s = np.degrees(np.arctan2(y2s - ys, x2s - xs) + np.pi)
            
            indices = np.where(phi_2s > phi_1s)[0]
            
            for i in indices:
                phi1 = int(phi_1s[i] // BIN_SIZE) % N_ANGLE_BINS
                phi2 = int(phi_2s[i] // BIN_SIZE) % N_ANGLE_BINS
                hist[phi1, phi2] += 1
                
        normalised_hist = hist / np.sum(hist)
        feature_vector = normalised_hist[np.triu_indices_from(normalised_hist, k = 1)]
        
        return feature_vector

3- LBP Features

In [130]:
def get_LBP_features (img) :
    radius = 2
    n_points = 16 * radius
    lbp = local_binary_pattern(img, n_points, radius, method='nri_uniform')
    n_bins = n_points * (n_points - 1) + 3
    lbp_hist = np.histogram(lbp.ravel(), bins=np.arange(n_bins + 1), density=True)[0]
    return lbp_hist

In [131]:
def svm_test(features, times=100, test_size=0.2, **kwargs):
    tr_ac, ts_ac, mal, fem = 0, 0, 0, 0
    for i in range(times):
        X_train, X_test, y_train, y_test = train_test_split(features[:,:-1], features[:,-1], test_size=test_size)
        clf = svm.SVC(**kwargs)
        clf.fit(X_train, y_train)
        tr_ac += clf.score(X_train, y_train)
        ts_ac += clf.score(X_test, y_test)
        mal += np.sum(clf.predict(features[:,:-1]) == 1) / len(features[:,:-1])
        fem += np.sum(clf.predict(features[:,:-1]) == 0) / len(features[:,:-1])
    ret = ts_ac * 100 / times, tr_ac * 100 / times, mal * 100 / times, fem * 100 / times
    print('male_predict% = {}\nfemale_predict% = {}'.format(*ret[2:]))
    return ret[:2]

1- Feature extaction using Hinge

In [132]:
LBP_features = []
for file in glob.glob(Male_training_data):    
    img = cv2.imread(file)  #read male images
    img = Preprocessing(img)
    LBP_features.append(np.append(get_LBP_features(img),1))

for file in glob.glob(Female_training_data):    
    img = cv2.imread(file)  #read female images
    img = Preprocessing(img)
    LBP_features.append(np.append(get_LBP_features(img),0))
LBP_features = np.array(LBP_features)
LBP_features[:,:-1] = normalize(LBP_features[:,:-1], axis=0) 
svm_test(LBP_features,C=10,kernel="rbf") 


male_predict% = 67.4636678200692
female_predict% = 32.5363321799308


(70.51724137931033, 88.99999999999996)