In [18]:
import cv2
from sklearn import svm
import numpy as np
import glob
import functools
from skimage.feature import local_binary_pattern
from sklearn.preprocessing import StandardScaler
import timeit
import pickle
from skimage.feature import greycomatrix, greycoprops


Helper Functions

In [19]:
'''
takes a function and it's arguments and then call it and calculate the time it takes to run
and return it with the returned value
'''
def function_timer(func, arg1, arg2, arg3):
    s = timeit.default_timer()
    val = func(arg1, arg2, arg3)
    f = timeit.default_timer()
    return (round((f-s)*100)/100.0) , val
'''
ceates a file with the given name and returnthe file object
if the file already exist it opens it and wipes it out
'''
def open_file(fileName):
    return open(fileName, "w")  #a --> for append       w --> for overwrite
'''
write a specific number in a given file object
'''
def write_in_file(file,number):
    file.write(str(number)+"\n")
    return
'''
closes a given file object
'''
def close_file(file):
    file.close()
    return

Read data Pathes of Males and Females

In [20]:
Male_training_data='Dataset/Males/Males/*.jpg'
Female_training_data='Dataset/Females/Females/*.jpg'
'''
Male_training_data='Data_split/train/Males/*.jpg'
Female_training_data='Data_split/train/Females/*.jpg'
'''
Male_testing_data='Data_split/test/Males/*.jpg'
Female_testing_data='Data_split/test/Females/*.jpg'

Preprocessing  Module

In [21]:
def Preprocessing (image) : 
    image = cv2.cvtColor(image,cv2.COLOR_RGB2GRAY)  # convert color from RGB to GRAY
    height, width = image.shape # get image dimensions
    img = cv2.GaussianBlur(image, (9, 9), 0) #decrease noise for dialation
    img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV, 101, 30) # apply threshold on blured image
    image = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV, 101, 30)  # apply threshold on original image
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 20)) 
    img = cv2.dilate(img, kernel, iterations=8)
    contours = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0] 
    biggest_contour = functools.reduce(lambda c1, c2: c1 if cv2.contourArea(c1) > cv2.contourArea(c2) else c2,contours) #find the biggest contour for text area
    x, y, w, h = cv2.boundingRect(biggest_contour) # find smallest rect that can contain the text area after dialation
    image = image[y:y + h, x:x + w]
    return image

Feature Extraction Module

1- COLD features

In [22]:
#define some constants for cold feature extraction 
N_RHO_BINS = 7
N_ANGLE_BINS = 12
N_BINS = N_RHO_BINS * N_ANGLE_BINS
BIN_SIZE = 360 // N_ANGLE_BINS
R_INNER = 5.0
R_OUTER = 35.0
K_S = np.arange(3, 8)

In [23]:
def get_contour_pixels(bw_image):
        contours, _= cv2.findContours(bw_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE) 
        # contours = imutils.grab_contours(contours)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[1:]
        
        img2 = bw_image.copy()[:,:,np.newaxis]
        img2 = np.concatenate([img2, img2, img2], axis = 2)
        return contours

In [24]:
def get_cold_features(bw_image, approx_poly_factor = 0.01):
    
        contours = get_contour_pixels(bw_image)
        
        rho_bins_edges = np.log10(np.linspace(R_INNER, R_OUTER, N_RHO_BINS))
        feature_vectors = np.zeros((len(K_S), N_BINS))
        
        # print([len(cnt) for cnt in contours])
        for j, k in enumerate(K_S):
            hist = np.zeros((N_RHO_BINS, N_ANGLE_BINS))
            for cnt in contours:
                epsilon = approx_poly_factor * cv2.arcLength(cnt,True)
                cnt = cv2.approxPolyDP(cnt,epsilon,True)
                n_pixels = len(cnt)
                
                point_1s = np.array([point[0] for point in cnt])
                x1s, y1s = point_1s[:, 0], point_1s[:, 1]
                point_2s = np.array([cnt[(i + k) % n_pixels][0] for i in range(n_pixels)])
                x2s, y2s = point_2s[:, 0], point_2s[:, 1]
                
                thetas = np.degrees(np.arctan2(y2s - y1s, x2s - x1s) + np.pi)
                rhos = np.sqrt((y2s - y1s) ** 2 + (x2s - x1s) ** 2)
                rhos_log_space = np.log10(rhos)
                
                quantized_rhos = np.zeros(rhos.shape, dtype=int)
                for i in range(N_RHO_BINS):
                    quantized_rhos += (rhos_log_space < rho_bins_edges[i])
                    
                for i, r_bin in enumerate(quantized_rhos):
                    theta_bin = int(thetas[i] // BIN_SIZE) % N_ANGLE_BINS
                    hist[r_bin - 1, theta_bin] += 1
                
            normalised_hist = hist / hist.sum()
            feature_vectors[j] = normalised_hist.flatten()
            
        return feature_vectors.flatten()

2- HINGE features

In [25]:
#define some constants for hinge 
N_ANGLE_BINS = 40
BIN_SIZE = 360 // N_ANGLE_BINS
LEG_LENGTH = 25

In [26]:
def get_hinge_features(bw_image):
        
        contours = get_contour_pixels(bw_image)
        
        hist = np.zeros((N_ANGLE_BINS, N_ANGLE_BINS))
            
        # print([len(cnt) for cnt in contours])
        for cnt in contours:
            n_pixels = len(cnt)
            if n_pixels <= LEG_LENGTH:
                continue
            
            points = np.array([point[0] for point in cnt])
            xs, ys = points[:, 0], points[:, 1]
            point_1s = np.array([cnt[(i + LEG_LENGTH) % n_pixels][0] for i in range(n_pixels)])
            point_2s = np.array([cnt[(i - LEG_LENGTH) % n_pixels][0] for i in range(n_pixels)])
            x1s, y1s = point_1s[:, 0], point_1s[:, 1]
            x2s, y2s = point_2s[:, 0], point_2s[:, 1]
            
            phi_1s = np.degrees(np.arctan2(y1s - ys, x1s - xs) + np.pi)
            phi_2s = np.degrees(np.arctan2(y2s - ys, x2s - xs) + np.pi)
            
            indices = np.where(phi_2s > phi_1s)[0]
            
            for i in indices:
                phi1 = int(phi_1s[i] // BIN_SIZE) % N_ANGLE_BINS
                phi2 = int(phi_2s[i] // BIN_SIZE) % N_ANGLE_BINS
                hist[phi1, phi2] += 1
                
        normalised_hist = hist / np.sum(hist)
        feature_vector = normalised_hist[np.triu_indices_from(normalised_hist, k = 1)]
        
        return feature_vector

3- LBP Features

In [27]:
def get_LBP_features (img) :
    radius = 2
    n_points = 16 * radius
    lbp = local_binary_pattern(img, n_points, radius, method='nri_uniform')
    n_bins = n_points * (n_points - 1) + 3
    lbp_hist = np.histogram(lbp.ravel(), bins=np.arange(n_bins + 1), density=True)[0]
    return lbp_hist

4- GLCM Features

In [28]:
def get_all_glcm_features(gray_scale_img):
    
    #size of co-occ matrix = number of gray levels
    image_array = np.array(gray_scale_img)
    #print('first pixel= ', image_array[0][0])
    coocurrence_matrix = greycomatrix(image_array, [1], [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4])
    contrast = greycoprops(coocurrence_matrix, 'contrast')
    homogeneity = greycoprops(coocurrence_matrix, 'homogeneity')
    #mean = greycoprops(coocurrence_matrix, 'mean')
    energy = greycoprops(coocurrence_matrix, 'energy')
    #entropy = greycoprops(coocurrence_matrix, 'entropy')
    #variance = greycoprops(coocurrence_matrix, 'variance')
    correlation = greycoprops(coocurrence_matrix, 'correlation')
    return contrast, homogeneity, energy, correlation

In [29]:
'''
a function that return all the required features of a given image
'''
def get_features(img):
    '''
        glcm_features = []
        contrast, homogeneity, energy, correlation= get_all_glcm_features((img * 255).astype(np.uint8))
        features = []
        features.append(contrast.ravel())
        features.append(homogeneity.ravel())
        features.append(energy.ravel())
        features.append(correlation.ravel())
        features = np.array(features).ravel()
        glcm_features.append(features)
        glcm_features = (np.array(glcm_features)).ravel()
    '''    
    hinge_features = get_hinge_features(img)
    return hinge_features

Model Selection and Training Module:

In [30]:
'''
Model Training:
    a function that creates an svm model and train it with the given features
    then it saves the model after training
'''
def svm_train(**kwargs):
    #first we read the train data images and extract therir features
    features = []
    for file in glob.glob(Male_training_data):    
        img = cv2.imread(file)  #read male images
        img = Preprocessing(img)
        features.append(np.append(get_features(img),1))

    for file in glob.glob(Female_training_data):    
        img = cv2.imread(file)  #read female images
        img = Preprocessing(img)
        features.append(np.append(get_features(img),0))
  

    features = np.array(features)
    x_train = features[:,:-1]
    #standered scaler: scales the data to have mean = 0 and standered deviation = 1
    std_scaler = StandardScaler()
    x_train = std_scaler.fit_transform(x_train)
    y_train = features[:,-1]
    #svm clasifier
    svm_clf = svm.SVC(**kwargs)
    svm_clf.fit(x_train, y_train)
    # save the model to disk
    pickle.dump(svm_clf, open('svm_model.sav', 'wb'))
    pickle.dump(std_scaler, open('std_scaler.bin', 'wb'))
    return 

Performance Analysis Module:

In [31]:
'''
function that test a given image with a given svm model and returns it's prediction 
valu:
1 --> male
0 --> females
'''
def svm_test(svm_clf,std_scaler, img):
    img = Preprocessing(img)
    features = get_features(img)
    features = std_scaler.transform([features])
    return  svm_clf.predict(features)[0]

In [32]:
'''
this functon takes an svm_clf and generates the required reports for it based on the << imaged in the test file >>
'''
def analyse_performance(svm_clf,std_scaler):
    #open 2-files to write the output in them
    results_file = open_file("results.txt")
    time_file = open_file("times.txt")
    m_correct, m_count, f_correct, f_count = 0,0,0,0
    #testing all test images
    for file in sorted(glob.glob(Male_testing_data)):    
        img = cv2.imread(file)  #read male images
        time,value = function_timer(svm_test, svm_clf, std_scaler, img)
        write_in_file(time_file,time)
        write_in_file(results_file,int(value))
        m_count+=1
        if value == 1:
            m_correct+=1

    for file in sorted(glob.glob(Female_testing_data)):    
        img = cv2.imread(file)  #read female images
        time,value = function_timer(svm_test, svm_clf, std_scaler, img)
        write_in_file(time_file,time)
        write_in_file(results_file,int(value))
        f_count+=1
        if value == 0:
            f_correct+=1

    close_file(results_file)
    close_file(time_file)

    return  (m_correct/m_count)*100, (f_correct/f_count)*100, ((m_correct + f_correct)/(m_count + f_count))*100

Main blocks:

In [33]:
#training block
svm_train(C=5,gamma='scale',kernel="rbf")

In [52]:
#testinging block
svm_clf = pickle.load(open('svm_model.sav', 'rb'))
std_scaler = pickle.load(open('std_scaler.bin', 'rb'))
male_percentage, female_percentage, total_percentag = analyse_performance(svm_clf,std_scaler)
print("male correct percentage= ",male_percentage, "\n", "female correct percentage= ",female_percentage, "\n", "total correct percentage= ", total_percentag)

male correct percentage=  95.74468085106383 
 female correct percentage=  92.5925925925926 
 total correct percentage=  94.5945945945946
