In [2]:
# Imports
from cmath import e
from time import time
import cv2
import numpy as np
from skimage.feature import graycomatrix, graycoprops
from skimage.measure import shannon_entropy as Entropy
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from tuning import svmTuner
import utils
import os
import pandas as pd
import numpy as np

In [3]:
"""
    extractGLCM(filename, outputFileName):
    - filename: path to the image
    - outputFileName: name of the output file
    - returns: numpy array of features
"""
def extractGLCM(filename, outputFileName):
    img = cv2.imread(filename)
    
    # Extract Gray Level Channel
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    step = [1]  # step size
    step = np.asarray(step)
    angle = [0, np.pi/4, np.pi/2, 3*np.pi/4]  # angles (0, 45, 90, 135)

    coOccuranceMat = graycomatrix(
        img, step, angle, levels=256, symmetric=True, normed=True)

    # calculate the GLCM properties
    contrast = graycoprops(coOccuranceMat, prop='contrast')
    correlation = graycoprops(coOccuranceMat, prop='correlation')
    energy = graycoprops(coOccuranceMat, prop='energy')
    homogeneity = graycoprops(coOccuranceMat, prop='homogeneity')
    # entropy = []
    # entropy.insert(0, Entropy(coOccuranceMat[0, 0, :, :]))
    # entropy.insert(1, Entropy(coOccuranceMat[0, 1, :, :]))
    # entropy.insert(2, Entropy(coOccuranceMat[1, 0, :, :]))
    # entropy.insert(3, Entropy(coOccuranceMat[1, 1, :, :]))
    # entropy = np.array(entropy)

    # calculate Entropy for each angle
    
    
    # append all features to a numpy array
    features = np.array([contrast.flatten(), homogeneity.flatten(),
                        energy.flatten(), correlation.flatten()])

    features = features.flatten()
    features = features.reshape(1, -1)
    

    with open(outputFileName+'.csv', 'a') as csvfile:
        np.savetxt(csvfile, features, fmt='%f', delimiter=',')
        csvfile.close()
    return features

In [4]:
def removeFileIfExists(fileName):
    if os.path.isfile(fileName):
        os.remove(fileName)

In [5]:
def writeFeaturesToFile(features, fileName):
    with open(fileName, 'a') as csvfile:
        np.savetxt(csvfile, features, fmt='%f', delimiter=',')
        csvfile.close()

In [6]:
def writeHeadersOfCSVFile(fileName):
    with open(fileName, 'a') as csvfile:
        np.savetxt(csvfile, [], delimiter=',',
                   header='Contrast1,Contrast2,Contrast3,Contrast4,homogeneity1,homogeneity2,homogeneity3,homogeneity4,energy1,energy2,energy3,energy4,correlation1,correlation2,correlation3,correlation4,entropy1,entropy2,entropy3,entropy4')
        csvfile.close()


In [7]:
def readFeaturesFromFile(fileName):
    CSVData = open(fileName)
    features = np.genfromtxt(CSVData, delimiter=",")
    return features

In [8]:
def extractFeaturesFromFolder(folder,outputFileName,gender):
    train_classes=[]
    features=[]
    for filename in os.listdir(folder):
        try:
            features.append(extractGLCM(folder+filename,outputFileName))
            train_classes.append(gender)
        except Exception as e:
            print(e)
            continue
    return np.array(features),np.array(train_classes)


In [9]:
def extractICDARFeatures():
    features=[]
    # read csv file
    df = pd.read_csv('train_answers.csv')
    # get the labels
    icdar_classes = df['male'].values
    print(icdar_classes.shape)
    icdar_classes_train = np.array([])
    i = 0
    for filename in os.listdir('images_gender/images/train'):
        try:
            features.append(extractGLCM('images_gender/images/train/'+filename,'icdar'))
            icdar_classes_train = np.append(icdar_classes_train, icdar_classes[i//2])
            i = i + 1
        except Exception as e:
            print(e)
            continue
    icdar_classes  = icdar_classes_train
    return np.array(features),np.array(icdar_classes)

In [10]:
def getBestParamsForANN(X_train,Y_train,X_test,Y_test):
    scaler = preprocessing.StandardScaler().fit(X_train)
    solver = ['adam', 'lbfgs', 'sgd']
    alpha = [0.0001, 0.001, 0.01, 0.1, 1]
    max_iter = [1000, 2000, 3000, 4000, 5000]
    layer_sizes = [10,15,20,25,30,40]
    scores = []
    for i in range(len(solver)):
        for j in range(len(alpha)):
            for k in range(len(max_iter)):
                for l in range(len(layer_sizes)):
                    clf = MLPClassifier(solver=solver[i], alpha=alpha[j], max_iter=max_iter[k],
                            hidden_layer_sizes=(layer_sizes[l],),random_state=1)
                    clf.fit(scaler.transform(X_train), Y_train)
                    print("Accuracy on training set: {:.2f}".format(
                        clf.score(scaler.transform(X_train), Y_train)))
                    temp = clf.score(scaler.transform(X_test), Y_test)
                    scores.append((temp,i,j,k,l))
                    print("Accuracy on test set: {:.4f}".format(
                        temp))
                    print("\n")  

In [11]:
def getBestParamsForSVM(X_train,Y_train,X_test,Y_test):
    scaler = preprocessing.StandardScaler().fit(X_train)
    C = [10, 100, 1000,5000, 10000]
    gamma = [.1, .01, .001, .0001, .00001]
              
    scores = []
    for i in range(len(C)):
        for j in range(len(gamma)):
                clf = svm.SVC(C=C[i], gamma=gamma[j])
                clf.fit(scaler.transform( X_train), Y_train)
                print("Accuracy on training set: {:.2f}".format(
                    clf.score(scaler.transform( X_train), Y_train)))
                temp = clf.score(scaler.transform( X_test), Y_test)
                print("Accuracy on test set: {:.4f}".format(
                    temp))
                scores.append(temp)
                print("\n")
    
    print("Best score: {:.4f}".format(max(scores))+ " at index " +str(scores.index(max(scores))))


In [12]:
def trainAndPredict(X_train,Y_train,X_test,Y_test):
    # train the classifier and predict the test data
    scaler = preprocessing.StandardScaler().fit(X_train)

    print("Training the classifier...")          
    clf = MLPClassifier( alpha= 0.1, hidden_layer_sizes= (20,) ,
                       solver= 'adam',max_iter= 5000, random_state= 1) #65%

    clf.fit(scaler.transform( X_train), Y_train) 
    
    print("Predicting the test data...")
    score_training = clf.score(scaler.transform( X_train), Y_train) 
    score = clf.score(scaler.transform(X_test), Y_test)
    print("Accuracy on test set: {:.4f}".format(score))
    print("Accuracy on training set: {:.4f}".format(score_training))
    
    # clf = SVC(C=10.0, gamma=0.01)#62.3%
    # clf = SVC(C=5000.0, class_weight='balanced', gamma=0.0001, kernel='rbf')#62.3%
    
# #     # clf = MLPClassifier( alpha= 0.01, hidden_layer_sizes= (15,) ,
# #     #                    solver= 'adam',max_iter= 4000, random_state= 1) #61% without icdar

In [13]:
# removeFileIfExists('female.csv')
# removeFileIfExists('male.csv')
# removeFileIfExists('icdar.csv')

# writeHeadersOfCSVFile('female.csv')
# writeHeadersOfCSVFile('male.csv')
# writeHeadersOfCSVFile('icdar.csv')

# f_features,f_classes = extractFeaturesFromFolder('Females/Females/','female',0)
# f_features =  f_features.reshape(f_features.shape[0], -1)
# print(f_features.shape)
# print(f_classes.shape)
# m_features,m_classes = extractFeaturesFromFolder('Males/Males/','male',1)
# m_features =  m_features.reshape(m_features.shape[0], -1)
# print(m_features.shape)
# print(m_classes.shape)
# i_features,i_classes = extractICDARFeatures()
# i_features = i_features.reshape(i_features.shape[0], -1)
# print(i_features.shape)
# print(i_classes.shape)

f_features = readFeaturesFromFile('./female.csv')
m_features = readFeaturesFromFile('./male.csv')
i_features = readFeaturesFromFile('./icdar.csv')

train_classes = []
# read csv file
df = pd.read_csv('train_answers.csv')
# get the labels
icdar_classes = df['male'].values
print(icdar_classes.shape)
icdar_classes_train = np.array([])

for i in range(1, 132):
    try:
        train_classes.append(0)
    except Exception as e:
        print(e)
        continue

for i in range(1, 233):
    try:
        train_classes.append(1)
    except Exception as e:
        print(e)
        continue

for i in range(0, 564):
    try:
        icdar_classes_train = np.append(icdar_classes_train, icdar_classes[i//2])
        i = i + 1
    except Exception as e:
        print(e)
        continue

icdar_classes = icdar_classes_train

# train_classes = np.array(train_classes)





X_train = np.concatenate((f_features,m_features,i_features ),axis=0)
# Y_train = np.concatenate((f_classes,m_classes,i_classes),axis=0)
Y_train = np.concatenate((train_classes,icdar_classes),axis=0)



X_train, X_test, Y_train, Y_test = train_test_split(
    X_train, Y_train, test_size=0.2,random_state=1)

# getBestParamsForANN(X_train,Y_train,X_test,Y_test)
# getBestParamsForSVM(X_train,Y_train,X_test,Y_test)
trainAndPredict(X_train,Y_train,X_test,Y_test)



(282,)
Training the classifier...
Predicting the test data...
Accuracy on test set: 0.6613
Accuracy on training set: 0.6559
