In [23]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import metrics
import pandas as pd
import cv2
import numpy as np
import features as features
import utilities as util

### Comparing the Accuracy of the models with features


|  | SVM | Random Forest | KNN |  Voting system |
| --------------- | --------------- | --------------- | --------------- | --------------- |
| Hinge | 62.8% | 64.1% | 67.9% |66.6% |
| Hinge + PCA | 71.7%  |60.2%  |80.7  | 76.9%|
| Cold | 61.5% |   64.1% | 67.9%   | 62.8%   |
| Cold + PCA | 67.9%  | 67.9% |  67.9% |70.5% |
| LBP |  |  |   |
| GLCM | 50.0%| 43.5% |41.0%   |43.5%  |
| GLCM + PCA (=5) |60.2% |48.7%  | 57.6%  |57.6% |
| HOG | 75.6% | 70.5% |  64.1% | 71.8% |
| HOG + PCA(=200)| 78.2% | 57.7% |  70.5% | 75.6% |
Accuracy of SVM: 0.782051282051282
Accuracy of random forest: 0.5769230769230769
Accuracy of knn: 0.7051282051282052
Accuracy of voting system: 0.7564102564102564

In [3]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''


def svm_classify(X_train,y_train,X_test):
    clf = svm.SVC(kernel='rbf')
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    return y_pred

In [4]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''

def random_forest_classify(X_train,y_train,X_test):
       clf = RandomForestClassifier(max_depth=2, n_estimators=50)
       clf.fit(X_train, y_train)
       y_pred=clf.predict(X_test)
       return y_pred

In [5]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''

def knn_classify(X_train,y_train,X_test):
        neigh = KNeighborsClassifier(n_neighbors=5)
        neigh.fit(X_train, y_train)
        y_pred=neigh.predict(X_test)
        return y_pred

In [6]:
'''
parms: test classes of models tarined by  --> 1) svm
                                              2) random_forest
                                              3)knn

----------------------------

returns: test classes after taking the vote of the 3 classes
'''

def voting_system(y_pred_svm,y_pred_random_forest,y_pred_knn,y_test):
    y_pred=np.add(np.add(y_pred_knn,y_pred_svm),y_pred_random_forest)
    y_pred[y_pred <=1]=0
    y_pred[y_pred >1]=1
    print("Accuracy of SVM:",metrics.accuracy_score(y_test, y_pred_svm))
    print("Accuracy of random forest:",metrics.accuracy_score(y_test, y_pred_random_forest))    
    print("Accuracy of knn:",metrics.accuracy_score(y_test, y_pred_knn))    
    print("Accuracy of voting system:",metrics.accuracy_score(y_test, y_pred))    
    return y_pred


In [7]:
female_imgs = util.read_imgs('./data/Female/Female')
male_imgs = util.read_imgs('./data/Male/Male')

In [8]:
females_count = len(female_imgs) 
males_count = len(male_imgs)
print("Female images count: ", females_count)
print("Male images count: ", males_count)


Female images count:  133
Male images count:  125


In [56]:
X = []
y = []
# Calculating features for females' images
for i in range(females_count):
    # extract features
    thresh = util.threshold_image(female_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    # cold_features = features.get_cold_features(contours)
    hinge_features = features.get_hinge_features(contours)
    # X.append(cold_features)
    X.append(hinge_features)
    y.append(1)
    
# Calculating features for males' images
for i in range(males_count):
    # extract features
    thresh = util.threshold_image(male_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    # cold_features = features.get_cold_features(contours)
    hinge_features = features.get_hinge_features(contours)
    # X.append(cold_features)
    X.append(hinge_features)
    y.append(0)
    

In [29]:
X = []
y = []
# Calculating features for females' images
for i in range(females_count):
    # extract features
    # glcm_features = features.extract_glcm_features(female_imgs[i])
    hog_features=features.extract_hog_features(female_imgs[i])
    # X.append(glcm_features)
    X.append(hog_features)
    y.append(1)
    
# Calculating features for males' images
for i in range(males_count):
    # glcm_features = features.extract_glcm_features(male_imgs[i])
    hog_features=features.extract_hog_features(male_imgs[i])
    # X.append(glcm_features)
    X.append(hog_features)
    y.append(0)

In [30]:
sc = StandardScaler()
X = sc.fit_transform(X)
pca = PCA(n_components = 200)
X = pca.fit_transform(X)
print("PCA is preserving about: ", np.sum(pca.explained_variance_ratio_), "of variance")
print(X.shape)

PCA is preserving about:  0.9534742273202335 of variance
(258, 200)


In [31]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=3)
ypred_svm=svm_classify(X_train,y_train,X_test)
ypred_random_forest=random_forest_classify(X_train,y_train,X_test)
ypred_knn=knn_classify(X_train,y_train,X_test)
voting_system(ypred_svm,ypred_random_forest,ypred_knn,y_test)

Accuracy of SVM: 0.782051282051282
Accuracy of random forest: 0.5769230769230769
Accuracy of knn: 0.7051282051282052
Accuracy of voting system: 0.7564102564102564


array([1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0])