In [23]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import metrics
import pandas as pd
import cv2
import numpy as np
import features as features
import utilities as util

### Comparing the Accuracy of the models with features


|  | SVM | Random Forest | KNN |  Voting system |
| --------------- | --------------- | --------------- | --------------- | --------------- |
| Hinge | 62.8% | 64.1% | 67.9% |66.6% |
| Hinge + PCA | 71.7%  |60.2%  |80.7  | 76.9%|
| Cold | 61.5% |   64.1% | 67.9%   | 62.8%   |
| Cold + PCA | 67.9%  | 67.9% |  67.9% |70.5% |
| LBP |  |  |   |
| GLCM | 50.0%| 43.5% |41.0%   |43.5%  |
| GLCM + PCA (=5) |60.2% |48.7%  | 57.6%  |57.6% |
| HOG | 75.6% | 70.5% |  64.1% | 71.8% |
| HOG + PCA(=200)| 78.2% | 57.7% |  70.5% | 75.6% |
Accuracy of SVM: 0.782051282051282
Accuracy of random forest: 0.5769230769230769
Accuracy of knn: 0.7051282051282052
Accuracy of voting system: 0.7564102564102564



### Combining the Features results in accuracy of:


|  | SVM | Random Forest | KNN |  Voting system |
| --------------- | --------------- | --------------- | --------------- | --------------- |
| PCA(Hinge + Cold) of 100  | 67.9% | 70.5% |67.9% | 70.5%|
| PCA(Hinge + GLCM) of 50  | 65.3% | 58.9% |74.3% | 71.7%|
| PCA(Cold + GLCM) of 50  | 51.2% | 60.2% |41.0% | 53.8%|
| PCA(Hinge + Cold + glcm ) of 100  | 69.2% | 73.0% |67.9% | 73.0%|
| PCA(Hinge + Cold + glcm ) of 200  | 66.6% | 71.7% |69.2% | 76.9%|


In [3]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''


def svm_classify(X_train,y_train,X_test):
    clf = svm.SVC(kernel='rbf')
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    return y_pred

In [4]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''

def random_forest_classify(X_train,y_train,X_test):
       clf = RandomForestClassifier(max_depth=2, n_estimators=50)
       clf.fit(X_train, y_train)
       y_pred=clf.predict(X_test)
       return y_pred

In [5]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''

def knn_classify(X_train,y_train,X_test):
        neigh = KNeighborsClassifier(n_neighbors=5)
        neigh.fit(X_train, y_train)
        y_pred=neigh.predict(X_test)
        return y_pred

In [6]:
'''
parms: test classes of models tarined by  --> 1) svm
                                              2) random_forest
                                              3)knn

----------------------------

returns: test classes after taking the vote of the 3 classes
'''

def voting_system(y_pred_svm,y_pred_random_forest,y_pred_knn,y_test):
    y_pred=np.add(np.add(y_pred_knn,y_pred_svm),y_pred_random_forest)
    y_pred[y_pred <=1]=0
    y_pred[y_pred >1]=1
    print("Accuracy of SVM:",metrics.accuracy_score(y_test, y_pred_svm))
    print("Accuracy of random forest:",metrics.accuracy_score(y_test, y_pred_random_forest))    
    print("Accuracy of knn:",metrics.accuracy_score(y_test, y_pred_knn))    
    print("Accuracy of voting system:",metrics.accuracy_score(y_test, y_pred))    
    return y_pred


In [66]:
def apply_PCA(X,n_components=5):
    sc = StandardScaler()
    X = sc.fit_transform(X)
    pca = PCA(n_components = n_components)
    X = pca.fit_transform(X)
    return X

In [67]:
def train_model(X,y,test_size=0.3):
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=test_size,random_state=3)
    ypred_svm=svm_classify(X_train,y_train,X_test)
    ypred_random_forest=random_forest_classify(X_train,y_train,X_test)
    ypred_knn=knn_classify(X_train,y_train,X_test)
    voting_system(ypred_svm,ypred_random_forest,ypred_knn,y_test)

In [7]:
female_imgs = util.read_imgs('./data/Female/Female')
male_imgs = util.read_imgs('./data/Male/Male')

In [8]:
females_count = len(female_imgs) 
males_count = len(male_imgs)
print("Female images count: ", females_count)
print("Male images count: ", males_count)


Female images count:  133
Male images count:  125


In [56]:
X_hinge = []
y_hinge = []

X_cold = []
y_cold = []

# Calculating features for females' images
for i in range(females_count):
    # extract features
    thresh = util.threshold_image(female_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    cold_features = features.get_cold_features(contours)
    hinge_features = features.get_hinge_features(contours)
    X_cold.append(cold_features)
    X_hinge.append(hinge_features)
    y_cold.append(1)
    y_hinge.append(1)
    
# Calculating features for males' images
for i in range(males_count):
    # extract features
    thresh = util.threshold_image(male_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    cold_features = features.get_cold_features(contours)
    hinge_features = features.get_hinge_features(contours)
    X_cold.append(cold_features)
    X_hinge.append(hinge_features)
    y_cold.append(0)
    y_hinge.append(0)
    

In [29]:
X_glcm = []
y_glcm = []
# Calculating features for females' images
for i in range(females_count):
    # extract features
    glcm_features = features.extract_glcm_features(female_imgs[i])
    # hog_features=features.extract_hog_features(female_imgs[i])
    # lbp_features=features.extract_lbp(female_imgs[i])
    # X.append(glcm_features)
    # X.append(hog_features)
    X_glcm.append(glcm_features)
    y_glcm.append(1)
    
# Calculating features for males' images
for i in range(males_count):
    glcm_features = features.extract_glcm_features(male_imgs[i])
    # hog_features=features.extract_hog_features(male_imgs[i])
    # lbp_features=features.extract_lbp(male_imgs[i])
    # X.append(glcm_features)
    # X.append(hog_features)
    X_glcm.append(glcm_features)
    y_glcm.append(0)

In [30]:
# Hinge + cold

print("Hinge features  shape :", np.shape(X_hinge))
print("Cold features  shape :", np.shape(X_cold))

X=np.hstack((X_hinge,X_cold))
y=y_cold

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))
X=apply_PCA(X=X,n_components=100)
print(" After PCA X has shape :", np.shape(X))
train_model(X,y)


PCA is preserving about:  0.9534742273202335 of variance
(258, 200)


In [None]:
# Hinge + GLCM
print("Hinge features  shape :", np.shape(X_hinge))
print("GLCM features  shape :", np.shape(X_glcm))


X=np.hstack(((X_hinge,X_glcm)))
y=y_hinge

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))

X=apply_PCA(X=X,n_components=50)
print(" After PCA X has shape :", np.shape(X))
train_model(X,y)

In [None]:
# COLD + GLCM
print("Cold features  shape :", np.shape(X_cold))
print("GLCM features  shape :", np.shape(X_glcm))


X=np.hstack(((X_hinge,X_glcm)))
y=y_glcm

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))

# X=apply_PCA(X=X,n_components=150)
print(" After PCA X has shape :", np.shape(X))
train_model(X,y)

In [None]:
# Hinge + cold + GLCM
print("Hinge features  shape :", np.shape(X_hinge))
print("Cold features  shape :", np.shape(X_cold))
print("Cold features  shape :", np.shape(X_glcm))


X=np.hstack((np.hstack((X_hinge,X_cold)),X_glcm))
y=y_cold

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))

X=apply_PCA(X=X,n_components=200)
print(" After PCA X has shape :", np.shape(X))
train_model(X,y)