In [1]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import metrics
import pandas as pd
import cv2
import numpy as np
import features as features
import utilities as util

### Comparing the Accuracy of the models with features


|  | SVM | Random Forest | KNN |  Voting system |
| --------------- | --------------- | --------------- | --------------- | --------------- |
| Hinge | 62.8% | 64.1% | 67.9% |66.6% |
| Hinge + PCA | 71.7%  |60.2%  |80.7  | 76.9%|
| Cold | 61.5% |   64.1% | 67.9%   | 62.8%   |
| Cold + PCA | 67.9%  | 67.9% |  67.9% |70.5% |
| LBP |  |  |   |
| GLCM | 50.0%| 43.5% |41.0%   |43.5%  |
| GLCM + PCA (=5) |60.2% |48.7%  | 57.6%  |57.6% |
| HOG | 75.6% | 70.5% |  64.1% | 71.8% |
| HOG + PCA(=200)| 78.2% | 57.7% |  70.5% | 75.6% |


### Combining the Features results in accuracy of:


|  | SVM | Random Forest | KNN |  Voting system |
| --------------- | --------------- | --------------- | --------------- | --------------- |
| PCA(Hinge + Cold) of 100  | 67.9% | 70.5% |67.9% | 70.5%|
| PCA(Hinge + GLCM) of 50  | 65.3% | 58.9% |74.3% | 71.7%|
| PCA(Cold + GLCM) of 50  | 51.2% | 60.2% |41.0% | 53.8%|
| PCA(Hinge + Cold + glcm ) of 100  | 69.2% | 73.0% |67.9% | 73.0%|
| PCA(Hinge + Cold + glcm ) of 200  | 66.6% | 71.7% |69.2% | 76.9%|
| Hinge + Hog| 75.6% | 69.2% | 64.1% | 70.5%|
| PCA(Hinge) of 40 + PCA(HOG) of 200 + RF_max_depth=5 + n_estimators=25|76.9% | 71.8% | 70.5% | 78.2%
| Hinge + Cold + Hog + RF_max_depth=5 + n_estimators=25 + n_neighbors=5 | 76.9% | 71% | 65.3% | 72.1% |

In [36]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''


def svm_classify(X_train,y_train,X_test, kernel='rbf'):
    clf = svm.SVC(kernel=kernel)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    return y_pred

In [19]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''

def random_forest_classify(X_train,y_train,X_test, max_depth=2, n_estimators=50):
    clf = RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators)
    clf.fit(X_train, y_train)
    y_pred=clf.predict(X_test)
    return y_pred

In [43]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''

def knn_classify(X_train,y_train,X_test, n_neighbors=5):
        neigh = KNeighborsClassifier(n_neighbors=n_neighbors)
        neigh.fit(X_train, y_train)
        y_pred=neigh.predict(X_test)
        return y_pred

In [50]:
'''
parms: test classes of models tarined by  --> 1) svm
                                              2) random_forest
                                              3)knn

----------------------------

returns: test classes after taking the vote of the 3 classes
'''

def voting_system(y_pred_svm,y_pred_random_forest,y_pred_knn,y_test):
    y_pred=np.add(np.add(y_pred_knn,y_pred_svm),y_pred_random_forest)
    y_pred[y_pred <=1]=0
    y_pred[y_pred >1]=1  
    return y_pred


In [28]:
def apply_PCA(X,n_components=5, verbose=False):
    sc = StandardScaler()
    X = sc.fit_transform(X)
    pca = PCA(n_components = n_components)
    X = pca.fit_transform(X)
    
    if(verbose):
        print("preserved vairance: ", np.sum(pca.explained_variance_ratio_))
    
    return X

In [52]:
def train_model(X,y,test_size=0.3, max_depth=2, n_estimators=50, kernel='rbf', n_neighbors=5, folds=5):
    svm_accuracy = 0
    random_forest_accuracy = 0
    knn_accuracy = 0
    voting_sys_accuracy = 0
    
    # Average the model results to get less random metric
    for i in range(folds):
        # Train the models
        X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=test_size,random_state=3)
        ypred_svm=svm_classify(X_train,y_train,X_test, kernel=kernel)
        ypred_random_forest=random_forest_classify(X_train,y_train,X_test, max_depth=max_depth, n_estimators=n_estimators)
        ypred_knn=knn_classify(X_train,y_train,X_test, n_neighbors=n_neighbors)
        ypred_voting_system = voting_system(ypred_svm,ypred_random_forest,ypred_knn,y_test)
        
        # Sum the accuracies
        svm_accuracy += metrics.accuracy_score(y_test, ypred_svm)
        random_forest_accuracy += metrics.accuracy_score(y_test, ypred_random_forest)
        knn_accuracy += metrics.accuracy_score(y_test, ypred_knn)
        voting_sys_accuracy += metrics.accuracy_score(y_test, ypred_voting_system)
    
    print("Accuracy of SVM:", svm_accuracy/folds)
    print("Accuracy of random forest:", random_forest_accuracy/folds)    
    print("Accuracy of knn:", knn_accuracy/folds)    
    print("Accuracy of voting system:", voting_sys_accuracy/folds)  

In [8]:
female_imgs = util.read_imgs('./data/Female/Female')
male_imgs = util.read_imgs('./data/Male/Male')

In [9]:
females_count = len(female_imgs) 
males_count = len(male_imgs)
print("Female images count: ", females_count)
print("Male images count: ", males_count)


Female images count:  133
Male images count:  125


In [56]:
X_hinge = []
y_hinge = []

X_cold = []
y_cold = []

# Calculating features for females' images
for i in range(females_count):
    # extract features
    thresh = util.threshold_image(female_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    cold_features = features.get_cold_features(contours)
    hinge_features = features.get_hinge_features(contours)
    X_cold.append(cold_features)
    X_hinge.append(hinge_features)
    y_cold.append(1)
    y_hinge.append(1)
    
# Calculating features for males' images
for i in range(males_count):
    # extract features
    thresh = util.threshold_image(male_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    cold_features = features.get_cold_features(contours)
    hinge_features = features.get_hinge_features(contours)
    X_cold.append(cold_features)
    X_hinge.append(hinge_features)
    y_cold.append(0)
    y_hinge.append(0)
    

In [29]:
X_glcm = []
y_glcm = []
# Calculating features for females' images
for i in range(females_count):
    # extract features
    glcm_features = features.extract_glcm_features(female_imgs[i])
    # hog_features=features.extract_hog_features(female_imgs[i])
    # lbp_features=features.extract_lbp(female_imgs[i])
    # X.append(glcm_features)
    # X.append(hog_features)
    X_glcm.append(glcm_features)
    y_glcm.append(1)
    
# Calculating features for males' images
for i in range(males_count):
    glcm_features = features.extract_glcm_features(male_imgs[i])
    # hog_features=features.extract_hog_features(male_imgs[i])
    # lbp_features=features.extract_lbp(male_imgs[i])
    # X.append(glcm_features)
    # X.append(hog_features)
    X_glcm.append(glcm_features)
    y_glcm.append(0)

In [30]:
# Hinge + cold

print("Hinge features  shape :", np.shape(X_hinge))
print("Cold features  shape :", np.shape(X_cold))

X=np.hstack((X_hinge,X_cold))
y=y_cold

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))
X=apply_PCA(X=X,n_components=100)
print(" After PCA X has shape :", np.shape(X))
train_model(X,y)


PCA is preserving about:  0.9534742273202335 of variance
(258, 200)


In [None]:
# Hinge + GLCM
print("Hinge features  shape :", np.shape(X_hinge))
print("GLCM features  shape :", np.shape(X_glcm))


X=np.hstack(((X_hinge,X_glcm)))
y=y_hinge

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))

X=apply_PCA(X=X,n_components=50)
print(" After PCA X has shape :", np.shape(X))
train_model(X,y)

In [None]:
# COLD + GLCM
print("Cold features  shape :", np.shape(X_cold))
print("GLCM features  shape :", np.shape(X_glcm))


X=np.hstack(((X_hinge,X_glcm)))
y=y_glcm

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))

# X=apply_PCA(X=X,n_components=150)
print(" After PCA X has shape :", np.shape(X))
train_model(X,y)

In [None]:
# Hinge + cold + GLCM
print("Hinge features  shape :", np.shape(X_hinge))
print("Cold features  shape :", np.shape(X_cold))
print("Cold features  shape :", np.shape(X_glcm))


X=np.hstack((np.hstack((X_hinge,X_cold)),X_glcm))
y=y_cold

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))

X=apply_PCA(X=X,n_components=200)
print(" After PCA X has shape :", np.shape(X))
train_model(X,y)

### Expiremnting on Hinge and Hog Features

In [41]:

X_hinge = []
X_hog = []
X_cold = []
y = []

# Calculating features for females' images
for i in range(females_count):
    # extract features
    hog_features = features.extract_hog_features(female_imgs[i])
    X_hog.append(hog_features)
    
    thresh = util.threshold_image(female_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    hinge_features = features.get_hinge_features(contours)
    cold_features = features.get_cold_features(contours)
    X_hinge.append(hinge_features)
    X_cold.append(cold_features)
    y.append(1)
    
# Calculating features for males' images
for i in range(males_count):
    # extract features
    hog_features = features.extract_hog_features(male_imgs[i])
    X_hog.append(hog_features)
    
    thresh = util.threshold_image(male_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    hinge_features = features.get_hinge_features(contours)
    cold_features = features.get_cold_features(contours)
    X_hinge.append(hinge_features)
    X_cold.append(cold_features)
    y.append(0)
    


  rhos_log_space = np.log10(rhos)


In [53]:
print("X_hog shape: ", np.shape(X_hog))
print("X_hinge shape: ", np.shape(X_hinge))
print("X_cold shape: ", np.shape(X_cold))

# X_hog_pca = apply_PCA(X_hog, n_components=200, verbose=True)
# X_hinge_pca = apply_PCA(X_hinge, n_components=40, verbose=True)
# X_cold_pca = apply_PCA(X_cold, n_components=40, verbose=True)
X = np.hstack((X_hog, X_hinge, X_cold))
# X = np.hstack((X_hog_pca, X_hinge_pca, X_cold_pca))
train_model(X, y, max_depth=5, n_estimators=25, kernel='rbf', n_neighbors=5)

X_hog shape:  (258, 6804)
X_hinge shape:  (258, 66)
X_cold shape:  (258, 420)
Accuracy of SVM: 0.7692307692307693
Accuracy of random forest: 0.7102564102564103
Accuracy of knn: 0.6538461538461539
Accuracy of voting system: 0.7205128205128205
