In [25]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import metrics
import pandas as pd
import cv2
import numpy as np
import features as features
import utilities as util
import glob

### Comparing the Accuracy of the models with features


|  | SVM | Random Forest | KNN |  Voting system |
| --------------- | --------------- | --------------- | --------------- | --------------- |
| Hinge | 66.1% | 71.7% | 68.3% |71.1% |
| Hinge + PCA | 76%  | 67%  | 71.9%  | 74.8%|
| Cold | 65.4% |   65.8% | 67.6%   | 67.7%   |
| Cold + PCA | 65.1%  | 68.6% |  68.5% | 69.5% |
| GLCM | 50.0%| 43.5% |41.0%   |43.5%  |
| GLCM + PCA (=5) |60.2% |48.7%  | 57.6%  |57.6% |
| HOG | 70.4% | 66.5% |  68.5% | 69.4% | Good Precision and Recall!
| HOG + PCA(=200)| 70% | 62.9% |  67.9% | 68.2% |


### Combining the Features results in accuracy of:


|  | SVM | Random Forest | KNN |  Voting system |
| --------------- | --------------- | --------------- | --------------- | --------------- |
| PCA(Hinge + Cold) of 100  | 72% | 64% |72% | 72.5%| Good precision and recall
| PCA(Cold) of 50 + Hinge  | 71% | 71.8% |73% | 73%|
| PCA(Hinge + GLCM) of 50  | 74.2% | 66.4% |70% | 73.5%|
| PCA(Hinge + Cold + glcm ) of 100  | 72.25% | 64.25% |73% | 72.0%|
| PCA(Hinge) + Cold + Hog of 60 | 75% | 71.25% |72.4% | 76%|
| Hinge + Hog| 69.6% | 69.7% | 68.5% | 70.9%|
| PCA(Hinge) of 40 + PCA(HOG) of 200 + RF_max_depth=5 + n_estimators=25|68.6% | 62% | 68% | 69% |
| Hinge + Cold + Hog + RF_max_depth=5 + n_estimators=25 + n_neighbors=5 | 66% | 66% | 67.5% | 66% |
| PCA(Hinge)=30 + PCA(Cold)=130 + PCA(Hog)=200 + RF_max_depth=5 + n_estimators=25 + n_neighbors=5 | 73% | 66.4% | 70.5% | 72% |
|Hog + Cold| 67% | 68% | 68% | 70% |
|PCA(Hog)=200 + PCA(Cold)=120 + RF_max_depth=4 + n_estimators=30| 72% | 65% | 69% | 71.5% |


### Final Descision

|  | SVM | Random Forest | KNN |  Voting system |
| --------------- | --------------- | --------------- | --------------- | --------------- |
|Hinge Cold Hog | 80.8% | 71.9% | 75.8% | 78.3% |

Features used :
SVM: hinge_pca(50 component)
Random_forest: hinge 
knn: X_hinge_pca(50 component)
| 76.6% | 72.75% | 75% | 77.1% |

In [2]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''


def svm_classify(X_train,y_train,X_test, kernel='rbf'):
    clf = svm.SVC(kernel=kernel)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    return y_pred

In [3]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''

def random_forest_classify(X_train,y_train,X_test, max_depth=2, n_estimators=50):
    clf = RandomForestClassifier(max_depth=max_depth, n_estimators=n_estimators)
    clf.fit(X_train, y_train)
    y_pred=clf.predict(X_test)
    return y_pred

In [4]:
'''
parms: train data (X_train)
       test data  (X_test)
       train classes  (y_train)

----------------------------

returns: test classes  (y_test)
'''

def knn_classify(X_train,y_train,X_test, n_neighbors=5):
        neigh = KNeighborsClassifier(n_neighbors=n_neighbors)
        neigh.fit(X_train, y_train)
        y_pred=neigh.predict(X_test)
        return y_pred

In [5]:
'''
parms: test classes of models tarined by  --> 1) svm
                                              2) random_forest
                                              3)knn

----------------------------

returns: test classes after taking the vote of the 3 classes
'''

def voting_system(y_pred_svm,y_pred_random_forest,y_pred_knn,y_test):
    y_pred=np.add(np.add(y_pred_knn,y_pred_svm),y_pred_random_forest)
    y_pred[y_pred <=1]=0
    y_pred[y_pred >1]=1  
    return y_pred


In [15]:
def apply_PCA(X,n_components=5, verbose=False):
    sc = StandardScaler()
    X_copy = X.copy()
    X_copy = sc.fit_transform(X_copy)
    pca = PCA(n_components = n_components)
    X_copy = pca.fit_transform(X_copy)
    
    if(verbose):
        print("preserved vairance: ", np.sum(pca.explained_variance_ratio_))
    
    return X_copy

In [78]:
def train_model(X,y,test_size=0.3, max_depth=2, n_estimators=50, kernel='rbf', n_neighbors=5, folds=5):
    svm_accuracy = 0
    random_forest_accuracy = 0
    knn_accuracy = 0
    voting_sys_accuracy = 0
    
    # Average the model results to get less random metric
    for i in range(folds):
        # Train the models
        X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=test_size,random_state=45)
        ypred_svm=svm_classify(X_train,y_train,X_test, kernel=kernel)
        ypred_random_forest=random_forest_classify(X_train,y_train,X_test, max_depth=max_depth, n_estimators=n_estimators)
        ypred_knn=knn_classify(X_train,y_train,X_test, n_neighbors=n_neighbors)
        ypred_voting_system = voting_system(ypred_svm,ypred_random_forest,ypred_knn,y_test)
        
        # Sum the accuracies
        svm_accuracy += metrics.accuracy_score(y_test, ypred_svm)
        random_forest_accuracy += metrics.accuracy_score(y_test, ypred_random_forest)
        knn_accuracy += metrics.accuracy_score(y_test, ypred_knn)
        voting_sys_accuracy += metrics.accuracy_score(y_test, ypred_voting_system)
    
    print("Accuracy of SVM:", svm_accuracy/folds)
    print("Accuracy of random forest:", random_forest_accuracy/folds)    
    print("Accuracy of knn:", knn_accuracy/folds)    
    print("Accuracy of voting system:", voting_sys_accuracy/folds)  

In [7]:
female_imgs = util.read_imgs('./data/Females')
male_imgs = util.read_imgs('./data/Males')

In [8]:
females_count = len(female_imgs) 
males_count = len(male_imgs)
print("Female images count: ", females_count)
print("Male images count: ", males_count)

Female images count:  131
Male images count:  232


In [10]:
X_hinge = []
y_hinge = []

X_cold = []
y_cold = []

# Calculating features for females' images
for i in range(females_count):
    # extract features
    thresh = util.threshold_image(female_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    cold_features = features.get_cold_features(contours)
    hinge_features = features.get_hinge_features(contours)
    X_cold.append(cold_features)
    X_hinge.append(hinge_features)
    y_cold.append(1)
    y_hinge.append(1)
    
# Calculating features for males' images
for i in range(males_count):
    # extract features
    thresh = util.threshold_image(male_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    cold_features = features.get_cold_features(contours)
    hinge_features = features.get_hinge_features(contours)
    X_cold.append(cold_features)
    X_hinge.append(hinge_features)
    y_cold.append(0)
    y_hinge.append(0)
    

  rhos_log_space = np.log10(rhos)


In [11]:
X_glcm = []
y_glcm = []
# Calculating features for females' images
for i in range(females_count):
    # extract features
    glcm_features = features.extract_glcm_features(female_imgs[i])
    # hog_features=features.extract_hog_features(female_imgs[i])
    # lbp_features=features.extract_lbp(female_imgs[i])
    # X.append(glcm_features)
    # X.append(hog_features)
    X_glcm.append(glcm_features)
    y_glcm.append(1)
    
# Calculating features for males' images
for i in range(males_count):
    glcm_features = features.extract_glcm_features(male_imgs[i])
    # hog_features=features.extract_hog_features(male_imgs[i])
    # lbp_features=features.extract_lbp(male_imgs[i])
    # X.append(glcm_features)
    # X.append(hog_features)
    X_glcm.append(glcm_features)
    y_glcm.append(0)

In [17]:
# Hinge + cold

print("Hinge features  shape :", np.shape(X_hinge))
print("Cold features  shape :", np.shape(X_cold))

X=apply_PCA(X=X_hinge,n_components=50)
y=y_cold

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))

print(" After PCA X has shape :", np.shape(X))
train_model(X,y, folds=10)

Hinge features  shape : (258, 66)
Cold features  shape : (258, 420)
X has shape : (258, 50)
y has shape : (258,)
 After PCA X has shape : (258, 50)
Accuracy of SVM: 0.717948717948718
Accuracy of random forest: 0.6500000000000001
Accuracy of knn: 0.8076923076923077
Accuracy of voting system: 0.782051282051282


In [None]:
# Hinge + GLCM
print("Hinge features  shape :", np.shape(X_hinge))
print("GLCM features  shape :", np.shape(X_glcm))


X=np.hstack(((X_hinge,X_glcm)))
y=y_hinge

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))

X=apply_PCA(X=X,n_components=50)
print(" After PCA X has shape :", np.shape(X))
train_model(X,y)

In [None]:
# COLD + GLCM
print("Cold features  shape :", np.shape(X_cold))
print("GLCM features  shape :", np.shape(X_glcm))


X=np.hstack(((X_hinge,X_glcm)))
y=y_glcm

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))

# X=apply_PCA(X=X,n_components=150)
print(" After PCA X has shape :", np.shape(X))
train_model(X,y)

In [16]:
# Hinge + cold + GLCM
print("Hinge features  shape :", np.shape(X_hinge))
print("Cold features  shape :", np.shape(X_cold))
print("Cold features  shape :", np.shape(X_glcm))


X=np.hstack((np.hstack((X_hinge,X_cold)),X_glcm))
y=y_cold

print("X has shape :", np.shape(X))
print("y has shape :", np.shape(y))

X=apply_PCA(X=X,n_components=100)
print(" After PCA X has shape :", np.shape(X))
train_model(X,y)

Hinge features  shape : (363, 66)
Cold features  shape : (363, 420)
Cold features  shape : (363, 6)
X has shape : (363, 492)
y has shape : (363,)
 After PCA X has shape : (363, 100)
Accuracy of SVM: 0.6697247706422018
Accuracy of random forest: 0.5871559633027523
Accuracy of knn: 0.7155963302752294
Accuracy of voting system: 0.6880733944954128


### Expiremnting on Hinge and Hog Features

In [70]:

X_hinge = []
X_hog = []
X_cold = []
y = []

# Calculating features for females' images
for i in range(females_count):
    # extract features
    hog_features = features.extract_hog_features(female_imgs[i])
    X_hog.append(hog_features)
    
    thresh = util.threshold_image(female_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    hinge_features = features.get_hinge_features(contours)
    cold_features = features.get_cold_features(contours)
    X_hinge.append(hinge_features)
    X_cold.append(cold_features)
    y.append(1)
    
# Calculating features for males' images
for i in range(males_count):
    # extract features
    hog_features = features.extract_hog_features(male_imgs[i])
    X_hog.append(hog_features)
    
    thresh = util.threshold_image(male_imgs[i])
    contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_NONE)
    hinge_features = features.get_hinge_features(contours)
    cold_features = features.get_cold_features(contours)
    X_hinge.append(hinge_features)
    X_cold.append(cold_features)
    y.append(0)
    


  rhos_log_space = np.log10(rhos)


In [84]:
print("X_hog shape: ", np.shape(X_hog))
print("X_hinge shape: ", np.shape(X_hinge))
print("X_cold shape: ", np.shape(X_cold))

X_hog_pca = apply_PCA(X_hog, n_components=170, verbose=False)
X_hinge_pca = apply_PCA(X_hinge, n_components=60, verbose=False)
X_cold_pca = apply_PCA(X_cold, n_components=180, verbose=False)

X = np.hstack((X_hog_pca, X_hinge_pca, X_cold_pca))
train_model(X, y, max_depth=3, n_estimators=40, kernel='rbf', n_neighbors=5, folds=10)

X_hog shape:  (363, 6804)
X_hinge shape:  (363, 66)
X_cold shape:  (363, 420)
Accuracy of SVM: 0.8256880733944956
Accuracy of random forest: 0.6862385321100917
Accuracy of knn: 0.7431192660550457
Accuracy of voting system: 0.8027522935779816
