In [2]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

In [4]:
# load numpy array
data = np.load("./data/data_pca_50_target.npz")
data.files

['arr_0', 'arr_1']

In [5]:
data.allow_pickle = True

In [6]:
X = data['arr_0'] #pca with 50 components
y = data["arr_1"] # target or dependent


In [7]:
X.shape

(4555, 50)

In [8]:
y.shape

(4555,)

In [9]:
y

array(['gender', 'gender', 'gender', ..., 'male', 'male', 'male'],
      dtype=object)

## split the data into training and testing

In [11]:
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,stratify=y)
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)

(3644, 50) (911, 50) (3644,) (911,)


### Traning Machine learning model

In [22]:
# support vector clasifier best for image 
model_svc = SVC(probability=True)

param_grid = {'C':[0.5,1,10,20,30,50],
             "kernel":["rbf","poly"],
             "gamma":[0.1,0.05,0.01,0.001,0.002,0.005],
             "coef0":[0,1]
            }

In [23]:
model_grid = GridSearchCV(model_svc,param_grid=param_grid,scoring='accuracy',cv=3,verbose=2)

In [24]:
model_grid.fit(x_train,y_train)

Fitting 3 folds for each of 144 candidates, totalling 432 fits
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   8.5s
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   7.7s
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   7.4s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   5.7s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   5.1s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   5.2s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   7.0s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   6.9s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   6.6s
[CV] END ............C=0.5, coef0=0, gamma=0.05, kernel=poly; total time=   5.0s
[CV] END ............C=0.5, coef0=0, gamma=0.05, kernel=poly; total time=   5.0s
[CV] END ............C=0.5, coef0=0, gamma=0.0

[CV] END .............C=1, coef0=0, gamma=0.002, kernel=poly; total time=   4.5s
[CV] END ..............C=1, coef0=0, gamma=0.005, kernel=rbf; total time=   5.8s
[CV] END ..............C=1, coef0=0, gamma=0.005, kernel=rbf; total time=   5.0s
[CV] END ..............C=1, coef0=0, gamma=0.005, kernel=rbf; total time=   5.7s
[CV] END .............C=1, coef0=0, gamma=0.005, kernel=poly; total time=   4.5s
[CV] END .............C=1, coef0=0, gamma=0.005, kernel=poly; total time=   4.2s
[CV] END .............C=1, coef0=0, gamma=0.005, kernel=poly; total time=   4.6s
[CV] END ................C=1, coef0=1, gamma=0.1, kernel=rbf; total time=   8.1s
[CV] END ................C=1, coef0=1, gamma=0.1, kernel=rbf; total time=   7.8s
[CV] END ................C=1, coef0=1, gamma=0.1, kernel=rbf; total time=   7.8s
[CV] END ...............C=1, coef0=1, gamma=0.1, kernel=poly; total time=   4.1s
[CV] END ...............C=1, coef0=1, gamma=0.1, kernel=poly; total time=   4.3s
[CV] END ...............C=1,

[CV] END ............C=10, coef0=1, gamma=0.001, kernel=poly; total time=   3.9s
[CV] END .............C=10, coef0=1, gamma=0.002, kernel=rbf; total time=   8.0s
[CV] END .............C=10, coef0=1, gamma=0.002, kernel=rbf; total time=   5.8s
[CV] END .............C=10, coef0=1, gamma=0.002, kernel=rbf; total time=   5.4s
[CV] END ............C=10, coef0=1, gamma=0.002, kernel=poly; total time=   3.9s
[CV] END ............C=10, coef0=1, gamma=0.002, kernel=poly; total time=   3.6s
[CV] END ............C=10, coef0=1, gamma=0.002, kernel=poly; total time=   3.9s
[CV] END .............C=10, coef0=1, gamma=0.005, kernel=rbf; total time=   5.5s
[CV] END .............C=10, coef0=1, gamma=0.005, kernel=rbf; total time=   5.8s
[CV] END .............C=10, coef0=1, gamma=0.005, kernel=rbf; total time=   5.9s
[CV] END ............C=10, coef0=1, gamma=0.005, kernel=poly; total time=   4.5s
[CV] END ............C=10, coef0=1, gamma=0.005, kernel=poly; total time=   5.0s
[CV] END ............C=10, c

[CV] END .............C=30, coef0=0, gamma=0.01, kernel=poly; total time=   5.3s
[CV] END .............C=30, coef0=0, gamma=0.001, kernel=rbf; total time=   5.4s
[CV] END .............C=30, coef0=0, gamma=0.001, kernel=rbf; total time=   5.2s
[CV] END .............C=30, coef0=0, gamma=0.001, kernel=rbf; total time=   5.3s
[CV] END ............C=30, coef0=0, gamma=0.001, kernel=poly; total time=   4.4s
[CV] END ............C=30, coef0=0, gamma=0.001, kernel=poly; total time=   4.9s
[CV] END ............C=30, coef0=0, gamma=0.001, kernel=poly; total time=   4.5s
[CV] END .............C=30, coef0=0, gamma=0.002, kernel=rbf; total time=   5.6s
[CV] END .............C=30, coef0=0, gamma=0.002, kernel=rbf; total time=   5.7s
[CV] END .............C=30, coef0=0, gamma=0.002, kernel=rbf; total time=   6.2s
[CV] END ............C=30, coef0=0, gamma=0.002, kernel=poly; total time=   4.4s
[CV] END ............C=30, coef0=0, gamma=0.002, kernel=poly; total time=   4.8s
[CV] END ............C=30, c

[CV] END .............C=50, coef0=1, gamma=0.05, kernel=poly; total time=   5.1s
[CV] END ..............C=50, coef0=1, gamma=0.01, kernel=rbf; total time=   7.5s
[CV] END ..............C=50, coef0=1, gamma=0.01, kernel=rbf; total time=   7.1s
[CV] END ..............C=50, coef0=1, gamma=0.01, kernel=rbf; total time=   6.7s
[CV] END .............C=50, coef0=1, gamma=0.01, kernel=poly; total time=   6.6s
[CV] END .............C=50, coef0=1, gamma=0.01, kernel=poly; total time=   6.1s
[CV] END .............C=50, coef0=1, gamma=0.01, kernel=poly; total time=   8.2s
[CV] END .............C=50, coef0=1, gamma=0.001, kernel=rbf; total time=   7.3s
[CV] END .............C=50, coef0=1, gamma=0.001, kernel=rbf; total time=   6.2s
[CV] END .............C=50, coef0=1, gamma=0.001, kernel=rbf; total time=   6.0s
[CV] END ............C=50, coef0=1, gamma=0.001, kernel=poly; total time=   4.6s
[CV] END ............C=50, coef0=1, gamma=0.001, kernel=poly; total time=   4.3s
[CV] END ............C=50, c

GridSearchCV(cv=3, estimator=SVC(probability=True),
             param_grid={'C': [0.5, 1, 10, 20, 30, 50], 'coef0': [0, 1],
                         'gamma': [0.1, 0.05, 0.01, 0.001, 0.002, 0.005],
                         'kernel': ['rbf', 'poly']},
             scoring='accuracy', verbose=2)

In [25]:
model_grid.best_params_

{'C': 1, 'coef0': 0, 'gamma': 0.05, 'kernel': 'rbf'}

In [27]:
model_final = model_grid.best_estimator_

In [28]:
model_final.get_params()

{'C': 1,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 0.05,
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': True,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

# Model Evaluation

- Classifictaion report
    - Precesion, Recall,F1-score
- Kappa score
    - -1 to +1 (worest to best)
- AUC
    - 0 to 1 (worest to best)

In [29]:
y_pred = model_final.predict(x_test)

In [30]:
y_pred

array(['gender', 'gender', 'gender', 'male', 'gender', 'gender', 'male',
       'gender', 'male', 'gender', 'male', 'gender', 'gender', 'gender',
       'gender', 'gender', 'gender', 'gender', 'gender', 'male', 'male',
       'gender', 'male', 'gender', 'gender', 'male', 'gender', 'gender',
       'male', 'gender', 'gender', 'gender', 'gender', 'gender', 'gender',
       'male', 'gender', 'gender', 'gender', 'male', 'male', 'male',
       'male', 'male', 'gender', 'male', 'gender', 'gender', 'male',
       'gender', 'gender', 'male', 'gender', 'gender', 'gender', 'gender',
       'male', 'male', 'male', 'gender', 'male', 'male', 'gender',
       'gender', 'male', 'male', 'gender', 'male', 'male', 'male',
       'gender', 'gender', 'gender', 'gender', 'gender', 'male', 'male',
       'gender', 'gender', 'gender', 'gender', 'male', 'male', 'gender',
       'gender', 'gender', 'gender', 'gender', 'gender', 'male', 'gender',
       'gender', 'male', 'gender', 'gender', 'gender', 'male', 'g

## Classification report


In [31]:
cr = metrics.classification_report(y_test,y_pred,output_dict=True)

In [32]:
pd.DataFrame(cr).T

Unnamed: 0,precision,recall,f1-score,support
gender,0.80754,0.822222,0.814815,495.0
male,0.783784,0.766827,0.775213,416.0
accuracy,0.796926,0.796926,0.796926,0.796926
macro avg,0.795662,0.794525,0.795014,911.0
weighted avg,0.796692,0.796926,0.796731,911.0


# kappa score

In [37]:
metrics.cohen_kappa_score(y_test,y_pred)

0.5900678375886887

# AUC

In [38]:
metrics.roc_auc_score(np.where(y_test=="male",1,0),np.where(y_pred=="male",1,0))

0.7945245726495727

# Save Face Recognition Model

In [40]:
import pickle

In [41]:
pickle.dump(model_final,open("./model/model_svm.pickle",mode='wb'))