In [1]:
import numpy as np
import pandas as pd

#
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

In [3]:
# load numoy array
data = np.load('./data/data_pca_50_target.npz')
data.files

['arr_0', 'arr_1']

In [4]:
data.allow_pickle = True

In [5]:
X = data['arr_0'] # pca data with 50 components
y = data['arr_1'] # target or dependent variable

In [6]:
X.shape, y.shape

((4320, 50), (4320,))

## split the data into train and test

In [7]:
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.2, stratify=y)

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(3456, 50) (864, 50) (3456,) (864,)


### training machine learning model

In [9]:
model_svc = SVC(probability=True)

param_grid = {'C': [0.5,1,10,20,30,50],
             'kernel':['rbf', 'poly'],
             'gamma': [0.1,0.05,0.01,0.001,0.002,0.005],
             'coef0':[0.1]}

In [10]:
model_grid = GridSearchCV(model_svc, param_grid = param_grid, scoring='accuracy', cv=3, verbose=2)

In [12]:
model_grid.fit(x_train, y_train)

Fitting 3 folds for each of 72 candidates, totalling 216 fits
[CV] END ............C=0.5, coef0=0.1, gamma=0.1, kernel=rbf; total time=   1.7s
[CV] END ............C=0.5, coef0=0.1, gamma=0.1, kernel=rbf; total time=   1.7s
[CV] END ............C=0.5, coef0=0.1, gamma=0.1, kernel=rbf; total time=   1.7s
[CV] END ...........C=0.5, coef0=0.1, gamma=0.1, kernel=poly; total time=   1.0s
[CV] END ...........C=0.5, coef0=0.1, gamma=0.1, kernel=poly; total time=   1.0s
[CV] END ...........C=0.5, coef0=0.1, gamma=0.1, kernel=poly; total time=   1.0s
[CV] END ...........C=0.5, coef0=0.1, gamma=0.05, kernel=rbf; total time=   1.5s
[CV] END ...........C=0.5, coef0=0.1, gamma=0.05, kernel=rbf; total time=   1.5s
[CV] END ...........C=0.5, coef0=0.1, gamma=0.05, kernel=rbf; total time=   1.5s
[CV] END ..........C=0.5, coef0=0.1, gamma=0.05, kernel=poly; total time=   1.0s
[CV] END ..........C=0.5, coef0=0.1, gamma=0.05, kernel=poly; total time=   1.0s
[CV] END ..........C=0.5, coef0=0.1, gamma=0.05

[CV] END ..........C=10, coef0=0.1, gamma=0.002, kernel=poly; total time=   0.8s
[CV] END ...........C=10, coef0=0.1, gamma=0.005, kernel=rbf; total time=   1.2s
[CV] END ...........C=10, coef0=0.1, gamma=0.005, kernel=rbf; total time=   1.2s
[CV] END ...........C=10, coef0=0.1, gamma=0.005, kernel=rbf; total time=   1.2s
[CV] END ..........C=10, coef0=0.1, gamma=0.005, kernel=poly; total time=   0.8s
[CV] END ..........C=10, coef0=0.1, gamma=0.005, kernel=poly; total time=   0.8s
[CV] END ..........C=10, coef0=0.1, gamma=0.005, kernel=poly; total time=   0.8s
[CV] END .............C=20, coef0=0.1, gamma=0.1, kernel=rbf; total time=   1.7s
[CV] END .............C=20, coef0=0.1, gamma=0.1, kernel=rbf; total time=   1.7s
[CV] END .............C=20, coef0=0.1, gamma=0.1, kernel=rbf; total time=   1.7s
[CV] END ............C=20, coef0=0.1, gamma=0.1, kernel=poly; total time=   1.0s
[CV] END ............C=20, coef0=0.1, gamma=0.1, kernel=poly; total time=   1.0s
[CV] END ............C=20, c

[CV] END ..........C=50, coef0=0.1, gamma=0.001, kernel=poly; total time=   0.7s
[CV] END ...........C=50, coef0=0.1, gamma=0.002, kernel=rbf; total time=   1.3s
[CV] END ...........C=50, coef0=0.1, gamma=0.002, kernel=rbf; total time=   1.4s
[CV] END ...........C=50, coef0=0.1, gamma=0.002, kernel=rbf; total time=   1.4s
[CV] END ..........C=50, coef0=0.1, gamma=0.002, kernel=poly; total time=   0.7s
[CV] END ..........C=50, coef0=0.1, gamma=0.002, kernel=poly; total time=   0.7s
[CV] END ..........C=50, coef0=0.1, gamma=0.002, kernel=poly; total time=   0.7s
[CV] END ...........C=50, coef0=0.1, gamma=0.005, kernel=rbf; total time=   1.5s
[CV] END ...........C=50, coef0=0.1, gamma=0.005, kernel=rbf; total time=   1.6s
[CV] END ...........C=50, coef0=0.1, gamma=0.005, kernel=rbf; total time=   1.6s
[CV] END ..........C=50, coef0=0.1, gamma=0.005, kernel=poly; total time=   0.7s
[CV] END ..........C=50, coef0=0.1, gamma=0.005, kernel=poly; total time=   0.7s
[CV] END ..........C=50, coe

In [13]:
model_grid.best_params_

{'C': 10, 'coef0': 0.1, 'gamma': 0.01, 'kernel': 'poly'}

In [18]:
model_final = model_grid.best_estimator_

In [19]:
model_final.get_params()

{'C': 10,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.1,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 0.01,
 'kernel': 'poly',
 'max_iter': -1,
 'probability': True,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [20]:
y_pred = model_final.predict(x_test) #predicted values

In [21]:
y_pred

array(['male', 'male', 'female', 'female', 'female', 'female', 'female',
       'male', 'female', 'female', 'female', 'male', 'female', 'male',
       'female', 'female', 'male', 'male', 'male', 'female', 'female',
       'female', 'female', 'female', 'female', 'male', 'male', 'male',
       'female', 'male', 'female', 'male', 'female', 'female', 'female',
       'female', 'male', 'female', 'female', 'male', 'female', 'female',
       'male', 'male', 'male', 'male', 'female', 'male', 'female', 'male',
       'female', 'female', 'female', 'male', 'female', 'male', 'male',
       'female', 'female', 'female', 'female', 'female', 'male', 'male',
       'male', 'male', 'female', 'female', 'male', 'male', 'female',
       'female', 'male', 'male', 'male', 'female', 'female', 'female',
       'female', 'female', 'female', 'male', 'male', 'female', 'female',
       'male', 'female', 'female', 'female', 'male', 'female', 'female',
       'male', 'male', 'female', 'female', 'male', 'female', 'm

**Classification Report**

In [22]:
cr = metrics.classification_report(y_test, y_pred, output_dict=True)
pd.DataFrame(cr).T

Unnamed: 0,precision,recall,f1-score,support
female,0.810976,0.836478,0.823529,477.0
male,0.790323,0.75969,0.774704,387.0
accuracy,0.802083,0.802083,0.802083,0.802083
macro avg,0.800649,0.798084,0.799116,864.0
weighted avg,0.801725,0.802083,0.801659,864.0


**Kappa Score**

In [23]:
metrics.cohen_kappa_score(y_test, y_pred)

0.5983558426306518

**Area Under Curve(AUC)**

In [27]:
metrics.roc_auc_score(np.where(y_test=="male",1,0),np.where(y_pred=="male",1,0))

0.7980839549510019

### Save Face Recognition Model

In [28]:
import pickle

In [29]:
pickle.dump(model_final, open('./model/model_svm.pickle', mode='wb'))