In [2]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

In [4]:
#Load numpy array
data = np.load('./data/data_pca_50_target.npz')
data.files

['arr_0', 'arr_1']

In [6]:
data.allow_pickle = True

In [7]:
X = data['arr_0'] #PCA data with 50 components
Y = data['arr_1'] #target or dependent variable

In [9]:
print(X.shape)
print(Y.shape)

(4321, 50)
(4321,)


**Split the data into train and test**

In [12]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, stratify=Y)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(3456, 50) (865, 50) (3456,) (865,)


###  Training machine learning model

In [20]:
model_svc = SVC(probability=True)

param_grid = {'C':[0.5,1,10,20,30,50],
              'kernel':['rbf','poly'],
              'gamma':[0.1,0.05,0.01,0.001,0.002,0.005],
              'coef0':[0,1]
             }

In [21]:
model_grid = GridSearchCV(model_svc,
                          param_grid=param_grid,
                          scoring='accuracy',cv=3,verbose=2)

In [22]:
model_grid.fit(x_train,y_train)

Fitting 3 folds for each of 144 candidates, totalling 432 fits
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   3.0s
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   2.7s
[CV] END ..............C=0.5, coef0=0, gamma=0.1, kernel=rbf; total time=   2.6s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   2.1s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   1.6s
[CV] END .............C=0.5, coef0=0, gamma=0.1, kernel=poly; total time=   2.0s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   2.7s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   2.4s
[CV] END .............C=0.5, coef0=0, gamma=0.05, kernel=rbf; total time=   2.5s
[CV] END ............C=0.5, coef0=0, gamma=0.05, kernel=poly; total time=   1.9s
[CV] END ............C=0.5, coef0=0, gamma=0.05, kernel=poly; total time=   1.3s
[CV] END ............C=0.5, coef0=0, gamma=0.0

[CV] END .............C=1, coef0=0, gamma=0.002, kernel=poly; total time=   1.0s
[CV] END ..............C=1, coef0=0, gamma=0.005, kernel=rbf; total time=   1.4s
[CV] END ..............C=1, coef0=0, gamma=0.005, kernel=rbf; total time=   1.4s
[CV] END ..............C=1, coef0=0, gamma=0.005, kernel=rbf; total time=   1.3s
[CV] END .............C=1, coef0=0, gamma=0.005, kernel=poly; total time=   1.0s
[CV] END .............C=1, coef0=0, gamma=0.005, kernel=poly; total time=   0.9s
[CV] END .............C=1, coef0=0, gamma=0.005, kernel=poly; total time=   0.9s
[CV] END ................C=1, coef0=1, gamma=0.1, kernel=rbf; total time=   2.2s
[CV] END ................C=1, coef0=1, gamma=0.1, kernel=rbf; total time=   2.2s
[CV] END ................C=1, coef0=1, gamma=0.1, kernel=rbf; total time=   2.1s
[CV] END ...............C=1, coef0=1, gamma=0.1, kernel=poly; total time=   1.0s
[CV] END ...............C=1, coef0=1, gamma=0.1, kernel=poly; total time=   1.0s
[CV] END ...............C=1,

[CV] END ............C=10, coef0=1, gamma=0.001, kernel=poly; total time=   0.7s
[CV] END .............C=10, coef0=1, gamma=0.002, kernel=rbf; total time=   1.4s
[CV] END .............C=10, coef0=1, gamma=0.002, kernel=rbf; total time=   1.3s
[CV] END .............C=10, coef0=1, gamma=0.002, kernel=rbf; total time=   1.3s
[CV] END ............C=10, coef0=1, gamma=0.002, kernel=poly; total time=   1.0s
[CV] END ............C=10, coef0=1, gamma=0.002, kernel=poly; total time=   0.8s
[CV] END ............C=10, coef0=1, gamma=0.002, kernel=poly; total time=   0.9s
[CV] END .............C=10, coef0=1, gamma=0.005, kernel=rbf; total time=   1.6s
[CV] END .............C=10, coef0=1, gamma=0.005, kernel=rbf; total time=   1.4s
[CV] END .............C=10, coef0=1, gamma=0.005, kernel=rbf; total time=   1.6s
[CV] END ............C=10, coef0=1, gamma=0.005, kernel=poly; total time=   1.1s
[CV] END ............C=10, coef0=1, gamma=0.005, kernel=poly; total time=   1.1s
[CV] END ............C=10, c

[CV] END .............C=30, coef0=0, gamma=0.01, kernel=poly; total time=   1.1s
[CV] END .............C=30, coef0=0, gamma=0.001, kernel=rbf; total time=   1.4s
[CV] END .............C=30, coef0=0, gamma=0.001, kernel=rbf; total time=   1.3s
[CV] END .............C=30, coef0=0, gamma=0.001, kernel=rbf; total time=   1.4s
[CV] END ............C=30, coef0=0, gamma=0.001, kernel=poly; total time=   1.1s
[CV] END ............C=30, coef0=0, gamma=0.001, kernel=poly; total time=   1.0s
[CV] END ............C=30, coef0=0, gamma=0.001, kernel=poly; total time=   1.2s
[CV] END .............C=30, coef0=0, gamma=0.002, kernel=rbf; total time=   1.7s
[CV] END .............C=30, coef0=0, gamma=0.002, kernel=rbf; total time=   1.6s
[CV] END .............C=30, coef0=0, gamma=0.002, kernel=rbf; total time=   1.6s
[CV] END ............C=30, coef0=0, gamma=0.002, kernel=poly; total time=   0.9s
[CV] END ............C=30, coef0=0, gamma=0.002, kernel=poly; total time=   0.9s
[CV] END ............C=30, c

[CV] END .............C=50, coef0=1, gamma=0.05, kernel=poly; total time=   1.1s
[CV] END ..............C=50, coef0=1, gamma=0.01, kernel=rbf; total time=   2.1s
[CV] END ..............C=50, coef0=1, gamma=0.01, kernel=rbf; total time=   2.1s
[CV] END ..............C=50, coef0=1, gamma=0.01, kernel=rbf; total time=   1.9s
[CV] END .............C=50, coef0=1, gamma=0.01, kernel=poly; total time=   1.7s
[CV] END .............C=50, coef0=1, gamma=0.01, kernel=poly; total time=   1.4s
[CV] END .............C=50, coef0=1, gamma=0.01, kernel=poly; total time=   1.7s
[CV] END .............C=50, coef0=1, gamma=0.001, kernel=rbf; total time=   1.4s
[CV] END .............C=50, coef0=1, gamma=0.001, kernel=rbf; total time=   1.6s
[CV] END .............C=50, coef0=1, gamma=0.001, kernel=rbf; total time=   1.5s
[CV] END ............C=50, coef0=1, gamma=0.001, kernel=poly; total time=   1.1s
[CV] END ............C=50, coef0=1, gamma=0.001, kernel=poly; total time=   1.1s
[CV] END ............C=50, c

In [23]:
model_grid.best_params_

{'C': 10, 'coef0': 0, 'gamma': 0.005, 'kernel': 'rbf'}

In [27]:
model_finale = model_grid.best_estimator_
model_finale.get_params()

{'C': 10,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 0.005,
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': True,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [28]:
y_pred = model_finale.predict(x_test)

### **Classification Report**

In [29]:
cr = metrics.classification_report(y_test,y_pred,output_dict=True)
pd.DataFrame(cr).T

Unnamed: 0,precision,recall,f1-score,support
female,0.826552,0.807531,0.816931,478.0
male,0.768844,0.790698,0.779618,387.0
accuracy,0.8,0.8,0.8,0.8
macro avg,0.797698,0.799115,0.798275,865.0
weighted avg,0.800734,0.8,0.800237,865.0


### Kappa Score

In [30]:
metrics.cohen_kappa_score(y_test,y_pred)

0.5966148479808504

### Area Under Curve (AUC)

In [31]:
metrics.roc_auc_score(np.where(y_test=="male",1,0),np.where(y_pred=="male",1,0))

0.7991145275858714

**Model Evaluation**

- Classification Report
  - Precision, Recall, F1-Score
- Kappa Score
  ---ve (worst model)
   - 0 to 0.5 (bad model)
   - 0.5 to 0.7 (Good Model)
   - 0.7 to 0.9 (Excellent Model)
   - 0.9 to 1.0 (Perfect Model)
- AUC
    - Less than 0.5 (Worst Model)
    - 0.5 to 0.6 (Bad Model)
    - 0.6 to 0.8 (Good Model)
    - 0.8 to 0.9 (Excellent Model)
    - 0.9 to 1.0 (Perfect Model)