### Import libraries and module

In [1]:
from sklearn.svm import SVC
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score,roc_curve, roc_auc_score, auc   
from sklearn.model_selection import cross_val_score, StratifiedKFold, LeaveOneOut

# import custom module 
import data_preprocessing as dpp

Number of Low Cases: 303 (30.30%)
Number of Medium Cases: 332 (33.20%)
Number of High Cases: 365 (36.50%)


<Figure size 2500x2500 with 1 Axes>

70.00% in training set
30.00% in test set
70.00% in training set
30.00% in test set


# Section 1: Model training by support vector machine  with all features

In [2]:
#creating model object
svm_model = SVC(gamma="auto", probability=True)

# fit model
svm_model.fit(dpp.F_train, dpp.P_train.ravel())

SVC(gamma='auto', probability=True)

### Prediction results 

In [3]:
svm_model_prediction_from_trained_data = svm_model.predict(dpp.F_train)
svm_model_prediction_from_test_data = svm_model.predict(dpp.F_test)

# get current accuracy of the model
svm_model_accuracy_from_trained_data = metrics.accuracy_score(dpp.P_train, svm_model_prediction_from_trained_data)
svm_model_accuracy_from_test_data = metrics.accuracy_score(dpp.P_test, svm_model_prediction_from_test_data)

In [4]:
#output 
print ("Accuracy from trained data of svm model is : {0:.3f}%".format(svm_model_accuracy_from_trained_data*100))
print ("Accuracy from test data of svm model is : {0:.3f}%".format(svm_model_accuracy_from_test_data*100))

Accuracy from trained data of svm model is : 100.000%
Accuracy from test data of svm model is : 99.667%


### Cross Validation

#### k-Fold Cross Validation

In [5]:
kF_cv_score = cross_val_score(svm_model,dpp.F,dpp.P.ravel(),cv=10)

In [6]:
kF_cv_score

array([1.  , 1.  , 1.  , 1.  , 0.99, 1.  , 1.  , 1.  , 1.  , 1.  ])

In [7]:
print ("Accuracy from test data of svm model is : {0:.3f}%".format(kF_cv_score.mean()*100))

Accuracy from test data of svm model is : 99.900%


#### Stratified cross validation 

In [8]:
SFKF = StratifiedKFold(n_splits=5)
SFKF_cv_score = cross_val_score(svm_model,dpp.F,dpp.P.ravel(),cv=SFKF)

In [9]:
SFKF_cv_score

array([1.   , 1.   , 0.995, 1.   , 1.   ])

In [10]:
print ("Accuracy from test data of svm model is : {0:.3f}%".format(SFKF_cv_score.mean()*100))

Accuracy from test data of svm model is : 99.900%


#### LeaveOneOut cross validation

In [None]:
loo_validation = LeaveOneOut()
LOO_cv_score = cross_val_score(svm_model,dpp.F,dpp.P.ravel(),cv=loo_validation)

In [None]:
LOO_cv_score

In [None]:
print ("Accuracy from test data of svm model is : {0:.3f}%".format(LOO_cv_score.mean()*100))

### Performance metrics

#### Confusion matrix

In [None]:
print("confusion matrix for trained data :")
print(metrics.confusion_matrix(dpp.P_train, svm_model_prediction_from_trained_data))

print("confusion matrix for test data :")
cm = metrics.confusion_matrix(dpp.P_test, svm_model_prediction_from_test_data)

print(cm)

cmd = metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Low","Medium","High"])
cmd.plot()
plt.show()

#### Classification report

In [None]:
print("classification report for trained data :")
print(metrics.classification_report(dpp.P_train, svm_model_prediction_from_trained_data))

print("classification report for test data :")
print(metrics.classification_report(dpp.P_test, svm_model_prediction_from_test_data))

### ROC Curve with all features

In [None]:
svm_model_roc = svm_model.predict_proba(dpp.F_test)
fpr, tpr, threshold = roc_curve(dpp.P_test, svm_model_roc[:,1], pos_label=2)
auc_value = auc(fpr, tpr)
fig = plt.figure(figsize=(5, 5), dpi=100)
fig.patch.set_facecolor('#8a2be2')
ax = plt.gca()
ax.set_facecolor("#FFF8F3")
plt.plot(fpr,tpr, linestyle='-', label='SVM (auc = %0.3f)' % auc_value)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'g--')
plt.xlabel("False Positive")
plt.ylabel("True Positive")
plt.title("ROC Curve For SVM")
plt.show()

# Section 2: Model training by support vector machine with selected features

In [None]:
#creating model object
svm_model_wsf = SVC(gamma="auto", probability=True)

# fit model
svm_model_wsf.fit(dpp.F_selec_train, dpp.P_selec_train.ravel())

### Prediction results 

In [None]:
svm_model_wsf_prediction_from_trained_data = svm_model_wsf.predict(dpp.F_selec_train)
svm_model_wsf_prediction_from_test_data = svm_model_wsf.predict(dpp.F_selec_test)

# get current accuracy of the model
svm_model_wsf_accuracy_from_trained_data = accuracy_score(dpp.P_selec_train, svm_model_wsf_prediction_from_trained_data)
svm_model_wsf_accuracy_from_test_data = accuracy_score(dpp.P_selec_test, svm_model_wsf_prediction_from_test_data)

In [None]:
#output 
print ("Accuracy from trained data of knn model is : {0:.3f}%".format(svm_model_wsf_accuracy_from_trained_data*100))
print ("Accuracy from test data of knn model is : {0:.3f}%".format(svm_model_wsf_accuracy_from_test_data*100))

### Performance Metrics

#### Confusion matrix

In [None]:
#confusion matrix
print("confusion matrix for train data :")
print(confusion_matrix(dpp.P_selec_train, svm_model_wsf_prediction_from_trained_data))


print("confusion matrix for test data :")
cm = metrics.confusion_matrix(dpp.P_test, svm_model_wsf_prediction_from_test_data)

print(cm)

cmd = metrics.ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["Low","Medium","High"])
cmd.plot()
plt.show()

#### Classification report

In [None]:
print("classification report for trained data :")
print(classification_report(dpp.P_selec_train, svm_model_wsf_prediction_from_trained_data))

print("classification report for test data :")
print(classification_report(dpp.P_selec_test, svm_model_wsf_prediction_from_test_data))

### ROC Curve with selected feature

In [None]:
svm_model_wsf_roc = svm_model_wsf.predict_proba(dpp.F_selec_test)
fpr_wsf, tpr_wsf, threshold_wsf = roc_curve(dpp.P_selec_test, svm_model_wsf_roc[:,1], pos_label=2)
auc_value_wsf = auc(fpr_wsf, tpr_wsf)
fig = plt.figure(figsize=(5, 5), dpi=100)
fig.patch.set_facecolor('#fa8072')
ax = plt.gca()
ax.set_facecolor("#FFF8F3")
plt.plot(fpr_wsf,tpr_wsf, linestyle='-', label='SVM (auc = %0.3f)' % auc_value_wsf)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'g--')
plt.xlabel("False Positive")
plt.ylabel("True Positive")
plt.title("ROC Curve For SVM")
plt.show()