In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold, cross_val_score

In [2]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.svm import SVC

In [3]:
data = pd.read_csv('../data/std_data.csv')

In [4]:
x = data.iloc[:,:-1].values
y = data.iloc[:,-1].apply(lambda x: str(x)).values

In [5]:
kf5 = KFold(n_splits= 5, shuffle= False)
kf5sh = KFold(n_splits= 5, shuffle= True, random_state= 42)

In [6]:
# using the svc model

In [7]:
model_svm = SVC(C=3.0, kernel='linear')

In [8]:
# applying the 5 fold cross validation
i = 1
acc_svm = []
for train_index, test_index in kf5sh.split(x):
#     print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = x[train_index], x[test_index]
    Y_train, Y_test = y[train_index], y[test_index]
    model_svm.fit(X_train,Y_train)
    y_pred = model_svm.predict(X_test)
    print("________________ ** {} ** ____________________".format(i))
    print(confusion_matrix(Y_test,y_pred))
    print(classification_report(Y_test,y_pred))
    print(accuracy_score(Y_test, y_pred))
    acc_svm.append(accuracy_score(Y_test, y_pred))
    i = i+1
    
print("average accuracy :{}".format(np.average(acc_svm)))

________________ ** 1 ** ____________________
[[30  0  0  1]
 [ 0 43  0  0]
 [ 0  0 27  0]
 [ 0  0  0 80]]
              precision    recall  f1-score   support

         0.0       1.00      0.97      0.98        31
        0.19       1.00      1.00      1.00        43
         2.5       1.00      1.00      1.00        27
         4.5       0.99      1.00      0.99        80

    accuracy                           0.99       181
   macro avg       1.00      0.99      0.99       181
weighted avg       0.99      0.99      0.99       181

0.994475138121547
________________ ** 2 ** ____________________
[[27  0  0  0]
 [ 0 44  0  0]
 [ 0  0 29  0]
 [ 0  3  0 78]]
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        27
        0.19       0.94      1.00      0.97        44
         2.5       1.00      1.00      1.00        29
         4.5       1.00      0.96      0.98        81

    accuracy                           0.98       181
   macr

In [9]:
cross_val_score(model_svm, x , y)

array([0.98342541, 0.97790055, 0.98888889, 1.        , 0.99444444])

In [10]:
np.average(cross_val_score(model_svm, x , y))

0.9889318600368323

In [11]:
#  using random Forest
from sklearn.ensemble import RandomForestClassifier

In [12]:
model_rf = RandomForestClassifier(n_estimators=30,criterion='entropy',max_depth=10,max_features='sqrt',max_leaf_nodes=None,min_samples_split=8)

In [13]:
# applying the 5 fold cross validation
i = 1
acc_rf = []
for train_index, test_index in kf5sh.split(x):
#     print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = x[train_index], x[test_index]
    Y_train, Y_test = y[train_index], y[test_index]
    model_rf.fit(X_train,Y_train)
    y_pred = model_rf.predict(X_test)
    print("________________ ** {} ** ____________________".format(i))
    print(confusion_matrix(Y_test,y_pred))
    print(classification_report(Y_test,y_pred))
    print(accuracy_score(Y_test, y_pred))
    acc_rf.append(accuracy_score(Y_test, y_pred))
    i = i+1
    
print("average accuracy :{}".format(np.average(acc_rf)))

________________ ** 1 ** ____________________
[[30  0  0  1]
 [ 0 43  0  0]
 [ 0  0 27  0]
 [ 0  0  0 80]]
              precision    recall  f1-score   support

         0.0       1.00      0.97      0.98        31
        0.19       1.00      1.00      1.00        43
         2.5       1.00      1.00      1.00        27
         4.5       0.99      1.00      0.99        80

    accuracy                           0.99       181
   macro avg       1.00      0.99      0.99       181
weighted avg       0.99      0.99      0.99       181

0.994475138121547
________________ ** 2 ** ____________________
[[27  0  0  0]
 [ 0 44  0  0]
 [ 0  1 28  0]
 [ 0  0  0 81]]
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        27
        0.19       0.98      1.00      0.99        44
         2.5       1.00      0.97      0.98        29
         4.5       1.00      1.00      1.00        81

    accuracy                           0.99       181
   macr

In [14]:
np.average(cross_val_score(model_rf, x , y))

0.9911295273173726

In [15]:


# using NB

from sklearn.naive_bayes import GaussianNB

In [16]:
model_nb =  GaussianNB(priors=None)

In [17]:
# applying the 5 fold cross validation
i = 1
acc_nb = []
for train_index, test_index in kf5sh.split(x):
#     print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = x[train_index], x[test_index]
    Y_train, Y_test = y[train_index], y[test_index]
    model_nb.fit(X_train,Y_train)
    y_pred = model_nb.predict(X_test)
    print("________________ ** {} ** ____________________".format(i))
    print(confusion_matrix(Y_test,y_pred))
    print(classification_report(Y_test,y_pred))
    print(accuracy_score(Y_test, y_pred))
    acc_nb.append(accuracy_score(Y_test, y_pred))
    i = i+1
    
print("average accuracy :{}".format(np.average(acc_nb)))

________________ ** 1 ** ____________________
[[28  0  3  0]
 [ 2 41  0  0]
 [ 0  0 27  0]
 [ 0  1  0 79]]
              precision    recall  f1-score   support

         0.0       0.93      0.90      0.92        31
        0.19       0.98      0.95      0.96        43
         2.5       0.90      1.00      0.95        27
         4.5       1.00      0.99      0.99        80

    accuracy                           0.97       181
   macro avg       0.95      0.96      0.96       181
weighted avg       0.97      0.97      0.97       181

0.9668508287292817
________________ ** 2 ** ____________________
[[27  0  0  0]
 [ 0 44  0  0]
 [ 0  0 29  0]
 [ 2  4  0 75]]
              precision    recall  f1-score   support

         0.0       0.93      1.00      0.96        27
        0.19       0.92      1.00      0.96        44
         2.5       1.00      1.00      1.00        29
         4.5       1.00      0.93      0.96        81

    accuracy                           0.97       181
   mac

In [18]:
print("_______________________________________ACCURACY TABLE______________________________________________________")
print("||________________________SVM__________________||____________________{}____________________".format(np.average(acc_svm)))
print("||__________________RANDOM_FOREST______________||____________________{}____________________".format(np.average(acc_rf)))
print("||________________________NB___________________||____________________{}____________________".format(np.average(acc_nb)))

_______________________________________ACCURACY TABLE______________________________________________________
||________________________SVM__________________||____________________0.9933578882750155____________________
||__________________RANDOM_FOREST______________||____________________0.9966789441375077____________________
||________________________NB___________________||____________________0.9767403314917127____________________


In [None]:
# using cross_val
print("_______________________________________ACCURACY TABLE______________________________________________________")
print("||________________________SVM__________________||____________________{}____________________".format(np.average(acc_svm)))
print("||__________________RANDOM_FOREST______________||____________________{}____________________".format(np.average(cross_val_score(model_rf, x , y))))
print("||________________________NB___________________||____________________{}____________________".format(np.average(acc_nb)))