In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score

In [2]:
data_df = pd.read_csv ('dt.csv')

In [3]:
data_df

Unnamed: 0,dep,wave,m,label
0,57,2.95,5.1,1
1,28,2.59,5.1,1
2,10,3.01,5.1,1
3,22,2.87,5.2,1
4,104,2.74,4.9,1
...,...,...,...,...
853,10,1.21,3.6,0
854,10,0.67,3.2,0
855,10,0.82,3.7,0
856,26,0.72,3.1,0


In [4]:
feature_df = data_df[['dep', 'wave', 'm']]
feature_df

Unnamed: 0,dep,wave,m
0,57,2.95,5.1
1,28,2.59,5.1
2,10,3.01,5.1
3,22,2.87,5.2
4,104,2.74,4.9
...,...,...,...
853,10,1.21,3.6
854,10,0.67,3.2
855,10,0.82,3.7
856,26,0.72,3.1


In [5]:
X = np.asarray(feature_df)
y = data_df['label']

In [46]:
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [47]:
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, ConfusionMatrixDisplay, precision_score, recall_score, f1_score, classification_report, roc_curve, roc_curve, auc, precision_recall_curve, precision_recall_curve, average_precision_score
from sklearn.model_selection import cross_val_score

In [48]:
pip install category-encoders

Note: you may need to restart the kernel to use updated packages.


In [49]:
import category_encoders as ce

In [50]:
encoder = ce.OneHotEncoder()

X_train = encoder.fit_transform(X_train)

X_test = encoder.transform(X_test)



In [51]:
X_train.head()

Unnamed: 0,0,1,2
0,10.0,2.25,5.1
1,21.0,1.34,2.9
2,10.0,1.77,3.9
3,576.0,2.26,4.9
4,10.0,2.71,5.1


In [52]:
X_test.head()

Unnamed: 0,0,1,2
0,164.0,1.62,4.9
1,10.0,1.21,3.3
2,123.0,2.61,4.7
3,10.0,1.15,3.9
4,10.0,0.67,2.1


In [53]:
cols = X_train.columns

In [54]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [55]:
X_train = pd.DataFrame(X_train, columns=[cols])

In [56]:
X_test = pd.DataFrame(X_test, columns=[cols])

In [57]:
X_train.head()

Unnamed: 0,0,1,2
0,-0.571554,0.702469,0.982262
1,-0.437985,-0.570973,-1.243636
2,-0.571554,0.030763,-0.231864
3,6.301179,0.716463,0.779907
4,-0.571554,1.346187,0.982262


In [58]:
models = []
models.append(['GaussianNB', GaussianNB()])
models.append(['BernoulliNB', BernoulliNB()])

lst_1= []

for m in range(len(models)):
    lst_2= []
    model = models[m][1]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)  #Confusion Matrix
    accuracies = cross_val_score(estimator = model, X = X_train, y = y_train, cv = 5)   #K-Fold Validation
    roc = roc_auc_score(y_test, y_pred)  #ROC AUC Score
    precision = precision_score(y_test, y_pred)  #Precision Score
    recall = recall_score(y_test, y_pred)  #Recall Score
    f1 = f1_score(y_test, y_pred)  #F1 Score
    print(models[m][0],':')
    print(cm)
    print('Accuracy Score: ',accuracy_score(y_test, y_pred))
    print('')
    print("K-Fold Validation Mean Accuracy: {:.2f} %".format(accuracies.mean()*100))
    print('')
    print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))
    print('')
    print('ROC AUC Score: {:.2f}'.format(roc))
    print('')
    print('Precision: {:.2f}'.format(precision))
    print('')
    print('Recall: {:.2f}'.format(recall))
    print('')
    print('F1: {:.2f}'.format(f1))
    print('-----------------------------------')
    print('')
    lst_2.append(models[m][0])
    lst_2.append((accuracy_score(y_test, y_pred))*100) 
    lst_2.append(accuracies.mean()*100)
    lst_2.append(accuracies.std()*100)
    lst_2.append(roc)
    lst_2.append(precision)
    lst_2.append(recall)
    lst_2.append(f1)
    lst_1.append(lst_2)

GaussianNB :
[[118   5]
 [  5 130]]
Accuracy Score:  0.9612403100775194

K-Fold Validation Mean Accuracy: 95.83 %

Standard Deviation: 1.29 %

ROC AUC Score: 0.96

Precision: 0.96

Recall: 0.96

F1: 0.96
-----------------------------------

BernoulliNB :
[[122   1]
 [ 17 118]]
Accuracy Score:  0.9302325581395349

K-Fold Validation Mean Accuracy: 93.50 %

Standard Deviation: 0.62 %

ROC AUC Score: 0.93

Precision: 0.99

Recall: 0.87

F1: 0.93
-----------------------------------



