In [13]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score, accuracy_score

In [14]:
trainingSet = pd.read_csv('data/training.csv')
testingSet = pd.read_csv('data/testing.csv')

In [15]:
# training data
x_train = trainingSet.iloc[:, :-1]  # features
y_train = trainingSet.iloc[:, -1]  # labels

In [16]:
# testing data
x_test = testingSet.iloc[:, :-1]  # features
y_test = testingSet.iloc[:, -1]  # labels

In [17]:
scaler = MinMaxScaler(copy=True, feature_range=(0,1))
scaler.fit(x_train)
x_train = scaler.transform(x_train)
x_test = scaler.transform(x_test)

In [18]:
classifier = GaussianNB()
classifier.fit(x_train, y_train)

In [19]:
y_predicted = classifier.predict(x_test)

In [20]:
print(classification_report(y_test, y_predicted))

              precision    recall  f1-score   support

           0       0.81      0.39      0.53      2006
           1       0.60      0.91      0.72      2006

    accuracy                           0.65      4012
   macro avg       0.70      0.65      0.63      4012
weighted avg       0.70      0.65      0.63      4012



In [21]:
cm = confusion_matrix(y_test, y_predicted)

In [22]:
labels = ['h','g']
columns = [f'Predicted {label}' for label in labels]
index = [f'Actual {label}' for label in labels]
table = pd.DataFrame(cm, columns=columns, index=index)
table

Unnamed: 0,Predicted h,Predicted g
Actual h,787,1219
Actual g,184,1822


In [23]:
acc = accuracy_score(y_test, y_predicted)
prec = precision_score(y_test, y_predicted)
recall = recall_score(y_test, y_predicted)
specificity = cm[0,0]/(cm[0,0] + cm[0,1])
f1 = f1_score(y_test, y_predicted)

print('model_accuracy = ', acc)
print('model_precision = ', prec)
print('model_recall = ', recall)
print('model_specificity = ', specificity)
print('model_f1 = ', f1)

model_accuracy =  0.6502991026919243
model_precision =  0.5991450180861558
model_recall =  0.9082751744765702
model_specificity =  0.39232303090727816
model_f1 =  0.7220130770754905
