In [3]:
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd
import numpy as np 
import joblib
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
df_full_feature = pd.read_csv('../Dataset/Normalisasi/processed_dataset_slice_norm.csv',sep=',')

In [4]:
naive_bayes_model = joblib.load('naive_bayes_slice.pkl')
naive_bayes_model

GaussianNB(var_smoothing=0.05711586478126432)

In [5]:
# Dataset Full Feature
full_feature_x = df_full_feature[df_full_feature.columns[:4]]
full_feature_y = df_full_feature['label']

full_feature_x.head(5)

Unnamed: 0,dst,src,Protocol,bytecount
0,0.647059,0.055556,0.0,0.00064
1,0.705882,0.333333,0.5,0.081254
2,0.882353,0.555556,0.0,0.000534
3,0.941176,0.833333,0.5,0.014585
4,0.647059,0.111111,1.0,0.61807


## Training Phase

In [6]:
#Splitting Data 

x_train,x_test,y_train,y_test = train_test_split(full_feature_x,full_feature_y,test_size=0.30,random_state=6)

In [7]:
naive_bayes_model.fit(x_train, y_train)

GaussianNB(var_smoothing=0.05711586478126432)

In [9]:
def calculate_precision(TP, FP):
    return TP/(TP+FP)


def calculate_fnr(TP, FP, FN):
    return FN/(TP+FP)

### Training Evaluation Result 

In [8]:
y_pred_train = naive_bayes_model.predict(x_train)
y_pred_train

array([0, 1, 0, ..., 1, 0, 1], dtype=int64)

In [10]:
cm = confusion_matrix(y_train, y_pred_train, labels=[1,0])
TP = cm[0][0]
FP = cm[1][0]
FN = cm[0][1]
TN = cm[1][1]

print(cm)
print(classification_report(y_train, y_pred_train))
print("Accuracy:",accuracy_score(y_train, y_pred_train)*100)
print("Precision",(calculate_precision(TP,FP))*100)
print("False Negative Rate", (calculate_fnr(TP,FP,FN))*100)


[[ 8741 10916]
 [ 8282 22601]]
              precision    recall  f1-score   support

           0       0.67      0.73      0.70     30883
           1       0.51      0.44      0.48     19657

    accuracy                           0.62     50540
   macro avg       0.59      0.59      0.59     50540
weighted avg       0.61      0.62      0.61     50540

Accuracy: 62.01424614166996
Precision 51.34817599718029
False Negative Rate 64.12500734300653


### Validation Evaluation Result 

In [11]:
y_pred_test = naive_bayes_model.predict(x_test)
y_pred_test

array([0, 0, 1, ..., 1, 1, 0], dtype=int64)

In [12]:
cm = confusion_matrix(y_test, y_pred_test, labels=[1,0])
TP = cm[0][0]
FP = cm[1][0]
FN = cm[0][1]
TN = cm[1][1]

print(cm)
print(classification_report(y_test, y_pred_test))
print("Accuracy:",accuracy_score(y_test, y_pred_test)*100)
print("Precision",(calculate_precision(TP,FP))*100)
print("False Negative Rate", (calculate_fnr(TP,FP,FN))*100)


[[3748 4777]
 [3572 9564]]
              precision    recall  f1-score   support

           0       0.67      0.73      0.70     13136
           1       0.51      0.44      0.47      8525

    accuracy                           0.61     21661
   macro avg       0.59      0.58      0.58     21661
weighted avg       0.61      0.61      0.61     21661

Accuracy: 61.45607312681778
Precision 51.20218579234973
False Negative Rate 65.25956284153006


## Testing Phase

In [13]:
test_full_feature = pd.read_csv('../Dataset/Normalisasi/test_processed_dataset_slice_norm.csv',sep=',')

In [14]:
# Dataset Full Feature
full_feature_x = df_full_feature[df_full_feature.columns[:4]]
full_feature_y = df_full_feature['label']

full_feature_x.head(5)

Unnamed: 0,dst,src,Protocol,bytecount
0,0.647059,0.055556,0.0,0.00064
1,0.705882,0.333333,0.5,0.081254
2,0.882353,0.555556,0.0,0.000534
3,0.941176,0.833333,0.5,0.014585
4,0.647059,0.111111,1.0,0.61807


In [17]:
pred_test =  naive_bayes_model.predict(full_feature_x)
pred_test

array([0, 0, 0, ..., 0, 0, 1], dtype=int64)

In [18]:
cm = confusion_matrix(full_feature_y, pred_test, labels=[1,0])
TP = cm[0][0]
FP = cm[1][0]
FN = cm[0][1]
TN = cm[1][1]

print(cm)
print(classification_report(full_feature_y, pred_test))
print("Accuracy:",accuracy_score(full_feature_y, pred_test)*100)
print("Precision",(calculate_precision(TP,FP))*100)
print("False Negative Rate", (calculate_fnr(TP,FP,FN))*100)

[[12489 15693]
 [11854 32165]]
              precision    recall  f1-score   support

           0       0.67      0.73      0.70     44019
           1       0.51      0.44      0.48     28182

    accuracy                           0.62     72201
   macro avg       0.59      0.59      0.59     72201
weighted avg       0.61      0.62      0.61     72201

Accuracy: 61.84678882563953
Precision 51.304276383354555
False Negative Rate 64.46617097317504
