In [1]:
import pandas as pd

# Import dataset
dataset = pd.read_csv('preprocessed_train_datasets.csv')

In [2]:
#sub sampling naive bayes 
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

# Memisahkan fitur dataset (X) dan label dataset (y)
X = dataset.drop('label', axis=1)
y = dataset['label']

# random_state=42 is for reproducibility
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Menset model ke model yang dipilih dan mem‐fit modelnya
# Create and train the Naive Bayes model
nb_model = GaussianNB()

# Melatih (fit) model menggunakan X_train, y_train data
nb_model.fit(X_train, y_train)

# Menghitung dan mencetak akurasi model dengan metode Random Subsampling (random_state=42)
modelScore = nb_model.score(X_test, y_test)
print("Random Subsampling score: ", modelScore)


Random Subsampling score:  1.0


In [3]:
#naive bayes holdout 
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

# Memisahkan fitur dataset (X) dan label dataset (y)
X = dataset.drop('label', axis=1)
y = dataset['label']

# random_state=1 artinya tanpa random
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Menset model ke model yang dipilih dan mem‐fit modelnya
# Create and train the Naive Bayes model
nb_model = GaussianNB()

# Melatih (fit) model menggunakan X_train, y_train data
nb_model.fit(X_train, y_train)

# Menghitung dan mencetak akurasi model dengan metode Holdout
modelScore = nb_model.score(X_test, y_test)
print("Holdout score: ", modelScore)


Holdout score:  1.0


In [4]:
from sklearn.model_selection import KFold, cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, make_scorer
import numpy as np

# Define the mapping from labels to numeric values
label_mapping = {
    'NORMAL_TCP': 1,
    'DDOS_TCP': 2,
    'DDOS_UDP': 3,
    'NORMAL_UDP': 4,
    'NORMAL_ICMP': 5,
    'DDOS_ICMP': 6
}

# Apply the mapping to the 'label' column
dataset['label'] = dataset['label'].map(label_mapping)

# Memisahkan fitur dataset (X) dan label dataset (y)
X = dataset.drop('label', axis=1)
y = dataset['label']

# Menset model ke model yang dipilih dan mem‐fit modelnya
# Create and train the Naive Bayes model
nb_model = GaussianNB()

# Menghitung dan mencetak akurasi model
kFoldValidation = KFold(n_splits=10, shuffle=True, random_state=42)
modelScore = cross_val_score(nb_model, X, y, cv=kFoldValidation, scoring='accuracy')
print("K‐Fold Cross Validation score: ", modelScore) # hasilnya score sebanyak K
print("Rata-rata KFCV score: ", np.mean(modelScore)) # menghitung rata-rata score dari score sejumlah K

"""
Kadang kala diperlukan ukuran error dari model sebagai kebalikan dari ukuran akurasi.
Berikut ini adalah menghitung error menggunakan MAE dan RMSE
"""
# Menghitung dan mencetak Mean Absolute Error (MAE) model
mae_scorer = make_scorer(lambda y_true, y_pred: np.mean(np.abs(y_true - y_pred)), greater_is_better=False)
maeScore = cross_val_score(nb_model, X, y, cv=kFoldValidation, scoring=mae_scorer)
print("K‐Fold Cross Validation MAE: ", -maeScore) # hasilnya mae sebanyak K
print("Rata-rata KFCV Mean Absolute Error: ", -np.mean(maeScore)) # menghitung rata-rata mae dari mae sejumlah K

# Menghitung dan mencetak Root Mean Square Error (RMSE) model
rmse_scorer = make_scorer(lambda y_true, y_pred: np.sqrt(np.mean((y_true - y_pred)**2)), greater_is_better=False)
rmseScore = cross_val_score(nb_model, X, y, cv=kFoldValidation, scoring=rmse_scorer)
print("K‐Fold Cross Validation RMSE: ", -rmseScore) # hasilnya rmse sebanyak K
print("Rata-rata KFCV Root Mean Square Error: ", -np.mean(rmseScore)) # menghitung rata-rata rmse dari mae sejumlah K


K‐Fold Cross Validation score:  [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
Rata-rata KFCV score:  1.0
K‐Fold Cross Validation MAE:  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Rata-rata KFCV Mean Absolute Error:  -0.0
K‐Fold Cross Validation RMSE:  [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Rata-rata KFCV Root Mean Square Error:  -0.0
