In [3]:
import numpy as np
import sys
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tqdm import tqdm
import pickle, json
from sklearn.preprocessing import LabelEncoder


# Add the parent directory so that the 'scripts' folder is on the path
sys.path.append(os.path.join(os.getcwd(), "..")) 
from API.model.BernoulliNB import BernoulliNB
from API.scripts.evaluate import ModelEvaluator

In [None]:
BASE_DIR = r"C:\Users\Pratyush\OneDrive\Desktop\ChampSim-master\Network_Intrusion\API\data\processed"
train_path = os.path.join(BASE_DIR, "train_data.csv")
test_path = os.path.join(BASE_DIR, "test_data.csv") 


# Load the data
train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)
    
# Separate features and target using the approach you provided
target_column = train_data.columns[-1]
    
X_train = train_data.drop(columns=[target_column]).values
y_train = train_data[target_column].values
    
X_test = test_data.drop(columns=[target_column]).values
y_test = test_data[target_column].values
    
    # If target is categorical, encode it
if isinstance(y_train[0], (str, np.str_)):
    print("Encoding categorical target variable...")
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(y_train)
    y_test = label_encoder.transform(y_test)
    print(f"Classes: {label_encoder.classes_}")
    
    # Convert numerical features to binary (Bernoulli NB works with binary features)
    # This step depends on your specific dataset - adjust threshold as needed
X_train_binary = (X_train > 0).astype(int)
X_test_binary = (X_test > 0).astype(int)

Converting features to binary format...


In [5]:
bnb = BernoulliNB(alpha=1.0)
bnb.fit(X_train_binary, y_train)
    
# Evaluate the model
train_accuracy = bnb.score(X_train_binary, y_train)
test_accuracy = bnb.score(X_test_binary, y_test)
    
print(f"Training accuracy: {train_accuracy:.4f}")
print(f"Test accuracy: {test_accuracy:.4f}")

Training accuracy: 0.8709
Test accuracy: 0.8700


In [8]:
param_grid = {
    'alpha': [0.01, 0.1, 1.0, 10.0]  # Different alpha values for Laplace smoothing
}

best_score = 0
best_param = None

# Grid search for best alpha
for alpha in param_grid['alpha']:
    bnb = BernoulliNB(alpha=alpha)
    bnb.fit(X_train_binary, y_train)  # Train the model
    y_pred = bnb.predict(X_test_binary)  # Predict on test data
    test_acc = accuracy_score(y_test, y_pred)  # Compute accuracy

    print(f'Alpha: {alpha} \nScore: {test_acc:.4f} \n')

    if test_acc > best_score:
        best_score = test_acc
        best_param = {'alpha': alpha}

print(f'Best score: {best_score:.4f} \nBest param: {best_param}')

Alpha: 0.01 
Score: 0.8700 

Alpha: 0.1 
Score: 0.8700 

Alpha: 1.0 
Score: 0.8700 

Alpha: 10.0 
Score: 0.8700 

Best score: 0.8700 
Best param: {'alpha': 0.01}
