In [1]:
# Data manipulation and processing
import pandas as pd
import numpy as np
import re  # Added for regex operations

# Sklearn preprocessing and scaling
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import (
    precision_score, recall_score, f1_score, roc_curve, auc,
    accuracy_score, classification_report, precision_recall_curve, confusion_matrix
)

# Model selection and evaluation
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC  # Importing SVM
from sklearn.utils import resample
import matplotlib.pyplot as plt
import pickle
import warnings

# Ignore warnings
warnings.filterwarnings("ignore")

In [2]:
# Load the trained models
with open('finalized_model_XGB.sav', 'rb') as f:
    model_xgb = pickle.load(f)

with open('finalized_model_SVM.sav', 'rb') as f:  
    model_svm = pickle.load(f)

with open('finalized_model_Voting.sav', 'rb') as f:
    voting_clf = pickle.load(f)

In [3]:
# Load test data
test_data = pd.read_csv('/home/achoo/Desktop/Honeypot/test3_sanitized_logs_combined.csv', delimiter=',', header=None)
test_data.columns = ['eventid', 'src_ip', 'src_port', 'dst_ip', 'dst_port', 'session', 
                     'protocol', 'version', 'hassh', 'hasshAlgorithms', 'message', 
                     'sensor', 'timestamp']

malicious_keywords = ['failed', 'whoami', 'uname', 'chattr', 'cat', ' rm', '.ssh', 'authorized_keys',
                      'grep', 'chmod', 'curl', 'not found', 'mkdir']

# Define a function to check for malicious login attempts
def flag_malicious(message):
    # Check for standard malicious keywords
    if any(keyword in message for keyword in malicious_keywords):
        return 1
    # Check for failed login attempts with random values
    failed_login_pattern = r'login attempt \[root\/[^\]]+\] failed'
    if re.search(failed_login_pattern, message):
        return 1
    return 0

# Create a target column based on the presence of keywords in the 'message' column
test_data['attack'] = test_data['message'].apply(lambda x: 1 if any(keyword in x for keyword in malicious_keywords) else 0)

# Keep only selected columns
test_data = test_data[['message', 'hasshAlgorithms', 'eventid', 'protocol', 'attack']]

# Initialize a LabelEncoder to encode categorical columns
le = LabelEncoder()

# Encode categorical columns (hasshAlgorithms, eventid, protocol)
test_data['hasshAlgorithms'] = le.fit_transform(test_data['hasshAlgorithms'])
test_data['eventid'] = le.fit_transform(test_data['eventid'])
test_data['protocol'] = le.fit_transform(test_data['protocol'])

# Vectorize the 'message' column using the same TF-IDF vectorizer settings
tfidf = TfidfVectorizer(max_features=500)
message_tfidf = tfidf.fit_transform(test_data['message'])

# Convert the TF-IDF features into a DataFrame and concatenate with the other features
message_tfidf_df = pd.DataFrame(message_tfidf.toarray(), columns=tfidf.get_feature_names_out())
test_data = pd.concat([test_data.drop('message', axis=1), message_tfidf_df], axis=1)

# Separate features (X_test) and labels (y_test)
X_test = test_data.drop(['attack'], axis=1)
y_test = test_data['attack']

# Initialize StandardScaler to scale feature data
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)

In [4]:
# Evaluate XGBoost Classifier on test data
xgb_pred = model_xgb.predict(X_test_scaled)
print("XGBoost Classifier Test Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, xgb_pred):.4f}")
print(confusion_matrix(y_test, xgb_pred))
print(classification_report(y_test, xgb_pred))

# Evaluate SVM Classifier on test data
svm_pred = model_svm.predict(X_test_scaled)
print("SVM Classifier Test Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, svm_pred):.4f}")
print(confusion_matrix(y_test, svm_pred))
print(classification_report(y_test, svm_pred))

# Evaluate Voting Classifier on test data
voting_pred = voting_clf.predict(X_test_scaled)
print("Voting Classifier Test Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, voting_pred):.4f}")
print(confusion_matrix(y_test, voting_pred))
print(classification_report(y_test, voting_pred))

XGBoost Classifier Test Evaluation:
Accuracy: 0.8559
[[20899     0]
 [ 3660   838]]
              precision    recall  f1-score   support

           0       0.85      1.00      0.92     20899
           1       1.00      0.19      0.31      4498

    accuracy                           0.86     25397
   macro avg       0.93      0.59      0.62     25397
weighted avg       0.88      0.86      0.81     25397

SVM Classifier Test Evaluation:
Accuracy: 0.8089
[[18814  2085]
 [ 2768  1730]]
              precision    recall  f1-score   support

           0       0.87      0.90      0.89     20899
           1       0.45      0.38      0.42      4498

    accuracy                           0.81     25397
   macro avg       0.66      0.64      0.65     25397
weighted avg       0.80      0.81      0.80     25397

Voting Classifier Test Evaluation:
Accuracy: 0.8562
[[20899     0]
 [ 3653   845]]
              precision    recall  f1-score   support

           0       0.85      1.00      0.92 