In [1]:
import pandas as pd
import re
import pickle
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Load the test data
test_data = pd.read_csv('/home/achoo/Desktop/Honeypot/test_sanitized_logs_combined.csv', delimiter=',', header=None)
test_data.columns = ['eventid', 'src_ip', 'src_port', 'dst_ip', 'dst_port', 'session', 
                     'protocol', 'version', 'hassh', 'hasshAlgorithms', 'message', 
                     'sensor', 'timestamp']

# Define malicious keywords as in the original code
malicious_keywords = ['failed', 'whoami', 'uname', 'chattr', 'cat', ' rm', '.ssh', 'authorized_keys',
                      'grep', 'chmod', 'curl', 'not found', 'mkdir', '/bin/', '/tmp/', 'sshd', '.sh', 
                      'ssh-rsa', 'ps', 'crontab', 'uptime', 'ifconfig', 'cpuinfo', 'df', 'chpasswd', 
                      'free', 'pkill', 'pgrep', 'admin']

# Define the function to flag malicious messages
def flag_malicious(message):
    # Check for malicious keywords
    if any(keyword in message for keyword in malicious_keywords):
        return 1
    # Check for failed login attempts with random values
    failed_login_pattern = r'login attempt \[root\/[^\]]+\] failed'
    if re.search(failed_login_pattern, message):
        return 1
    return 0

# Apply the flag_malicious function to the 'message' column
test_data['attack'] = test_data['message'].apply(flag_malicious)

# Keep only selected columns
test_data = test_data[['message', 'hasshAlgorithms', 'eventid', 'protocol', 'attack']]

In [3]:
# Initialize the LabelEncoder to encode categorical columns
le = LabelEncoder()

# Encode categorical columns (hasshAlgorithms, eventid, protocol)
test_data['hasshAlgorithms'] = le.fit_transform(test_data['hasshAlgorithms'])
test_data['eventid'] = le.fit_transform(test_data['eventid'])
test_data['protocol'] = le.fit_transform(test_data['protocol'])

# Separate features (X) and labels (y)
X_test = test_data.drop(['attack', 'message'], axis=1)
y_test = test_data['attack']

# Initialize StandardScaler to scale feature data
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)

# Load the saved models
with open('finalized_model_XGB.sav', 'rb') as file:
    model_xgb = pickle.load(file)

with open('finalized_model_SVM.sav', 'rb') as file:
    model_svm = pickle.load(file)

with open('finalized_model_RF.sav', 'rb') as file:
    model_rf = pickle.load(file)

with open('finalized_model_Voting.sav', 'rb') as file:
    voting_clf = pickle.load(file)

In [4]:
# Make predictions on the test set using each model
y_pred_xgb = model_xgb.predict(X_test_scaled)
y_pred_svm = model_svm.predict(X_test_scaled)
y_pred_rf = model_rf.predict(X_test_scaled)
y_pred_voting = voting_clf.predict(X_test_scaled)

# Evaluate the models on the test data
print("XGBoost Classifier Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_xgb):.4f}")
print(confusion_matrix(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))

print("SVM Classifier Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_svm):.4f}")
print(confusion_matrix(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))

print("Random Forest Classifier Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_rf):.4f}")
print(confusion_matrix(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

print("Voting Classifier (Soft) Evaluation:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_voting):.4f}")
print(confusion_matrix(y_test, y_pred_voting))
print(classification_report(y_test, y_pred_voting))

XGBoost Classifier Evaluation:
Accuracy: 0.7545
[[6406  945]
 [1957 2512]]
              precision    recall  f1-score   support

           0       0.77      0.87      0.82      7351
           1       0.73      0.56      0.63      4469

    accuracy                           0.75     11820
   macro avg       0.75      0.72      0.72     11820
weighted avg       0.75      0.75      0.75     11820

SVM Classifier Evaluation:
Accuracy: 0.9776
[[7212  139]
 [ 126 4343]]
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      7351
           1       0.97      0.97      0.97      4469

    accuracy                           0.98     11820
   macro avg       0.98      0.98      0.98     11820
weighted avg       0.98      0.98      0.98     11820

Random Forest Classifier Evaluation:
Accuracy: 0.9106
[[6825  526]
 [ 531 3938]]
              precision    recall  f1-score   support

           0       0.93      0.93      0.93      7351
          