## **Majority voting of the same sequence of sample overtime**

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from collections import Counter
import joblib

In [None]:
data = pd.read_csv("Umap_data_2D.csv")

# features and labels 
X = data.drop(columns=["attack"])
y = data["attack"]

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# standardize the dataset using z-score transformation
scaler = StandardScaler(copy=False)
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
clf = DecisionTreeClassifier(min_samples_split=2, min_samples_leaf=6, max_depth=4, random_state=42)
df_clf = clf.fit(X_train, y_train)

# Make predictions on the test set
dt_predictions = df_clf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, dt_predictions)
print(f"Accuracy: {accuracy:.2f}")

# Save the trained classifier to a file
model_filename = 'Umap_data_classifier.joblib'
joblib.dump(df_clf, model_filename)
print(f"DT Trained model saved to {model_filename}")

In [None]:
# Split the dataset based on the attack labels
dt_attack_0 = y_test[y_test == 0]
dt_attack_1 = y_test[y_test == 1]

print("length of y_test attack_0: ", len(dt_attack_0))
print("length of y_test attack_1: ", len(dt_attack_1))
print("length of y_test: ", len(y_test))

In [None]:
dt_X_test_label_0 = X_test[y_test == 0]
dt_X_test_label_1 = X_test[y_test == 1] 

In [None]:
# load the trained classifier
clf1 = joblib.load("Umap_data_classifier.joblib")

# Perform predictions on each part
dt_y_pred_attack_0 = df_clf.predict(dt_X_test_label_0)
dt_y_pred_attack_1 = df_clf.predict(dt_X_test_label_1)

print("\ndt Predicted label for attack 0: ", dt_y_pred_attack_0)
print("dt Length of Predicted label for attack 0: ", len(dt_y_pred_attack_0))
print("dt Predicted label for attack 1: ", dt_y_pred_attack_1)

## **Legitimate (attack) = 0**

In [None]:
# Define window size and step size
window_size0 = 3
step_size0 = 1

# Initialize lists to store true labels and predicted labels
true_labels_all0 = []
predicted_labels_all0 = []

# Split the test set into sequences with a sliding window
for i in range(0, len(dt_X_test_label_0) - window_size0 + 1, step_size0):
    # Extract a window of samples and corresponding true labels
    window_samples0 = dt_X_test_label_0[i:i+window_size0]
    window_true_labels0 = dt_attack_0[i:i+window_size0]

    # Predict labels for each sample in the window
    predicted_labels0 = df_clf.predict(window_samples0)

    # Apply majority voting to decide the final label for the window
    majority_voted_label0 = np.bincount(predicted_labels0).argmax()
    print("window samples: ", window_samples0)
    print("window true labels: \n", window_true_labels0)
    print("predicted labels: ", predicted_labels0)
    print("majority_voted_label: ", majority_voted_label0)
    print()

    # Append true label and majority voted label to the lists
    true_labels_all0.append(window_true_labels0.values[0])
    predicted_labels_all0.append(majority_voted_label0)

# Create confusion matrix
conf_matrix = confusion_matrix(true_labels_all0, predicted_labels_all0)

print("Confusion Matrix:")
print(conf_matrix)

In [None]:
# For attack_0
TN, FP, FN, TP = confusion_matrix(true_labels_all0, predicted_labels_all0).ravel()

# Calculate False Positive Rate (FPR)
FPR = FP / (FP + TN)

# Calculate True Negative Rate (TNR)
TNR = TN / (TN + FP)

# Calculate Accuracy (ACC)
ACC = (TP + TN) / (TP + TN + FP + FN)

print("RESULT FOR ATTACK_LABEL_0:\n")
print("True Negative Rate (TNR):", TNR)
print("False Positive Rate (FPR):", FPR)
print("Accuracy (ACC):", ACC)
print("F1-score:", F1_score)

## **Adversarial (attack) = 1**

In [None]:
# Define window size and step size
window_size = 3
step_size = 1

# Initialize lists to store true labels and predicted labels
true_labels_all1 = []
predicted_labels_all1 = []

# Split the test set into sequences with a sliding window
for i in range(0, len(dt_X_test_label_1) - window_size + 1, step_size):
    # Extract a window of samples and corresponding true labels
    window_samples1 = dt_X_test_label_1[i:i+window_size]
    window_true_labels1 = dt_attack_1[i:i+window_size]

    # Predict labels for each sample in the window
    predicted_labels1 = df_clf.predict(window_samples1)

    # Apply majority voting to decide the final label for the window
    majority_voted_label1 = np.bincount(predicted_labels1).argmax()
    print("window samples: ", window_samples1)
    print("window true labels: \n", window_true_labels1)
    print("predicted labels: ", predicted_labels1)
    print("majority_voted_label: ", majority_voted_label1)
    print()

    # Append true label and majority voted label to the lists
    true_labels_all1.append(window_true_labels1.values[0]) 
    predicted_labels_all1.append(majority_voted_label1)

# Create confusion matrix
conf_matrix1 = confusion_matrix(true_labels_all1, predicted_labels_all1)

print("Confusion Matrix:")
print(conf_matrix1)

In [None]:
# For attack_1
TN, FP, FN, TP = confusion_matrix(true_labels_all1, predicted_labels_all1).ravel()

# Calculate True Positive Rate (TPR) or Recall
TPR = TP / (TP + FN)

# Calculate False Negative Rate (FNR)
FNR = FN / (FN + TP)

# Calculate Accuracy (ACC)
ACC = (TP + TN) / (TP + TN + FP + FN)

print("RESULT FOR ATTACK_LABEL_1: \n")
print("True Positive Rate (TPR):", TPR)
print("False Negative Rate (FNR):", FNR)
print("Accuracy (ACC):", ACC)