In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, accuracy_score

df = pd.read_csv("../datasets/dataset-final.csv", low_memory=False)

features = [
    'failed_count_last_5min',
    'failed_ratio_last_5min',
    'unique_user_ids_last_5min'
]
df['label'] = df['attack_type'].astype(str).apply(lambda x: 1 if x in ["1", "2", "3", "4"] else 0)

X = df[features].values
y = df['label'].values

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

fold = 1
for train_index, test_index in skf.split(X_scaled, y):
    print(f"Fold {fold}")
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = SGDClassifier(loss='hinge', class_weight=None, max_iter=10000, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred, digits=6))
    fold += 1


Fold 1
Accuracy: 0.8396923206153588
              precision    recall  f1-score   support

           0   0.823476  1.000000  0.903194    249276
           1   1.000000  0.364296  0.534043     84058

    accuracy                       0.839692    333334
   macro avg   0.911738  0.682148  0.718618    333334
weighted avg   0.867991  0.839692  0.810103    333334

Fold 2
Accuracy: 0.8391008391008391
              precision    recall  f1-score   support

           0   0.822940  1.000000  0.902871    249275
           1   1.000000  0.361952  0.531520     84058

    accuracy                       0.839101    333333
   macro avg   0.911470  0.680976  0.717195    333333
weighted avg   0.867590  0.839101  0.809226    333333

Fold 3
Accuracy: 0.838950838950839
              precision    recall  f1-score   support

           0   0.822804  1.000000  0.902789    249275
           1   1.000000  0.361358  0.530878     84058

    accuracy                       0.838951    333333
   macro avg   0.9114