In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, accuracy_score

df = pd.read_csv("../datasets/dataset-final.csv", low_memory=False)

features = [
    'failed_count_last_5min',
    'failed_ratio_last_5min',
    'unique_user_ids_last_5min'
]
df['label'] = df['attack_type'].astype(str).apply(lambda x: 1 if x in ["1", "2", "3", "4"] else 0)

X = df[features].values
y = df['label'].values

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

fold = 1
for train_index, test_index in skf.split(X_scaled, y):
    print(f"Fold {fold}")
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = SGDClassifier(loss='hinge', class_weight='balanced', max_iter=10000, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred, digits=6))
    fold += 1


Fold 1
Accuracy: 0.9575620848758303
              precision    recall  f1-score   support

           0   0.996705  0.946381  0.970891    249276
           1   0.861699  0.990721  0.921716     84058

    accuracy                       0.957562    333334
   macro avg   0.929202  0.968551  0.946304    333334
weighted avg   0.962660  0.957562  0.958490    333334

Fold 2
Accuracy: 0.9117429117429118
              precision    recall  f1-score   support

           0   0.997074  0.884577  0.937463    249275
           1   0.743526  0.992303  0.850087     84058

    accuracy                       0.911743    333333
   macro avg   0.870300  0.938440  0.893775    333333
weighted avg   0.933136  0.911743  0.915429    333333

Fold 3
Accuracy: 0.9588789588789589
              precision    recall  f1-score   support

           0   0.996564  0.948282  0.971824    249275
           1   0.865897  0.990304  0.923932     84058

    accuracy                       0.958879    333333
   macro avg   0.931