# LogisticRegression

In [15]:
import pandas as pd
import numpy as np
import os
import time
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

In [16]:
csv_path = r'F:\dataset\merged csv\dataset_balance.csv'
df = pd.read_csv(csv_path)

In [17]:
dict_2classes = {
    'DDoS-SlowLoris': 'Attack', 
    'BenignTraffic': 'Benign'
}


In [18]:
df['label'] = df['label'].map(dict_2classes)

In [19]:
df = df.dropna(subset=['label'])

In [20]:
X_columns = [
    'flow_duration', 'Header_Length', 'Protocol Type', 'Duration',
    'Rate', 'Srate', 'Drate', 'fin_flag_number', 'syn_flag_number',
    'rst_flag_number', 'psh_flag_number', 'ack_flag_number',
    'ece_flag_number', 'cwr_flag_number', 'ack_count',
    'syn_count', 'fin_count', 'urg_count', 'rst_count', 
    'HTTP', 'HTTPS', 'DNS', 'Telnet', 'SMTP', 'SSH', 'IRC', 'TCP',
    'UDP', 'DHCP', 'ARP', 'ICMP', 'IPv', 'LLC', 'Tot sum', 'Min',
    'Max', 'AVG', 'Std', 'Tot size', 'IAT', 'Number', 'Magnitue',
    'Radius', 'Covariance', 'Variance', 'Weight'
]


In [21]:
X = df[X_columns]
y = df['label']

In [22]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [23]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

In [24]:
model = LogisticRegression(n_jobs=-1)

start_train_time = time.time()
model.fit(X_train, y_train)
end_train_time = time.time()

training_duration = end_train_time - start_train_time

In [25]:
start_pred_time = time.time()
y_pred = model.predict(X_test)
end_pred_time = time.time()

prediction_duration = end_pred_time - start_pred_time

In [26]:
print("##### Logistic Regression (2-class) #####")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("F1 Score:", f1_score(y_test, y_pred, average='macro'))
print(f"Training Time: {training_duration:.4f} seconds")
print(f"Prediction Time: {prediction_duration:.4f} seconds")

##### Logistic Regression (2-class) #####
Accuracy: 0.8272329527264967
Recall: 0.8272355129450745
Precision: 0.8279884822112986
F1 Score: 0.8271341808534827
Training Time: 1.8253 seconds
Prediction Time: 0.0010 seconds


In [None]:
acc = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

result = {
    "Model": "LR",  
    "Accuracy": float(acc),
    "Precision": float(precision),
    "Recall": float(recall),
    "F1": float(f1),
    "Train Time (s)": float(training_duration),
    "Predict Time (s)": float(prediction_duration)
}

# 保存为 JSON 文件
import json
with open("result_LR.json", "w") as f: 
    json.dump(result, f)

print("Model evaluation results have been saved to result_LR.json")

✅ 模型评估结果已保存到 result_LR.json
