In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score, recall_score, f1_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

file_path = 'dataset/Wednesday-workingHours.pcap_ISCX.csv'
data = pd.read_csv(file_path)

df = data.copy()
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
df.columns = df.columns.str.strip().str.replace('[ /]', '_', regex=True)
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.fillna(0, inplace=True)

X = df.drop('Label', axis=1)
y = df['Label']
y_binary = np.where(y == 'BENIGN', 0, 1)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_binary, test_size=0.15, random_state=42, stratify=y_binary
)

print(f"Train 데이터 크기: {X_train.shape}")
print(f"Test 데이터 크기: {X_test.shape}")
print("-" * 50)

In [None]:
print("## LSTM 모델 학습 및 평가 ##")

# 데이터 스케일링
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# LSTM 입력 형태에 맞게 데이터 재구성 (samples, timesteps, features)
# 여기서는 각 샘플을 1개의 타임스텝으로 간주
X_train_lstm = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_lstm = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

# LSTM 모델 정의
lstm_model = Sequential([
    LSTM(100, activation='relu', input_shape=(X_train_lstm.shape[1], X_train_lstm.shape[2]))
    ,Dropout(0.2)
    ,Dense(50, activation='relu')
    ,Dropout(0.2)
    ,Dense(1, activation='sigmoid')
]) 

# 모델 컴파일
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습
history = lstm_model.fit(
    X_train_lstm, y_train, epochs=10, batch_size=256, validation_split=0.2, verbose=1
)

# 모델 평가
loss, accuracy = lstm_model.evaluate(X_test_lstm, y_test, verbose=0)
y_pred_lstm_proba = lstm_model.predict(X_test_lstm)
y_pred_lstm = (y_pred_lstm_proba > 0.5).astype(int)

print(f"LSTM 모델 정확도: {accuracy:.4f}")
print(f"LSTM 모델 재현율 (Recall): {recall_score(y_test, y_pred_lstm):.4f}")
print(f"LSTM 모델 F1-Score: {f1_score(y_test, y_pred_lstm):.4f}")
print("LSTM 모델 성능 리포트:")
print(classification_report(y_test, y_pred_lstm, target_names=['BENIGN (0)', 'ATTACK (1)']))
print("-" * 50)