<a href="https://colab.research.google.com/github/8sheeta8/2025_Capstone_DDoS-Botnet/blob/main/Botnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

# ✅ 정상 데이터 로딩
normal = np.load('/content/normal_combined.npy')

# ✅ Botnet 관련 공격 npy 로딩
irc = np.load('/content/irc_payloads.npy')
fastflux = np.load('/content/fastflux_payloads.npy')
scan = np.load('/content/scan_payloads.npy')
unknown = np.load('/content/unknown_payloads.npy')

In [2]:
def pad_all_to_max_length(arrays):
    """
    각 배열을 가장 긴 길이에 맞춰 오른쪽에 0으로 패딩합니다.
    arrays: 리스트 형태의 2D np.ndarray
    반환: 패딩된 배열 리스트, 최대 길이
    """
    max_len = max(arr.shape[1] for arr in arrays)
    padded = []
    for arr in arrays:
        pad_width = max_len - arr.shape[1]
        padded_arr = np.pad(arr, ((0, 0), (0, pad_width)), mode='constant')
        padded.append(padded_arr)
    return padded, max_len

In [3]:
# 모든 배열을 리스트에 담기
all_arrays = [normal, irc, fastflux, scan, unknown]

# ✅ 최대 길이 기준으로 패딩
padded_arrays, max_len = pad_all_to_max_length(all_arrays)

# 패딩 완료된 배열 꺼내기
normal, irc, fastflux, scan, unknown = padded_arrays

print(f"🔧 최대 길이에 맞춰 패딩 완료: max_len = {max_len}")
print(f"📦 normal shape: {normal.shape}, irc shape: {irc.shape}")

🔧 최대 길이에 맞춰 패딩 완료: max_len = 1024
📦 normal shape: (106561, 1024), irc shape: (218, 1024)


In [4]:
from sklearn.model_selection import train_test_split

# 📌 0: 정상, 1: Botnet
X = np.concatenate([normal, irc, fastflux, scan, unknown], axis=0)
y = np.array([0]*len(normal) + [1]*len(irc) + [1]*len(fastflux) + [1]*len(scan) + [1]*len(unknown))

# CNN 입력 형식으로 reshape
X = X.reshape((X.shape[0], X.shape[1], 1))

# train/test 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(max_len, 1)),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Conv1D(64, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # 이진 분류
])

model.compile(optimizer=Adam(0.001), loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test))


Epoch 1/10
[1m2218/2218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m543s[0m 243ms/step - accuracy: 0.9285 - loss: 0.2051 - val_accuracy: 0.9783 - val_loss: 0.0605
Epoch 2/10
[1m2218/2218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m566s[0m 245ms/step - accuracy: 0.9713 - loss: 0.0708 - val_accuracy: 0.9779 - val_loss: 0.0514
Epoch 3/10
[1m2218/2218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m516s[0m 233ms/step - accuracy: 0.9770 - loss: 0.0564 - val_accuracy: 0.9784 - val_loss: 0.0472
Epoch 4/10
[1m2218/2218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m579s[0m 240ms/step - accuracy: 0.9806 - loss: 0.0487 - val_accuracy: 0.9866 - val_loss: 0.0348
Epoch 5/10
[1m2218/2218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m543s[0m 245ms/step - accuracy: 0.9834 - loss: 0.0426 - val_accuracy: 0.9904 - val_loss: 0.0360
Epoch 6/10
[1m2218/2218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m562s[0m 245ms/step - accuracy: 0.9857 - loss: 0.0387 - val_accuracy: 0.9879 - val_loss:

<keras.src.callbacks.history.History at 0x7da09215bcd0>

In [7]:
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# 예측
y_pred = model.predict(X_test)
y_pred_class = (y_pred > 0.5).astype(int)

# 평가 출력
print(classification_report(y_test, y_pred_class))
print(confusion_matrix(y_test, y_pred_class))


[1m4436/4436[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 20ms/step
              precision    recall  f1-score   support

           0       0.99      0.99      0.99     52994
           1       1.00      0.99      0.99     88942

    accuracy                           0.99    141936
   macro avg       0.99      0.99      0.99    141936
weighted avg       0.99      0.99      0.99    141936

[[52590   404]
 [  709 88233]]
