<a href="https://colab.research.google.com/github/8sheeta8/Attack_Network_AI/blob/main/XSS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import classification_report

# ✅ GPU/CPU 자동 선택
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("사용 장치:", device)

# ✅ 1. 업로드된 npy 파일을 불러오는 Dataset 클래스
class XSSDataset(Dataset):
    def __init__(self, image_path, label_path):
        self.images = np.load(image_path).astype(np.float32) / 255.0
        self.labels = np.load(label_path).astype(np.int64)

        if len(self.images.shape) == 3:
            self.images = self.images[:, np.newaxis, :, :]  # (N, 1, 16, 16)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return torch.tensor(self.images[idx]), torch.tensor(self.labels[idx])

# ✅ 2. DataLoader 생성 함수
def create_dataloader_from_files(img_path, label_path, batch_size=32, shuffle=False):
    dataset = XSSDataset(img_path, label_path)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

# ✅ 3. AlexNet 기반 Discriminator 모델 정의
class DiscriminatorAlexNet(nn.Module):
    def __init__(self):
        super(DiscriminatorAlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),  # (16x16) -> (16x16)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (16x16) -> (8x8)

            nn.Conv2d(64, 128, kernel_size=3, padding=1),  # (8x8) -> (8x8)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # (8x8) -> (4x4)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 2),  # Binary classification
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# ✅ 4. 학습 함수
def train_discriminator(model, train_loader, val_loader, epochs=10, lr=1e-3):
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct = 0
        total = 0

        for x, y in train_loader:
            x, y = x.to(device), y.to(device)

            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            pred = out.argmax(dim=1)
            correct += (pred == y).sum().item()
            total += y.size(0)

        acc = correct / total
        print(f"Epoch {epoch+1}/{epochs} | Loss: {total_loss:.4f} | Train Acc: {acc:.4f}")

        # 검증 정확도 출력
        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                out = model(x)
                pred = out.argmax(dim=1)
                correct += (pred == y).sum().item()
                total += y.size(0)
            val_acc = correct / total
        print(f"           >> Val Acc: {val_acc:.4f}")

# ✅ 5. 테스트 평가 함수
def evaluate_model(model, test_loader):
    model.eval()
    preds = []
    targets = []
    with torch.no_grad():
        for x, y in test_loader:
            x = x.to(device)
            out = model(x)
            pred = out.argmax(dim=1).cpu().numpy()
            preds.extend(pred)
            targets.extend(y.numpy())

    print("\n✅ 최종 테스트 결과:")
    print(classification_report(targets, preds, target_names=["Normal", "XSS"]))

# ✅ 6. 파일 경로 지정 후 실행
train_loader = create_dataloader_from_files("train_images.npy", "train_labels.npy", shuffle=True)
val_loader = create_dataloader_from_files("val_images.npy", "val_labels.npy")
test_loader = create_dataloader_from_files("test_images.npy", "test_labels.npy")

model = DiscriminatorAlexNet()
train_discriminator(model, train_loader, val_loader, epochs=15, lr=1e-3)
evaluate_model(model, test_loader)


사용 장치: cpu


  def filter_except(validator, iterable, *exceptions):


Epoch 1/15 | Loss: 5.5016 | Train Acc: 0.9489
           >> Val Acc: 0.9961
Epoch 2/15 | Loss: 0.3893 | Train Acc: 0.9983
           >> Val Acc: 1.0000
Epoch 3/15 | Loss: 0.0509 | Train Acc: 0.9992
           >> Val Acc: 1.0000
Epoch 4/15 | Loss: 0.2948 | Train Acc: 0.9983
           >> Val Acc: 1.0000
Epoch 5/15 | Loss: 0.2588 | Train Acc: 0.9992
           >> Val Acc: 1.0000
Epoch 6/15 | Loss: 0.0034 | Train Acc: 1.0000
           >> Val Acc: 1.0000
Epoch 7/15 | Loss: 0.0010 | Train Acc: 1.0000
           >> Val Acc: 1.0000
Epoch 8/15 | Loss: 0.0008 | Train Acc: 1.0000
           >> Val Acc: 1.0000
Epoch 9/15 | Loss: 0.0006 | Train Acc: 1.0000
           >> Val Acc: 1.0000
Epoch 10/15 | Loss: 0.0006 | Train Acc: 1.0000
           >> Val Acc: 1.0000
Epoch 11/15 | Loss: 0.0004 | Train Acc: 1.0000
           >> Val Acc: 1.0000
Epoch 12/15 | Loss: 0.0004 | Train Acc: 1.0000
           >> Val Acc: 1.0000
Epoch 13/15 | Loss: 0.0003 | Train Acc: 1.0000
           >> Val Acc: 1.0000
Epoch 14

 1단계: 우회 기법 (Action) 정의

In [19]:
import random
import urllib.parse

def insert_comment(tag):
    return tag.replace("<", "<!-- -->") if "<" in tag else tag

def url_encode(tag):
    return urllib.parse.quote(tag)

def split_tag(tag):
    if "<script>" in tag:
        return tag.replace("<script>", "<scr<script>ipt>")
    return tag

def case_shift(tag):
    return ''.join(c.upper() if random.random() < 0.5 else c.lower() for c in tag)

def space_injection(tag):
    return tag.replace("<", "< ").replace(">", " >")

# 가능한 행동 집합
obfuscation_actions = [insert_comment, url_encode, split_tag, case_shift, space_injection]


2단계: MCTS-T 노드 구조 정의

In [20]:
import numpy as np

class MCTSNode:
    def __init__(self, xss_string, depth=0, parent=None):
        self.xss = xss_string
        self.parent = parent
        self.children = []
        self.visits = 0
        self.reward = 0.0
        self.depth = depth
        self.action = None

    def is_leaf(self):
        return len(self.children) == 0

    def expand(self):
        for action in obfuscation_actions:
            new_str = action(self.xss)
            child = MCTSNode(new_str, depth=self.depth + 1, parent=self)
            child.action = action.__name__
            self.children.append(child)

    def ucb_score(self, c=1.41):
        if self.visits == 0:
            return float('inf')
        return (self.reward / self.visits) + c * ( (2 * np.log(self.parent.visits)) / self.visits ) ** 0.5


 3단계: MCTS 탐색 알고리즘

In [21]:
def select_promising_node(node):
    while not node.is_leaf():
        node = max(node.children, key=lambda n: n.ucb_score())
    return node

def backpropagate(node, reward):
    while node is not None:
        node.visits += 1
        node.reward += reward
        node = node.parent


4단계: 보상 평가 함수 (Discriminator 활용)

In [22]:
import torch

def string_to_image_matrix(text, max_len=256):
    arr = np.zeros((16, 16), dtype=np.uint8)
    for i, c in enumerate(text[:max_len]):
        arr[i // 16][i % 16] = ord(c) if ord(c) < 256 else 0
    return arr

def calculate_reward(discriminator_model, xss_string):
    model = discriminator_model.eval().to(device)
    matrix = string_to_image_matrix(xss_string)
    tensor = torch.tensor(matrix, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device) / 255.0
    with torch.no_grad():
        output = model(tensor)
        prob = torch.softmax(output, dim=1)[0][0].item()  # normal(0) 확률 = 탐지 실패 확률
    return prob


5단계: 전체 Generator 실행

In [23]:
def generate_obfuscated_xss(original_xss, discriminator_model, max_iter=20):
    root = MCTSNode(original_xss)
    for _ in range(max_iter):
        node = select_promising_node(root)

        if node.visits == 0:
            reward = calculate_reward(discriminator_model, node.xss)
            backpropagate(node, reward)
        else:
            node.expand()
            if node.children:
                child = random.choice(node.children)
                reward = calculate_reward(discriminator_model, child.xss)
                backpropagate(child, reward)

    leaf_nodes = []
    def collect_leaves(n):
        if n.is_leaf(): leaf_nodes.append(n)
        else:
            for c in n.children:
                collect_leaves(c)
    collect_leaves(root)

    best = max(leaf_nodes, key=lambda n: n.reward / (n.visits + 1e-6))
    print(f"\n✅ 우회 결과: {best.xss}")
    print(f"⮕ 보상: {best.reward:.4f} | 방문 수: {best.visits} | 적용 행동: {best.action}")
    return best.xss


실행

In [24]:
def generate_all_obfuscated_xss(input_path, output_path, discriminator_model, max_iter=30):
    with open(input_path, "r", encoding="utf-8") as f:
        original_samples = [line.strip() for line in f if line.strip()]

    print(f"총 {len(original_samples)}개 XSS 문자열 우회 생성 시작...")

    obfuscated_list = []
    for i, xss in enumerate(original_samples):
        print(f"\n[{i+1}/{len(original_samples)}] 원본: {xss}")
        obf = generate_obfuscated_xss(xss, discriminator_model, max_iter=max_iter)
        obfuscated_list.append(obf)

    with open(output_path, "w", encoding="utf-8") as f:
        for obf in obfuscated_list:
            f.write(obf + "\n")

    print(f"\n✅ 모든 우회 문자열 저장 완료 → {output_path}")


In [25]:
generate_all_obfuscated_xss(
    input_path="generator_input.txt",
    output_path="generator_output.txt",
    discriminator_model=model,
    max_iter=30
)


총 874개 XSS 문자열 우회 생성 시작...

[1/874] 원본: <script>console.log('BLV5DIBIS87PYKBE1QQHCJ22ET7LUGGUCPP9OTYYPSB697X2E7');console.log(document.cookie);</script>

✅ 우회 결과: %3C%21--%20--%3Escript%3Econsole.log%28%27BLV5DIBIS87PYKBE1QQHCJ22ET7LUGGUCPP9OTYYPSB697X2E7%27%29%3Bconsole.log%28document.cookie%29%3B%3C%21--%20--%3E/script%3E
⮕ 보상: 1.0000 | 방문 수: 1 | 적용 행동: url_encode

[2/874] 원본: <script>console.log('4IX6WF7YU8XE4XV7F78KR868GYBVZ8SLVEBA2WMLHD1NKP7ZVK');console.log(document.cookie);</script>

✅ 우회 결과: %3C%21--%20--%3Escript%3Econsole.log%28%274IX6WF7YU8XE4XV7F78KR868GYBVZ8SLVEBA2WMLHD1NKP7ZVK%27%29%3Bconsole.log%28document.cookie%29%3B%3C%21--%20--%3E/script%3E
⮕ 보상: 1.0000 | 방문 수: 1 | 적용 행동: url_encode

[3/874] 원본: <script>console.log('9PRBGN0KXTIRDJWTY9DKY8IVZKIUDMTHH8PCBZ6PM37OLPFVG4');console.log(document.cookie);</script>

✅ 우회 결과: %3C%21--%20--%3Escript%3Econsole.log%28%279PRBGN0KXTIRDJWTY9DKY8IVZKIUDMTHH8PCBZ6PM37OLPFVG4%27%29%3Bconsole.log%28document.cookie%29%3B%3C%21--%20--%3E/

평가

In [26]:
def evaluate_obfuscation_success(discriminator_model, obf_file_path):
    # 1. 우회 문자열 불러오기
    with open(obf_file_path, "r", encoding="utf-8") as f:
        samples = [line.strip() for line in f if line.strip()]

    total = len(samples)
    fooled = 0

    discriminator_model.eval()
    with torch.no_grad():
        for xss in samples:
            img = string_to_image_matrix(xss)
            tensor = torch.tensor(img, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(device) / 255.0
            output = discriminator_model(tensor)
            pred = torch.argmax(output, dim=1).item()

            if pred == 0:  # 정상으로 분류 → 탐지 실패
                fooled += 1

    print(f"\n✅ 우회 문자열 평가 결과:")
    print(f" - 전체 개수: {total}")
    print(f" - 탐지 실패 (정상으로 오판): {fooled}")
    print(f" - 우회 성공률: {fooled / total * 100:.2f}%")


In [27]:
evaluate_obfuscation_success(model, "generator_output.txt")



✅ 우회 문자열 평가 결과:
 - 전체 개수: 874
 - 탐지 실패 (정상으로 오판): 874
 - 우회 성공률: 100.00%


D-G GAN 혼합 루트

우회 문자열 → 이미지(.npy) 변환 코드

In [28]:
import numpy as np
import os

def convert_string_to_image_matrix(text, max_len=256):
    arr = np.zeros((16, 16), dtype=np.uint8)
    for i, c in enumerate(text[:max_len]):
        arr[i // 16][i % 16] = ord(c) if ord(c) < 256 else 0
    return arr

def convert_obfuscated_txt_to_npy(input_txt, out_dir):
    with open(input_txt, "r", encoding="utf-8") as f:
        lines = [line.strip() for line in f if line.strip()]

    images = [convert_string_to_image_matrix(t) for t in lines]
    labels = [1] * len(images)  # label 1 = XSS (우회공격도 공격임)

    os.makedirs(out_dir, exist_ok=True)
    np.save(os.path.join(out_dir, "images.npy"), np.array(images))
    np.save(os.path.join(out_dir, "labels.npy"), np.array(labels))
    print(f"✅ 우회 이미지 저장 완료: {out_dir}/images.npy, labels.npy")

# 사용 예시
convert_obfuscated_txt_to_npy("generator_output.txt", "dataset_obfuscated")


✅ 우회 이미지 저장 완료: dataset_obfuscated/images.npy, labels.npy


3개 클래스 병합 후 분할

In [31]:
def merge_and_split_all_with_existing_splits(
    train_img, train_lbl,
    val_img, val_lbl,
    test_img, test_lbl,
    obf_img, obf_lbl,
    out_dir="dataset_gan_split"
):
    import numpy as np
    import os

    # 기존 분할 데이터 로드
    X_train = np.load(train_img)
    y_train = np.load(train_lbl)
    X_val = np.load(val_img)
    y_val = np.load(val_lbl)
    X_test = np.load(test_img)
    y_test = np.load(test_lbl)

    # 우회 샘플 로드
    X_obf = np.load(obf_img)
    y_obf = np.load(obf_lbl)

    # 비율 유지하며 6:2:2 비율로 분할
    n = len(X_obf)
    n_train = int(n * 0.6)
    n_val = int(n * 0.2)

    X_obf_train, y_obf_train = X_obf[:n_train], y_obf[:n_train]
    X_obf_val, y_obf_val = X_obf[n_train:n_train+n_val], y_obf[n_train:n_train+n_val]
    X_obf_test, y_obf_test = X_obf[n_train+n_val:], y_obf[n_train+n_val:]

    # 병합
    X_train_merge = np.concatenate([X_train, X_obf_train], axis=0)
    y_train_merge = np.concatenate([y_train, y_obf_train], axis=0)

    X_val_merge = np.concatenate([X_val, X_obf_val], axis=0)
    y_val_merge = np.concatenate([y_val, y_obf_val], axis=0)

    X_test_merge = np.concatenate([X_test, X_obf_test], axis=0)
    y_test_merge = np.concatenate([y_test, y_obf_test], axis=0)

    # 저장
    os.makedirs(out_dir, exist_ok=True)
    np.save(os.path.join(out_dir, "train_images.npy"), X_train_merge)
    np.save(os.path.join(out_dir, "train_labels.npy"), y_train_merge)
    np.save(os.path.join(out_dir, "val_images.npy"), X_val_merge)
    np.save(os.path.join(out_dir, "val_labels.npy"), y_val_merge)
    np.save(os.path.join(out_dir, "test_images.npy"), X_test_merge)
    np.save(os.path.join(out_dir, "test_labels.npy"), y_test_merge)

    print(f"✅ 병합 및 저장 완료 → {out_dir}")
    print(f" - Train: {len(X_train_merge)}")
    print(f" - Val:   {len(X_val_merge)}")
    print(f" - Test:  {len(X_test_merge)}")


In [32]:
merge_and_split_all_with_existing_splits(
    train_img="train_images.npy",
    train_lbl="train_labels.npy",
    val_img="val_images.npy",
    val_lbl="val_labels.npy",
    test_img="test_images.npy",
    test_lbl="test_labels.npy",
    obf_img="dataset_obfuscated/images.npy",
    obf_lbl="dataset_obfuscated/labels.npy",
    out_dir="dataset_gan_split"
)


✅ 병합 및 저장 완료 → dataset_gan_split
 - Train: 1718
 - Val:   430
 - Test:  433


Discriminator 재학습 (GAN용)

In [33]:
# 불러오기
train_loader = create_dataloader_from_files("dataset_gan_split/train_images.npy", "dataset_gan_split/train_labels.npy", shuffle=True)
val_loader = create_dataloader_from_files("dataset_gan_split/val_images.npy", "dataset_gan_split/val_labels.npy")
test_loader = create_dataloader_from_files("dataset_gan_split/test_images.npy", "dataset_gan_split/test_labels.npy")

# 모델 초기화 및 학습
model = DiscriminatorAlexNet()
train_discriminator(model, train_loader, val_loader, epochs=15, lr=1e-3)

# 평가
evaluate_model(model, test_loader)


Epoch 1/15 | Loss: 13.4692 | Train Acc: 0.9075
           >> Val Acc: 0.9907
Epoch 2/15 | Loss: 0.9717 | Train Acc: 0.9965
           >> Val Acc: 1.0000
Epoch 3/15 | Loss: 0.5286 | Train Acc: 0.9977
           >> Val Acc: 1.0000
Epoch 4/15 | Loss: 0.4726 | Train Acc: 0.9983
           >> Val Acc: 1.0000
Epoch 5/15 | Loss: 0.5616 | Train Acc: 0.9988
           >> Val Acc: 1.0000
Epoch 6/15 | Loss: 0.4792 | Train Acc: 0.9988
           >> Val Acc: 0.9953
Epoch 7/15 | Loss: 0.3139 | Train Acc: 0.9988
           >> Val Acc: 1.0000
Epoch 8/15 | Loss: 0.2306 | Train Acc: 0.9988
           >> Val Acc: 0.9953
Epoch 9/15 | Loss: 0.2166 | Train Acc: 0.9994
           >> Val Acc: 0.9977
Epoch 10/15 | Loss: 0.2388 | Train Acc: 0.9994
           >> Val Acc: 1.0000
Epoch 11/15 | Loss: 0.0220 | Train Acc: 1.0000
           >> Val Acc: 1.0000
Epoch 12/15 | Loss: 0.0517 | Train Acc: 1.0000
           >> Val Acc: 1.0000
Epoch 13/15 | Loss: 0.1049 | Train Acc: 0.9994
           >> Val Acc: 0.9953
Epoch 1