In [None]:
import numpy as np
import pandas as pd
import os

# TensorFlow GPU 사용 제한 (CPU만 사용)
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

save_dir = "/content/drive/MyDrive/sooin/"
npy_path = os.path.join(save_dir, "botnet.npy")

# ✅ 데이터 로드
X = np.load(npy_path)
print(f"데이터 로드 완료: {X.shape}")

# ✅ Label (모두 Botnet)
y = np.ones(X.shape[0])
X_minority = X

데이터 로드 완료: (866, 4)


In [None]:
# ==========================
# ✅ 패키지 설치 (Stable-Baselines3)
# ==========================
!pip install stable-baselines3 gymnasium torch --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m187.2/187.2 kB[0m [31m988.2 kB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m40.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m52.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m61.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# ==========================
# ✅ GAN 모델 (Generator + Discriminator)
# ==========================
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

# Generator
def build_generator(latent_dim, feature_dim):
    model = models.Sequential()
    model.add(layers.Dense(128, activation='relu', input_dim=latent_dim))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(feature_dim, activation='tanh'))
    return model

# Discriminator
def build_discriminator(feature_dim):
    input_layer = layers.Input(shape=(feature_dim,))
    x = layers.Dense(128, activation='relu')(input_layer)
    x = layers.Dense(64, activation='relu')(x)
    validity = layers.Dense(1, activation='sigmoid')(x)
    label = layers.Dense(1, activation='sigmoid')(x)
    return models.Model(input_layer, [validity, label])

# GAN 결합
def build_gan(generator, discriminator, latent_dim):
    discriminator.trainable = False
    z = layers.Input(shape=(latent_dim,))
    generated_features = generator(z)
    validity, label = discriminator(generated_features)
    gan = models.Model(z, [validity, label])
    gan.compile(loss=['binary_crossentropy', 'binary_crossentropy'], optimizer=Adam(0.0002, 0.5))
    return gan

latent_dim = 32
feature_dim = X.shape[1]
generator = build_generator(latent_dim, feature_dim)
discriminator = build_discriminator(feature_dim)
discriminator.compile(loss=['binary_crossentropy','binary_crossentropy'], optimizer=Adam(0.0002, 0.5))
gan = build_gan(generator, discriminator, latent_dim)

# ==========================
# ✅ 강화학습 환경 (Gymnasium + Stable-Baselines3)
# ==========================
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO

class BotnetEnv(gym.Env):
    def __init__(self, X_minority, discriminator):
        super(BotnetEnv, self).__init__()
        self.X_minority = X_minority
        self.feature_dim = X_minority.shape[1]
        self.discriminator = discriminator

        self.action_space = spaces.Discrete(self.feature_dim)  # 각 feature 조작
        self.observation_space = spaces.Box(low=0, high=1, shape=(self.feature_dim,), dtype=np.float32)
        self.state = self._get_random_sample()

    def _get_random_sample(self):
        return np.copy(self.X_minority[np.random.randint(0, len(self.X_minority))])

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.state = self._get_random_sample()
        return self.state, {}

    def step(self, action):
        delta = 0.01
        self.state[action] = min(1.0, self.state[action] + delta)
        reward = self._calculate_reward()
        done = reward > 0.5
        return self.state, reward, done, False, {}

    def _calculate_reward(self):
        # TensorFlow predict를 CPU 강제로 실행하여 GPU 충돌 방지
        with tf.device('/CPU:0'):
            pred = self.discriminator.predict(self.state.reshape(1, -1), verbose=0)[1]
        return float(pred)

# 환경 초기화
env = BotnetEnv(X_minority, discriminator)

# ==========================
# ✅ PPO 에이전트 학습
# ==========================
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=5000)

# ==========================
# ✅ GAN 학습 루프 (PPO → GAN 통합)
# ==========================
epochs = 500
batch_size = 16  # 메모리 절약 위해 배치 크기 감소

for epoch in range(epochs):
    idx = np.random.randint(0, X.shape[0], batch_size)
    real_samples = X[idx]
    noise = np.random.normal(0, 1, (batch_size, latent_dim))
    fake_samples = generator.predict(noise, verbose=0)

    d_loss_real = discriminator.train_on_batch(real_samples, [np.ones((batch_size,1)), y[idx].reshape(-1,1)])
    d_loss_fake = discriminator.train_on_batch(fake_samples, [np.zeros((batch_size,1)), np.ones((batch_size,1))])

    # PPO는 학습된 모델로 state-action 업데이트
    model.learn(total_timesteps=100)

    g_loss = gan.train_on_batch(noise, [np.ones((batch_size,1)), np.ones((batch_size,1))])

    if epoch % 50 == 0:
        print(f"{epoch} [D loss: {d_loss_real[0]+d_loss_fake[0]:.4f}] [G loss: {g_loss}]")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  return float(pred)


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | 0.501    |
| time/              |          |
|    fps             | 14       |
|    iterations      | 1        |
|    time_elapsed    | 144      |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1           |
|    ep_rew_mean          | 0.501       |
| time/                   |             |
|    fps                  | 14          |
|    iterations           | 2           |
|    time_elapsed         | 277         |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.020768687 |
|    clip_fraction        | 0.363       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.37       |
|    explained_variance   | -1.27       |
|    learning_rate        | 0.

KeyboardInterrupt: 

**Discriminator 성능 평가**

Accuracy: 진짜/가짜 구분 정확도

AUC (ROC Curve): 진짜와 가짜를 얼마나 잘 구분하는지

**Generator 성능 평가** : validity 점수 평균이 높을수록 좋음.

**강화학습 에이전트 성능 평가**

Reward 평균: 에이전트가 받는 보상의 평균 (클수록 목표 달성)

Success Rate: done=True 일어난 비율 (ex. reward > 0.5)

In [None]:
from sklearn.metrics import accuracy_score, roc_auc_score

# 1. 진짜/가짜 샘플 준비
batch_size = 100  # 평가에 사용할 샘플 수
idx = np.random.randint(0, X.shape[0], batch_size)
real_samples = X[idx]
noise = np.random.normal(0, 1, (batch_size, latent_dim))
fake_samples = generator.predict(noise, verbose=0)

# 2. Discriminator 예측
real_valid, real_label = discriminator.predict(real_samples, verbose=0)
fake_valid, fake_label = discriminator.predict(fake_samples, verbose=0)

# 3. Discriminator 평가 지표
real_labels = np.ones(batch_size)
fake_labels = np.zeros(batch_size)

real_pred_label = (real_valid.flatten() > 0.5).astype(int)
fake_pred_label = (fake_valid.flatten() > 0.5).astype(int)

acc_real = accuracy_score(real_labels, real_pred_label)
acc_fake = accuracy_score(fake_labels, fake_pred_label)
acc_total = (acc_real + acc_fake) / 2

auc_real = roc_auc_score(real_labels, real_valid.flatten())
auc_fake = roc_auc_score(fake_labels, fake_valid.flatten())
auc_total = (auc_real + auc_fake) / 2

# 4. Generator 평가 (fooling score)
g_fooling_score = np.mean(fake_valid.flatten())

print("🧪 [Discriminator 성능 평가]")
print(f"Accuracy (Real vs Fake): {acc_total:.4f}")
print(f"AUC (Real vs Fake): {auc_total:.4f}")

print("\n🧪 [Generator 성능 평가]")
print(f"Fooling Score (mean validity of fake): {g_fooling_score:.4f}")

# 5. PPO 강화학습 에이전트 평가
print("\n🧪 [PPO 에이전트 평가]")
episodes = 20
success_count = 0
rewards = []

for _ in range(episodes):
    obs, _ = env.reset()
    done = False
    ep_reward = 0
    while not done:
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        ep_reward += reward
    rewards.append(ep_reward)
    if ep_reward > 0.5:
        success_count += 1

print(f"Average Reward per Episode: {np.mean(rewards):.4f}")
print(f"Success Rate (>0.5): {success_count / episodes:.2%}")

  return float(pred)


🧪 [Discriminator 성능 평가]
Accuracy (Real vs Fake): 0.4950
AUC (Real vs Fake): nan

🧪 [Generator 성능 평가]
Fooling Score (mean validity of fake): 0.5318

🧪 [PPO 에이전트 평가]
Average Reward per Episode: 0.5006
Success Rate (>0.5): 100.00%


In [None]:
# ==========================
# ✅ 생성 데이터 저장
# ==========================
num_samples = 500
noise = np.random.normal(0, 1, (num_samples, latent_dim))
generated_samples = generator.predict(noise)

npy_gen = os.path.join(save_dir, "generated_botnet.npy")
csv_gen = os.path.join(save_dir, "generated_botnet.csv")

np.save(npy_gen, generated_samples)
pd.DataFrame(generated_samples).to_csv(csv_gen, index=False)

print(f"생성 데이터 저장 완료: {npy_gen}, {csv_gen}")

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
생성 데이터 저장 완료: /content/drive/MyDrive/sooin/generated_botnet.npy, /content/drive/MyDrive/sooin/generated_botnet.csv
