# Instalasi & Import

In [1]:
# Cell 1: Install & Import Library
# XGBoost biasanya sudah ada di Kaggle, tapi kita pastikan versinya update
!pip install xgboost optuna category_encoders

import os
import time
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import xgboost as xgb
import optuna

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score, precision_score, recall_score, roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.utils.class_weight import compute_sample_weight
from optuna.samplers import TPESampler, NSGAIISampler, RandomSampler
from optuna.pruners import SuccessiveHalvingPruner, MedianPruner

# Konfigurasi
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
sns.set_style("whitegrid")

print("‚úÖ Setup Selesai. Menggunakan XGBoost versi:", xgb.__version__)

The history saving thread hit an unexpected error (OperationalError('attempt to write a readonly database')).History will not be written to the database.


  if entities is not ():


‚úÖ Setup Selesai. Menggunakan XGBoost versi: 3.1.0


# Load Dataset (Otomatis)

In [2]:
# Cell 2: Load Data dari Kaggle Input
print("üìÇ Mencari file dataset di /kaggle/input/ ...")

train_path = None
test_path = None

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        full_path = os.path.join(dirname, filename)
        if "training" in filename.lower() and "csv" in filename:
            train_path = full_path
        elif "testing" in filename.lower() and "csv" in filename:
            test_path = full_path

if train_path and test_path:
    print(f"‚úÖ Training Data: {train_path}")
    print(f"‚úÖ Testing Data: {test_path}")
    
    df_train = pd.read_csv(train_path)
    df_test = pd.read_csv(test_path)
    
    print(f"Shape Train: {df_train.shape}")
    print(f"Shape Test:  {df_test.shape}")
else:
    raise FileNotFoundError("‚ùå File dataset tidak ditemukan! Pastikan sudah Add Input.")

üìÇ Mencari file dataset di /kaggle/input/ ...
‚úÖ Training Data: /kaggle/input/unsw-nb15/UNSW_NB15_training-set.csv
‚úÖ Testing Data: /kaggle/input/unsw-nb15/UNSW_NB15_testing-set.csv
Shape Train: (175341, 45)
Shape Test:  (82332, 45)


# Preprocessing & Class Weights

In [3]:
# Cell 3: Preprocessing & Perhitungan Class Weights
# 1. Drop Kolom Sampah
drop_cols = ['id'] 
df_train = df_train.drop(columns=drop_cols, errors='ignore')
df_test = df_test.drop(columns=drop_cols, errors='ignore')

# 2. Pisahkan Fitur & Target
X_train = df_train.drop(columns=['attack_cat', 'label'])
y_train_raw = df_train['attack_cat']

X_test = df_test.drop(columns=['attack_cat', 'label'])
y_test_raw = df_test['attack_cat']

# 3. Encoding Fitur Kategorikal (Label Encoding untuk XGBoost)
# XGBoost bisa handle kategori, tapi lebih aman di-encode angka
cat_cols = X_train.select_dtypes(include=['object']).columns

for col in cat_cols:
    le = LabelEncoder()
    # Fit gabungan agar tidak ada unknown label
    full_data = pd.concat([X_train[col], X_test[col]], axis=0).astype(str)
    le.fit(full_data)
    X_train[col] = le.transform(X_train[col].astype(str))
    X_test[col] = le.transform(X_test[col].astype(str))

# 4. Encoding Target (Multiclass Wajib 0,1,2...)
le_target = LabelEncoder()
y_train = le_target.fit_transform(y_train_raw)
y_test = le_target.transform(y_test_raw)

# 5. Scaling (Disarankan untuk XGBoost agar konvergensi stabil)
scaler = StandardScaler()
X_train_enc = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test_enc = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

# 6. HITUNG CLASS WEIGHTS (Pengganti SMOTE)
# Kita hitung bobot tiap sampel. Sampel kelas minoritas dapat bobot besar.
sample_weights_train = compute_sample_weight(
    class_weight='balanced',
    y=y_train
)

print("‚úÖ Preprocessing Selesai.")
print(f"Classes: {le_target.classes_}")
print(f"Contoh Sample Weights (5 data awal): {sample_weights_train[:5]}")

‚úÖ Preprocessing Selesai.
Classes: ['Analysis' 'Backdoor' 'DoS' 'Exploits' 'Fuzzers' 'Generic' 'Normal'
 'Reconnaissance' 'Shellcode' 'Worms']
Contoh Sample Weights (5 data awal): [0.31310893 0.31310893 0.31310893 0.31310893 0.31310893]


# Definisi Objective Function (Generic)

In [14]:
# Cell 4: Definisi Objective Function XGBoost (FIXED FOR XGBOOST 2.0)
from xgboost.callback import TrainingCallback
import xgboost as xgb

# Custom Pruning Callback
class OptunaPruningCallback(TrainingCallback):
    def __init__(self, trial, metric_name):
        self.trial = trial
        self.metric_name = metric_name
        
    def after_iteration(self, model, epoch, evals_log):
        if not evals_log:
            return False
        current_score = evals_log['validation_0'][self.metric_name][-1]
        self.trial.report(current_score, epoch)
        if self.trial.should_prune():
            raise optuna.exceptions.TrialPruned()
        return False

def objective_xgboost(trial):
    # 1. Hyperparameter Space
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 500, 3000),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True),
        
        # --- PERBAIKAN UTAMA DI SINI (XGBoost 2.0+) ---
        'objective': 'multi:softmax',
        'num_class': len(le_target.classes_),
        'tree_method': 'hist',  # Ganti 'gpu_hist' menjadi 'hist'
        'device': 'cuda',       # Tambahkan ini untuk mengaktifkan GPU
        'eval_metric': 'mlogloss',
        'verbosity': 0
    }

    # 2. Setup Pruning
    pruning_callback = OptunaPruningCallback(trial, "mlogloss")

    # 3. Model Training
    model = xgb.XGBClassifier(
        **param, 
        random_state=42, 
        callbacks=[pruning_callback]
    )
    
    model.fit(
        X_train_enc, y_train,
        sample_weight=sample_weights_train, 
        eval_set=[(X_test_enc, y_test)],
        verbose=False
    )

    # 4. Evaluasi
    preds = model.predict(X_test_enc)
    f1 = f1_score(y_test, preds, average='macro')
    
    return f1
print("‚úÖ Define Objective Function Selesai.")

‚úÖ Define Objective Function Selesai.


# Strategi 1 - Optuna TPE (Tree-structured Parzen Estimator)

In [15]:
# Cell 5: Optimasi TPE
print("üöÄ Memulai Optimasi TPE...")
start_time = time.time()

# Sampler TPE, Pruner Median (Standar)
study_tpe = optuna.create_study(
    direction='maximize',
    sampler=TPESampler(seed=42),
    pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=5)
)

study_tpe.optimize(objective_xgboost, n_trials=30)
time_tpe = time.time() - start_time

print(f"‚úÖ TPE Selesai dalam {time_tpe:.2f} detik.")
print(f"Best F1 TPE: {study_tpe.best_value}")

[32m[I 2025-12-29 14:20:18,033][0m A new study created in memory with name: no-name-f98d9e09-3268-4273-9d8c-091a6cd1880c[0m


üöÄ Memulai Optimasi TPE...


[32m[I 2025-12-29 14:20:57,350][0m Trial 0 finished with value: 0.5167651539650223 and parameters: {'n_estimators': 1436, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.7993292420985183, 'colsample_bytree': 0.5780093202212182, 'gamma': 0.7799726016810132, 'min_child_weight': 1, 'reg_alpha': 0.08499808989182997, 'reg_lambda': 0.0006440507553993703}. Best is trial 0 with value: 0.5167651539650223.[0m
[32m[I 2025-12-29 14:21:23,362][0m Trial 1 finished with value: 0.5119503845065664 and parameters: {'n_estimators': 2270, 'max_depth': 3, 'learning_rate': 0.2708160864249968, 'subsample': 0.9162213204002109, 'colsample_bytree': 0.6061695553391381, 'gamma': 0.9091248360355031, 'min_child_weight': 2, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995}. Best is trial 0 with value: 0.5167651539650223.[0m
[32m[I 2025-12-29 14:21:49,078][0m Trial 2 finished with value: 0.5074041522721873 and parameters: {'n_estimators': 1580, 'max_depth': 5, 'lea

‚úÖ TPE Selesai dalam 1202.33 detik.
Best F1 TPE: 0.5255424954261267


# Strategi 2 - Optuna ASHA (Asynchronous Successive Halving)

In [16]:
# Cell 6: Optimasi ASHA
print("üöÄ Memulai Optimasi ASHA...")
start_time = time.time()

# Di Optuna, ASHA diimplementasikan lewat SuccessiveHalvingPruner
# Sampler biasanya RandomSampler agar eksplorasi luas, lalu dipangkas ASHA
study_asha = optuna.create_study(
    direction='maximize',
    sampler=RandomSampler(seed=42), 
    pruner=SuccessiveHalvingPruner(min_resource=100, reduction_factor=3)
)

study_asha.optimize(objective_xgboost, n_trials=30)
time_asha = time.time() - start_time

print(f"‚úÖ ASHA Selesai dalam {time_asha:.2f} detik.")
print(f"Best F1 ASHA: {study_asha.best_value}")

[32m[I 2025-12-29 14:41:00,098][0m A new study created in memory with name: no-name-7f45e1b5-9cdc-4b68-a862-5e9600f9608e[0m


üöÄ Memulai Optimasi ASHA...


[32m[I 2025-12-29 14:41:38,484][0m Trial 0 finished with value: 0.5167651539650223 and parameters: {'n_estimators': 1436, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.7993292420985183, 'colsample_bytree': 0.5780093202212182, 'gamma': 0.7799726016810132, 'min_child_weight': 1, 'reg_alpha': 0.08499808989182997, 'reg_lambda': 0.0006440507553993703}. Best is trial 0 with value: 0.5167651539650223.[0m
[32m[I 2025-12-29 14:41:42,955][0m Trial 1 pruned. [0m
[32m[I 2025-12-29 14:41:49,217][0m Trial 2 pruned. [0m
[32m[I 2025-12-29 14:42:20,148][0m Trial 3 pruned. [0m
[32m[I 2025-12-29 14:42:22,097][0m Trial 4 pruned. [0m
[32m[I 2025-12-29 14:42:24,946][0m Trial 5 pruned. [0m
[32m[I 2025-12-29 14:42:41,674][0m Trial 6 pruned. [0m
[32m[I 2025-12-29 14:42:44,307][0m Trial 7 pruned. [0m
[32m[I 2025-12-29 14:42:49,911][0m Trial 8 pruned. [0m
[32m[I 2025-12-29 14:43:10,243][0m Trial 9 pruned. [0m
[32m[I 2025-12-29 14:43:13,668][0m Trial 10 prune

‚úÖ ASHA Selesai dalam 255.79 detik.
Best F1 ASHA: 0.5167651539650223


# Strategi 3 - Optuna NSGA-II (Genetic Algorithm)

In [17]:
# Cell 7: Optimasi NSGA-II
print("üöÄ Memulai Optimasi NSGA-II...")
start_time = time.time()

# Sampler NSGA-II
study_nsga = optuna.create_study(
    direction='maximize',
    sampler=NSGAIISampler(seed=42),
    pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=5) # NSGA-II juga bisa pakai pruner
)

study_nsga.optimize(objective_xgboost, n_trials=30)
time_nsga = time.time() - start_time

print(f"‚úÖ NSGA-II Selesai dalam {time_nsga:.2f} detik.")
print(f"Best F1 NSGA-II: {study_nsga.best_value}")

[32m[I 2025-12-29 14:45:32,582][0m A new study created in memory with name: no-name-e09cafd4-d83b-4163-ab5a-16211af91761[0m


üöÄ Memulai Optimasi NSGA-II...


[32m[I 2025-12-29 14:46:11,483][0m Trial 0 finished with value: 0.5167651539650223 and parameters: {'n_estimators': 1436, 'max_depth': 12, 'learning_rate': 0.1205712628744377, 'subsample': 0.7993292420985183, 'colsample_bytree': 0.5780093202212182, 'gamma': 0.7799726016810132, 'min_child_weight': 1, 'reg_alpha': 0.08499808989182997, 'reg_lambda': 0.0006440507553993703}. Best is trial 0 with value: 0.5167651539650223.[0m
[32m[I 2025-12-29 14:46:37,226][0m Trial 1 finished with value: 0.5119503845065664 and parameters: {'n_estimators': 2270, 'max_depth': 3, 'learning_rate': 0.2708160864249968, 'subsample': 0.9162213204002109, 'colsample_bytree': 0.6061695553391381, 'gamma': 0.9091248360355031, 'min_child_weight': 2, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995}. Best is trial 0 with value: 0.5167651539650223.[0m
[32m[I 2025-12-29 14:47:02,650][0m Trial 2 finished with value: 0.5074041522721873 and parameters: {'n_estimators': 1580, 'max_depth': 5, 'lea

‚úÖ NSGA-II Selesai dalam 910.39 detik.
Best F1 NSGA-II: 0.5250779765638034


# Evaluasi Model Final & Komparasi

In [18]:
# Cell 8: Training Final & Komparasi Hasil (FIXED FOR XGBOOST 2.0)
def train_final_model(study, name):
    print(f"‚öôÔ∏è Melatih Model Final: {name}...")
    params = study.best_params
    
    # Update parameter statis untuk XGBoost 2.0+
    params.update({
        'objective': 'multi:softmax',
        'num_class': len(le_target.classes_),
        'tree_method': 'hist',  # Ganti jadi 'hist'
        'device': 'cuda',       # Wajib 'cuda' untuk GPU
        'verbosity': 0
    })
    
    model = xgb.XGBClassifier(**params, random_state=42)
    model.fit(X_train_enc, y_train, sample_weight=sample_weights_train)
    
    preds = model.predict(X_test_enc)
    return model, preds

# 1. Train 3 Model
model_tpe, preds_tpe = train_final_model(study_tpe, "TPE")
model_asha, preds_asha = train_final_model(study_asha, "ASHA")
model_nsga, preds_nsga = train_final_model(study_nsga, "NSGA-II")

# 2. Hitung Metrik
def get_metrics(y_true, y_pred, time_taken, study):
    return {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision (Macro)': precision_score(y_true, y_pred, average='macro'),
        'Recall (Macro)': recall_score(y_true, y_pred, average='macro'),
        'F1-Score (Macro)': f1_score(y_true, y_pred, average='macro'),
        'Waktu Optimasi (s)': time_taken,
        'Best Trial': study.best_trial.number,
        'Total Trials': len(study.trials)
    }

metrics_tpe = get_metrics(y_test, preds_tpe, time_tpe, study_tpe)
metrics_asha = get_metrics(y_test, preds_asha, time_asha, study_asha)
metrics_nsga = get_metrics(y_test, preds_nsga, time_nsga, study_nsga)

# 3. Tampilkan Tabel
df_compare = pd.DataFrame([metrics_tpe, metrics_asha, metrics_nsga], index=['TPE', 'ASHA', 'NSGA-II'])
print("\n=== HASIL AKHIR KOMPARASI ===")
display(df_compare)

# 4. Report
print("\n--- REPORT TPE ---")
print(classification_report(y_test, preds_tpe, target_names=le_target.classes_))

‚öôÔ∏è Melatih Model Final: TPE...
‚öôÔ∏è Melatih Model Final: ASHA...
‚öôÔ∏è Melatih Model Final: NSGA-II...

=== HASIL AKHIR KOMPARASI ===


Unnamed: 0,Accuracy,Precision (Macro),Recall (Macro),F1-Score (Macro),Waktu Optimasi (s),Best Trial,Total Trials
TPE,0.717704,0.522151,0.59315,0.525542,1202.325456,28,30
ASHA,0.701331,0.514468,0.598471,0.516765,255.790513,0,30
NSGA-II,0.703797,0.52479,0.610514,0.525078,910.390092,8,30



--- REPORT TPE ---
                precision    recall  f1-score   support

      Analysis       0.03      0.09      0.05       677
      Backdoor       0.03      0.23      0.05       583
           DoS       0.29      0.25      0.27      4089
      Exploits       0.78      0.65      0.71     11132
       Fuzzers       0.27      0.59      0.37      6062
       Generic       1.00      0.97      0.98     18871
        Normal       0.97      0.69      0.80     37000
Reconnaissance       0.89      0.83      0.86      3496
     Shellcode       0.32      0.87      0.47       378
         Worms       0.65      0.75      0.69        44

      accuracy                           0.72     82332
     macro avg       0.52      0.59      0.53     82332
  weighted avg       0.84      0.72      0.76     82332



# Visualisasi (Confusion Matrix & Convergence)

In [1]:
# Cell 9: Visualisasi Lengkap

# --- A. Confusion Matrix (Side-by-Side 3 Metode) ---
def plot_cm(y_true, y_pred, title, ax):
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, ax=ax,
                xticklabels=le_target.classes_, yticklabels=le_target.classes_)
    ax.set_title(title, fontsize=14, fontweight='bold')
    ax.set_xlabel('Prediksi')
    ax.tick_params(axis='x', rotation=45)

fig, axes = plt.subplots(1, 3, figsize=(24, 6))

plot_cm(y_test, preds_tpe, "CM - Metode TPE", axes[0])
plot_cm(y_test, preds_asha, "CM - Metode ASHA", axes[1])
plot_cm(y_test, preds_nsga, "CM - Metode NSGA-II", axes[2])

axes[0].set_ylabel('Label Asli (Aktual)')
plt.tight_layout()
plt.show()

# --- B. Plot Konvergensi Optimasi (History) ---
from optuna.visualization import plot_optimization_history

# TPE
print("\n=== 1. Riwayat Optimasi TPE ===")
plot_optimization_history(study_tpe)
plt.title("Optimization History - TPE")
plt.show() # Ini akan memunculkan pop-up window matplotlib standar

# ASHA
print("\n=== 2. Riwayat Optimasi ASHA ===")
plot_optimization_history(study_asha)
plt.title("Optimization History - ASHA")
plt.show()

# NSGA-II
print("\n=== 3. Riwayat Optimasi NSGA-II ===")
plot_optimization_history(study_nsga)
plt.title("Optimization History - NSGA-II")
plt.show()

NameError: name 'plt' is not defined