In [1]:
import pandas as pd
import numpy as np
import pickle
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from scipy.special import softmax
import random


In [2]:
# Melakukan preprocessing dan load dataset CIC IoT 2023 dan CIC IoT DIAD
def load_dataset_and_model_CICIoT2023_CICIoTDIAD(dataset_name):
    if dataset_name == "CIC IoT 2023":
      df = pd.read_csv("/content/drive/MyDrive/Dataset_Skripsi/6_Classes/CIC_IOT_2023_6Classes.csv")
      model_path = "/content/drive/MyDrive/Pengembangan IDS/CIC-IoT-2023_xgb_best_model.pkl"
    elif dataset_name == "CIC IoT DIAD":
      df = pd.read_csv("/content/drive/MyDrive/Dataset_Skripsi/6_Classes/CIC-IoT-DIAD_6Classes.csv")
      model_path = "/content/drive/MyDrive/Pengembangan IDS/CIC-IoT-DIAD_xgb_best_model.pkl"
    else:
        raise ValueError("Nama dataset tidak dikenali.")

    # Memisahkan label target dari fitur
    y = df["Label"]
    X = df.drop(columns=["Label"])

    # Membersihkan nilai kosong pada fitur
    X = X.select_dtypes(include=["number"])
    X = X.replace([np.inf, -np.inf], np.nan).dropna()
    y = y.loc[X.index]

    # Mereset index
    X = X.reset_index(drop=True)
    y = y.reset_index(drop=True)

    # Melakukan scalling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Me-load model .pkl
    with open(model_path, 'rb') as f:
        model = pickle.load(f)

    # Me-return semua parameter yang dibutuhkan
    return X_scaled, y, model, sorted(y.unique())

In [3]:
# Melakukan preprocessing dan load dataset RT IoT 2022
def load_dataset_and_model_RTIoT2022(dataset_name):
    if dataset_name == "RT IoT 2022":
      df = pd.read_csv("/content/drive/MyDrive/Dataset_Skripsi/6_Classes/RT_IOT2022_6Classes.csv")
      model_path = "/content/drive/MyDrive/Pengembangan IDS/RT-IoT-22_xgb_best_model.pkl"
    else:
        raise ValueError("Nama dataset tidak dikenali.")

    # Memisahkan label target dari fitur
    y = df["Attack_type"]
    X = df.drop(columns=["Attack_type"])

    # Membersihkan nilai kosong pada fitur
    X = X.select_dtypes(include=["number"])
    X = X.replace([np.inf, -np.inf], np.nan).dropna()
    y = y.loc[X.index]

    # Mereset index
    X = X.reset_index(drop=True)
    y = y.reset_index(drop=True)

    # Melakukan scalling
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Me-load model .pkl
    with open(model_path, 'rb') as f:
        model = pickle.load(f)

    # Me-return semua parameter yang dibutuhkan
    return X_scaled, y, model, sorted(y.unique())

In [11]:
# Menerima input nama dataset dari pengguna
print("Pilih salah satu nama dataset:")
print("1. RT IoT 2022")
print("2. CIC IoT 2023")
print("3. CIC IoT DIAD")

nama_input = input("Masukkan nama persis dari dataset yang dipilih: ").strip()

if nama_input == "RT IoT 2022":
    dataset_name = nama_input
    X_scaled, y, model, class_labels = load_dataset_and_model_RTIoT2022(dataset_name)
elif nama_input == "CIC IoT 2023":
    dataset_name = nama_input
    X_scaled, y, model, class_labels = load_dataset_and_model_CICIoT2023_CICIoTDIAD(dataset_name)
elif nama_input == "CIC IoT DIAD":
    dataset_name = nama_input
    X_scaled, y, model, class_labels = load_dataset_and_model_CICIoT2023_CICIoTDIAD(dataset_name)
else:
    raise ValueError("Nama dataset tidak dikenali.")

Pilih salah satu nama dataset:
1. RT IoT 2022
2. CIC IoT 2023
3. CIC IoT DIAD
Masukkan nama persis dari dataset yang dipilih: CIC IoT DIAD


In [12]:
# Menerima input index data dari pengguna
try:
    index = int(input(f"Masukkan indeks sampel (0 - {len(X_scaled) - 1}): "))
    if index < 0 or index >= len(X_scaled):
        raise ValueError("Index di luar jangkauan.")
except ValueError as e:
    print("Input tidak valid:", e)
    index = random.randint(0, len(X_scaled) - 1)
    print(f"Index diganti secara acak: {index}")


Masukkan indeks sampel (0 - 53233): 13430


In [13]:
# Mengambil sampel
sample = X_scaled[index].reshape(1, -1)
true_label = y.iloc[index]

# Melakukan konversi ke DMatrix
dtest = xgb.DMatrix(sample)

# Melakukan prediksi logit dan konversi ke label prediksi
raw_score = model.predict(dtest)  # array 2 dimensi
proba = softmax(raw_score[0])
predicted_index = np.argmax(proba)
predicted_label = class_labels[predicted_index]

# Menampilkan hasil
print("\n========== Attack Type ==========")
print(f"Dataset         : {dataset_name}")
print(f"Sample Index    : {index}")
print(f"Actual Label    : {true_label}")
print(f"Predicted Result  : {predicted_label}")


Dataset         : CIC IoT DIAD
Sample Index    : 13430
Actual Label    : DoS-HTTP_Flood
Predicted Result  : DoS-HTTP_Flood
