#**0. Một số biến toàn cục**

##**0.1. Path**

In [0]:
# Modules Path
ModulePath = "/content/drive/My Drive/Study/KLTN/Python Modules/"

# Dataset Path
DatasetPath = "/content/drive/My Drive/Study/KLTN/Dataset/"
# train_dataset_path = DatasetPath + "KDDTrain+.csv"
test_dataset_path = DatasetPath + "NSL-KDD-TM/Attack-type Labels/PREPROCESSED/KDDTest+_label_normalized_all.csv"

# Saved Models Path
SavedModelPath = "/content/drive/My Drive/Study/KLTN/Saved Model/"
Generator_ModelPath = SavedModelPath + "/GANModel/Separated Dataset/Generator_DOS-2020.06.04-17.14.pth"
# IDS_ModelPath = SavedModelPath + "IDSModel/IDS_2020.05.22-13.49.pth"
IDS_ModelPath = SavedModelPath + "IDS_DOS-2020.06.04-17.14.pth"

##**0.2 Variables**

In [0]:
# Choose Attack Category
ATTACK_CATEGORY = 'DOS'

# **1. Chuẩn bị**

##**1.1. Import thư viện**

In [0]:
# Add Module Path - To Import Custom Modules
import sys
sys.path.append(ModulePath)

In [0]:
from sklearn.metrics import confusion_matrix
import pandas as pd
import numpy as np
import torch as th
from torch import nn
from torch.autograd import Variable as V
from models import Blackbox_IDS,Generator
import constants

##**1.2. Định nghĩa hàm tiền xử lý dữ liệu, tạo Batch**

###**1.2.1. Hàm tạo Batch**

In [0]:
# Hàm tạo Batch
def create_batch2(x,batch_size):
    # Comment - a là danh sách các số từ 0 -> len(x)
    a = list(range(len(x)))

    # Comment - Xáo trộn a lên, đảo lộn vị trí các phần từ của a
    np.random.shuffle(a)

    # Comment - Xáo trộn các phần tử trong x
    x = x[a]

    # Comment - Mảng các batch, mỗi batch có số phần tử là batch size
    batch_x = [x[batch_size * i : (i+1)*batch_size,:] for i in range(len(x)//batch_size)]
    
    return batch_x

###**1.2.1 Các hàm xử lý cho GAN**


In [0]:
# Make Noise
def MakeNoise(attack_category, n_feature, n_record):
    if attack_category != 'DOS' and attack_category != 'U2R_AND_R2L':
      print(f"Preprocess Data Fail: Invalid Attack Category")
      return np.zeros((n_feature, n_record))

    # Create noise array
    noise = np.random.uniform(-1,1,(n_feature,n_record))

    # retain feature
    # DOS : INTRINSIC, TIMEBASED
    if attack_category == 'DOS':
      noise[:, constants.INTRINSIC_INDEX + constants.TIMEBASED_INDEX] = 0
    else:
      # U2R&R2L: INTRINSIC, CONTENT
      noise[:, constants.INTRINSIC_INDEX + constants.CONTENT_INDEX] = 0
    return noise


def RestrictOutput(dataset):
    dataset[dataset < 0] = 0
    dataset[dataset > 1] = 1
    return dataset

#**2. Định nghĩa Model**

# **3. Generate Attack Traffics**

In [20]:
test = pd.read_csv(test_dataset_path)
test["class"] = test["class"].map(lambda x : 0 if x == "normal" else 1)

test_raw_attack = np.array(test[test["class"] == 1])[:,:-1]
test_normal = np.array(test[test["class"] == 0])[:,:-1]
true_label = test["class"]
BATCH_SIZE = 256 # Batch size
D_G_INPUT_DIM = test_normal.shape[1]
G_OUTPUT_DIM =test_normal.shape[1] 
D_OUTPUT_DIM = 1

#read model
random_g = Generator(D_G_INPUT_DIM,G_OUTPUT_DIM)
leaned_g = Generator(D_G_INPUT_DIM,G_OUTPUT_DIM)

ids_model = Blackbox_IDS(D_G_INPUT_DIM,2)

ids_param= th.load(IDS_ModelPath,map_location=lambda x,y:x)
ids_model.load_state_dict(ids_param)
g_param = th.load(Generator_ModelPath,map_location=lambda x,y:x)
leaned_g.load_state_dict(g_param)

model_g = {"No Train Model":random_g,"Trained Model":leaned_g}

test_batch_normal = create_batch2(test_normal,BATCH_SIZE)

print("Adversarial Traffic Evaluating")
print("-"*100)
for n,g in model_g.items():
    o_dr,a_dr,eir=[],[],[]
    g.eval()
    with th.no_grad():
        for bn in test_batch_normal:
            normal_b = th.Tensor(bn.astype("float64"))

            # Gen Adversarial Traffic
            attack_traffic = test_raw_attack[np.random.randint(0,len(test_raw_attack),BATCH_SIZE)]
            noise = MakeNoise(ATTACK_CATEGORY, BATCH_SIZE, D_G_INPUT_DIM)
            attack_traffic_noised = RestrictOutput(attack_traffic + noise)
            batch_a = th.Tensor(attack_traffic)
            z = V(th.Tensor(attack_traffic_noised))
            adversarial_attack = g(z)
            # # Các giá trị ở cột 33 trở đi: Nếu >= 0.5 set =1; else set =0
            # adversarial_attack[:,33:] = th.Tensor(np.where(adversarial_attack[:,33:].detach().cpu().numpy()>= 0.5 , 1,0))

            # IDS Input
            ori_input = th.cat((batch_a,normal_b))
            adv_input = th.cat((adversarial_attack,normal_b))
            l = list(range(len(ori_input)))
            np.random.shuffle(l)
            
            adv_input = adv_input[l]
            ori_input = ori_input[l]
            ids_pred_adv = ids_model(adv_input)
            ids_pred_ori = ids_model(ori_input)
            
            # IDS input co dang: attack (BATCH_SIZE phan tu) --> normal (BATCH_SIZE phan tu)
            ids_true_label = np.r_[np.ones(BATCH_SIZE),np.zeros(BATCH_SIZE)][l]
            pred_label_adv = th.argmax(nn.Sigmoid()(ids_pred_adv),dim = 1).cpu().numpy()
            pred_label_ori = th.argmax(nn.Sigmoid()(ids_pred_ori),dim = 1).cpu().numpy()
            
            
            tn1, fp1, fn1, tp1 = confusion_matrix(ids_true_label,pred_label_adv).ravel()
            tn2, fp2, fn2, tp2 = confusion_matrix(ids_true_label,pred_label_ori).ravel()

            # print(f"tn1, fp1, fn1, tp1: {tn1}, {fp1}, {fn1}, {tp1}")
            # print(f"tn2, fp2, fn2, tp2: {tn2}, {fp2}, {fn2}, {tp2}")
            o_DR = tp2/(tp2 + fp2)
            if o_DR == 0:
                print(f"tp2/(tp2 + fp2): {tp2}/({tp2} + {fp2})")
            a_DR = tp1/(tp1 + fp1)
            if a_DR == 0:
                print(f"tp1/(tp1 + fp1): {tp1}/({tp1} + {fp1})")

            o_dr.append(tp2/(tp2 + fp2))
            a_dr.append(tp1/(tp1 + fp1))
            eir.append(1 - (tp1/(tp1 + fp1))/(tp2/(tp2 + fp2)))
    print(f"{n}\t => Origin DR : {np.mean(o_dr):.5f} \t Adversarial DR : {np.mean(a_dr):.5f} \t EIR : {np.mean(eir):.5f}")   


Adversarial Traffic Evaluating
----------------------------------------------------------------------------------------------------
tp1/(tp1 + fp1): 0/(0 + 6)
tp1/(tp1 + fp1): 0/(0 + 6)
tp1/(tp1 + fp1): 0/(0 + 2)
tp1/(tp1 + fp1): 0/(0 + 9)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 2)
tp1/(tp1 + fp1): 0/(0 + 6)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 2)
tp1/(tp1 + fp1): 0/(0 + 4)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 6)
tp1/(tp1 + fp1): 0/(0 + 4)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 2)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 4)
tp1/(tp1 + fp1): 0/(0 + 8)
tp1/(tp1 + fp1): 0/(0 + 9)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 6)
tp1/(tp1 + fp1): 0/(0 + 7)
tp1/(tp1 + fp1): 0/(0 + 4)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 3)




tp1/(tp1 + fp1): 0/(0 + 4)
tp1/(tp1 + fp1): 0/(0 + 6)
tp1/(tp1 + fp1): 0/(0 + 6)
tp1/(tp1 + fp1): 0/(0 + 4)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 6)
No Train Model	 => Origin DR : 0.96683 	 Adversarial DR : nan 	 EIR : nan
tp1/(tp1 + fp1): 0/(0 + 4)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 4)
tp1/(tp1 + fp1): 0/(0 + 2)
tp1/(tp1 + fp1): 0/(0 + 4)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 4)
tp1/(tp1 + fp1): 0/(0 + 1)
tp1/(tp1 + fp1): 0/(0 + 2)
tp1/(tp1 + fp1): 0/(0 + 1)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 2)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 6)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 4)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 5)
tp1/(tp1 + fp1): 0/(0 + 3)
tp1/(tp1 + fp1): 0/(0 + 1)
tp1/(tp1

***Chú thích***
* No Train Model: Là Model Generator tạo ra random.
* Trained Model: Là Model Generator lấy từ IDSWGAN.
* DR: Detection Rate - Tỉ lệ Phát hiện (Correct_Detected/Total_Detected)
 * Origin DR: Tỉ lệ Phát hiện khi đầu vào là traffic raw trong dataset.
 * Adversarial DR: Tỉ lệ phát hiện khi đầu vào là traffic do Generator tạo ra.



***Nhận xét***   
*  Việc sử dụng 2 Generator Model: Random Model và Trained Model là để tượng trưng thấy được sự tiến hóa của Generator Model
*  Đối với Trained Model
 * Tạm thời bỏ qua EIR
 * DR giảm từ 43% -> 0.49%