In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **1. Paths - Libs**

## **Paths**

In [None]:
base_path = "/content/drive/My Drive/Study/KLTN/"
# Modules Path
ModulePath = base_path + "/Google Colab/0.0 Python Modules"

# Dataset Path
DatasetPath = "/content/drive/My Drive/Study/KLTN/Dataset/NSL-KDD Processed/Final - For Using/"
train_dataset_path = DatasetPath + "Trainset/" + "IDS.csv"
test_dataset_path = DatasetPath + "Testset/" + "KDDTest+.csv"

# Save Model Path
SavedModelPath = "/content/drive/My Drive/Study/KLTN/Saved Model/IDSModel/"

##**Libs**

In [None]:
# INSTALL LIBS
# !pip3 install numpy adabound torc sklearn matplotlib pandas

In [None]:
# ADD MODULE PATH - TO IMPORT CUSTOM MODULES
import sys
sys.path.append(ModulePath)

In [None]:
# IMPORT LIBS
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.utils import shuffle
from sklearn.tree import DecisionTreeClassifier
import pickle

from datetime import date
import timeit
import os

## **Consts**

In [None]:
ATTACK_CATEGORIES = ['DOS', 'U2R_AND_R2L']

#**2. Functions**

## **Processing Data Functions**

In [None]:
# Processing Data
# preprocess_data - Hàm tiền xử lý dữ liệu
def preprocess_data(dataset, attack_category):
    if attack_category not in ATTACK_CATEGORIES:
        raise ValueError("Preprocess Data: Invalid Attack Category")
    attack_data = dataset[dataset['class'] == attack_category]
    normal_data = dataset[dataset['class'] == 'Normal']
    # Combine Data
    combine_data = shuffle(pd.concat([attack_data, normal_data], ignore_index=True)).reset_index(drop=True)
    # Convert class to Binary
    combine_data["class"] = combine_data["class"].map(lambda x : 0 if x == "Normal" else 1)
    # separate data and label
    data, label = np.array(combine_data[combine_data.columns[combine_data.columns != "class"]]), np.array(combine_data["class"])
    return data, label

# load_dataset - Load du lieu voi Attack Category
def load_dataset(attack_category):
    if attack_category not in ATTACK_CATEGORIES:
        raise ValueError("Preprocess Data: Invalid Attack Category")
    # Load Dataset
    train = pd.read_csv(train_dataset_path)
    test = pd.read_csv(test_dataset_path)
    trainx, trainy = preprocess_data(train, attack_category)
    testx, testy = preprocess_data(test, attack_category)
    return trainx, trainy, testx, testy

# **3. Chạy - Run**

In [None]:
ml_ids_model = "DT" #@param ["DT"]

In [None]:
SavedModelPath

'/content/drive/My Drive/Study/KLTN/Saved Model/IDSModel/'

In [None]:
trainx, trainy, testx, testy = load_dataset('DOS')

dt = DecisionTreeClassifier()
dt.fit(trainx, trainy)

print(dt.score(testx, testy))

0.9078679168365267


In [None]:
labels = ['Attack Category', 'Accuracy(%)', 'DR(%)', 'Runtime(s)']
print("{: <20} {: >15} {: >15} {: >15}".format(*labels))
print(65*'-')

for attack_category in ATTACK_CATEGORIES:
    start = timeit.default_timer()
    out_val = []
    out_val.append(attack_category)
    trainx, trainy, testx, testy = load_dataset(attack_category)
    dt = DecisionTreeClassifier()
    dt.fit(trainx, trainy)

    pred_testy = dt.predict(testx)
    tn, fn, fp, tp = confusion_matrix(testy, pred_testy).ravel()
    accuracy = (tn + tp)/len(testy)*100
    out_val.append(str(f"{accuracy:.2f}"))
    dr = tp/(tp + fp)*100
    out_val.append(str(f"{dr:.2f}"))
    runtime = str(f"{timeit.default_timer() - start:.2f}")
    out_val.append(runtime)
    print("{: <20} {: >15} {: >15} {: >15}".format(*out_val))
    # Save Model
    save_category_path = attack_category + '/Machine_Learning/'
    if not os.path.exists(SavedModelPath + save_category_path):
        os.makedirs(SavedModelPath + save_category_path)
    
    today = str(date.today())
    
    short_model_path = save_category_path + "created_date_" + today + "_DT.pkl"
    full_model_path = SavedModelPath + short_model_path
    with open(full_model_path, 'wb') as file:
        pickle.dump(dt, file)
        print(f" > Saved Model to disk: /{short_model_path}")

Attack Category          Accuracy(%)           DR(%)      Runtime(s)
-----------------------------------------------------------------
DOS                            90.30           81.62            0.89
 > Saved Model to disk: /DOS/Machine_Learning/created_date_2020-07-08_dt.pkl
U2R_AND_R2L                    80.42           22.32            0.67
 > Saved Model to disk: /U2R_AND_R2L/Machine_Learning/created_date_2020-07-08_dt.pkl


The Saved Model stored in [Google Drive - BlackBox IDS Model](https://drive.google.com/drive/u/1/folders/1M-xotvruMlkFNaQWPf9bpBpzgPhPLpiH)  
The Result of Code stored in [Github - Thesis](https://github.com/thetinybug/thesis-IDSGAN)