In [1]:
import os
import math
import numpy as np
import pandas as pd

import torch

from torch import nn
from torch.optim import Adam
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.compose import ColumnTransformer

from einops import repeat, pack
from einops.layers.torch import Rearrange

from datetime import datetime

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
# device_dis = torch.device('cuda:0')
# device_gen = torch.device('cuda:0')

# 获取当前日期和时间
run_time = datetime.now()
# model_kind = f'BiLSTM'
model_kind = f'BiLSTM'

save_path = os.path.join(f'ablation_study_results', str(run_time))
save_path = os.path.join(save_path, model_kind)

os.makedirs(save_path, exist_ok=True)
os.makedirs(f'{save_path}', exist_ok=True)
os.makedirs(f'{save_path}/models', exist_ok=True)
os.makedirs(f'{save_path}/results', exist_ok=True)

#### Dataset加载

In [3]:
def Load_UNSWNB15(path_train_data, path_test_data, binary_or_multi='multi'):

    categorical_columns = ['proto', 'service', 'state']
    
    classification = ['Normal', 'Fuzzers', 'Analysis', 'Backdoor', 'DoS', 
                    'Exploits', 'Generic', 'Reconnaissance', 'Shellcode', 'Worms']
    # 加载数据 train_num:125973, test_num:22544, total_data:148517
    data_train = pd.read_csv(path_train_data).copy()
    data_test = pd.read_csv(path_test_data).copy()
    total_data = pd.concat([data_train, data_test], axis=0) # 合并train和test
    total_data = total_data.drop(['id'], axis=1)
    train_num = len(data_train)
    test_num = len(data_test)

    # 特征
    features = total_data.iloc[:, :-2]     
    
    # 标签（以Binary/Multi形式加载Y的值）
    if binary_or_multi=='binary':    
        # 删除attack_cat列
        total_data = total_data.drop('attack_cat', axis=1)
        # 把labels转换为binary[0,1] 
        labels = total_data.iloc[:, -1]
    elif binary_or_multi=='multi':
        # 删除label列
        total_data = total_data.drop('label', axis=1)
        labels_class = total_data.iloc[:, -1]
        
        pdlist_class_dict = {}
        for index, data_class in enumerate(classification):
            pdlist_class_dict[data_class] = index
                
        labels = np.array([pdlist_class_dict[row] for row in np.array(labels_class)])
        
    # One-hot编码数据
    features = pd.get_dummies(features, columns=categorical_columns)
    
    # Min-Max标准化
    scaler = MinMaxScaler().fit(features)
    features = scaler.transform(features)

    # 凑形状，增加60列
    addition_number = 60
    addition_data = np.zeros((len(total_data), addition_number))
    features = np.concatenate((features, addition_data), axis=1)
    
    # X_train = features[:train_num][:, :, np.newaxis]
    # X_test = features[train_num:][:, :, np.newaxis]
    X_train = features[:train_num].astype(np.float32)
    X_test = features[train_num:].astype(np.float32)
    Y_train = labels[:train_num].astype(np.longlong)
    Y_test = labels[train_num:].astype(np.longlong)
    
        
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test = torch.tensor(X_test, dtype=torch.float32)
    Y_train = torch.LongTensor(Y_train)
    Y_test = torch.LongTensor(Y_test)
    
    return X_train, Y_train, X_test, Y_test

#### 网络层

In [4]:
class MultiLayerPerceptron(nn.Module):
    def __init__(self, num_classes, dim, dropout):
        super().__init__()
        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, dim),
            nn.Linear(dim, num_classes),
            nn.Dropout(dropout),
            nn.ReLU()
        )

    def forward(self, x):
        return self.mlp_head(x)
    
class PositionalEmbedding(nn.Module):
    def __init__(self, *, seq_len, patch_size, dim, channels, emb_dropout = 0.):
        super().__init__()
        assert (seq_len % patch_size) == 0

        num_patches = seq_len // patch_size
        patch_dim = channels * patch_size
        # patch_dim = patch_size
        self.patch_dim = [patch_size, channels, patch_dim]
        
        self.to_patch_embedding = nn.Sequential(
            Rearrange('b c (n p) -> b n (p c)', p = patch_size),
            # batch_size channels (patch_number * patch_size) -> batch_size patch_number (patch_size * channels)
            nn.LayerNorm(patch_dim),
            nn.Linear(patch_dim, dim),
            nn.LayerNorm(dim),
        )

        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.cls_token = nn.Parameter(torch.randn(dim))
        self.dropout = nn.Dropout(emb_dropout)

    def forward(self, series):
        x = self.to_patch_embedding(series)
        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, 'd -> b d', b = b)
        
        x, ps = pack([cls_tokens, x], 'b * d')

        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)
        
        return x, ps

#### BiLSTM

In [5]:
class BiLSTM(nn.Module):
    def __init__(self, input_dim, num_classes, lstm_hidden_dim=64, lstm_layers=2, dropout=0.1):
        super(BiLSTM, self).__init__()
        
        self.bilstm = nn.LSTM(input_dim, lstm_hidden_dim, num_layers=lstm_layers, batch_first=True, bidirectional=True)

        self.embedding = nn.Linear(lstm_hidden_dim*2, int(input_dim/2))
        
        self.mlp = MultiLayerPerceptron(dim=int(input_dim/2),
                                        num_classes=num_classes,
                                        dropout=dropout)
    
    def forward(self, x):
        # LSTM部分
        x, _ = self.bilstm(x)  # x: (batch_size, seq_length, input_dim)
        x = x[:, -1, :]  # 取最后一个时间步的输出作为特征

        x = self.embedding(x)

        y = self.mlp(x)
        return y

#### +Transformer

In [6]:
class BiLSTMTransformer(nn.Module):    
    def __init__(self, input_dim, num_classes, lstm_hidden_dim=64, lstm_layers=2, nhead=4, dim_feedforward=128, num_layers=6, dropout=0.1):
        super(BiLSTMTransformer, self).__init__()
        
        self.bilstm = nn.LSTM(input_dim, lstm_hidden_dim, num_layers=lstm_layers, batch_first=True, bidirectional=True)
        self.embedding = nn.Linear(lstm_hidden_dim*2, int(input_dim/2))
        
        encoder_layers = nn.TransformerEncoderLayer(d_model=int(input_dim/2), nhead=nhead, dim_feedforward=int(input_dim/2), dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)

        self.mlp = MultiLayerPerceptron(dim=int(input_dim/2),
                                        num_classes=num_classes,
                                        dropout=dropout)
    
    def forward(self, x):
        # LSTM部分
        x, _ = self.bilstm(x)  # x: (batch_size, seq_length, input_dim)
        x = x[:, -1, :]  # 取最后一个时间步的输出作为特征
        x = self.embedding(x)

        x = self.transformer_encoder(x)

        y = self.mlp(x)
        return y

#### 参数、模型初始化

In [7]:
train_epoches = 30
batch_size = 256
last_epoch = -1
print_interval = 100

input_dim = 256
class_num = 10
lstm_layers = 2
nhead = 4
tb_layers = 4
dropout = 0.1

# model = BiLSTM(input_dim=input_dim, 
#                num_classes=class_num, 
#                lstm_hidden_dim=input_dim, 
#                lstm_layers=lstm_layers,
#                dropout=dropout).to(torch.device("cuda"))

model = BiLSTMTransformer(input_dim=input_dim, 
                          num_classes=class_num, 
                          lstm_hidden_dim=input_dim, 
                          lstm_layers=lstm_layers, 
                          nhead=nhead, 
                          dim_feedforward=input_dim, 
                          num_layers=tb_layers, 
                          dropout=dropout).to(torch.device("cuda"))
# model = nn.DataParallel(model)

criterion = nn.CrossEntropyLoss()
optimizer_model = Adam(params=model.parameters(), lr=1e-3)
scheduler_model = CosineAnnealingLR(optimizer_model, T_max=train_epoches, last_epoch=last_epoch)

#### 加载数据

In [8]:
# 加载数据
path_train_data='datasets/UNSW-NB15/UNSW_NB15_training-set.csv'
path_test_data='datasets/UNSW-NB15/UNSW_NB15_testing-set.csv'

X_train, Y_train, X_test, Y_test = Load_UNSWNB15(path_train_data=path_train_data, 
                                                 path_test_data=path_test_data, 
                                                 binary_or_multi='multi')# 装载数据到loader里面

train_dataset = torch.utils.data.TensorDataset(X_train, Y_train)
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
    shuffle=True
)
test_dataset = torch.utils.data.TensorDataset(X_test, Y_test)
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
    shuffle=False
)

In [9]:
best_accu = 0
best_epoch = 0
for epo in range(train_epoches):
    print(f'Training Epoch{epo+1}')
    
    model.train()
    for index, (train_image, train_label) in enumerate(train_loader):
        train_image, train_label = train_image.view(train_image.shape[0], -1, train_image.shape[1]).cuda(non_blocking=True), train_label.to(torch.int64).cuda(non_blocking=True)
        train_label = torch.nn.functional.one_hot(train_label, class_num).to(torch.float32)
        optimizer_model.zero_grad()
        
        outs_real = model(train_image).view(train_label.shape)
        
        dis_loss = criterion(outs_real, train_label)
        
        dis_loss.backward()
        optimizer_model.step()
        scheduler_model.step()
        
        correct = (torch.argmax(outs_real, dim=1) == torch.argmax(train_label, dim=1)).sum().item()
        if (index % print_interval) == 0 and index != 0:
            print(f'Epo{epo}/Iter{index}:Accu {correct/len(train_label)}')

    model.eval()    
    
    Y_pred = np.array([])
    Y_test = np.array([])
    for image, label in test_loader:
        Y_test = np.append(Y_test, label, axis=None)
        image, label = image.view(image.shape[0], -1, image.shape[1]).cuda(non_blocking=True), label.cuda(non_blocking=True)

        predicted = model(image).view(label.shape[0], -1)
        predicted = torch.argmax(predicted, dim=1)
        
        Y_pred = np.append(Y_pred, predicted.cpu().numpy(), axis=None)
            
    accuracy_test = accuracy_score(Y_test, Y_pred)
    
    if accuracy_test > best_accu:
        best_accu = accuracy_test
        best_epoch = epo
        best_pred = Y_pred
        ground_truth = Y_test
        
        #保存整个模型
        torch.save(model, f'{save_path}/models/{model_kind}_{epo}_accu_{accuracy_test:.4f}.pth')
        np.save(f'{save_path}/models/best_pred_accu_{accuracy_test:.4f}.npy', best_pred)
        np.save(f'{save_path}/models/ground_truth_accu_{accuracy_test:.4f}.npy', ground_truth)
        
    print_lines = f'Epoch {epo+1}/{train_epoches}:\nAccuracy:{accuracy_test}\n'
    print_lines += f'Best Accuracy:{best_accu} - Epoch:{best_epoch+1}\n'
    print(print_lines)
    
    with open(f'{save_path}/results/{model_kind}_Multi.txt', 'a', encoding='utf-8') as file:
        # 将输出内容写入文件
        file.write(print_lines+'\n')

Training Epoch1
Epo0/Iter100:Accu 0.6953125
Epo0/Iter200:Accu 0.734375
Epo0/Iter300:Accu 0.67578125
Epo0/Iter400:Accu 0.73046875
Epo0/Iter500:Accu 0.73828125
Epo0/Iter600:Accu 0.69921875
Epoch 1/30:
Accuracy:0.6164674731574601
Best Accuracy:0.6164674731574601 - Epoch:1

Training Epoch2
Epo1/Iter100:Accu 0.73828125
Epo1/Iter200:Accu 0.671875
Epo1/Iter300:Accu 0.72265625
Epo1/Iter400:Accu 0.73046875
Epo1/Iter500:Accu 0.75
Epo1/Iter600:Accu 0.734375
Epoch 2/30:
Accuracy:0.6543749696351358
Best Accuracy:0.6543749696351358 - Epoch:2

Training Epoch3
Epo2/Iter100:Accu 0.72265625
Epo2/Iter200:Accu 0.72265625
Epo2/Iter300:Accu 0.70703125
Epo2/Iter400:Accu 0.73046875
Epo2/Iter500:Accu 0.796875
Epo2/Iter600:Accu 0.76171875
Epoch 3/30:
Accuracy:0.6380508186367391
Best Accuracy:0.6543749696351358 - Epoch:2

Training Epoch4
Epo3/Iter100:Accu 0.67578125
Epo3/Iter200:Accu 0.76171875
Epo3/Iter300:Accu 0.75390625
Epo3/Iter400:Accu 0.71875
Epo3/Iter500:Accu 0.72265625
Epo3/Iter600:Accu 0.75
Epoch 4/30:
