In [1]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import torch
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset


class KddData(object):

    def __init__(self, batch_size):
        kddcup99 = datasets.fetch_kddcup99()
        self._encoder = {
            'protocal': LabelEncoder(),
            'service':  LabelEncoder(),
            'flag':     LabelEncoder(),
            'label':    LabelEncoder()
        }
        self.batch_size = batch_size
        data_X, data_y = self.__encode_data(kddcup99.data, kddcup99.target)
        self.train_dataset, self.test_dataset = self.__split_data_to_tensor(data_X, data_y)
        self.train_dataloader = DataLoader(self.train_dataset, self.batch_size, shuffle=True)
        self.test_dataloader = DataLoader(self.test_dataset, self.batch_size, shuffle=True)


    """将数据中字符串部分转换为数字，并将输入的41维特征转换为8*8的矩阵"""
    def __encode_data(self, data_X, data_y):
        self._encoder['protocal'].fit(list(set(data_X[:, 1])))
        self._encoder['service'].fit(list(set(data_X[:, 2])))
        self._encoder['flag'].fit((list(set(data_X[:, 3]))))
        self._encoder['label'].fit(list(set(data_y)))
        data_X[:, 1] = self._encoder['protocal'].transform(data_X[:, 1])
        data_X[:, 2] = self._encoder['service'].transform(data_X[:, 2])
        data_X[:, 3] = self._encoder['flag'].transform(data_X[:, 3])
        data_X = np.pad(data_X, ((0, 0), (0, 64 - len(data_X[0]))), 'constant').reshape(-1, 1, 8, 8)
        data_y = self._encoder['label'].transform(data_y)
        return data_X, data_y

    """将数据拆分为训练集和测试集，并转换为TensorDataset对象"""
    def __split_data_to_tensor(self, data_X, data_y):
        X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, test_size=0.3)
        train_dataset = TensorDataset(
            torch.from_numpy(X_train.astype(np.float32)),
            torch.from_numpy(y_train.astype(np.int))
        )
        test_dataset = TensorDataset(
            torch.from_numpy(X_test.astype(np.float32)),
            torch.from_numpy(y_test.astype(np.int))
        )
        return train_dataset, test_dataset

    """接受一个数组进行解码"""
    def decode(self, data, label=False):
        if not label:
            _data = list(data)
            _data[1] = self._encoder['protocal'].inverse_transform([_data[1]])[0]
            _data[2] = self._encoder['service'].inverse_transform([_data[2]])[0]
            _data[2] = self._encoder['flag'].inverse_transform([_data[3]])[0]
            return _data
        return self._encoder['label'].inverse_transform(data)
    
    def encode(self, data, label=False):
        if not label:
            _data = list(data)
            _data[1] = self._encoder['protocal'].transform([_data[1]])[0]
            _data[2] = self._encoder['service'].transform([_data[2]])[0]
            _data[3] = self._encoder['flag'].transform([_data[3]])[0]
            return _data
        return self._encoder['label'].transform([data])[0]


In [2]:
import torch.nn as nn

class CNN(nn.Module):
    def __init__(self, in_dim, n_class):
        super(CNN, self).__init__()

        self.conv = nn.Sequential(
            nn.Conv2d(in_dim, 6, 3, stride=1, padding=1),
            nn.BatchNorm2d(6),
            nn.ReLU(True),
            nn.Conv2d(6, 16, 3, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)
        )

        self.fc = nn.Sequential(
            nn.Linear(144, 512),
            nn.Linear(512, 256),
            nn.Linear(256, n_class)
        )

    def forward(self, x):
        out = self.conv(x)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
# 神经网络参数
batch_size = 128
learning_rate = 1e-2
num_epoches = 20
USE_GPU = torch.cuda.is_available()


dataset = KddData(batch_size)
model = CNN(1, 23)

def train():
    
    global model

    if USE_GPU:
        model = model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    for epoch in range(num_epoches):
        print('epoch {}'.format(epoch + 1))
        print('*' * 10)
        running_loss = 0.0
        running_acc = 0.0
        for i, data in enumerate(dataset.train_dataloader, 1):
            img, label = data
            if USE_GPU:
                img = img.cuda()
                label = label.cuda()
            img = Variable(img)
            label = Variable(label)
            # 向前传播
            out = model(img)
            loss = criterion(out, label)
            running_loss += loss.item() * label.size(0)
            _, pred = torch.max(out, 1)
            num_correct = (pred == label).sum()
            accuracy = (pred == label).float().mean()
            running_acc += num_correct.item()
            # 向后传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('Finish {} epoch, Loss: {:.6f}, Acc: {:.6f}'.format(
            epoch + 1, running_loss / (len(dataset.train_dataset)), running_acc / (len(
                dataset.train_dataset))))
        model.eval()
        eval_loss = 0
        eval_acc = 0
        for data in dataset.test_dataloader:
            img, label = data
            if USE_GPU:
                img = Variable(img, volatile=True).cuda()
                label = Variable(label, volatile=True).cuda()
            else:
                img = Variable(img, volatile=True)
                label = Variable(label, volatile=True)
            out = model(img)
            loss = criterion(out, label)
            eval_loss += loss.item() * label.size(0)
            _, pred = torch.max(out, 1)
            num_correct = (pred == label).sum()
            eval_acc += num_correct.item()
        print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(
            dataset.test_dataset)), eval_acc / (len(dataset.test_dataset))))
        print()


In [6]:
train()

epoch 1
**********
Finish 1 epoch, Loss: 0.208683, Acc: 0.954045




Test Loss: 1.635921, Acc: 0.568327

epoch 2
**********
Finish 2 epoch, Loss: 0.098167, Acc: 0.982132
Test Loss: 0.154757, Acc: 0.988131

epoch 3
**********
Finish 3 epoch, Loss: 0.050068, Acc: 0.987970
Test Loss: 0.118267, Acc: 0.988482

epoch 4
**********
Finish 4 epoch, Loss: 0.041440, Acc: 0.988893
Test Loss: 0.099372, Acc: 0.989123

epoch 5
**********
Finish 5 epoch, Loss: 0.038163, Acc: 0.989390
Test Loss: 0.099686, Acc: 0.989002

epoch 6
**********
Finish 6 epoch, Loss: 0.035749, Acc: 0.989891
Test Loss: 0.082922, Acc: 0.989710

epoch 7
**********
Finish 7 epoch, Loss: 0.033967, Acc: 0.990402
Test Loss: 0.091113, Acc: 0.990682

epoch 8
**********
Finish 8 epoch, Loss: 0.032350, Acc: 0.990891
Test Loss: 0.081245, Acc: 0.991208

epoch 9
**********
Finish 9 epoch, Loss: 0.030806, Acc: 0.991400
Test Loss: 0.086804, Acc: 0.992349

epoch 10
**********
Finish 10 epoch, Loss: 0.029359, Acc: 0.991764
Test Loss: 0.073397, Acc: 0.991694

epoch 11
**********
Finish 11 epoch, Loss: 0.028263, 

In [7]:
kddcup99 = datasets.fetch_kddcup99()

In [8]:
test_data = kddcup99.data[888]
label = kddcup99.target[888]
print(test_data, label)

[0 b'tcp' b'http' b'SF' 219 643 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 6 6 0.0
 0.0 0.0 0.0 1.0 0.0 0.0 44 255 1.0 0.0 0.02 0.03 0.0 0.0 0.0 0.0] b'normal.'


In [9]:
_data = dataset.encode(test_data)

In [12]:
__data = torch.from_numpy(np.pad(_data, (0, 64 - len(_data)), 'constant').astype(np.float32)).reshape(-1, 1, 8, 8).cuda()

In [14]:
model(__data)

tensor([[   1.7615,   16.6235,   16.5394,   19.2315,   14.2301,   19.8753,
            7.7450,   16.8384,   15.4089, -126.3723,    7.7885,   29.8984,
           14.1022,   14.9759,    4.9610,  -57.6466,   14.1995,  -18.0559,
          -23.1028,   15.2467,  -22.2353,   13.6960,   16.9799]], device='cuda:0')

In [15]:
def predict(data, multiple=False):
    _data = dataset.encode(data)
    _data = torch.from_numpy(
        np.pad(_data, (0, 64 - len(_data)), 'constant').astype(np.float32)
    ).reshape(-1, 1, 8, 8).cuda()
    _out = int(torch.max(model(_data).data, 1)[1].cpu().numpy())
    return dataset.decode(_out, label=True)

In [18]:
predict(test_data)

  if diff:


b'normal.'