In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn import functional as F
from torch import unsqueeze, where, matmul, sum, repeat_interleave, sqrt, topk, flip, index_select, cat
from torch.utils.tensorboard import SummaryWriter

import numpy as np

from dataprocess.cic_ids_2017 import CIC_IDS_2107_DataLoader
# from net.linear import linear_3

In [2]:
def topk_with_distance(src, dis, k):
    """
    find for every data in src its k-nearest neighbor from dis with index and distance 
    params:
        src: (N, C) 
        dis: (M, C)
        k: numbers to select from dis
    return:
        indices: (N, k)
        distance: (N, k) 
    """
    N, _ =src.shape
    M, _ = dis.shape
    src_sqare = repeat_interleave(sum(src ** 2, -1).reshape(N,1), M, 1, output_size=M) # (N, M)
    dis_sqare = repeat_interleave(sum(dis ** 2, -1).reshape(1,M), N, 0, output_size=N) # (N, M)
    src_ids = matmul(src, dis.permute(1,0)) # (N, M)
    distance = src_sqare + dis_sqare - 2 * src_ids
    distance, indices = topk(distance, k, 1) # (N, k)
    distance = sqrt(distance)

    return flip(indices,dims=[1]), flip(distance, dims=[1])

In [3]:
def nonconformity_measure(train_index, train_distance, train_label):
    """
    params:
        train_index: (N, k)
        train_distance: (N, k)
        label: (M, L)
    return:
        nonconformity: (N, L) 
    """
    N, k = train_distance.shape
    _, L = train_label.shape
    # train_distance = repeat_interleave(train_distance.reshape(N, 1, k), L, 1, output_size=L) # (N, L, k)
    train_distance = train_distance.reshape(N, 1, k)
    labels = train_label[train_index] # (N, k, L)
    labels = ~labels
    nonconformity = matmul(train_distance, labels.float()) # (N, 1, L)
    nonconformity = nonconformity.reshape(N, L)
    nonconformity = nonconformity ** (-1)
    return nonconformity

In [4]:
def nonconformity_measure_cali(cali_label, train_index, train_distance, train_label):
    """
    params:
        cali_label: (N, L)
        train_index: (N, k)
        train_distance: (N, k)
        label: (M, L)
    return:
        nonconformity: (N, ) 
    """
    N, k = train_distance.shape
    _, L = train_label.shape
    # train_distance = repeat_interleave(train_distance.reshape(N, 1, k), L, 1, output_size=L) # (N, L, k)
    train_distance = train_distance.reshape(N, 1, k)
    labels = train_label[train_index] # (N, k, L)
    labels = ~labels
    nonconformity = matmul(train_distance, labels.float()) # (N, 1, L)
    nonconformity = nonconformity.reshape(N, L)
    nonconformity = nonconformity ** (-1)
    nonconformity = sum(nonconformity * cali_label, 1) # (N, )
    return nonconformity

In [5]:
class kNN(nn.Module):
    """
    params:
        input_channel:
        output_channel:
    inputs:
        feature: (M, C)
        train_feature: (N, C)
        train_label: (N, L), one-hot code
    return:
        nonconformity: (M, L)
    """
    def __init__(self, k):
        super(kNN, self).__init__()
        self.k = k
    
    def forward(self, feature, train_feature, train_label):
        index, distance = topk_with_distance(feature, train_feature, self.k) # (M, k), (M, k)
        nonconformity = nonconformity_measure(index, distance, train_label) # (M, L)
        return nonconformity

In [2]:
class linear_3(nn.Module):
    def __init__(self, input_channel, output_channel):
        super(linear_3, self).__init__()
        self.fc1 = nn.Linear(input_channel, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_channel)
        self.softmax = nn.Softmax(-1)

        self.drop = nn.Dropout()
        self.relu = nn.ReLU()

    def forward(self, input_data):
        x1 = self.fc1(input_data)
        x1 = self.relu(x1)
        x1 = self.drop(x1)
        x2 = self.fc2(x1)
        x2 = self.relu(x2)
        x2 = self.drop(x2)
        x3 = self.fc3(x2)
        x3 = self.softmax(x3)
        return x1, x2, x3

In [7]:
class DkNN_linear_3(nn.Module):
    """
    params:
        input_channel: int
        output_channel: int
        k:int
        knn_weight: tensor, same length with layers of basic net
    input:
        test_input: (N, C)
        train_feature: (M, C)
        train_label: (M, L)
        cali_nonconformity: (G,)
    output:
        logits: (N, L)
    """
    def __init__(self, input_channel, output_channel, k, knn_weight):
        super(DkNN_linear_3, self).__init__()
        self.bone_net = linear_3(input_channel, output_channel)
        self.knn1 = kNN(k)
        self.knn2 = kNN(k)
        self.knn3 = kNN(k)

        self.drop = nn.Dropout()
        self.relu = nn.ReLU()

        self.nonconformity_weight = knn_weight

    def forward(self, test_input, train_feature, train_label, cali_nonconformity):
        N, _ = test_input.shape
        _, L = train_label.shape
        G, = cali_nonconformity.shape
        with torch.no_grad():
            x1, x2, x3 = self.bone_net(test_input)

            # knn part
            nonconformity_1 = self.knn1(x1, train_feature, train_label).reshape(N, 1, L)
            nonconformity_2 = self.knn2(x2, train_feature, train_label).reshape(N, 1, L)
            nonconformity_3 = self.knn3(x3, train_feature, train_label).reshape(N, 1, L)
            
            nonconfotmity = cat((nonconformity_1, nonconformity_2, nonconformity_3), 1) # (N, 3, L)
            nonconformity_weight = self.nonconformity_weight.repeat(N).reshape(N,3,1)
            nonconformity_weight = repeat_interleave(nonconformity_weight, L, dim=2, output_size=L)

            weighted_nonconformity = sum(nonconfotmity * nonconformity_weight, dim=1).reshape(N, L, 1)
            _logits = repeat_interleave(weighted_nonconformity, G, dim=2, output_size=G) > cali_nonconformity #(N, L, G)
            logits = sum(_logits.int(), dim=2) / G # (N, L)
        return logits

In [45]:
dknn_weight = torch.Tensor([0.3, 0.4, 0.3])
DkNN_net = DkNN_linear_3(85, 9, 200, dknn_weight)
ckpt_file = 'history/linear_3/test_cic_Ids2017/checkpoint/6_13/ckpt_best_99.pth'
checkpoint = torch.load(ckpt_file)
DkNN_net.load_state_dict(checkpoint['net'], strict=False)

_IncompatibleKeys(missing_keys=['bone_net.fc1.weight', 'bone_net.fc1.bias', 'bone_net.fc2.weight', 'bone_net.fc2.bias', 'bone_net.fc3.weight', 'bone_net.fc3.bias'], unexpected_keys=['fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias'])

In [3]:
cali_dataset = CIC_IDS_2107_DataLoader('E:/DkNN/data/CIC-IDS2017', batch_size=1, mode='Cali')

In [3]:
import os 
import time

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]='0'

device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

t = time.localtime()
year, month, day = t.tm_year, t.tm_mon, t.tm_mday

net = input('input the name of net to train: ')
mode = input('input the tag about the training: ')
if mode == '':
    mode = 'no_tag'

log_directory = f"log/exp/{net}/{mode}/{month}_{day}/"
ckpt_directory = f"history/{net}/{mode}/checkpoint/{month}_{day}/"
if not os.path.isdir(log_directory):
    os.makedirs(log_directory)
if not os.path.isdir(ckpt_directory):
    os.makedirs(ckpt_directory)
print('log_directory: ', log_directory)
print('ckpt_directory: ', ckpt_directory)

cuda
log_directory:  log/exp/linear_3/test3_cic_ids2017/6_15/
ckpt_directory:  history/linear_3/test3_cic_ids2017/checkpoint/6_15/


In [4]:
# for cic_ids2017
batch_size = 256
dataset = CIC_IDS_2107_DataLoader('E:\DataSets\CIC-IDS2016', batch_size, mode='Train')
data_labels = dataset.data.label_category.keys()
class_num = len(data_labels)
feature_length = dataset.data.feature_length
print(class_num, feature_length)

9 85


In [5]:
writer1 = SummaryWriter(log_directory)

In [6]:
net = linear_3(feature_length, class_num).to(device)

epoch = 100
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(net.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

In [7]:
for t in range(0, epoch):
    running_loss = 0.0
    for step, (x, y) in enumerate(dataset):
        
        x = x.to(device)
        y = y.to(device)

        _, _, y_pred = net(x)
        loss = criterion(y_pred, y)
        loss.backward()
        optimizer.step()
        
        # visualize loss
        running_loss += loss.item() / batch_size
    # ...log the running loss
    writer1.add_scalar('training loss', running_loss, t)
    scheduler.step()
    if t % 10 == 9:
        checkpoint = {"net": net.state_dict(), 'optimizer':optimizer.state_dict(), "epoch": t}
        torch.save(checkpoint,  ckpt_directory + 'ckpt_best_%s.pth' %(str(t)))
writer1.close()

In [None]:
for step, (x, y) in enumerate(dataset):
    if step == 0:
        cali_data = x
        cali_label = y
    else:
        cali_data = cat((cali_data, x))
        cali_label = cat((cali_label, y))

In [11]:
np.random.randint(0,32,size=(2,3,2)).tolist()

[[[18, 13], [10, 13], [0, 3]], [[3, 5], [8, 13], [21, 15]]]

In [37]:
a = torch.Tensor([[[1, 0], [1, 1], [0, 0]], [[1, 1], [0, 0], [0, 1]]]).bool()
b = torch.Tensor([7,15]).float()

In [39]:
c = sum(a.int(), 2)
print(c)

tensor([[1, 2, 0],
        [2, 0, 1]])


In [5]:
N, M = 20000, 5000
nonconformity = torch.Tensor(np.random.random(size=(N,))*3.4E+38)
standard_nonconformity = torch.Tensor(np.random.random(size=(M,))*3.4E+38)

In [6]:
nonconformity = unsqueeze(nonconformity, -1)
nonconformity = nonconformity.repeat(1, M)
print(nonconformity.shape)

torch.Size([20000, 5000])


In [52]:
result = nonconformity - standard_nonconformity
print(result.shape)

torch.Size([20000, 5000])


In [48]:
ones_array = torch.ones(result.shape)
zeros_array = torch.zeros(result.shape)
_result = where(result < 0,ones_array,zeros_array)

In [37]:
result = _result.sum(-1, keepdim=False)
print(result.shape)

torch.Size([20000])


In [38]:
result = result / M
print(result[:20])

tensor([0.5904, 0.8680, 0.3888, 0.2612, 0.4950, 0.0360, 0.8210, 0.2072, 0.8244,
        0.3100, 0.5700, 0.9140, 0.2904, 0.3008, 0.0592, 0.5360, 0.1212, 0.2880,
        0.4150, 0.8034])


In [39]:
result.requires_grad(True)

TypeError: 'bool' object is not callable