In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import learn2learn as l2l
from model import *

  from .autonotebook import tqdm as notebook_tqdm


# Old and new classes

In [2]:
old_labels = "/home/ids/NT505.O21.ATCL/Datasets/Changed_dataset/CICIDS2017-11LABEL.csv"

new_labels = "/home/ids/NT505.O21.ATCL/Datasets/Changed_dataset/CICIDS2017-7LABEL4.csv"

final_labels = "/home/ids/NT505.O21.ATCL/Datasets/Changed_dataset/CICIDS2017-Combined_11_7.csv"

In [3]:
data_old = pd.read_csv(old_labels)
print(data_old["Label"].unique())

[ 0  2  3  1  4 11 12 13 10  5 14]


In [4]:
X_old = data_old.drop(columns="Label")
y_old = data_old["Label"]

In [5]:
X_tensor_old = torch.tensor(X_old.values, dtype=torch.float32)
y_tensor_old = torch.tensor(y_old.values, dtype=torch.long)

# Initial learning

In [6]:
ways = 4
shots = 5
meta_lr = 0.001
fast_lr = 0.01
adaptation_steps = 1
meta_batch_size = 4
batch = 256
epoch_old = 10
epoch_new = 10
epoch_fin = 10

In [7]:
# My_train_old = MyDataset(name="Old_data", dataframe=data_old, label_column="Label")
# print(My_train_old.__getitem__(2))

In [8]:
dnn_model1 = DFNN(input_shape=78, num_classes=15)
loss_old = nn.CrossEntropyLoss()

In [9]:
dnn_model1.fit(X_tensor_old, y_tensor_old, batch , epoch_old, loss_old)

Epoch 1, Loss: 0.19958268105983734
Epoch 2, Loss: 0.2265874296426773
Epoch 3, Loss: 0.18570439517498016
Epoch 4, Loss: 0.2304009348154068
Epoch 5, Loss: 0.22608761489391327
Epoch 6, Loss: 0.22533707320690155
Epoch 7, Loss: 0.2253720909357071
Epoch 8, Loss: 0.2303600311279297
Epoch 9, Loss: 0.22506088018417358
Epoch 10, Loss: 0.23027794063091278


In [10]:
# meta_data_old =l2l.data.MetaDataset(My_train_old)
# print(len(meta_data_old))
# print(type(meta_data_old))

In [11]:
# a = nn.MSELoss()
# b = nn.CrossEntropyLoss()
# c = a+b
# c.backward()

In [12]:
# task_trans = [
#     l2l.data.transforms.NWays(dataset=meta_data_old, n=ways),           # N-way
#     l2l.data.transforms.KShots(dataset=meta_data_old, k=shots * 2),     # K-shot
#     l2l.data.transforms.LoadData(dataset=meta_data_old),
# ]
# task_train_old = l2l.data.TaskDataset(dataset = meta_data_old,
#                                       task_transforms = task_trans, num_tasks=20000)

In [13]:
# X, y = task_train_old.sample()
# print("Feature: {}".format(X.shape))
# print("Labels: {}".format(y.shape))

In [14]:
def accuracy(predictions, targets):
    # print(targets.size(0))
    # print(targets.shape)
    predictions = predictions.argmax(dim = 1).view(targets.shape)
    return (predictions == targets).sum().float() / targets.size(0)

In [15]:
def fast_adapt(batch, learner, loss, adaptation_steps, shots, ways):
    data, labels = batch
    # data, labels = data.to(device), labels.to(device)

    # Seperate data into adaptation and evaluation sets
    adaptation_indices = np.zeros(data.size(0), dtype = bool)
    adaptation_indices[np.arange(shots * ways) * 2] = True
    
    evaluation_indices = torch.from_numpy(~adaptation_indices)
    adaptation_indices = torch.from_numpy(adaptation_indices)

    adaptation_data, adaptation_labels = data[adaptation_indices], labels[adaptation_indices]
    evaluation_data, evaluation_labels = data[evaluation_indices], labels[evaluation_indices]

    # Adapt the model
    for step in range(adaptation_steps):
        # print(learner(adaptation_data).shape)
        # print(adaptation_labels.shape)
        train_error = loss(learner(adaptation_data), adaptation_labels)
        learner.adapt(train_error)
    
    # Evaluatie the adapted model
    predictions = learner(evaluation_data)
    # print("Logit value: {}".format(predictions))
    evaluation_error = loss(predictions, evaluation_labels)
    evaluation_accuracy = accuracy(predictions, evaluation_labels)
    return evaluation_error, evaluation_accuracy

In [16]:
def train_model(maml_dnn, meta_batch_size, task_num, epochs, opt, loss, adaptation_steps, shots, ways):
    for iteration in range(epochs):
        opt.zero_grad()
        meta_train_error = 0.0
        meta_train_acc = 0.0
        loss_lst = list()

        for task in range(meta_batch_size):
            # Compute meta-training loss
            learner = maml_dnn.clone()
            # print(len(task11))
            # print(len(task11.sample()))
            batch = task_num.sample()
            evaluation_error, evaluation_acc = fast_adapt(batch, learner, loss, adaptation_steps, shots, ways)
            loss_lst.append(evaluation_error)
            evaluation_error.backward()
            
            meta_train_error += evaluation_error.item()
            meta_train_acc += evaluation_acc.item()

        meta_train_acc_sum = meta_train_acc / meta_batch_size
        meta_train_error_sum = meta_train_error / meta_batch_size

        print(f"Iteration {iteration + 1}")
        print('Meta train Accuracy: {}'.format(meta_train_acc_sum))
        print('Meta train Error: {}'.format(meta_train_error_sum))
        # print('Loss each batch: {}'.format(loss_lst))
        print('\n')

        # Average the accumulated gradients and optimize
        for para in maml_dnn.parameters():
            para.grad.data.mul_(1.0 / meta_batch_size)
        opt.step()

In [17]:
def test_model(maml_dnn, meta_batch_size, task_num, loss, adaptation_steps, shots, ways):
    meta_test_acc = 0.0
    meta_test_error = 0.0
    for task in range(meta_batch_size):
        # Compute meta-testing loss
        learner = maml_dnn.clone()
        batch = task_num.sample()
        evaluation_error, evaluation_acc = fast_adapt(batch, learner, loss, adaptation_steps, shots, ways)  
        
        meta_test_acc += evaluation_acc.item()
        meta_test_error += evaluation_error.item()
        
    meta_test_acc_sum = meta_test_acc / meta_batch_size
    meta_test_error_sum = meta_test_error / meta_batch_size

    print(f"Meta task Accuracy: ", end='')
    print(f"{meta_test_acc_sum}")

In [18]:
# train_model(maml_dnn1, meta_batch_size, task_train_old, 10000, opt, loss, adaptation_steps, shots, ways)

# Initial Learning

## Training model with new labels

In [19]:
data_new = pd.read_csv(new_labels)
print(data_new["Label"].unique())

[3 9 8 7 6 4 5]


In [20]:
X_new = data_new.drop(columns="Label")
y_new = data_new["Label"]

In [21]:
X_tensor_new = torch.tensor(X_new.values, dtype=torch.float32)
y_tensor_new = torch.tensor(y_new.values, dtype=torch.long)

In [22]:
# loss_new = nn.CrossEntropyLoss(reduction='mean')

In [23]:
dnn_model1.fit(X_tensor_new, y_tensor_new, batch , epoch_new, loss_old)

Epoch 1, Loss: 5.545642852783203
Epoch 2, Loss: 4.781175136566162
Epoch 3, Loss: 4.330199241638184
Epoch 4, Loss: 4.031749248504639
Epoch 5, Loss: 3.8372349739074707
Epoch 6, Loss: 3.6914989948272705
Epoch 7, Loss: 3.566136360168457
Epoch 8, Loss: 3.446817636489868
Epoch 9, Loss: 3.326040506362915
Epoch 10, Loss: 3.200028657913208


In [24]:
My_train_new = MyDataset(name="New_data4", dataframe=data_new, label_column="Label")
print(My_train_new.__getitem__(7))

(tensor([8.1607e+05, 8.0000e+01, 5.4181e+05, 3.0000e+00, 6.0000e+00, 2.6000e+01,
        1.1607e+04, 2.0000e+01, 0.0000e+00, 8.6667e+00, 1.0263e+01, 5.8400e+03,
        0.0000e+00, 1.9345e+03, 2.5389e+03, 2.1471e+04, 1.6611e+01, 6.7726e+04,
        1.9115e+05, 5.4081e+05, 3.0000e+00, 8.8800e+02, 4.4400e+02, 4.8932e+02,
        7.9000e+02, 9.8000e+01, 5.4173e+05, 1.0835e+05, 2.4175e+05, 5.4081e+05,
        3.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 7.2000e+01,
        1.3200e+02, 5.5370e+00, 1.1074e+01, 0.0000e+00, 5.8400e+03, 1.1633e+03,
        2.1383e+03, 4.5725e+06, 0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.0000e+00, 1.2926e+03,
        8.6667e+00, 1.9345e+03, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 3.0000e+00, 2.6000e+01, 6.0000e+00, 1.1607e+04,
        8.1920e+03, 2.2900e+02, 2.0000e+00, 2.0000e+01, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e

In [25]:
meta_train_new = l2l.data.MetaDataset(My_train_new)

In [26]:
print(len(meta_train_new))

134692


In [27]:
task_trans_new = [
    l2l.data.transforms.NWays(dataset=meta_train_new, n=ways),          # N-way
    l2l.data.transforms.KShots(dataset=meta_train_new, k=shots * 2),    # K-shot
    l2l.data.transforms.LoadData(dataset=meta_train_new),
]
task_train_new = l2l.data.TaskDataset(dataset = meta_train_new, task_transforms = task_trans_new, num_tasks=20000)

In [28]:
# count = 0
# for task in task_train_new:
#     if count < 2:
#         X, y = task
#         print(X.shape)
#         print(y.shape)
#         count += 1
#     else:
#         break
X, y = task_train_new.sample()

## Meta-training and Meta-testing

In [29]:
maml_dnn1 = l2l.algorithms.MAML(dnn_model1, lr = fast_lr, first_order=False)

opt = optim.Adam(maml_dnn1.parameters(), meta_lr)

In [30]:
# dnn_model2 = DFNN(input_shape=78, num_classes=15)
# maml_dnn2 = l2l.algorithms.MAML(dnn_model2, lr = fast_lr, first_order=False)

In [31]:
print("Training model...")
train_model(maml_dnn1, meta_batch_size, task_train_new, 10000, opt, loss_old, adaptation_steps, shots, ways)

Training model...
Iteration 1
Meta train Accuracy: 0.25
Meta train Error: 8.546973943710327


Iteration 2
Meta train Accuracy: 0.1875
Meta train Error: 8.574887990951538


Iteration 3
Meta train Accuracy: 0.125
Meta train Error: 9.512435913085938


Iteration 4
Meta train Accuracy: 0.125
Meta train Error: 10.072770833969116


Iteration 5
Meta train Accuracy: 0.1875
Meta train Error: 8.583468914031982


Iteration 6
Meta train Accuracy: 0.0625
Meta train Error: 9.955108642578125


Iteration 7
Meta train Accuracy: 0.25
Meta train Error: 10.288935899734497


Iteration 8
Meta train Accuracy: 0.1875
Meta train Error: 8.408420324325562


Iteration 9
Meta train Accuracy: 0.1875
Meta train Error: 10.052928924560547


Iteration 10
Meta train Accuracy: 0.0625
Meta train Error: 10.502126455307007


Iteration 11
Meta train Accuracy: 0.1875
Meta train Error: 9.244938373565674


Iteration 12
Meta train Accuracy: 0.125
Meta train Error: 9.644399166107178


Iteration 13
Meta train Accuracy: 0.125
Meta t

# Incremental Learning

## Train model with combined labels and old labels

In [32]:
data_new_old = pd.read_csv(final_labels)
print(data_new_old["Label"].unique())

[ 0  2  3  1  4 11 12 13 10  5 14  9  6  7  8]


In [33]:
X_train_new_old, X_test_new_old = train_test_split(data_new_old, test_size=0.3)
print(X_train_new_old.shape)
print(X_test_new_old.shape)

(1347177, 79)
(577362, 79)


In [34]:
X_final = X_train_new_old.drop(columns="Label")
y_final = X_train_new_old["Label"]

In [35]:
X_tensor_fin = torch.tensor(X_final.values, dtype=torch.float32)
y_tensor_fin = torch.tensor(y_final.values, dtype=torch.long)

In [36]:
# loss_fin = nn.CrossEntropyLoss(reduction='mean')
# loss_fin = loss_old + loss_new

In [37]:
dnn_model1.fit(X_tensor_fin, y_tensor_fin, batch , epoch_fin, loss_old)

Epoch 1, Loss: 0.8921040296554565
Epoch 2, Loss: 0.8584554195404053
Epoch 3, Loss: 0.8825308680534363
Epoch 4, Loss: 0.8816751837730408
Epoch 5, Loss: 0.8695823550224304
Epoch 6, Loss: 0.8663064241409302
Epoch 7, Loss: 0.8768521547317505
Epoch 8, Loss: 0.8547331094741821
Epoch 9, Loss: 0.8808427453041077
Epoch 10, Loss: 0.8703023791313171


In [38]:
My_train_final = MyDataset(name="Train_final", dataframe=X_train_new_old, label_column="Label")
print(My_train_final.__getitem__(7))

(tensor([4.1278e+05, 5.7324e+04, 2.1700e+02, 3.0000e+00, 1.0000e+00, 7.7000e+01,
        0.0000e+00, 4.6000e+01, 0.0000e+00, 2.5667e+01, 2.3459e+01, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 3.5484e+05, 1.8433e+04, 7.2333e+01,
        8.7757e+01, 1.7100e+02, 3.0000e+00, 2.1700e+02, 1.0850e+02, 1.4920e+02,
        2.1400e+02, 3.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 6.0000e+01,
        2.0000e+01, 1.3825e+04, 4.6083e+03, 0.0000e+00, 4.6000e+01, 2.4600e+01,
        2.3277e+01, 5.4180e+02, 0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00,
        1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 3.0750e+01,
        2.5667e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 3.0000e+00, 7.7000e+01, 1.0000e+00, 0.0000e+00,
        1.2300e+02, 0.0000e+00, 1.0000e+00, 2.0000e+01, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e

In [39]:
meta_train_final = l2l.data.MetaDataset(My_train_final)

In [40]:
task_trans_final = [
    l2l.data.transforms.NWays(dataset=meta_train_final, n=ways),           # N-way
    l2l.data.transforms.KShots(dataset=meta_train_final, k=shots * 2),    #K-shot
    l2l.data.transforms.LoadData(dataset=meta_train_final),
]
task_train_final = l2l.data.TaskDataset(dataset = meta_train_final, task_transforms = task_trans_final, num_tasks=20000)

In [41]:
My_test_final = MyDataset(name="Test_final", dataframe=X_test_new_old, label_column="Label")
print(My_test_final.__getitem__(7))

(tensor([1.6718e+04, 5.1794e+04, 4.1000e+01, 1.0000e+00, 1.0000e+00, 6.0000e+00,
        6.0000e+00, 6.0000e+00, 6.0000e+00, 6.0000e+00, 0.0000e+00, 6.0000e+00,
        6.0000e+00, 6.0000e+00, 0.0000e+00, 2.9268e+05, 4.8780e+04, 4.1000e+01,
        0.0000e+00, 4.1000e+01, 4.1000e+01, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 2.0000e+01,
        2.0000e+01, 2.4390e+04, 2.4390e+04, 6.0000e+00, 6.0000e+00, 6.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        1.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 1.0000e+00, 9.0000e+00,
        6.0000e+00, 6.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 1.0000e+00, 6.0000e+00, 1.0000e+00, 6.0000e+00,
        6.6000e+01, 2.5700e+02, 0.0000e+00, 2.0000e+01, 0.0000e+00, 0.0000e+00,
        0.0000e+00, 0.0000e+00, 0.0000e

In [42]:
meta_test_final = l2l.data.MetaDataset(My_test_final)

In [43]:
task_test_final_trans = [
    l2l.data.transforms.KShots(dataset=meta_test_final, k=shots * 2),
    l2l.data.transforms.LoadData(dataset=meta_test_final),
]
task_test_final = l2l.data.TaskDataset(dataset = meta_test_final, task_transforms = task_test_final_trans, num_tasks=100)


In [44]:
count = 0
for task in task_train_final:
    if count < 2:
        X, y = task
        print(X.shape)
        print(y.shape)
        count += 1
    else:
        break
# X, y = task11.sample()

torch.Size([40, 78])
torch.Size([40])
torch.Size([40, 78])
torch.Size([40])


In [45]:
count = 0
for task in task_test_final:
    if count < 2:
        X, y = task
        print(X.shape)
        print(y.shape)
        count += 1
    else:
        break
# X, y = task11.sample()

torch.Size([150, 78])
torch.Size([150])
torch.Size([150, 78])
torch.Size([150])


In [46]:
print("Training model...")
train_model(maml_dnn1, meta_batch_size, task_train_final, 10000, opt, loss_old, adaptation_steps, shots, ways)

Training model...
Iteration 1
Meta train Accuracy: 0.7124999910593033
Meta train Error: 1.1724121868610382


Iteration 2
Meta train Accuracy: 0.7875000089406967
Meta train Error: 0.9345753937959671


Iteration 3
Meta train Accuracy: 0.8125
Meta train Error: 0.8257548809051514


Iteration 4
Meta train Accuracy: 0.7250000089406967
Meta train Error: 0.9839040488004684


Iteration 5
Meta train Accuracy: 0.7625000029802322
Meta train Error: 0.9374504238367081


Iteration 6
Meta train Accuracy: 0.8499999940395355
Meta train Error: 0.7092392891645432


Iteration 7
Meta train Accuracy: 0.800000011920929
Meta train Error: 0.8437051475048065


Iteration 8
Meta train Accuracy: 0.862500011920929
Meta train Error: 0.6709319055080414


Iteration 9
Meta train Accuracy: 0.7875000089406967
Meta train Error: 0.9455457031726837


Iteration 10
Meta train Accuracy: 0.7374999970197678
Meta train Error: 1.0398726612329483


Iteration 11
Meta train Accuracy: 0.7749999910593033
Meta train Error: 1.027955397963

In [47]:
print("Testing model...")
test_model(maml_dnn1, meta_batch_size, task_test_final, loss_old, adaptation_steps, shots, ways)

Testing model...
Meta task Accuracy: 0.7403846234083176
