In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Train Incrementally and Evaluate

---



---



In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

class InitialColumnProgNN(nn.Module):
    def __init__(self, topology, activations):
        super(InitialColumnProgNN, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(len(topology) - 1):
            self.layers.append(nn.Linear(topology[i], topology[i+1]))
        self.activations = activations

    def forward(self, x):
        for i, layer in enumerate(self.layers):
            x = layer(x)
            # removinf softmax from last layer because of crossentropyloss function - vishal
            if i != len(self.activations):
              x = self.activations[i](x)
        return x

class ExtensibleColumnProgNN(nn.Module):
    def __init__(self, topology, activations, prev_columns):
        super(ExtensibleColumnProgNN, self).__init__()
        self.layers = nn.ModuleList()
        self.lateral_connections = nn.ModuleList()
        for i in range(len(topology) - 1):
            self.layers.append(nn.Linear(topology[i], topology[i+1]))
            if i > 0:
                lateral = [nn.Linear(prev_column.layers[i-1].out_features, topology[i+1], bias=False) for prev_column in prev_columns]
                self.lateral_connections.append(nn.ModuleList(lateral))
        self.activations = activations
        self.prev_columns = prev_columns

    def forward(self, x):
        prev_hs = [[[] for j in range(len(prev_col.layers))] for prev_col in self.prev_columns]

        for j in range(len(self.prev_columns)):
            x_copy = x.clone()
            for i, col_layer in enumerate(self.prev_columns[j].layers):
                x_copy = col_layer(x_copy)
                # removinf softmax from last layer because of crossentropyloss function - vishal
                if i != len(self.prev_columns[j].activations):
                  x_copy = self.prev_columns[j].activations[i](x_copy)
                prev_hs[j][i] = x_copy.clone()


        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i > 0:
                for j, lateral in enumerate(self.lateral_connections[i-1]):
                    x += lateral(prev_hs[j][i - 1])
            # removinf softmax from last layer because of crossentropyloss function - vishal
            if i != len(self.activations):
              x = self.activations[i](x)
        return x

#todo: add training batch size later
def train_column(column, data, target, epochs=50, lr=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(column.parameters(), lr=lr)
    # saved h_values for training lateral connections
    for epoch in range(epochs):
        optimizer.zero_grad()

        # forward pass
        output = column(data)

        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if epoch%10 == 0:
          print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}')

def test_column(column, embeddings, labels):
    column.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        test_outputs = column(embeddings)
        _, predicted = torch.max(test_outputs, 1)

        # Calculate accuracy
        accuracy = accuracy_score(labels, predicted)

        # Calculate F1 score
        f1 = f1_score(labels, predicted)

        # Calculate precision
        precision = precision_score(labels, predicted)

        # Calculate recall
        recall = recall_score(labels, predicted)

        print("Accuracy: {:.2f}".format(accuracy))
        print("F1 Score: {:.2f}".format(f1))
        print("Precision: {:.2f}".format(precision))
        print("Recall: {:.2f}".format(recall))

class PNN():
    def __init__(self):
        self.num_classes = 1

        self.topology = [256, 100, 64, 25, 2]
        self.activations = [F.relu, F.relu, F.relu]

        # Instantiate the first module
        self.subnetworks = [InitialColumnProgNN(self.topology, self.activations)]

        # # Define the layer on top
        # self.output_layer = nn.Linear(in_features=num_classes*2, out_features=num_classes)

    def add_network(self):
        self.num_classes += 1
        self.subnetworks.append(ExtensibleColumnProgNN(self.topology, self.activations, self.subnetworks))

    def train(self, subnetwork, embeddings, labels):
        for i in range(self.num_classes):
            if i == subnetwork:
                # Unfreeze the parameters of the modules
                for param in self.subnetworks[i].parameters():
                    param.requires_grad = True
            else:
                # Freeze the parameters of the modules
                for param in self.subnetworks[i].parameters():
                    param.requires_grad = False
        # Train the relevant PNN
        train_column(self.subnetworks[subnetwork], embeddings, labels, epochs=100)

        #Unfreeze all parameters
        for i in range(self.num_classes):
            for param in self.subnetworks[i].parameters():
                    param.requires_grad = True

    def test(self, embeddings, labels):
        for i in range(self.num_classes):
            print('\nResults for class', i, '-')
            test_column(self.subnetworks[i], embeddings, labels[i])

        #  # Concatenate the outputs along the feature dimension (dimension 1)
        # combined_output = torch.cat((output1, output2), dim=1)

        # # Apply additional layer
        # final_output = self.output_layer(combined_output)


model = PNN()

# train_column(model, embeddings0, labels0, epochs=50)

In [64]:
model.subnetworks

[InitialColumnProgNN(
   (layers): ModuleList(
     (0): Linear(in_features=256, out_features=100, bias=True)
     (1): Linear(in_features=100, out_features=64, bias=True)
     (2): Linear(in_features=64, out_features=25, bias=True)
     (3): Linear(in_features=25, out_features=2, bias=True)
   )
 )]

# Task 1

In [3]:
import json
import torch
embeddings = []
labels = []

for i in range(1, 5):
    with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/train_embeddings/dos_train_file_{i}.json', 'r') as f:
        for line in f:
            data = json.loads(line)
            embeddings.append(data['embeddings'][0])
            labels.append(data['label'])

# Convert lists to tensors
embeddings0 = torch.tensor(embeddings)
labels0 = torch.tensor(labels)

print(embeddings0.size())  # This will show the shape of the embeddings tensor
print(labels0.size())     # This will show the shape of the labels tensor

model.train(0, embeddings=embeddings0, labels=labels0)

torch.Size([4437, 256])
torch.Size([4437])
Epoch 1/100, Loss: 0.697877824306488
Epoch 11/100, Loss: 0.6894388794898987
Epoch 21/100, Loss: 0.6719402074813843
Epoch 31/100, Loss: 0.6458081007003784
Epoch 41/100, Loss: 0.6175787448883057
Epoch 51/100, Loss: 0.5892585515975952
Epoch 61/100, Loss: 0.5704184770584106
Epoch 71/100, Loss: 0.5558143854141235
Epoch 81/100, Loss: 0.5424866676330566
Epoch 91/100, Loss: 0.5280255079269409


In [4]:
embeddings = []
labels = []

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/dos_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([data['label']])

# Convert lists to tensors
embeddings0 = torch.tensor(embeddings)
labels0 = torch.tensor(labels)

print(embeddings0.size())  # This will show the shape of the embeddings tensor
print(labels0.size())     # This will show the shape of the labels tensor

model.test(embeddings0, labels0.T)

torch.Size([1109, 256])
torch.Size([1109, 1])

Results for class 0 -
Accuracy: 0.72
F1 Score: 0.72
Precision: 0.74
Recall: 0.70


# Task 2

In [5]:
import json
import torch
embeddings = []
labels = []

for i in range(1, 5):
    with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/train_embeddings/+info_train_file_{i}.json', 'r') as f:
        for line in f:
            data = json.loads(line)
            embeddings.append(data['embeddings'][0])
            labels.append(data['label'])

# Convert lists to tensors
embeddings0 = torch.tensor(embeddings)
labels0 = torch.tensor(labels)

print(embeddings0.size())  # This will show the shape of the embeddings tensor
print(labels0.size())     # This will show the shape of the labels tensor

model.add_network()
model.train(1, embeddings=embeddings0, labels=labels0)

torch.Size([984, 256])
torch.Size([984])
Epoch 1/100, Loss: 0.7490859627723694
Epoch 11/100, Loss: 0.6737428903579712
Epoch 21/100, Loss: 0.6307698488235474
Epoch 31/100, Loss: 0.5876051783561707
Epoch 41/100, Loss: 0.5408448576927185
Epoch 51/100, Loss: 0.48283374309539795
Epoch 61/100, Loss: 0.4343494772911072
Epoch 71/100, Loss: 0.39462095499038696
Epoch 81/100, Loss: 0.3554893732070923
Epoch 91/100, Loss: 0.3147546648979187


In [6]:
embeddings = []
labels = []

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/dos_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([data['label'], 1])

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/+info_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([1, data['label']])

# Convert lists to tensors
embeddings0 = torch.tensor(embeddings)
labels0 = torch.tensor(labels)

print(embeddings0.size())  # This will show the shape of the embeddings tensor
print(labels0.size())     # This will show the shape of the labels tensor

model.test(embeddings0, labels0.T)

torch.Size([1355, 256])
torch.Size([1355, 2])

Results for class 0 -
Accuracy: 0.68
F1 Score: 0.70
Precision: 0.78
Recall: 0.64

Results for class 1 -
Accuracy: 0.45
F1 Score: 0.59
Precision: 0.92
Recall: 0.43


# Task 3

In [7]:
import json
import torch
embeddings = []
labels = []

for i in range(1, 5):
    with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/train_embeddings/bypass_train_file_{i}.json', 'r') as f:
        for line in f:
            data = json.loads(line)
            embeddings.append(data['embeddings'][0])
            labels.append(data['label'])

# Convert lists to tensors
embeddings0 = torch.tensor(embeddings)
labels0 = torch.tensor(labels)

print(embeddings0.size())  # This will show the shape of the embeddings tensor
print(labels0.size())     # This will show the shape of the labels tensor

model.add_network()
model.train(2, embeddings=embeddings0, labels=labels0)

embeddings = []
labels = []

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/dos_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([data['label'], 1, 1])

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/+info_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([1, data['label'], 1])

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/bypass_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([1, 1, data['label']])

# Convert lists to tensors
embeddings0 = torch.tensor(embeddings)
labels0 = torch.tensor(labels)

print(embeddings0.size())  # This will show the shape of the embeddings tensor
print(labels0.size())     # This will show the shape of the labels tensor

model.test(embeddings0, labels0.T)

torch.Size([709, 256])
torch.Size([709])
Epoch 1/100, Loss: 0.7441276907920837
Epoch 11/100, Loss: 0.6702899932861328
Epoch 21/100, Loss: 0.6484308838844299
Epoch 31/100, Loss: 0.6307220458984375
Epoch 41/100, Loss: 0.6108115315437317
Epoch 51/100, Loss: 0.5751252174377441
Epoch 61/100, Loss: 0.5162976384162903
Epoch 71/100, Loss: 0.4560319185256958
Epoch 81/100, Loss: 0.3949965834617615
Epoch 91/100, Loss: 0.334001749753952
torch.Size([1532, 256])
torch.Size([1532, 3])

Results for class 0 -
Accuracy: 0.66
F1 Score: 0.70
Precision: 0.81
Recall: 0.62

Results for class 1 -
Accuracy: 0.46
F1 Score: 0.60
Precision: 0.93
Recall: 0.44

Results for class 2 -
Accuracy: 0.42
F1 Score: 0.57
Precision: 0.94
Recall: 0.41


# Task 4

In [8]:
import json
import torch
embeddings = []
labels = []

for i in range(1, 5):
    with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/train_embeddings/+priv_train_file_{i}.json', 'r') as f:
        for line in f:
            data = json.loads(line)
            embeddings.append(data['embeddings'][0])
            labels.append(data['label'])

# Convert lists to tensors
embeddings0 = torch.tensor(embeddings)
labels0 = torch.tensor(labels)

print(embeddings0.size())  # This will show the shape of the embeddings tensor
print(labels0.size())     # This will show the shape of the labels tensor

model.add_network()
model.train(2, embeddings=embeddings0, labels=labels0)

embeddings = []
labels = []

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/dos_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([data['label'], 1, 1, 1])

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/+info_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([1, data['label'], 1, 1])

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/bypass_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([1, 1, data['label'], 1])

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/+priv_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([1, 1, 1, data['label']])

# Convert lists to tensors
embeddings0 = torch.tensor(embeddings)
labels0 = torch.tensor(labels)

print(embeddings0.size())  # This will show the shape of the embeddings tensor
print(labels0.size())     # This will show the shape of the labels tensor

model.test(embeddings0, labels0.T)

torch.Size([638, 256])
torch.Size([638])
Epoch 1/100, Loss: 0.8878297209739685
Epoch 11/100, Loss: 0.6105435490608215
Epoch 21/100, Loss: 0.5229743719100952
Epoch 31/100, Loss: 0.4826395511627197
Epoch 41/100, Loss: 0.44857582449913025
Epoch 51/100, Loss: 0.4127260446548462
Epoch 61/100, Loss: 0.37526941299438477
Epoch 71/100, Loss: 0.334656298160553
Epoch 81/100, Loss: 0.28982973098754883
Epoch 91/100, Loss: 0.24226541817188263
torch.Size([1692, 256])
torch.Size([1692, 4])

Results for class 0 -
Accuracy: 0.64
F1 Score: 0.70
Precision: 0.83
Recall: 0.60

Results for class 1 -
Accuracy: 0.46
F1 Score: 0.60
Precision: 0.94
Recall: 0.44

Results for class 2 -
Accuracy: 0.33
F1 Score: 0.46
Precision: 0.96
Recall: 0.30

Results for class 3 -
Accuracy: 0.34
F1 Score: 0.48
Precision: 0.97
Recall: 0.32


# Task 5

In [9]:
import json
import torch
embeddings = []
labels = []

for i in range(1, 5):
    with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/train_embeddings/other_train_file_{i}.json', 'r') as f:
        for line in f:
            data = json.loads(line)
            embeddings.append(data['embeddings'][0])
            labels.append(data['label'])

# Convert lists to tensors
embeddings0 = torch.tensor(embeddings)
labels0 = torch.tensor(labels)

print(embeddings0.size())  # This will show the shape of the embeddings tensor
print(labels0.size())     # This will show the shape of the labels tensor

model.add_network()
model.train(2, embeddings=embeddings0, labels=labels0)

embeddings = []
labels = []

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/dos_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([data['label'], 1, 1, 1, 1])

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/+info_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([1, data['label'], 1, 1, 1])

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/bypass_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([1, 1, data['label'], 1, 1])

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/+priv_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([1, 1, 1, data['label'], 1])

with open(f'/content/drive/MyDrive/CSCI544-Project/data/class_wise_embeddings/test_embeddings/other_test_file.json', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append([1, 1, 1, 1, data['label']])

# Convert lists to tensors
embeddings0 = torch.tensor(embeddings)
labels0 = torch.tensor(labels)

print(embeddings0.size())  # This will show the shape of the embeddings tensor
print(labels0.size())     # This will show the shape of the labels tensor

model.test(embeddings0, labels0.T)

torch.Size([800, 256])
torch.Size([800])
Epoch 1/100, Loss: 1.2902934551239014
Epoch 11/100, Loss: 0.5481863617897034
Epoch 21/100, Loss: 0.3996991813182831
Epoch 31/100, Loss: 0.33156898617744446
Epoch 41/100, Loss: 0.2819080948829651
Epoch 51/100, Loss: 0.2394256442785263
Epoch 61/100, Loss: 0.20193754136562347
Epoch 71/100, Loss: 0.16852302849292755
Epoch 81/100, Loss: 0.13897638022899628
Epoch 91/100, Loss: 0.11328723281621933
torch.Size([1892, 256])
torch.Size([1892, 5])

Results for class 0 -
Accuracy: 0.65
F1 Score: 0.72
Precision: 0.85
Recall: 0.62

Results for class 1 -
Accuracy: 0.48
F1 Score: 0.63
Precision: 0.95
Recall: 0.47

Results for class 2 -
Accuracy: 0.78
F1 Score: 0.88
Precision: 0.95
Recall: 0.82

Results for class 3 -
Accuracy: 0.33
F1 Score: 0.47
Precision: 0.98
Recall: 0.31

Results for class 4 -
Accuracy: 0.34
F1 Score: 0.47
Precision: 0.99
Recall: 0.31
