In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import json
embeddings = []
labels = []

with open('/content/drive/MyDrive/t5p_small_embeddings/train/train_0.jsonl', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append(data['label'])

# Convert lists to tensors
embeddings0 = torch.tensor(embeddings)
labels0 = torch.tensor(labels)

print(embeddings0.size())  # This will show the shape of the embeddings tensor
print(labels0.size())     # This will show the shape of the labels tensor

embeddings = []
labels = []

with open('/content/drive/MyDrive/t5p_small_embeddings/train/train_1.jsonl', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append(data['label'])

# Convert lists to tensors
embeddings1 = torch.tensor(embeddings)
labels1 = torch.tensor(labels)

print(embeddings1.size())  # This will show the shape of the embeddings tensor
print(labels1.size())     # This will show the shape of the labels tensor

torch.Size([7000, 256])
torch.Size([7000])
torch.Size([1515, 256])
torch.Size([1515])


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

class InitialColumnProgNN(nn.Module):
    def __init__(self, topology, activations):
        super(InitialColumnProgNN, self).__init__()
        self.layers = nn.ModuleList()
        for i in range(len(topology) - 1):
            self.layers.append(nn.Linear(topology[i], topology[i+1]))
        self.activations = activations

    def forward(self, x):
        h = [x]
        for i, layer in enumerate(self.layers):
            x = layer(x)
            # removinf softmax from last layer because of crossentropyloss function - vishal
            if i != len(self.activations):
              x = self.activations[i](x)
            h.append(x)

        return h

class ExtensibleColumnProgNN(nn.Module):
    def __init__(self, topology, activations, prev_columns):
        super(ExtensibleColumnProgNN, self).__init__()
        self.layers = nn.ModuleList()
        self.lateral_connections = nn.ModuleList()
        for i in range(len(topology) - 1):
            self.layers.append(nn.Linear(topology[i], topology[i+1]))
            if i > 0:
                lateral = [nn.Linear(prev_column.layers[i-1].out_features, topology[i+1], bias=False) for prev_column in prev_columns]
                self.lateral_connections.append(nn.ModuleList(lateral))
        self.activations = activations
        self.prev_columns = prev_columns

    def forward(self, x, prev_hs):
        h = [x]
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i > 0:
                for j, lateral in enumerate(self.lateral_connections[i-1]):
                    x += lateral(prev_hs[j][i])

            # removinf softmax from last layer because of crossentropyloss function - vishal
            if i != len(self.activations):
              x = self.activations[i](x)
            h.append(x)
        return h

#todo: add training batch size later
def train_column(column, data, target, epochs=50, lr=0.001, prev_hs=[]):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(column.parameters(), lr=lr)
    # saved h_values for training lateral connections
    h_values = []
    for epoch in range(epochs):
        optimizer.zero_grad()

        # forward pass
        if len(prev_hs) == 0:
          tmp = column(data)
        else:
          tmp = column(data, prev_hs)

        output = tmp[-1] # We're using the final layer's output for training

        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}')

        if epoch == epochs - 1:
            for h_i in tmp:
              h_values.append(h_i.detach().clone()[-1])
            # h_values = tmp.to_numpy()[:,-1,:]
    return h_values

def train_PNN():
    # todo: make these two as identical sub-networks
    topology1 = [256, 100, 64, 25, 2]
    topology2 = [256, 100, 64, 25, 2]
    # topology2 = [128, 68, 44, 19, 2]

    activations = [F.relu, F.relu, F.relu]

    col_0 = InitialColumnProgNN(topology1, activations)
    h_0 = train_column(col_0, embeddings0, labels0, epochs=2)
    print(len(h_0), len(h_0[0]))
    # h_0 = col_0(fake1)

    col_1 = ExtensibleColumnProgNN(topology2, activations, [col_0])
    h_1 = train_column(col_1, embeddings1, labels1, epochs=2, prev_hs=[h_0])
    # h_1 = col_1(fake2, [h_0])

    # Make sure the column parameters aren't changing when being used by later columns.
    # PyTorch parameters are tensors, so we can use `.eq` and `.all` to verify
    th0_before = [param.clone() for param in col_0.parameters()]
    th1_before = [param.clone() for param in col_1.parameters()]
    #... (and so on for other columns)

    # Simulate some kind of training here if desired. For now, it's just forward passes.

    th0_after = [param for param in col_0.parameters()]
    assert all([torch.eq(before, after).all() for before, after in zip(th0_before, th0_after)])
    th1_after = [param for param in col_1.parameters()]
    assert all([torch.eq(before, after).all() for before, after in zip(th1_before, th1_after)])
    #... (and so on for other columns)

# if __name__ == "__main__":
#     train_PNN()

# todo: make these two as identical sub-networks
topology1 = [256, 100, 64, 25, 2]
topology2 = [256, 100, 64, 25, 2]
# topology2 = [128, 68, 44, 19, 2]

activations = [F.relu, F.relu, F.relu]

col_0 = InitialColumnProgNN(topology1, activations)
h_0 = train_column(col_0, embeddings0, labels0, epochs=50)
# h_0 = col_0(fake1)

col_1 = ExtensibleColumnProgNN(topology2, activations, [col_0])
h_1 = train_column(col_1, embeddings1, labels1, epochs=50, prev_hs=[h_0])


Epoch 1/50, Loss: 0.6938989758491516
Epoch 2/50, Loss: 0.6933290958404541
Epoch 3/50, Loss: 0.6927840113639832
Epoch 4/50, Loss: 0.6922221779823303
Epoch 5/50, Loss: 0.6916407942771912
Epoch 6/50, Loss: 0.6910565495491028
Epoch 7/50, Loss: 0.6904148459434509
Epoch 8/50, Loss: 0.6896586418151855
Epoch 9/50, Loss: 0.6887884140014648
Epoch 10/50, Loss: 0.6878147125244141
Epoch 11/50, Loss: 0.6867549419403076
Epoch 12/50, Loss: 0.6856104135513306
Epoch 13/50, Loss: 0.6843828558921814
Epoch 14/50, Loss: 0.6830549240112305
Epoch 15/50, Loss: 0.6816108822822571
Epoch 16/50, Loss: 0.6800478100776672
Epoch 17/50, Loss: 0.6783655881881714
Epoch 18/50, Loss: 0.6765665411949158
Epoch 19/50, Loss: 0.6746563911437988
Epoch 20/50, Loss: 0.6726415157318115
Epoch 21/50, Loss: 0.670519232749939
Epoch 22/50, Loss: 0.6682757139205933
Epoch 23/50, Loss: 0.6659070253372192
Epoch 24/50, Loss: 0.663419783115387
Epoch 25/50, Loss: 0.6608283519744873
Epoch 26/50, Loss: 0.6581307053565979
Epoch 27/50, Loss: 0.65

In [None]:
embeddings = []
labels = []

with open('/content/drive/MyDrive/t5p_small_embeddings/test/test_0.jsonl', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append(data['label'])

# Convert lists to tensors
test_embeddings0 = torch.tensor(embeddings)
test_labels0 = torch.tensor(labels)

print(test_embeddings0.size())  # This will show the shape of the embeddings tensor
print(test_labels0.size())     # This will show the shape of the labels tensor

embeddings = []
labels = []

with open('/content/drive/MyDrive/t5p_small_embeddings/test/test_1.jsonl', 'r') as f:
    for line in f:
        data = json.loads(line)
        embeddings.append(data['embeddings'][0])
        labels.append(data['label'])

# Convert lists to tensors
test_embeddings1 = torch.tensor(embeddings)
test_labels1 = torch.tensor(labels)

print(test_embeddings1.size())  # This will show the shape of the embeddings tensor
print(test_labels1.size())     # This will show the shape of the labels tensor

torch.Size([883, 256])
torch.Size([883])
torch.Size([171, 256])
torch.Size([171])


In [None]:
# Evaluation on the test set
col_0.eval()  # Set the model to evaluation mode
with torch.no_grad():
    test_outputs = col_0(test_embeddings0)[-1]
    _, predicted = torch.max(test_outputs, 1)
    accuracy_simple_rnn = torch.sum(predicted == test_labels0).item() / len(test_labels0)
    print(f'\nTest Accuracy for Class-0 RNN: {accuracy_simple_rnn}')


Test Accuracy for Class-0 RNN: 0.6998867497168743


In [None]:
# Evaluation on the test set
col_1.eval()  # Set the model to evaluation mode
with torch.no_grad():
    test_outputs = col_1(test_embeddings1, [h_0])[-1]
    _, predicted = torch.max(test_outputs, 1)
    accuracy_simple_rnn = torch.sum(predicted == test_labels1).item() / len(test_labels1)
    print(f'\nTest Accuracy for Class-1 RNN: {accuracy_simple_rnn}')


Test Accuracy for Class-1 RNN: 0.6842105263157895


In [None]:
col_0

InitialColumnProgNN(
  (layers): ModuleList(
    (0): Linear(in_features=256, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=25, bias=True)
    (3): Linear(in_features=25, out_features=2, bias=True)
  )
)

In [None]:
col_1

ExtensibleColumnProgNN(
  (layers): ModuleList(
    (0): Linear(in_features=256, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=25, bias=True)
    (3): Linear(in_features=25, out_features=2, bias=True)
  )
  (lateral_connections): ModuleList(
    (0): ModuleList(
      (0): Linear(in_features=100, out_features=64, bias=False)
    )
    (1): ModuleList(
      (0): Linear(in_features=64, out_features=25, bias=False)
    )
    (2): ModuleList(
      (0): Linear(in_features=25, out_features=2, bias=False)
    )
  )
)