In [9]:
import torch

from ptgnn.transform.ptree_matrix import permutation_tree_to_matrix

In [10]:
from ptgnn.runtime_config.run_config import fetch_loaders

In [11]:
data_config = {
    'dataset' : {
        'type' : "rs",
        'mask_chiral_tags': True,
        'transformation_mode': 'chienn_tree_basic',
    },
    'loader': {
        'general': {
            'n_neighbors_in_circle': 3,
            'batch_size': 32,
            'num_workers': 0,
        },
        'train': {},
        'test': {},
        'val': {}
    }
}

In [12]:
train_loader, _, val_loader = fetch_loaders(data_config=data_config)

In [13]:
for batch in val_loader:
    display(batch)
    break

DataBatch(x=[2356, 118], edge_index=[2, 6416], edge_attr=[6416, 80], pos=[2356, 6], y=[32], batch=[2356], ptr=[33], ptree=[2356])

# part in collation and part possible in preprocessing

In [14]:
k = 3

In [15]:
idx_matrix, type_matrix = permutation_tree_to_matrix(batch.ptree, k)
display(idx_matrix)
display(type_matrix)

tensor([[   0,    0,    1,    1],
        [   1,    0,    0,    0],
        [   1,    1,    0,    3],
        ...,
        [2354,    1,    1, 2351],
        [2354,    1,    2, 2353],
        [2355,    0, 2354, 2354]])

tensor([[1, 0],
        [1, 0],
        [1, 2],
        ...,
        [1, 2],
        [1, 2],
        [1, 0]])

## From here second part that is either in the model or in preprocessing

In [16]:
# load first index range of elements
data_array = batch.x[idx_matrix[:, -1]]

In [17]:
# get structure to orient to
idx_structure = idx_matrix[:, :-1]

### for loop starts here

In [18]:
# get indexes for graph pooling
idx_structure, current_layer_pooling_counts = torch.unique(idx_structure[:, :-1], dim=0, return_counts=True)
display(idx_structure)
display(current_layer_pooling_counts)

tensor([[   0,    0],
        [   1,    0],
        [   1,    1],
        ...,
        [2354,    0],
        [2354,    1],
        [2355,    0]])

tensor([1, 1, 3,  ..., 1, 3, 1])

In [19]:
# get indexes for graph pooling
current_layer_pooling = torch.repeat_interleave(current_layer_pooling_counts)
display(current_layer_pooling[:10])

tensor([0, 1, 2, 2, 2, 3, 4, 4, 4, 5])

In [12]:
# init circling
# todo: rework for other types
order_matrix = torch.zeros(k, len(current_layer_pooling), dtype=torch.int) - 1

In [13]:
cur_pos = 0
for i in current_layer_pooling_counts:#[:5]:
    current_k = min(k, i)
    r = torch.arange(cur_pos, cur_pos+i)
    for j in range(current_k):
        order_matrix[:current_k, cur_pos+j] = torch.roll(r, shifts=-j)
    cur_pos += i

In [14]:
order_matrix

tensor([[   0,    1,    2,  ..., 6413, 6414, 6415],
        [  -1,   -1,    3,  ..., 6414, 6412,   -1],
        [  -1,   -1,    4,  ..., 6412, 6413,   -1]], dtype=torch.int32)

In [15]:
# add zero padding to data list
data_array = torch.cat([torch.zeros(1, data_array.shape[-1]), data_array], dim=0)
order_matrix += 1

In [16]:
temp = data_array[order_matrix]

In [17]:
temp.shape

torch.Size([3, 6416, 118])

In [18]:
# apply z layer

In [19]:
mask_z = type_matrix[:, -1] == 2

In [23]:
z_layer = torch.nn.ModuleList([
    torch.nn.Linear(118, 118)
    for _ in range(k)
])
z_final_layer = torch.nn.Linear(118, 118)
z_elu = torch.nn.ELU()

In [21]:
# todo: take care of duplicate elements that are sent through linear layer

In [22]:
data_array.shape

torch.Size([6417, 118])

In [23]:
embedding = torch.stack([
    emb(t)
    for emb, t in zip(z_layer,temp)
], dim=1)
display(embedding.shape)

torch.Size([6416, 3, 118])

In [24]:
temp3 = embedding.sum(dim=1)
display(temp3.shape)

torch.Size([6416, 118])

In [25]:
temp4 = z_final_layer(temp3)
display(temp4.shape)

torch.Size([6416, 118])

In [7]:
import torch_geometric
data_array = torch_geometric.nn.global_add_pool(temp4, current_layer_pooling)

NameError: name 'temp4' is not defined

In [27]:
data_array.shape

torch.Size([4124, 118])

In [20]:
k = 3

In [21]:
batch.idx_matrix, batch.type_matrix = permutation_tree_to_matrix(batch.ptree, 3)

In [24]:
device = 'cpu'
batch = batch.to(device)
z_elu = z_elu.to(device)
z_layer = z_layer.to(device)
z_final_layer = z_final_layer.to(device)

In [25]:
# make link to batch idx_matrix
idx_matrix = batch.idx_matrix

# load first index range of elements
data_array = batch.x[idx_matrix[:, -1]]

# get structure to orient to
idx_structure = idx_matrix[:, :-1]

In [26]:
# get indexes for graph pooling
idx_structure, current_layer_pooling_counts = torch.unique(idx_structure[:, :-1], dim=0, return_counts=True)

# get indexes for graph pooling
current_layer_pooling = torch.repeat_interleave(current_layer_pooling_counts)

# init circling
# todo: rework for other types - treat everything as Z
order_matrix = torch.zeros(k, len(current_layer_pooling), dtype=torch.int) - 1

In [27]:
cur_pos = 0
for i in current_layer_pooling_counts:
    current_k = min(k, i)
    r = torch.arange(cur_pos, cur_pos+current_k)
    for j in range(i):
        order_matrix[:current_k, cur_pos+j] = torch.roll(r, shifts=-j)
    cur_pos += i

In [28]:
order_matrix[:, :10]

tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
        [-1, -1,  3,  4,  2, -1,  7,  8,  6, -1],
        [-1, -1,  4,  2,  3, -1,  8,  6,  7, -1]], dtype=torch.int32)

In [29]:
# add zero padding to data list
data_array = torch.cat([torch.zeros(1, data_array.shape[-1], device=data_array.device), data_array], dim=0)
order_matrix += 1

embedding = data_array[order_matrix]
# mask_z = batch.type_matrix

embedding = torch.stack([
    emb(t)
    for emb, t in zip(z_layer, embedding)
], dim=1)

embedding[(order_matrix.T == 0).unsqueeze(-1).expand_as(embedding)] = 0.

embedding = embedding.sum(dim=1)
embedding = z_final_layer(embedding)

# global pooling
data_array = torch_geometric.nn.global_add_pool(embedding, current_layer_pooling)

NameError: name 'torch_geometric' is not defined

In [55]:
batch

DataBatch(x=[2356, 118], edge_index=[2, 6416], edge_attr=[6416, 80], pos=[2356, 6], y=[32], batch=[2356], ptr=[33], ptree=[2356], idx_matrix=[6416, 4], type_matrix=[6416, 2])

In [56]:
import json
ptree = json.loads(batch.ptree[0])

In [58]:
from ptgnn.transform.ptree_matrix import permutation_tree_depth

# get depth of tree
tree_depth = permutation_tree_depth(ptree)

In [61]:
batch.ptree[1]

'{"P": [0, {"Z": [3, 7, 5]}]}'

In [60]:
permutation_tree_to_matrix([batch.ptree[1]])

(tensor([[0, 0, 0, 0],
         [0, 1, 0, 3],
         [0, 1, 1, 7],
         [0, 1, 2, 5]]),
 tensor([[1, 0],
         [1, 2],
         [1, 2],
         [1, 2]]))

In [1]:
from ptgnn.transform.ptree_matrix import permutation_tree_to_order_matrix

permutation_tree_to_order_matrix(batch, 3)

NameError: name 'batch' is not defined

In [1]:
from ptgnn.runtime_config.run_config import fetch_loaders

In [2]:
data_config = {
    'dataset' : {
        'type' : "rs",
        'mask_chiral_tags': True,
        'transformation_mode': 'chienn_tree_order_matrix',
        'transformation_parameters': {
            'k': 3
        }
    },
    'loader': {
        'general': {
            'n_neighbors_in_circle': 3,
            'batch_size': 32,
            'num_workers': 0,
        },
        'train': {},
        'test': {},
        'val': {}
    }
}

In [3]:
train_loader, _, val_loader = fetch_loaders(data_config=data_config)

In [8]:
for t in train_loader:
    display(t)
    break

DataBatch(x=[2804, 118], edge_index=[2, 7684], edge_attr=[7684, 80], pos=[2804, 6], y=[32], batch=[2804], ptr=[33], ptree=[2804], num_layer=[1], initial_map=[7684], layer0_order_matrix=[3, 7684], layer0_type_mask=[7684], layer0_pooling=[7684], layer1_order_matrix=[3, 4978], layer1_type_mask=[4978], layer1_pooling=[4978])

In [9]:
print(t.layer1_order_matrix.max())
print(t.layer0_pooling.max())
print(t.layer0_type_mask.max())
print(t.initial_map.max())

tensor(4977, dtype=torch.int32)
tensor(4977)
tensor(2)
tensor(2803)


In [6]:
t.layer0_type_mask

tensor([0, 0, 2,  ..., 2, 0, 2])

In [11]:
t.layer0_pooling[0][0][0]

tensor([  0,   1,   2,   2,   3,   4,   4,   5,   6,   6,   7,   8,   8,   9,
         10,  10,  11,  12,  12,  13,  14,  14,  14,  15,  16,  16,  17,  18,
         19,  19,  19,  20,  21,  21,  22,  23,  23,  23,  24,  25,  25,  25,
         26,  27,  27,  27,  28,  29,  30,  30,  31,  32,  32,  33,  34,  34,
         35,  36,  36,  37,  38,  38,  39,  40,  40,  41,  42,  42,  43,  44,
         45,  45,  46,  47,  47,  48,  49,  49,  50,  51,  52,  52,  53,  54,
         54,  55,  56,  56,  57,  58,  58,  59,  60,  60,  61,  62,  62,  63,
         64,  64,  65,  66,  66,  67,  68,  68,  69,  70,  70,  71,  72,  72,
         73,  74,  75,  75,  76,  77,  77,  78,  79,  79,  80,  81,  82,  82,
         83,  84,  84,  85,  86,  86,  87,  88,  89,  89,  90,  91,  91,  92,
         93,  93,  94,  95,  96,  96,  97,  98,  99,  99, 100, 101, 101, 102,
        103, 103, 104, 105, 106, 106, 107, 108, 109, 109, 109, 110, 111, 111,
        112, 113, 113, 113, 114, 115, 116, 116, 116, 117, 118, 1

In [10]:
k=3

In [19]:
import torch
import torch_geometric
from tqdm import tqdm

In [20]:
z_layer = torch.nn.ModuleList([
    torch.nn.Linear(118, 118)
    for _ in range(k)
])
z_final_layer = torch.nn.Linear(118, 118)
z_elu = torch.nn.ELU()

In [23]:
# device = 'cpu'
device = 'cuda'

z_elu = z_elu.to(device)
z_layer = z_layer.to(device)
z_final_layer = z_final_layer.to(device)

In [25]:
for batch in tqdm(train_loader):
    batch = batch.to(device)

    # load first index range of elements
    data_array = batch.x[batch.initial_map]

    for layer_idx in range(batch.num_layer):
        # fetch data
        order_matrix = batch[f"layer{layer_idx}_order_matrix"]
        current_layer_pooling = batch[f"layer{layer_idx}_pooling"]
        type_mask = batch[f"layer{layer_idx}_type_mask"] # todo: actually use for different types

        # add zero padding to data list
        data_array = torch.cat([torch.zeros(1, data_array.shape[-1], device=data_array.device), data_array], dim=0)
        order_matrix += 1

        embedding = data_array[order_matrix]
        # mask_z = batch.type_matrix

        embedding = torch.stack([
            emb(t)
            for emb, t in zip(z_layer, embedding)
        ], dim=1)

        embedding[(order_matrix.T == 0).unsqueeze(-1).expand_as(embedding)] = 0.

        embedding = embedding.sum(dim=1)
        embedding = z_final_layer(embedding)

        # global pooling
        data_array = torch_geometric.nn.global_add_pool(embedding, current_layer_pooling)

    assert data_array.shape == batch.x.shape

100%|██████████| 1721/1721 [00:49<00:00, 35.04it/s]
