In [3]:
import pandas as pd
import torch as tt
import numpy as np

In [2]:
molecules_train = pd.read_json("gated-graph-neural-network-pytorch/molecules_trainFull.json")
molecules_valid = pd.read_json("gated-graph-neural-network-pytorch/molecules_validFull.json")

In [3]:
molecules_train.head()

Unnamed: 0,graph,node_features,targets
0,"[[0, 1, 1], [1, 2, 2], [1, 1, 3], [3, 1, 4], [...","[[0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 0, 1...",[[-0.4143944162]]
1,"[[0, 1, 1], [1, 1, 2], [2, 1, 3], [3, 1, 4], [...","[[0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0...",[[-0.7727865908]]
2,"[[0, 1, 1], [1, 1, 2], [2, 1, 3], [3, 1, 4], [...","[[0, 1, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0...",[[-0.5899948140000001]]
3,"[[0, 1, 1], [1, 1, 2], [2, 1, 3], [3, 1, 4], [...","[[0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 1, 0, 0...",[[-1.2355191359]]
4,"[[0, 1, 1], [1, 1, 2], [2, 1, 3], [3, 1, 4], [...","[[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 1, 0, 0...",[[-1.2417952483999999]]


In [4]:
len(molecules_train), len(molecules_valid)

(120803, 13082)

In [22]:
molecules_train=molecules_train[:1000]
molecules_valid=molecules_valid[:100]

In [28]:
molecules_valid.to_json("gated-graph-neural-network-pytorch/molecules_valid.json", orient="records")

In [29]:
molecules_train.to_json("gated-graph-neural-network-pytorch/molecules_train.json", orient="records")

# Experimenting with Loss functions

In [4]:
feat = tt.tensor((np.random.rand(24)).astype(float)).view((2, 4, 3))

In [5]:
candidates = tt.DoubleTensor([[0, 0, 0, 1], [0, 1, 1, 0]]) # candidates should not include the positive number

In [6]:
anchor = tt.tensor([0, 3])
positive = tt.tensor([1, 0])

In [7]:
feat, candidates, anchor, positive

(tensor([[[0.9444, 0.8917, 0.4181],
          [0.0893, 0.6045, 0.3186],
          [0.0836, 0.0597, 0.5882],
          [0.3557, 0.8653, 0.5213]],
 
         [[0.5651, 0.1775, 0.0569],
          [0.6969, 0.8702, 0.5663],
          [0.8732, 0.4919, 0.8192],
          [0.2207, 0.8025, 0.2038]]], dtype=torch.float64),
 tensor([[0., 0., 0., 1.],
         [0., 1., 1., 0.]], dtype=torch.float64),
 tensor([0, 3]),
 tensor([1, 0]))

In [8]:
anchor_embeds = tt.gather(feat, 1, anchor.view(-1, 1).unsqueeze(2).repeat(1, 1, 3))

In [9]:
anchor_embeds

tensor([[[0.9444, 0.8917, 0.4181]],

        [[0.2207, 0.8025, 0.2038]]], dtype=torch.float64)

In [10]:
feat - anchor_embeds

tensor([[[ 0.0000,  0.0000,  0.0000],
         [-0.8551, -0.2873, -0.0995],
         [-0.8608, -0.8321,  0.1701],
         [-0.5887, -0.0264,  0.1032]],

        [[ 0.3444, -0.6250, -0.1469],
         [ 0.4762,  0.0678,  0.3625],
         [ 0.6525, -0.3106,  0.6154],
         [ 0.0000,  0.0000,  0.0000]]], dtype=torch.float64)

In [11]:
all_dist = tt.sum((feat - anchor_embeds)**2, 2)
all_dist

tensor([[0.0000, 0.8236, 1.4622, 0.3579],
        [0.5308, 0.3628, 0.9009, 0.0000]], dtype=torch.float64)

In [12]:
dim = 4
e = 1e-8

In [13]:
pos_dist = tt.gather(all_dist, 1, positive.view(-1, 1))
pos_dist = -tt.log(-pos_dist / dim + 1 + e)
neg_dist = -tt.log(-(dim - all_dist) / dim + 1 + e)
neg_dist = neg_dist * candidates
pos_dist, neg_dist

(tensor([[0.2305],
         [0.1424]], dtype=torch.float64),
 tensor([[0.0000, 0.0000, 0.0000, 2.4138],
         [0.0000, 2.4002, 1.4906, 0.0000]], dtype=torch.float64))

In [14]:
neg_total = tt.sum(neg_dist > 0, 1).type(tt.float64)
neg_total

tensor([1., 2.], dtype=torch.float64)

In [15]:
result = pos_dist.view(-1) + tt.sum(neg_dist, 1) / neg_total
result

tensor([2.6444, 2.0878], dtype=torch.float64)

In [34]:
neg_dist

tensor([[0.0000, 0.0000, 0.0000, 2.4138],
        [0.0000, 2.4002, 1.4906, 0.0000]], dtype=torch.float64)

In [35]:
pos_dist = tt.DoubleTensor([[2.4], [1.5]])

In [36]:
acc = tt.sum(neg_dist> pos_dist, 1).type(tt.float64)/neg_total

In [37]:
tt.mean(acc)

tensor(0.7500, dtype=torch.float64)