# Pytorch Geometric GNN - homogen

## Import section

In [1]:
from data_gen import data_transform_split

In [2]:
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import pandas as pd

## Section for GNN with rdkit

In [3]:
data_rdkit = data_transform_split(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['id_1']=df['aid'].map(lambda x: aid_translation_dictionary[x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['id_2']=df['cid'].map(lambda x: cid_translation_dictionary[x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['id_1']=df['aid'].map(lambda x: aid_translation_dictionary[x])
A value

In [4]:
data_rdkit

Data(x=[457560, 208], train_pos_edge_index=[2, 979616], train_neg_edge_index=[2, 65612424], test_pos_edge_index=[2, 244898], test_neg_edge_index=[2, 16403118])

In [5]:
data_rdkit.num_features

208

In [6]:
int(data_rdkit.num_features/2)

104

In [39]:
data_rdkit.x = torch.tensor(np.ones(shape=data_rdkit.x.shape), dtype=torch.float)

## Setup pytorch Model

In [40]:
class GNN_homogen_with_data(torch.nn.Module):
    '''
    basic implementation of GNN, inspired by content shown in https://antoniolonga.github.io/Pytorch_geometric_tutorials/posts/post12.html from Antionio Longa
    '''
    def __init__(self):
        super(GNN_homogen_with_data, self).__init__()
        self.conv1 = GCNConv(data.num_features, int(data.num_features/2))
        self.conv2 = GCNConv(int(data.num_features/2), 64)
    
    def encode(self):
        x = self.conv1(data.x, data.train_pos_edge_index) # first convolution layer
        x = x.relu() # relu function for tu - disables negative values
        x = self.conv2(x, data.train_pos_edge_index) # second convolution layer
        return x
    
    def decode(self, z, pos_edge_index, neg_edge_index):
        edge_index = torch.cat([pos_edge_index, neg_edge_index], dim=-1) #fuse pos and neg edges together
        logits = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=-1) # dot product of ... getting the mapped entries with z? z=model?
        return logits
    
    def decode_all(self, z):
        prob_adj = z @ z.t() # get adjacenticy matrix NxN
        return (prob_adj > 0).nonzero(as_tuple=False).t() # get predicted edge_list # should this not be 0.5?

In [41]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [42]:
data = data_rdkit.to(device)
model = GNN_homogen_with_data().to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)

In [43]:
def get_link_labels(pos_edge_index, neg_edge_index):
    # returns a tensor:
    # [1,1,1,1,...,0,0,0,0,0,..] with the number of ones is equel to the lenght of pos_edge_index
    # and the number of zeros is equal to the length of neg_edge_index
    E = pos_edge_index.size(1) + neg_edge_index.size(1)
    link_labels = torch.zeros(E, dtype=torch.float, device=device)
    link_labels[:pos_edge_index.size(1)] = 1.
    return link_labels

In [125]:
model.train()

GNN_homogen_with_data(
  (conv1): GCNConv(208, 104)
  (conv2): GCNConv(104, 64)
)

In [126]:
optimizer.zero_grad()

In [127]:
z=model.encode()

In [128]:
z

tensor([[-0.0627, -0.0699, -0.0100,  ...,  0.0549,  0.0191, -0.0587],
        [-0.0627, -0.0699, -0.0100,  ...,  0.0549,  0.0191, -0.0587],
        [-0.0627, -0.0699, -0.0100,  ...,  0.0549,  0.0191, -0.0587],
        ...,
        [-0.0627, -0.0699, -0.0100,  ...,  0.0549,  0.0191, -0.0587],
        [-0.0627, -0.0699, -0.0100,  ...,  0.0549,  0.0191, -0.0587],
        [-0.0627, -0.0699, -0.0100,  ...,  0.0549,  0.0191, -0.0587]],
       grad_fn=<AddBackward0>)

In [129]:
link_logits = model.decode(z, data.train_pos_edge_index, data.train_neg_edge_index)
link_logits

tensor([0.0779, 0.0779, 0.0779,  ..., 0.0779, 0.0779, 0.0779],
       grad_fn=<SumBackward1>)

In [130]:
link_labels = get_link_labels(data.train_pos_edge_index, data.train_neg_edge_index)
link_labels

tensor([1., 1., 1.,  ..., 0., 0., 0.])

In [131]:
loss = F.binary_cross_entropy_with_logits(link_logits, link_labels)
loss

tensor(0.7317, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [123]:
loss.backward()

In [124]:
optimizer.step()