**Modified Attention Mechanism based on the requirements**

In [26]:
import tensorflow as tf
import os
import networkx as nx
import tarfile
import urllib.request
def download_dataset(url, dest_folder="cora"):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)

    file_path = os.path.join(dest_folder, "cora.tgz")

    # Download the dataset
    urllib.request.urlretrieve(url, file_path)

    # Extract the tar file
    with tarfile.open(file_path, "r:gz") as tar:
        tar.extractall(path=dest_folder)

    print(f"Downloaded and extracted dataset to {dest_folder}")

# URL of the dataset
url = "https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz"
download_dataset(url)

Downloaded and extracted dataset to cora


In [27]:
import numpy as np
from sklearn.preprocessing import StandardScaler
path = "cora/cora"
nodes_data = np.genfromtxt(os.path.join(path, "cora.content"), dtype=str)
edges = np.genfromtxt(os.path.join(path, "cora.cites"), dtype=int)

G = nx.Graph()
node_features = []
node_id_map = {}
index = 0

#Extrating node features
for node in nodes_data:
  node_id = int(node[0])
  feature = np.array(node[1:-1], dtype=np.float32)

  # Add node to graph with features (without label)
  G.add_node(node_id, feature=feature)
  node_features.append(feature)
  # Mapping node IDs
  node_id_map[node_id] = index
  index += 1


features = np.array(node_features)

row_sum = np.array(features.sum(1))
row_inv = np.power(row_sum, -1).flatten()
row_inv[np.isinf(row_inv)] = 0.
row_mat_inv = sp.diags(row_inv)
features = row_mat_inv.dot(features)
print(features)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [28]:
nb_nodes = features.shape[0]
ft_size = features.shape[1]
print(nb_nodes)
print(ft_size)

2708
1433


In [29]:
batch_size = 1
hid_units = [8] #No of output size
n_heads = [8, 1]
nonlinearity = tf.nn.elu
residual = False

In [30]:
feature_input = tf.keras.Input(shape=(nb_nodes, ft_size), batch_size=batch_size)
bias_input = tf.keras.Input(shape=(nb_nodes, nb_nodes), batch_size=batch_size)
ffd_drop = 0.0
attn_drop = 0.0

In [54]:
from tensorflow.keras import layers
from tensorflow.keras.layers import Add
seq = feature_input


conv1d = layers.Conv1D #layers.Conv1D

# Multi head Attention Mechanism (Average mechanism has been used)
def attn_head(seq, bias_mat, out_sz, activation, in_drop=0.0, coef_drop=0.0, residual=False):
  with tf.name_scope('my_attn'):
          # Apply dropout if needed
          if in_drop != 0.0:
              seq = tf.nn.dropout(seq, 1.0 - in_drop)

          seq_fts = layers.Conv1D(filters=out_sz, kernel_size=1, use_bias=False)(seq)

          # Self-attention mechanism
          f_1 = layers.Conv1D(filters=1, kernel_size=1)(seq_fts)
          f_2 = layers.Conv1D(filters=1, kernel_size=1)(seq_fts)

          f_2_transposed = layers.Lambda(lambda x: tf.transpose(x, perm=[0, 2, 1]))(f_2)
          logits = layers.Add()([f_1, f_2_transposed])

          leaky_relu = layers.LeakyReLU(alpha=0.2)
          logits_with_bias = layers.Add()([logits, bias_mat])
          coefs = layers.Activation('softmax')(leaky_relu(logits_with_bias))

          if coef_drop != 0.0:
              coefs = tf.nn.dropout(coefs, 1.0 - coef_drop)


          return coefs

class CustomActivation(layers.Layer):
    def call(self, inputs):
        return tf.nn.leaky_relu(inputs, alpha=0.2)

length = int(n_heads[0])
attention_weights = []
for i in range(length):
  attention_weights.append(attn_head(seq, bias_mat=bias_input,
                out_sz=hid_units[0], activation=nonlinearity,
                in_drop=ffd_drop, coef_drop=attn_drop, residual=False))

attention_weights_avg = Add()(attention_weights)/n_heads[0]





In [55]:
# Run the model
model = tf.keras.Model(inputs=[feature_input, bias_input], outputs=[attention_weights_avg])
feature_input_pred = tf.random.normal(shape=(batch_size, nb_nodes, ft_size))
bias_input_pred = tf.random.normal(shape=(batch_size, nb_nodes, nb_nodes))
attention_weights  = model.predict([feature_input_pred, bias_input_pred])



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step


In [56]:
# Directly Connected Neighbors
neighbors = edges[(edges[:, 0] == 6213 ) | (edges[:, 1] == 6213 )]

#direct_neighbors = np.unique(neighbors[neighbors != 6213 ])
print(neighbors)

[[    887    6213]
 [   4584    6213]
 [   6151    6213]
 [   6213   10796]
 [   6213 1103315]
 [   6213 1105764]
 [   6213 1106406]
 [   6213 1106547]
 [   6213 1106966]
 [   6213 1107355]
 [   6213 1107455]
 [   6213 1108329]
 [   6213 1109957]
 [   6213 1111052]
 [   6213 1111230]
 [   6213 1111304]
 [   6213 1113182]
 [   6213 1113459]
 [   6213 1113614]
 [   6213 1116347]
 [   6213 1117760]
 [   6213 1117942]
 [   6213 1120020]
 [   6213 1122425]
 [   6213 1123553]
 [   6213 1128267]
 [   6213 1129096]
 [   6213 1129243]
 [   6213 1130567]
 [   6213  124064]
 [   6213   12576]
 [   6213     128]
 [   6213   13193]
 [   6213  134128]
 [   6213    1385]
 [   6213   13960]
 [   6213  153598]
 [   6213  161221]
 [   6213   17208]
 [   6213  193742]
 [   6213  195361]
 [   6213   20526]
 [   6213   20534]
 [   6213  218410]
 [   6213   23774]
 [   6213  241133]
 [   6213   28202]
 [   6213   28227]
 [   6213   28278]
 [   6213   28350]
 [   6213   28471]
 [   6213   28485]
 [   6213  2

In [62]:
# Converting Node_ID to index
node_index_1 = 5348 #neighbors[0][0]
node_index_2 = 2695     #neighbors[0][1]
# From Node_ID to Index
node_id_1 = node_index_1
node_id_2 = node_index_2
if node_id_1 in node_id_map:
    index_1 = node_id_map[node_id_1]
    print(index_1)
else:
    print("Error")
if node_id_2 in node_id_map:
    index_2 = node_id_map[node_id_2]
    print(index_2)
else:
  print("Error")

#Attention Weights
print(f"Attention weights for node(Index value) {index_1} to {index_2}")
print(attention_weights[0, index_1, index_2 ])

359
2498
Attention weights for node(Index value) 359 to 2498
7.578681e-05
