In [None]:
import numpy as np
from scipy.special import softmax
import networkx as nx


outputs = ['arg0', 'arg1', 'O']

: 

First we define a function so we can print all our matrices and vectors to latex.

In [None]:
def bmatrix(a):
    """Returns a LaTeX bmatrix

    :a: numpy array
    :returns: LaTeX bmatrix as a string
    """
    if len(a.shape) > 2:
        raise ValueError('bmatrix can at most display two dimensions')
    lines = str(a).replace('[', '').replace(']', '').splitlines()
    rv = [r'\begin{bmatrix}']
    rv += ['  ' + ' & '.join(l.split()) + r'\\' for l in lines]
    rv +=  [r'\end{bmatrix}']
    return '\n'.join(rv)

We create the sentence and the vocabulary mappings. The vocabulary contains all unique words and maps to their ids (or the reverse).

In [None]:
# we define the sentence
sent = "the cat sees the tree with the binoculars".split()
# we create the vocabulary based on the sentence
vocab_w2i = {}
vocab_i2w = {}
for i, w in enumerate(sorted(list(set(sent)))):
  vocab_w2i[w] = i
  vocab_i2w[i] = w

# we transfer the sentence to a list of vocab ids
sent_wordids = [vocab_w2i[w] for w in sent]

### We create lists for outgoing and incoming connections of each word
sent_incoming_nodes = []
sent_outgoing_nodes = []
for _ in sent:
  sent_incoming_nodes.append([])
  sent_outgoing_nodes.append([])
## we start by adding the neighbours in the sentence
for i in range(len(sent)):
  # if not the first word
  if i != 0:
    # add connection with previous word (bidirectional sequence connections)
    sent_incoming_nodes[i].append(i-1)
    sent_outgoing_nodes[i].append(i-1)
  # if not the last word
  if i != len(sent)-1:
    # add connection with next word (bidirectional sequence connections)
    sent_incoming_nodes[i].append(i+1)
    sent_outgoing_nodes[i].append(i+1)

## now we manually have to add the entire tree as connections
# 'the' depends on 'cat'          sent[0], sent[1]
sent_incoming_nodes[0].append(1)
sent_outgoing_nodes[1].append(0)
# 'cat' depends on 'sees'         sent[1], sent[2]
sent_incoming_nodes[1].append(2)
sent_outgoing_nodes[2].append(1)
# 'the' depends on 'tree'         sent[3], sent[4]
sent_incoming_nodes[3].append(4)
sent_outgoing_nodes[4].append(3)
# 'tree' depends on 'sees'        sent[4], sent[2]
sent_incoming_nodes[4].append(2)
sent_outgoing_nodes[2].append(4)
# 'with' depends on 'binoculars'  sent[5], sent[7]
sent_incoming_nodes[5].append(7)
sent_outgoing_nodes[7].append(5)
# 'the' depends on 'binoculars'   sent[6], sent[7]
sent_incoming_nodes[6].append(7)
sent_outgoing_nodes[7].append(6)
# 'binoculars' depends on 'tree'  sent[7], sent[4]
sent_incoming_nodes[7].append(4)
sent_outgoing_nodes[4].append(7)

In [None]:
graph = nx.MultiDiGraph()
graph.add_nodes_from([(i, {'word': w}) for i, w in enumerate(sent)])
for i, l in enumerate(sent_incoming_nodes):
    for j in l:
        graph.add_edge(i,j)
nx.draw(graph, with_labels=True, font_weight='bold')

let's see what we have:

In [None]:
print("vocab:\t", vocab_i2w)
print(sent)
print(sent_wordids)
print(sent_incoming_nodes)
print(sent_outgoing_nodes)

first we create our word embeddings

In [None]:
embeddings = np.array([
  [-0.5, 0.7],  # binoculars
  [1.4, 1.1],   # cat
  [0.8, -0.5],    # sees
  [-0.3, 0.1],   # the
  [1.3, 0.8],  # tree
  [0.8, -0.8]   # with
])
print(embeddings)
print(bmatrix(embeddings))

and now we create all the weight and biases

In [None]:
w_in = np.array([
  [-0.5, 0., 0.6],
  [-2, 0.5, 1.0],
  [2.0, -0.7, -0.8]
])
w_out = np.array([
  [0.1, -1., 1.],
  [-0.7, -2., 2.],
  [1., 0.3, -0.8]
])
w_pred = np.array([
  [3., -2., 0.2],
  [0.1, 2., -3.],
  [0.7, -2., 2.]
])
b_in = np.array([.6, 3., -1.])
b_out = np.array([-0.7, 1., 0.2])
b_pred = np.array([1,-0.5,-0.5])

print("w_in:\n", w_in, "\n", bmatrix(w_in), "\n")
print("w_out:\n", w_out, "\n", bmatrix(w_out), "\n")
print("w_pred:\n", w_pred, "\n", bmatrix(w_pred), "\n")
print("b_in:\n", b_in, "\n", bmatrix(b_in), "\n")
print("b_out:\n", b_out, "\n", bmatrix(b_out), "\n")
print("b_pred:\n", b_pred,"\n", bmatrix(b_pred), "\n")

Now let us create our initial hidden states for all the nodes

In [None]:
# lets start by taking all the correct embeddings from the matrix
h_0 = np.zeros((len(sent), embeddings.shape[1]))
for i, word_id in enumerate(sent_wordids):
  h_0[i,:] = embeddings[word_id]
print(h_0)

# now we concatenate a one hot vector, with a 1 where the predicate is
one_hot = np.zeros((len(sent),1))
one_hot[2] = 1
h_0 = np.concatenate([h_0, one_hot], axis=1)
h_0

now we implement the GCN update algorithm

In [None]:
h_1 = np.zeros_like(h_0)

for i,w in enumerate(sent):
  print(f"\n\nupdate h({i}) of word \"{w}\":\n------------")
  v_in = np.sum([w_in.T @ h_0[j] + b_in for j in sent_incoming_nodes[i]], axis=0)
  v_out = np.sum([w_out.T @ h_0[j] + b_out for j in sent_outgoing_nodes[i]], axis=0)
  tmp = h_0[i] + v_in + v_out
  h_1[i] = tmp
  h_1[i][h_1[i] < 0] = 0
  
  print("v_in:\n", (v_in))
  print("v_out:\n", (v_out))
  print("h_1[i]:\n", (tmp))
  print("RELU h_1[i]:\n", (h_1[i]))

  print("v_in:\n", bmatrix(v_in))
  print("v_out:\n", bmatrix(v_out))
  print("h_1[i]:\n", bmatrix(tmp))
  print("RELU h_1[i]:\n", bmatrix(h_1[i]))

So we have the following h at timestep 1:

In [None]:
bmatrix(h_1)

now we make our prediction

In [None]:
pred = (w_pred.T @ h_1.T).T + b_pred
pred_dist = softmax(pred, axis=-1)

print("pred\n", bmatrix(pred))
print("pred\n", (pred))

print("pred_dist\n", bmatrix(pred_dist))
print("pred_dist\n", np.round(pred_dist, 2))

In [None]:
print("Predicted role labels for predicate 'see'")
for i, w in enumerate(sent):
  print("{:10} {} - {}".format(w, np.argmax(pred_dist[i]), outputs[np.argmax(pred_dist[i])]))