In [40]:
import numpy as np

In [41]:
# Input matrix
X = np.array([[1, 2],
              [3, 4]])

In [42]:
# Weight matrices for Head 1
W_Q1 = np.array([[1, 0],
                 [0, 1]])
W_K1 = np.array([[0, 1],
                 [1, 0]])
W_V1 = np.array([[1, 0],
                 [0, -1]])

In [43]:
# Weight matrices for Head 2
W_Q2 = np.array([[0, 1],
                 [1, 0]])
W_K2 = np.array([[1, 0],
                 [0, 1]])
W_V2 = np.array([[0, 1],
                 [-1, 0]])

In [44]:
# Final projection matrix
W_O = np.array([[1, 0],
                [0, 1],
                [-1, 0],
                [0, -1]])

In [45]:
def self_attention(Q, K, V):
    dk = Q.shape[1]  # Dimension of keys/queries matrix
    scores = np.matmul(Q, K.T) / np.sqrt(dk)  # Scaled dot-product
    attention_weights = np.exp(
        scores) / np.sum(np.exp(scores), axis=1, keepdims=True)  # Softmax
    return np.matmul(attention_weights, V)

In [46]:
# Compute queries, keys, and values for head 1
Q1 = np.matmul(X, W_Q1)
K1 = np.matmul(X, W_K1)
V1 = np.matmul(X, W_V1)

In [47]:
# Compute queries, keys, and values for head 2
Q2 = np.matmul(X, W_Q2)
K2 = np.matmul(X, W_K2)
V2 = np.matmul(X, W_V2)

In [48]:
# Compute attention outputs for each head
head1 = self_attention(Q1, K1, V1)
head2 = self_attention(Q2, K2, V2)

In [49]:
# Concatenate the heads
concat_heads = np.hstack((head1, head2))

In [50]:
# Apply the final projection matrix
M = np.matmul(concat_heads, W_O)

In [51]:
#results
print("Head 1 Output:")
print(head1)
print("\nHead 2 Output:")
print(head2)
print("\nConcatenated Heads:")
print(concat_heads)
print("\nFinal Multi-Head Self-Attention Output (M):")
print(M)

Head 1 Output:
[[ 2.97166793 -3.97166793]
 [ 2.9998996  -3.9998996 ]]

Head 2 Output:
[[-3.97166793  2.97166793]
 [-3.9998996   2.9998996 ]]

Concatenated Heads:
[[ 2.97166793 -3.97166793 -3.97166793  2.97166793]
 [ 2.9998996  -3.9998996  -3.9998996   2.9998996 ]]

Final Multi-Head Self-Attention Output (M):
[[ 6.94333586 -6.94333586]
 [ 6.99979921 -6.99979921]]
