# Implementation: Scaled Dot Product Attention on Graphs

**Goal**: Weighted Aggregation.

In [None]:
import torch
import torch.nn.functional as F

# 1. Node Features (3 nodes, dim=4)
x = torch.randn(3, 4)

# 2. Attention Mechanism (Simplified)
def attention(node_i, node_j):
    # Simple dot product similarity
    return torch.dot(node_i, node_j)

# 3. Calculate Scores for Node 0's neighbors (1 and 2)
score_01 = attention(x[0], x[1])
score_02 = attention(x[0], x[2])

# 4. Softmax
weights = F.softmax(torch.tensor([score_01, score_02]), dim=0)

print(f"Attention Weights: {weights}")
if weights[0] > weights[1]:
    print("Node 0 pays more attention to Node 1.")
else:
    print("Node 0 pays more attention to Node 2.")
    
# 5. Weighted Sum
h_new = weights[0] * x[1] + weights[1] * x[2]
print("New Feature for Node 0:", h_new)

## Conclusion
GAT is just a Transformer where the 'Sequence' is the 'Neighborhood'.