In [4]:
import torch

# Hidden vector for one token
h = torch.tensor([[0.3, -0.1, 0.9, 0.05]])  # shape [1, 4]

# Weight matrix W (each row = word vector)
W = torch.tensor([
    [0.2, 0.5, -0.3, 0.1],   # word1
    [-0.4, 0.1, 0.7, 0.2],   # word2
    [0.3, -0.2, 0.6, -0.5]   # word3
])  # shape [3, 4]

# Bias for each word
b = torch.tensor([0.1, -0.2, 0.05])  # shape [3]

# Compute logits manually (dot product + bias)
logits = h @ W.T + b   # [1, 3]

print("Hidden vector h:", h)
print("\nWeight matrix W:\n", W)
print("\nBias b:", b)
print("\nLogits:", logits)

# Apply softmax to get probabilities
probs = torch.softmax(logits, dim=-1)
print("\nProbabilities:", probs)
print("Sum of probs:", probs.sum().item())

Hidden vector h: tensor([[ 0.3000, -0.1000,  0.9000,  0.0500]])

Weight matrix W:
 tensor([[ 0.2000,  0.5000, -0.3000,  0.1000],
        [-0.4000,  0.1000,  0.7000,  0.2000],
        [ 0.3000, -0.2000,  0.6000, -0.5000]])

Bias b: tensor([ 0.1000, -0.2000,  0.0500])

Logits: tensor([[-0.1550,  0.3100,  0.6750]])

Probabilities: tensor([[0.2047, 0.3259, 0.4694]])
Sum of probs: 1.0
