# Using pytorch

In [2]:
import torch
import torch.nn as nn

sentence = "What are the symptoms of diabetes?"

tokens = sentence.lower().split()
vocab = {word: idx for idx, word in enumerate(set(tokens))}
vocab_size = len(vocab)
embed_dim = 8

token_ids = torch.tensor([vocab[word] for word in tokens])

embedding = nn.Embedding(vocab_size, embed_dim)
embedded_tokens = embedding(token_ids)

query_weight = nn.Linear(embed_dim, embed_dim)
key_weight = nn.Linear(embed_dim, embed_dim)
value_weight = nn.Linear(embed_dim, embed_dim)

Q = query_weight(embedded_tokens)
K = key_weight(embedded_tokens)
V = value_weight(embedded_tokens)

attention_scores = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(embed_dim, dtype=torch.float32))
attention_weights = torch.nn.functional.softmax(attention_scores, dim=-1)
attention_output = torch.matmul(attention_weights, V)

print("Tokens:", tokens)
print("Attention Scores:\n", attention_scores)
print("Attention Weights (after Softmax):\n", attention_weights)
print("Attention Output:\n", attention_output)


Tokens: ['what', 'are', 'the', 'symptoms', 'of', 'diabetes?']
Attention Scores:
 tensor([[-0.1466,  0.2270,  0.0423,  0.3350,  0.4152, -0.1968],
        [-0.0032, -0.1107, -0.4408, -0.0396, -0.4479,  0.0352],
        [-0.7412, -0.4808, -0.2818,  0.0701, -0.3640,  0.0267],
        [-0.2614, -0.4615, -0.1122, -0.2139, -0.3583, -0.0429],
        [-1.3057, -0.7060, -0.4546, -0.2096, -0.8794, -0.1695],
        [ 0.2190,  0.3425,  0.1200,  0.2590,  0.3976,  0.0155]],
       grad_fn=<DivBackward0>)
Attention Weights (after Softmax):
 tensor([[0.1252, 0.1820, 0.1513, 0.2027, 0.2197, 0.1191],
        [0.1927, 0.1731, 0.1244, 0.1859, 0.1236, 0.2003],
        [0.1026, 0.1331, 0.1625, 0.2310, 0.1496, 0.2212],
        [0.1618, 0.1325, 0.1878, 0.1697, 0.1469, 0.2013],
        [0.0781, 0.1423, 0.1829, 0.2337, 0.1196, 0.2433],
        [0.1642, 0.1858, 0.1487, 0.1709, 0.1963, 0.1340]],
       grad_fn=<SoftmaxBackward0>)
Attention Output:
 tensor([[ 0.0796,  0.2296,  0.0008, -0.0764,  0.5766, -0.0350, -

# Using numpt

In [3]:
import numpy as np

sentence = "What are the symptoms of diabetes?"

tokens = sentence.lower().split()
num_tokens = len(tokens)
embed_dim = 8

vocab = {word: idx for idx, word in enumerate(set(tokens))}

np.random.seed(0)
embeddings = {word: np.random.randn(embed_dim) for word in vocab}

embedded_tokens = np.array([embeddings[word] for word in tokens])

W_q = np.random.randn(embed_dim, embed_dim)
W_k = np.random.randn(embed_dim, embed_dim)
W_v = np.random.randn(embed_dim, embed_dim)

Q = np.dot(embedded_tokens, W_q)
K = np.dot(embedded_tokens, W_k)
V = np.dot(embedded_tokens, W_v)

attention_scores = np.dot(Q, K.T)

scale_factor = np.sqrt(embed_dim)
attention_scores /= scale_factor

def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum(axis=1, keepdims=True)

attention_weights = softmax(attention_scores)

attention_output = np.dot(attention_weights, V)

print("Tokens:", tokens)
print("Attention Scores:\n", attention_scores)
print("Attention Weights (after Softmax):\n", attention_weights)
print("Attention Output:\n", attention_output)


Tokens: ['what', 'are', 'the', 'symptoms', 'of', 'diabetes?']
Attention Scores:
 [[ -7.63479725  -1.95047375   3.30412288   2.47112603  -1.73103351
    5.88963206]
 [  2.29426192  -0.54716144   4.87188321  -2.22295171   0.81228273
    2.0491618 ]
 [  0.39667638   5.43517076  16.3123148   -1.78123963 -14.6585921
  -13.18883214]
 [  6.21009767  -1.7565809    9.02426645 -10.49680223  -1.60171342
    3.14603431]
 [-10.75043823  -9.8497515   -8.078799    -0.09564052  11.09850863
   24.86293917]
 [ -4.12186374   6.202978   -22.40118343  13.42445284   7.17639521
  -18.73209584]]
Attention Weights (after Softmax):
 [[1.20637348e-06 3.54937684e-04 6.79507778e-02 2.95412247e-02
  4.42032087e-04 9.01709821e-01]
 [6.55958834e-02 3.82703207e-03 8.63621190e-01 7.16268139e-04
  1.49025710e-02 5.13370556e-02]
 [1.22438442e-07 1.88846136e-05 9.99980979e-01 1.38695027e-08
  3.54403311e-14 1.54101339e-13]
 [5.64119028e-02 1.95652864e-05 9.40911388e-01 3.13080713e-09
  2.28425351e-05 2.63429823e-03]
 [3.4