<a href="https://colab.research.google.com/github/DOLLARDEV05/AI-LAB/blob/main/Attention_is_All_You_Need/Transformers/single_head_attention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

sentence = "My cat is lazy"

tokens = sentence.lower().split()
print("Tokens:", tokens)

vocab = {word: idx for idx, word in enumerate(set(tokens))}
print("Vocab:", vocab)

token_ids = np.array([vocab[word] for word in tokens])
print("Token IDs:", token_ids)


Tokens: ['my', 'cat', 'is', 'lazy']
Vocab: {'lazy': 0, 'cat': 1, 'is': 2, 'my': 3}
Token IDs: [3 1 2 0]


In [2]:
# let's create some embeddings!

d_model = 4

# this will create a random matrix of small float values with dimension being
# rows = length of the sentence/token list, columns being the dimension we set
# that is the d_model = 4 where this is a hyperparameter, and for single head
# attention this is d_model = Q = K = V
embedding = np.random.rand(len(tokens),d_model)

print(embedding)

[[0.27955275 0.15740985 0.58713083 0.94429896]
 [0.59527577 0.40765788 0.61334109 0.30343168]
 [0.62835687 0.95076768 0.76316043 0.09116549]
 [0.78187572 0.20650533 0.43900491 0.99186558]]


In [3]:
embedding_token = np.array([embedding[vocab[token]] for token in tokens])
print(embedding_token)

[[0.78187572 0.20650533 0.43900491 0.99186558]
 [0.59527577 0.40765788 0.61334109 0.30343168]
 [0.62835687 0.95076768 0.76316043 0.09116549]
 [0.27955275 0.15740985 0.58713083 0.94429896]]


In [4]:
# let's convert the maths of positional encoding into code successful

import math

for embedding in range(len(embedding_token)):
  for value in range(d_model):
    if value % 2 == 0:
      temp = math.sin(embedding/10000**((2*(value//2))/d_model))
      embedding_token[embedding,value]=embedding_token[embedding,value]+temp
    elif value % 2 == 1:
      temp = math.cos(embedding/10000**((2*(value//2))/d_model))
      embedding_token[embedding,value]=embedding_token[embedding,value]+temp
    else:
      print("error")
print(embedding_token)

[[ 0.78187572  1.20650533  0.43900491  1.99186558]
 [ 1.43674676  0.94796019  0.62334093  1.30338168]
 [ 1.5376543   0.53462085  0.7831591   1.0909655 ]
 [ 0.42067275 -0.83258265  0.61712633  1.943849  ]]


In [5]:
x = 0 // 2
y = 1 // 2
z = 2 // 2
p = 3 // 2
q = 4 // 2

# how positional encoding uses floor division to have similar values given to the
# sin and cos fuction to have pi/2 shifted sin and cos waves that also dipicts a
# circle idk if this is necessary to mention here

print(x,y,z,p,q)

# was just showcasing where i was stuck in positional encoding and why it matter

0 0 1 1 2


In [8]:
# let's work on making the weight matrix that will extract features from the
# embedding matrix ! W_Q,W_K,W_V


W_Q = np.random.rand(d_model,d_model)
W_K = np.random.rand(d_model,d_model)
W_V = np.random.rand(d_model,d_model)

print(W_Q)
print(W_K)
print(W_V)

[[0.72873166 0.50975491 0.27944903 0.53790527]
 [0.73771901 0.11702697 0.39326878 0.9302899 ]
 [0.76522779 0.94886028 0.16822934 0.52165922]
 [0.80390318 0.41469672 0.38509278 0.64277087]]
[[0.63029303 0.4728026  0.75799225 0.94956099]
 [0.87635421 0.74522781 0.39662107 0.81721073]
 [0.02541526 0.28001866 0.71364224 0.54610022]
 [0.27095926 0.05362236 0.57941191 0.79893838]]
[[0.40730124 0.11242142 0.26928557 0.17089245]
 [0.94453613 0.49446403 0.41027834 0.46506908]
 [0.08458327 0.22210106 0.48706867 0.15678304]
 [0.91591896 0.26460957 0.57519865 0.42494554]]


In [12]:
# multiplying the weighted matrix with the embedded_token to get Q K V matrices


Q = embedding_token @ W_Q
K = embedding_token @ W_K
V = embedding_token @ W_V

print(Q,"\n\n",K,"\n\n\n",V)

# i used the ** 1/2 or **0.5 which is fine for scalers and one time operations
# but its better to use np.sqrt() which is faster for sqrting whole arrays!
print((Q @ K.T)/d_model**(1/2))


[[3.39704534 1.78233309 1.53388186 3.05229893]
 [3.27112158 1.97529718 1.38108775 2.81765877]
 [2.99126308 2.04192027 1.19181898 2.43424774]
 [1.72725413 1.50867983 0.64250881 1.02311802]] 

 [[ 2.10100869  1.4985323   2.53858424  3.55952631]
 [ 2.10532607  1.63018139  2.66506117  3.52069017]
 [ 1.75320141  1.40322058  2.56858575  3.19629199]
 [ 0.07789811 -0.1445279   1.55534354  1.60908735]] 


 [[3.31956673 1.30904322 2.06509478 1.60998791]
 [2.72708758 1.11358528 1.82913592 1.33799167]
 [2.19673535 0.89983644 1.64238561 1.09779613]
 [1.21754286 0.28703592 1.1903727  0.60746626]]
[[12.28336607 12.44575109 11.07632843  3.6520777 ]
 [11.68411981 11.85382856 10.53010756  3.32562682]
 [10.51742748 10.68639358  9.47570351  2.85425225]
 [ 5.58132983  5.70513089  5.03287895  1.28105585]]
