In [2]:
import torch
import torch.nn as nn

  cpu = _conversion_method_template(device=torch.device("cpu"))


In [29]:
vocab_size = 65
n_embd = 4

## Understanding Embeddings

In [21]:
embedding_layer = nn.Embedding(vocab_size, n_embd) ##Creates an embedding look up table or an embedding matrix with embedding vectors as rows for each of the tokens in the vocab_size. (vocab_size, embd_dimension)

In [6]:
x = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8]])

In [7]:
print(x)

tensor([[1, 2, 3, 4, 5, 6, 7, 8]])


In [8]:
embeddings = embedding_layer(x) ##Looks in the embedding lookup table for these indices in the input sequence and gives the embeddings for them. Dimensionality would be (batch_size, seq_length, embd_dimension)

In [9]:
embeddings.shape

torch.Size([1, 8, 4])

In [12]:
print(embeddings)  ##As the shape can be seen, batch size is 1, the mex_seq_length is 8, and the embeddings dimension is 4. So we have (1, 8, 4)

tensor([[[-1.1264,  0.8589, -1.2485,  0.7766],
         [ 0.0216,  0.3420, -0.0510, -0.1627],
         [ 1.8798, -0.5027,  0.3491,  1.9044],
         [ 1.4022, -0.6931, -1.1434, -2.5329],
         [ 0.0087,  0.8017,  0.7825, -1.3101],
         [ 0.8763,  2.3211,  0.4939,  0.0248],
         [ 0.5575,  0.3543,  0.0396, -0.4927],
         [ 1.5899, -0.3071, -0.1410, -0.1925]]], grad_fn=<EmbeddingBackward0>)


In [13]:
##Let's increase the batch size
x = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8], [9, 10, 11, 12, 13, 14, 15, 16]])

In [14]:
print(x.shape)
print(x)

torch.Size([2, 8])
tensor([[ 1,  2,  3,  4,  5,  6,  7,  8],
        [ 9, 10, 11, 12, 13, 14, 15, 16]])


In [15]:
embeddings_2_batches = embedding_layer(x)

In [20]:
print(embeddings_2_batches.shape)

torch.Size([2, 8, 4])


In [18]:
print(embeddings_2_batches)  ##As you can see now, the dimension we have (2, 8, 4), as the number of batches is 2.

tensor([[[-1.1264,  0.8589, -1.2485,  0.7766],
         [ 0.0216,  0.3420, -0.0510, -0.1627],
         [ 1.8798, -0.5027,  0.3491,  1.9044],
         [ 1.4022, -0.6931, -1.1434, -2.5329],
         [ 0.0087,  0.8017,  0.7825, -1.3101],
         [ 0.8763,  2.3211,  0.4939,  0.0248],
         [ 0.5575,  0.3543,  0.0396, -0.4927],
         [ 1.5899, -0.3071, -0.1410, -0.1925]],

        [[-1.5421, -0.2371, -0.0450,  1.0562],
         [-0.1198, -0.6220,  0.0757,  1.7773],
         [ 1.8148, -0.1553,  0.0715,  0.4484],
         [ 0.8124, -0.3807, -0.0236, -1.2077],
         [ 0.0345, -0.0710,  0.3022, -1.3491],
         [-0.2698, -0.8774,  0.3046, -1.0447],
         [-0.2172, -0.5591,  1.7421, -0.9243],
         [-0.4596,  1.5519, -1.1228, -0.2404]]], grad_fn=<EmbeddingBackward0>)


In [22]:
## Create an Embedding layer class for our input sequences

In [27]:
class EmbeddingLayer(nn.Module):

    def __init__(self, vocab_size, embd_dimension, max_seq_length = None):
        super().__init__()
        self.vocab_size = vocab_size
        self.embd_dimension = embd_dimension
        self.max_seq_length = max_seq_length
        self.embedding = nn.Embedding(vocab_size, embd_dimension)

    def forward(self, x):
        embeddings = self.embedding(x)
        return embeddings

In [30]:
embedding_layer = EmbeddingLayer(vocab_size, n_embd)

In [31]:
embedding_layer

EmbeddingLayer(
  (embedding): Embedding(65, 4)
)

In [32]:
embeddings = embedding_layer(x)

In [34]:
print(embeddings.shape)
print(embeddings)

torch.Size([2, 8, 4])
tensor([[[-0.3810,  0.4519,  0.5480, -0.2506],
         [ 0.7409,  0.1382,  1.1884,  0.6978],
         [ 1.0261, -0.0628,  1.0503,  0.2012],
         [ 0.3017, -1.5578, -0.2951,  0.6807],
         [ 1.0435,  0.2109,  0.2883,  0.3007],
         [ 0.4978, -0.2887, -0.5370,  0.5508],
         [ 0.1637,  1.2067, -0.0828,  1.3311],
         [-1.6304,  0.9141, -0.6493, -0.9564]],

        [[-0.8021,  0.0934,  0.2240, -2.1692],
         [-1.2842,  1.6917,  1.3587,  1.5298],
         [-0.4081,  0.6964,  1.4781, -0.0682],
         [ 0.7820, -0.6412,  1.2276, -0.0174],
         [ 0.3995, -1.6388,  0.6334,  0.2647],
         [-0.5701,  1.2844, -0.9699,  0.8745],
         [-1.4509, -1.0548, -0.1155, -1.2794],
         [ 0.3926,  0.6677, -0.5076, -0.7729]]], grad_fn=<EmbeddingBackward0>)


## Understanding Positional Embeddings
#### Positional Embeddings are created to make sure to preserve the word order in the sequence.