## Embedding Layer (Pytorch)

The `nn.Embedding` layer is a simple lookup table that maps an index value to a weight matrix of a certain dimension. This dimension is `(vocab_size, vector_size)`, where **vacab_size** is the size of the vocabulary and **vector_size** the size of the embedding.

Each row represents a single _word embedding_ that is initialized randomly drawn from a standard normal distribution.

-

If we don't want to train our embeddings during model training (when we are using pre-trained embeddings), we can use:

    `emb_layer.weight.requires_grad = False`

-


Lastly to load weight to our layer form a pre-trained embedding model we can use

    `weight = torch.FloatTensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]])`

    `embedding = nn.Embedding.from_pretrained(weight)`

For more information:
https://towardsdatascience.com/the-secret-to-improved-nlp-an-in-depth-look-at-the-nn-embedding-layer-in-pytorch-6e901e193e16

## First Example

In [None]:
import torch
from torch import nn


vocab_size = 30
embed_size = 4

# Creating the embedding layer
embedding = nn.Embedding(vocab_size, embed_size)

# Printing the first 5 word embeddings
with torch.inference_mode():
    for i in range(5):
        print(embedding(torch.LongTensor([i])).squeeze())

tensor([ 1.2067,  1.3697,  1.0623, -2.3392])
tensor([0.9531, 0.0397, 0.3057, 1.3423])
tensor([-0.1981,  0.1677, -0.4753, -1.6036])
tensor([-1.1397, -0.1794,  0.3362,  0.5088])
tensor([-0.5705,  1.4132, -0.0266,  0.6365])


## Learning the Embedding using a Sentiment task

In [None]:
import torch
from torch import nn, optim
from torch.nn.functional import one_hot, pad

### Setting the Data

In [None]:
reviews = ["nice food", 
           "amazing restaurant",
           "too good",
           "just loved it",
           "will go again",
           "nice service",
           "great chef",
           "value for money",
           "horrible food",
           "never go again",
           "terrible menu",
           "poor quality",
           "awfull service",
           "needs imporvment",
           "poor service",
           "too expensive"]

sentiment = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]

In [None]:
# Setting the vocabualry
vocab = list(set(word for rev in reviews for word in rev.split()))
vocab_idx = {vocab[i-1]: i for i in range(1, len(vocab) + 1)}

print(len(vocab))
print(vocab_idx)

28
{'loved': 1, 'restaurant': 2, 'great': 3, 'it': 4, 'poor': 5, 'never': 6, 'service': 7, 'amazing': 8, 'money': 9, 'terrible': 10, 'too': 11, 'good': 12, 'awfull': 13, 'menu': 14, 'quality': 15, 'expensive': 16, 'nice': 17, 'imporvment': 18, 'for': 19, 'go': 20, 'food': 21, 'again': 22, 'value': 23, 'horrible': 24, 'needs': 25, 'just': 26, 'will': 27, 'chef': 28}


In [None]:
# Creating the one-hot encodings of the vocabulary
one_hot_encodings = one_hot(torch.arange(0, len(vocab)))

print(one_hot_encodings[:3])

tensor([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0]])


In [None]:
# Getting the one-hot encoding of the word "chef"
print(one_hot_encodings[vocab_idx["chef"] - 1])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1])


In [None]:
# We have encoded the vocabulary, now we will encode the `reviews`
encoded_reviews = [[vocab_idx[word] for word in rev.split()] for rev in reviews]

# Adding padding to the encoded reviews
max_rev = max(len(rev.split()) for rev in reviews)
padded_encoded_reviews = [enc_rev + [0 for _ in range(3 - len(enc_rev))] for enc_rev in encoded_reviews]

padded_encoded_reviews

[[17, 21, 0],
 [8, 2, 0],
 [11, 12, 0],
 [26, 1, 4],
 [27, 20, 22],
 [17, 7, 0],
 [3, 28, 0],
 [23, 19, 9],
 [24, 21, 0],
 [6, 20, 22],
 [10, 14, 0],
 [5, 15, 0],
 [13, 7, 0],
 [25, 18, 0],
 [5, 7, 0],
 [11, 16, 0]]

In [None]:
# Using Pytorch's function `pad`
max_rev = max(len(rev.split()) for rev in reviews)
for i in range(len(encoded_reviews)):
    if len(encoded_reviews[i]) < 3:
        encoded_reviews[i] = pad(input=torch.tensor(encoded_reviews[i]), pad=(0, max_rev-len(encoded_reviews[i])), mode="constant", value=0).tolist()

encoded_reviews

[[17, 21, 0],
 [8, 2, 0],
 [11, 12, 0],
 [26, 1, 4],
 [27, 20, 22],
 [17, 7, 0],
 [3, 28, 0],
 [23, 19, 9],
 [24, 21, 0],
 [6, 20, 22],
 [10, 14, 0],
 [5, 15, 0],
 [13, 7, 0],
 [25, 18, 0],
 [5, 7, 0],
 [11, 16, 0]]

### Creating the Model

In [None]:
class Embedding_Model(nn.Module):
    def __init__(self, vocab_size, embed_size, max_rev_size=3):
        super().__init__()

        self.embedding_layer = nn.Embedding(vocab_size, embed_size)
        self.classifier = nn.Sequential(
            nn.Flatten(start_dim=0),
            nn.Linear(in_features=embed_size*max_rev_size, out_features=1)
        )

    def forward(self, x):
        # Where x are the `padded_encoding_reviews`
        return self.classifier(self.embedding_layer(torch.LongTensor(x)))

### Initializing the Model

In [None]:
model = Embedding_Model(vocab_size, embed_size)

print(model)

Embedding_Model(
  (embedding_layer): Embedding(30, 4)
  (classifier): Sequential(
    (0): Flatten(start_dim=0, end_dim=-1)
    (1): Linear(in_features=12, out_features=1, bias=True)
  )
)


In [None]:
logits = model(encoded_reviews[0])
logits

tensor([0.1841], grad_fn=<AddBackward0>)

### Creating the Training Loop

In [None]:
def fit(epochs, model, encoded_reviews, sentiment, loss_fn, accuracy_fn, opt):
    for epoch in range(1, epochs + 1):
        model.train()

        set_loss, set_acc = 0, 0

        for i in range(len(encoded_reviews)):
            logit = model(encoded_reviews[i])
            loss = loss_fn(logit.squeeze(), sentiment[i])

            set_loss += loss.item()
            set_acc += accuracy_fn(logit.squeeze(), sentiment[i])

            opt.zero_grad()
            loss.backward()
            opt.step()

        set_loss /= len(encoded_reviews)
        set_acc /= len(encoded_reviews)

        if epoch % 10 == 0:
            print(f"Epoch: {epoch} | Loss: {set_loss} | Acc(%): {set_acc}")

### Creating Accuracy Metric

In [None]:
def accuracy_fn(logits, sentiment):
    pred = logits.sigmoid().round()

    return (pred == sentiment).item()

### Setting Loss Function and Optimizer

In [None]:
loss_fn = nn.BCEWithLogitsLoss()
opt = optim.Adam(params=model.parameters(), lr=1e-3)

In [None]:
fit(100, model, encoded_reviews, torch.tensor(sentiment, dtype=torch.float32), loss_fn, accuracy_fn, opt)

Epoch: 10 | Loss: 0.6899639777839184 | Acc(%): 0.5625
Epoch: 20 | Loss: 0.6037122216075659 | Acc(%): 0.6875
Epoch: 30 | Loss: 0.5316259227693081 | Acc(%): 0.875
Epoch: 40 | Loss: 0.46818832680583 | Acc(%): 1.0
Epoch: 50 | Loss: 0.41084972862154245 | Acc(%): 1.0
Epoch: 60 | Loss: 0.35870243329554796 | Acc(%): 1.0
Epoch: 70 | Loss: 0.3116523493081331 | Acc(%): 1.0
Epoch: 80 | Loss: 0.2697701081633568 | Acc(%): 1.0
Epoch: 90 | Loss: 0.23296113777905703 | Acc(%): 1.0
Epoch: 100 | Loss: 0.20091756968759 | Acc(%): 1.0


### Getting the Word Embeddings

In [None]:
word_embeddings = model.embedding_layer.weight
word_embeddings.requires_grad = False

print(word_embeddings[:5])

tensor([[-1.7385,  0.3555,  0.9445,  0.5888],
        [-1.9104,  0.9121, -1.6049,  0.9643],
        [-0.8481,  0.3568, -1.6062, -0.7131],
        [ 0.6051, -0.3211, -1.4134, -0.4751],
        [ 0.6058, -0.3648,  1.6964,  0.5499]])


In [None]:
# Comparing the word embedding of 2 similar words like "nice" and "amazing"

print(word_embeddings[vocab_idx["nice"] - 1])
print(word_embeddings[vocab_idx["amazing"] - 1])

tensor([ 0.4986, -0.8899,  1.6748,  0.2998])
tensor([ 0.4989,  0.8725,  2.4576, -0.6257])
