In [1]:
# !pip install d2l
import torch
from torch import nn
from d2l import torch as d2l
import torch.nn.functional as F

batch_size, max_window_size, num_noise_words = 512, 5, 5
data_iter, vocab = d2l.load_data_ptb(batch_size, max_window_size,
                                     num_noise_words)

In [2]:
embed = nn.Embedding(num_embeddings=20, embedding_dim=4)
print(f'Parameter embedding_weight ({embed.weight.shape}, '
      'dtype={embed.weight.dtype})')

Parameter embedding_weight (torch.Size([20, 4]), dtype={embed.weight.dtype})


In [96]:
x = torch.tensor([[1, 2, 3], [4, 5, 6]])
embed(x)

tensor([[[ 0.2175, -2.0603,  1.6071,  0.6256],
         [-0.9642,  1.2068, -1.6545, -1.3753],
         [ 0.0942,  0.9756, -0.9378, -1.1653]],

        [[-0.3458, -0.2209, -0.1117, -1.6890],
         [ 0.6608,  0.5684, -0.0803,  0.1461],
         [ 1.1185,  0.1876, -1.0653,  0.2527]]], grad_fn=<EmbeddingBackward>)

**Implemented skip-gram to CBOW**

In [4]:
def skip_gram(center, contexts_and_negatives, embed_v, embed_u):
    v = embed_v(center)
    u = embed_u(contexts_and_negatives)
    pred = torch.bmm(v, u.permute(0, 2, 1))
    return pred

In [60]:
center = torch.ones((2, 1), dtype=torch.long)
cont_neg = torch.ones((2, 4), dtype=torch.long)

In [38]:
skip_gram(center, cont_neg, embed, embed)

tensor([[[7.2665, 7.2665, 7.2665, 7.2665]],

        [[7.2665, 7.2665, 7.2665, 7.2665]]], grad_fn=<BmmBackward0>)

In [121]:
class cbow(nn.Module):
    def __init__(self, vocab_size, context_size, emb_dim, hidden_size=128):
        super(cbow, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim)
        self.fc1 = nn.Linear(context_size*emb_dim, hidden_size)
        self.fc2 = nn.Linear(hidden_size, vocab_size)
    def forward(self, x, batch_size):
        x = self.embedding(x).view(batch_size, -1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

In [122]:
model = cbow(vocab_size=20, context_size=5, emb_dim=4)

In [123]:
word = torch.ones((2, 5), dtype=torch.long)

In [125]:
model.forward(word, 2)

tensor([[ 0.1540, -0.1793, -0.3494, -0.0374, -0.0552, -0.1179, -0.0022,  0.2474,
         -0.0559, -0.0038,  0.0825,  0.3538,  0.2691, -0.2226, -0.0300, -0.1146,
          0.2610, -0.1271, -0.1734,  0.1292],
        [ 0.1540, -0.1793, -0.3494, -0.0374, -0.0552, -0.1179, -0.0022,  0.2474,
         -0.0559, -0.0038,  0.0825,  0.3538,  0.2691, -0.2226, -0.0300, -0.1146,
          0.2610, -0.1271, -0.1734,  0.1292]], grad_fn=<AddmmBackward>)

In [126]:
model.forward(word, 2).shape

torch.Size([2, 20])