<a href="https://colab.research.google.com/github/DavoodSZ1993/Dive_into_Deep_Learning/blob/main/15_4_pretraining_word2vec.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install d2l==1.0.0-alpha1.post0 --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.0/93.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.9/121.9 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.9/84.9 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m37.3 MB/s[0m eta [36m0:00:00[0m
[?25h

## 15.4 Pretraining word2vec

In [2]:
import math
import torch
from torch import nn
from d2l import torch as d2l

In [3]:
batch_size, max_window_size, num_noise_words = 512, 5, 5
data_iter, vocab = d2l.load_data_ptb(batch_size, max_window_size,
                                     num_noise_words)

Downloading ../data/ptb.zip from http://d2l-data.s3-accelerate.amazonaws.com/ptb.zip...




### 15.4.1 The Skip-Gram Model

#### Embedding Layer

In [4]:
embed = nn.Embedding(num_embeddings=20, embedding_dim=4)
print(f'Parameter embedding_weight ({embed.weight.shape}, '
      f'dtype={embed.weight.dtype})')

Parameter embedding_weight (torch.Size([20, 4]), dtype=torch.float32)


In [6]:
x = torch.tensor([[1, 2, 3],
                  [4, 5, 6]])
embed(x), embed(x).shape

(tensor([[[-0.1920,  0.2009,  0.9424, -0.7605],
          [ 0.5801, -2.1541, -0.3997,  0.6834],
          [ 1.0473, -0.3226,  0.9451, -0.4419]],
 
         [[-1.0552,  0.9272, -0.1512,  1.2081],
          [-0.8601,  0.7703, -1.5981,  0.8459],
          [-0.1257, -1.7021, -1.3471,  0.0515]]], grad_fn=<EmbeddingBackward0>),
 torch.Size([2, 3, 4]))

#### Defining the Forward Propagation

In [7]:
def skip_gram(center, contexts_and_negatives, embed_v, embed_u):
  v = embed_v(center)
  u = embed_u(contexts_and_negatives)
  pred = torch.bmm(v, u.permute(0, 2, 1))
  return pred

In [8]:
skip_gram(torch.ones((2, 1), dtype=torch.long),
          torch.ones((2, 4), dtype=torch.long), embed, embed).shape

torch.Size([2, 1, 4])

### 15.4.2 Training

#### Binary Cross-Entropy Loss

In [11]:
class SigmoidBCELoss(nn.Module):
  # Binary cross-entropy loss with masking
  def __init__(self):
    super().__init__()

  def forward(self, inputs, target, mask=None):
    out = nn.functional.binary_cross_entropy_with_logits(
        inputs, target, weight=mask, reduction="none")
    return out.mean(dim=1)

loss = SigmoidBCELoss()

In [12]:
pred = torch.tensor([[1.1, -2.2, 3.3, -4.4]] * 2)
label = torch.tensor([[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0]])
mask = torch.tensor([[1, 1, 1, 1], [1, 1, 0, 0]])
loss(pred, label, mask) * mask.shape[1] / mask.sum(axis=1)

tensor([0.9352, 1.8462])

In [13]:
def sigmd(x):
  return -math.log(1 / (1 + math.exp(-x)))

print(f'{(sigmd(1.1) + sigmd(2.2) + sigmd(-3.3) + sigmd(4.4)) / 4: .4f}')
print(f'{(sigmd(-1.1) + sigmd(-2.2)) / 2:.4f}')

 0.9352
1.8462


#### Initializing Model Parameters