### 构造word_embedding

### 构造position embeddings
$$ PE(pos, 2i) = sin(pos/10000^{2i/d_{model}})  $$
$$ PE(pos, 2i+1) = cos(pos/1000^{2i/d_{model}})  $$

 where $pos$ is the position and $i$ is the dimension. 

In [37]:
import torch
import numpy
import torch.nn as nn
import torch.nn.functional as F
# 关于word embedding，以序列建模为例
# 考虑source sentence 和 target sentence
# 构建序列，序列的词符以其在词表中的索引形式表示

batch_size = 2

# 单词表大小
max_num_src_words = 8
max_num_tgt_words = 8
model_dim = 8 # 原始论文512

# 序列的最大长度
max_src_seq_len = 5
max_tgt_seq_len = 5
max_position_len = 5

# src_len 和 tgt_len 是每个批次的源和目标序列长度
src_len = torch.randint(2, 5, (batch_size,))
tgt_len = torch.randint(2, 5, (batch_size,))

# 单词索引构成源句子和目标句子，并且做了padding，默认值为0
src_seq = torch.cat([torch.unsqueeze(F.pad(torch.randint(1, max_num_src_words, (L,)), (0, max_src_seq_len-L)), 0) for L in src_len])
tgt_seq = torch.cat([torch.unsqueeze(F.pad(torch.randint(1, max_num_tgt_words, (L,)), (0, max_tgt_seq_len-L)), 0) for L in tgt_len])

# 构造word embedding
## 构建embedding table
src_embedding_table = nn.Embedding(max_num_src_words + 1, model_dim) # 当加 +1 时，通常是为了为 填充符号（padding token） 留出一个额外的位置：
tgt_embedding_table = nn.Embedding(max_num_tgt_words + 1, model_dim)
## 依据table构建embedding
src_embdding = src_embedding_table(src_seq)
tgt_embdding = tgt_embedding_table(tgt_seq)

# position embedding 注意pos代表行，i代表列，分别反映位置变化和维度变化
pos_mat = torch.arange(max_position_len).reshape((-1, 1))  
i_mat = torch.pow(10000, torch.arange(0, model_dim, 2).reshape(([1, -1])) / model_dim)
pe_embedding_table = torch.zeros(max_position_len, model_dim)

pe_embedding_table[:, 0::2] = torch.sin(pos_mat / i_mat)  # 偶数维度使用sin
pe_embedding_table[:, 1::2] = torch.cos(pos_mat / i_mat)  # 奇数维度使用cos

pe_embedding = nn.Embedding(max_position_len, model_dim)
pe_embedding.weight = nn.Parameter(pe_embedding_table, requires_grad=False)

# 生成源序列和目标序列的position embedding
src_pos = torch.cat([torch.unsqueeze(torch.arange(max(src_len)), 0) for _ in src_len]).to(torch.int32)
tgt_pos = torch.cat([torch.unsqueeze(torch.arange(max(tgt_len)), 0) for _ in tgt_len]).to(torch.int32)
src_pe_embedding = pe_embedding(src_pos)
tgt_pe_embedding = pe_embedding(tgt_pos)







print(src_len)
print(tgt_len)
print(src_seq)
# print(src_embedding_table.weight) 
# print(src_embdding.size())
print(src_pos)
print(tgt_pos)



tensor([2, 2])
tensor([4, 2])
tensor([[4, 1, 0, 0, 0],
        [2, 6, 0, 0, 0]])
tensor([[0, 1],
        [0, 1]], dtype=torch.int32)
tensor([[0, 1, 2, 3],
        [0, 1, 2, 3]], dtype=torch.int32)
