# Input


## Embedding


In [24]:
import torch
import torch.nn as nn
import math
from torch.autograd import Variable

In [79]:
class Embeddings(nn.Module):

    def __init__(self,d_model,vocab):
        #d_model:dimension of embedding 
        #vocab:size of wordlist
        super(Embeddings,self).__init__()
        self.lut =nn.Embedding(vocab,d_model)
        self.d_model =d_model
    def forward(self,x):
        return self.lut(x)*math.sqrt(self.d_model)
    
d_model =512
vocab = 1000
x=torch.LongTensor([[1,2,3,4],[2,2,3,4]])
emb =Embeddings(d_model,vocab)
emb_rslt=emb(x)
print("embr",emb_rslt)
print("embrshape",emb_rslt.shape)

    

embr tensor([[[-14.8289,  -8.2562,  54.6304,  ...,   8.4305,  -3.6507, -27.6918],
         [-10.6098,  -9.9717,  -2.4429,  ...,   8.3850,  -4.9843,  -6.0591],
         [ -1.2095,  22.6689,   5.8902,  ...,  18.8349,   0.3589,  11.6905],
         [ 38.0961, -13.7467,  36.3315,  ...,  11.5945, -25.4663,  -5.4805]],

        [[-10.6098,  -9.9717,  -2.4429,  ...,   8.3850,  -4.9843,  -6.0591],
         [-10.6098,  -9.9717,  -2.4429,  ...,   8.3850,  -4.9843,  -6.0591],
         [ -1.2095,  22.6689,   5.8902,  ...,  18.8349,   0.3589,  11.6905],
         [ 38.0961, -13.7467,  36.3315,  ...,  11.5945, -25.4663,  -5.4805]]],
       grad_fn=<MulBackward0>)
embrshape torch.Size([2, 4, 512])


In [None]:
#nn.Embedding(vocab,d_model)
#integer in[0,vocab-1]->vector in R^d_model(randomly)
#integer in tensor lut place by place
embedding = nn.Embedding(10,3)
input = torch.LongTensor([[1,2,4,5],[2,3,4,9]])
print(embedding(input))
#10:max integer0-9
#3:each ->3d
#[[1,2,4,5],[2,3,4,9]]
#  v
#[ 1.8997,  0.6557, -0.1424]

tensor([[[ 1.8997,  0.6557, -0.1424],
         [-0.0872,  0.1228, -0.7261],
         [ 0.4782,  0.0221, -1.3902],
         [ 0.5069,  0.9979,  0.0901]],

        [[-0.0872,  0.1228, -0.7261],
         [ 1.5800,  0.5353,  0.6007],
         [ 0.4782,  0.0221, -1.3902],
         [ 0.1910, -1.2970, -0.9452]]], grad_fn=<EmbeddingBackward0>)


In [27]:
embedding = nn.Embedding(10,3,padding_idx=1)
input=torch.LongTensor([1,0])
print(embedding(input))
#[1,0]
# V
#[ 0.0000,  0.0000,  0.0000]
#padding_idx->0

tensor([[ 0.0000,  0.0000,  0.0000],
        [ 1.6208,  0.2372, -1.3009]], grad_fn=<EmbeddingBackward0>)


## Position Encoding


In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model,dropout,max_len=5000):
        #dropout probablity set to 0
        # max length of sentence no more than 5000 supposed
        super(PositionalEncoding,self).__init__()

        self.dropout = nn.Dropout(dropout)

        #position encoding martrix max_len* d_model
        pe = torch.zeros(max_len,d_model)

        #absolut pe
        #arange(0,max_len): [maxlen]
        #arange(0,max_len).unsqueeze(1):[maxlen*1]
        position = torch.arange(0,max_len).unsqueeze(1)

        #div_term diver matrix
        div_term = torch.exp(torch.arange(0,d_model,2)*-(math.log(10000.0)))

        pe[:,0::2]=torch.sin(position*div_term)
        pe[:,1::2]=torch.cos(position*div_term)

        #2d->3d
        pe =pe.unsqueeze(0)
        #rigister as buffer.Is not super parameters.Wont upgrade

        self.register_buffer('pe',pe)

    def forward(self,x):
            #x;seq2vec
            #maxlen is too long shorten to lenth of x
            x = x + Variable(self.pe[:,:x.size(1)],requires_grad=False)
            return self.dropout(x)
        

d_model = 512
dropout = 0.1
max_len = 60
x = emb_rslt
pe = PositionalEncoding(d_model,dropout,max_len)
pe_result = pe(x)
print(pe_result)
print(pe_result.shape)

tensor([[[ -0.0000,  -8.0624,  60.7004,  ...,  10.4784,  -4.0564, -29.6576],
         [-10.8537, -10.4793,  -2.7143,  ...,  10.4278,  -5.5381,  -5.6212],
         [ -0.3336,   0.0000,   6.5446,  ...,  22.0388,   0.0000,  14.1006],
         [ 42.4858, -16.3742,  40.3683,  ...,   0.0000, -28.2959,  -4.9783]],

        [[-11.7887,  -9.9685,  -2.7143,  ...,  10.4278,  -5.5381,  -5.6212],
         [-10.8537, -10.4793,  -2.7143,  ...,  10.4278,  -5.5381,  -5.6212],
         [ -0.3336,  24.7253,   6.5446,  ...,  22.0388,   0.3988,  14.1006],
         [ 42.4858, -16.3742,  40.3683,  ...,   0.0000, -28.2959,  -4.9783]]],
       grad_fn=<MulBackward0>)
torch.Size([2, 4, 512])


In [None]:
#nn.Dropout
#drop nerve at percentage p
# remain all /(1-p)
m = nn.Dropout(0.2)
input = torch.randn(4,5)
print(input)
output = m(input)
print(output)

tensor([[-2.5505e-01, -1.0910e+00, -1.1701e-01, -1.6812e+00, -6.8660e-01],
        [-5.8749e-01,  9.7613e-01,  2.2157e-01, -3.0746e-01, -1.8241e-01],
        [-7.2120e-01,  3.8040e-02, -1.7941e+00, -1.1828e-03,  2.4766e-02],
        [ 1.8616e+00, -1.5868e+00, -1.3467e+00,  5.3304e-01,  1.4029e+00]])
tensor([[-0.3188, -1.3638, -0.0000, -0.0000, -0.8582],
        [-0.7344,  1.2202,  0.0000, -0.3843, -0.2280],
        [-0.9015,  0.0475, -2.2427, -0.0000,  0.0310],
        [ 0.0000, -1.9835, -0.0000,  0.6663,  1.7536]])


In [92]:
#torch.unsqueeze
#add a dimension at position n
x = torch.tensor([1,2,3,4])
print(torch.unsqueeze(x,0))
print(torch.unsqueeze(x,1))
print(x.shape)
print(x.unsqueeze(1).shape)

tensor([[1, 2, 3, 4]])
tensor([[1],
        [2],
        [3],
        [4]])
torch.Size([4])
torch.Size([4, 1])


In [142]:
#demo @ml=4 dm =2
a = torch.zeros(4,2)
print("pe",a)
b = torch.arange(0,4)
print("abs p",b)
b = b.unsqueeze(1)
print("abs p unsqueeze",b)
div_mid = torch.arange(0,2,2)#0,d_model,2
print("div_mid",div_mid)
div_t =torch.exp(div_mid*-(math.log(10000.0)/d_model))
print("dt",div_t)
print("abs p*dt",b*div_t)
# abs p:[4,1]
# dt :[1,1]
# broadcasting:[4,1]
print("size",a[:,0::2].shape)#[4,1]
a[:,0::2]=torch.sin(b*div_t)
a[:,1::2]=torch.cos(b*div_t)
print("pe",a)
a=a.unsqueeze(0)
print("pe unsqueeze",a)


input = torch.LongTensor([[[13,2],[13,2], [13,2], [13,2]]]) 

print("input size:", input.size())  # (3,4,1)

output = input.float() + a[:, :input.size(1)]
print("output:", output)

pe tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])
abs p tensor([0, 1, 2, 3])
abs p unsqueeze tensor([[0],
        [1],
        [2],
        [3]])
div_mid tensor([0])
dt tensor([1.])
abs p*dt tensor([[0.],
        [1.],
        [2.],
        [3.]])
size torch.Size([4, 1])
pe tensor([[ 0.0000,  1.0000],
        [ 0.8415,  0.5403],
        [ 0.9093, -0.4161],
        [ 0.1411, -0.9900]])
pe unsqueeze tensor([[[ 0.0000,  1.0000],
         [ 0.8415,  0.5403],
         [ 0.9093, -0.4161],
         [ 0.1411, -0.9900]]])
input size: torch.Size([1, 4, 2])
output: tensor([[[13.0000,  3.0000],
         [13.8415,  2.5403],
         [13.9093,  1.5839],
         [13.1411,  1.0100]]])
