In [1]:
import torch
import torch.nn

In [2]:
vocab = {
    '나': 0,
    '는': 1,
    '김밥': 2,
    '과': 3,
    '라면': 4,
    '이': 5,
    '좋다': 6
}

In [3]:
dim=10
emb_mtx = torch.nn.Embedding(len(vocab), dim)

print(emb_mtx)
print(emb_mtx.weight)

Embedding(7, 10)
Parameter containing:
tensor([[ 0.8255,  0.1371, -0.0986,  0.4993, -0.2734,  2.3330, -1.3574,  1.0892,
          0.6537, -2.0155],
        [-0.5497,  0.7832, -0.7299, -0.1728,  0.6307, -1.3166, -0.3288,  0.5738,
          0.7838, -0.7975],
        [ 0.4045, -0.3912, -0.3746,  0.0188,  0.0469, -0.0848, -0.8266,  0.4567,
          0.5474,  1.4800],
        [-1.9969, -0.9921,  0.8379, -0.2754,  0.5166, -0.3838, -0.9941, -0.2567,
          1.1535,  0.3611],
        [-0.6225, -1.0859,  1.2012,  1.5256,  1.3607, -0.7187,  0.2482,  0.0042,
         -0.0353,  0.5840],
        [ 0.8322, -0.1028,  1.3203, -0.2176, -0.7774, -0.2918, -0.0775, -0.2857,
          1.3952, -1.4729],
        [-1.0791,  1.9437,  0.1480,  1.0292,  0.7533,  1.7348,  2.0790, -2.0058,
          1.0209,  0.5637]], requires_grad=True)


In [10]:
## Practice 1 ##

# print word embedding of '라면'
idx = torch.tensor([vocab['라면']],dtype=torch.long) # practice: make index tensor for '라면'
print("Tensor: {}\nShape: {}\n".format(idx, idx.size()))

emb = emb_mtx(idx) # make word embedding for '라면'
print("Tensor:\n{}\nShape: {}".format(emb, emb.size()))

Tensor: tensor([4])
Shape: torch.Size([1])

Tensor:
tensor([[-0.6225, -1.0859,  1.2012,  1.5256,  1.3607, -0.7187,  0.2482,  0.0042,
         -0.0353,  0.5840]], grad_fn=<EmbeddingBackward>)
Shape: torch.Size([1, 10])


In [17]:
## Practice 2 ##

# print word embeddings for a given sentence: "김밥 라면 좋다"
sent = ['김밥', '라면', '좋다']
idxs = []
for word in sent:
    idx =vocab[word]# practice: convert word into index
    idxs.append(idx) # append idx to idxs

idxs = torch.tensor(idxs,dtype=torch.long)# practice: convert index list to tensor
print("Tensor: {}\nShape: {}\n".format(idxs, idxs.size()))

emb =emb_mtx(idxs) # practice: convert idxs into word embedding
print("Tensor:\n{}\nShape: {}".format(emb, emb.size()))

Tensor: tensor([2, 4, 6])
Shape: torch.Size([3])

Tensor:
tensor([[ 0.4045, -0.3912, -0.3746,  0.0188,  0.0469, -0.0848, -0.8266,  0.4567,
          0.5474,  1.4800],
        [-0.6225, -1.0859,  1.2012,  1.5256,  1.3607, -0.7187,  0.2482,  0.0042,
         -0.0353,  0.5840],
        [-1.0791,  1.9437,  0.1480,  1.0292,  0.7533,  1.7348,  2.0790, -2.0058,
          1.0209,  0.5637]], grad_fn=<EmbeddingBackward>)
Shape: torch.Size([3, 10])


In [21]:
## Practice 03 ##
# construct batch level word embeddings: ["김밥 라면 좋다", "나 는 라면"]

sents = [['김밥', '라면', '좋다'],
         ['나', '는', '라면']]

batch_idxs = []
for sent in sents:
    idxs = []
    for word in sent:
        idx = vocab[word]# practice: convert word into index
        idxs.append(idx)
    batch_idxs.append(idxs)

batch_idxs = torch.tensor(batch_idxs,dtype=torch.long) # practice: convert batch idx to tensor
print("Tensor:\n{}\nShape: {}".format(batch_idxs, batch_idxs.size()))
print('')

emb = emb_mtx(batch_idxs)# practice: convert idxs into word embedding
print("Tensor:\n{}\nShape: {}".format(emb, emb.size())) #2개 3*10

Tensor:
tensor([[2, 4, 6],
        [0, 1, 4]])
Shape: torch.Size([2, 3])

Tensor:
tensor([[[ 0.4045, -0.3912, -0.3746,  0.0188,  0.0469, -0.0848, -0.8266,
           0.4567,  0.5474,  1.4800],
         [-0.6225, -1.0859,  1.2012,  1.5256,  1.3607, -0.7187,  0.2482,
           0.0042, -0.0353,  0.5840],
         [-1.0791,  1.9437,  0.1480,  1.0292,  0.7533,  1.7348,  2.0790,
          -2.0058,  1.0209,  0.5637]],

        [[ 0.8255,  0.1371, -0.0986,  0.4993, -0.2734,  2.3330, -1.3574,
           1.0892,  0.6537, -2.0155],
         [-0.5497,  0.7832, -0.7299, -0.1728,  0.6307, -1.3166, -0.3288,
           0.5738,  0.7838, -0.7975],
         [-0.6225, -1.0859,  1.2012,  1.5256,  1.3607, -0.7187,  0.2482,
           0.0042, -0.0353,  0.5840]]], grad_fn=<EmbeddingBackward>)
Shape: torch.Size([2, 3, 10])
