In [1]:
import torch
import torch.nn

In [2]:
vocab = {
    '나': 0,
    '는': 1,
    '김밥': 2,
    '과': 3,
    '라면': 4,
    '이': 5,
    '좋다': 6
}

In [3]:
dim=10
emb_mtx = torch.nn.Embedding(len(vocab), dim)

print(emb_mtx)
print(emb_mtx.weight)

Embedding(7, 10)
Parameter containing:
tensor([[-0.1693, -0.3529,  1.0629,  0.1398, -0.1607,  0.1683,  0.6768, -2.5792,
          0.5665, -0.5511],
        [-0.3787, -0.2830,  1.1567, -1.0070, -0.5515,  1.9946, -2.5155,  0.0406,
          0.0478, -0.7321],
        [ 0.6929, -0.8344, -0.2919, -0.1401,  1.0639, -0.1878, -0.6071, -2.3223,
         -1.0445, -0.3781],
        [-2.8893, -0.7158,  0.2928, -0.3654, -1.1717,  0.2758, -0.5463, -0.2353,
         -1.1753,  0.7306],
        [ 0.5907, -2.2213,  0.4761,  0.3602,  0.3796, -0.7976,  1.5590, -1.1483,
          0.9510,  1.2343],
        [-0.4532,  0.4466,  0.4170,  0.3523, -0.2217, -0.8274,  0.7790, -0.9496,
         -1.5910,  0.4920],
        [ 0.2023,  0.8373, -1.4675, -0.8615, -0.1627, -0.0912, -0.7329,  0.6762,
         -0.0879,  0.3074]], requires_grad=True)


In [5]:
## Practice 1 ##

# print word embedding of '라면'
idx = torch.tensor([vocab['라면']],dtype=torch.long)
print("Tensor: {}\nShape: {}\n".format(idx, idx.size()))

emb = emb_mtx(idx) # make word embedding for '라면'
print("Tensor:\n{}\nShape: {}".format(emb, emb.size()))

Tensor: tensor([4])
Shape: torch.Size([1])

Tensor:
tensor([[ 0.5907, -2.2213,  0.4761,  0.3602,  0.3796, -0.7976,  1.5590, -1.1483,
          0.9510,  1.2343]], grad_fn=<EmbeddingBackward>)
Shape: torch.Size([1, 10])


In [7]:
## Practice 2 ##

# print word embeddings for a given sentence: "김밥 라면 좋다"
sent = ['김밥', '라면', '좋다']
idxs = []
for word in sent:
    idx = vocab[word]
    idxs.append(idx) # append idx to idxs

idxs = torch.tensor(idxs,dtype=torch.long)
print("Tensor: {}\nShape: {}\n".format(idxs, idxs.size()))

emb = emb_mtx(idxs)
print("Tensor:\n{}\nShape: {}".format(emb, emb.size()))

Tensor: tensor([2, 4, 6])
Shape: torch.Size([3])

Tensor:
tensor([[ 0.6929, -0.8344, -0.2919, -0.1401,  1.0639, -0.1878, -0.6071, -2.3223,
         -1.0445, -0.3781],
        [ 0.5907, -2.2213,  0.4761,  0.3602,  0.3796, -0.7976,  1.5590, -1.1483,
          0.9510,  1.2343],
        [ 0.2023,  0.8373, -1.4675, -0.8615, -0.1627, -0.0912, -0.7329,  0.6762,
         -0.0879,  0.3074]], grad_fn=<EmbeddingBackward>)
Shape: torch.Size([3, 10])


In [8]:
## Practice 03 ##
# construct batch level word embeddings: ["김밥 라면 좋다", "나 는 라면"]

sents = [['김밥', '라면', '좋다'],
         ['나', '는', '라면']]

batch_idxs = []
for sent in sents:
    idxs = []
    for word in sent:
        idx = vocab[word]
        idxs.append(idx)
    batch_idxs.append(idxs)

batch_idxs = torch.tensor(batch_idxs,dtype=torch.long)
print("Tensor:\n{}\nShape: {}".format(batch_idxs, batch_idxs.size()))
print('')

emb = emb_mtx(batch_idxs)
print("Tensor:\n{}\nShape: {}".format(emb, emb.size()))

Tensor:
tensor([[2, 4, 6],
        [0, 1, 4]])
Shape: torch.Size([2, 3])

Tensor:
tensor([[[ 0.6929, -0.8344, -0.2919, -0.1401,  1.0639, -0.1878, -0.6071,
          -2.3223, -1.0445, -0.3781],
         [ 0.5907, -2.2213,  0.4761,  0.3602,  0.3796, -0.7976,  1.5590,
          -1.1483,  0.9510,  1.2343],
         [ 0.2023,  0.8373, -1.4675, -0.8615, -0.1627, -0.0912, -0.7329,
           0.6762, -0.0879,  0.3074]],

        [[-0.1693, -0.3529,  1.0629,  0.1398, -0.1607,  0.1683,  0.6768,
          -2.5792,  0.5665, -0.5511],
         [-0.3787, -0.2830,  1.1567, -1.0070, -0.5515,  1.9946, -2.5155,
           0.0406,  0.0478, -0.7321],
         [ 0.5907, -2.2213,  0.4761,  0.3602,  0.3796, -0.7976,  1.5590,
          -1.1483,  0.9510,  1.2343]]], grad_fn=<EmbeddingBackward>)
Shape: torch.Size([2, 3, 10])
