In [None]:
# Attention 연산을 구현해 보아요!
import tensorflow as tf

# 예제 Text
tokens = ['나는', '너를', '사랑해']

# 숫자로 변경해야 해요 (원래는 vocabulary를 이용해서 해야 해요!)
word_to_index = {word: i for i, word in enumerate(tokens)}
print(word_to_index) # {'나는': 0, '너를': 1, '사랑해': 2}

# 숫자의 시퀀스로 변경해야해요! => [0 1 2]
input_ids = [word_to_index[word] for word in tokens]
print(input_ids) # [0, 1, 2]

# 입력 데이터를 숫자로 표현했어요. 이제 Embedding부터 처리해 보아요!
# Embedding 처리는 Tensorflow Keras의 Embedding Layer를 이용할거에요!
# 입력 데이터를 Tensorflow Tensor로 변경
input_tensor = tf.constant(input_ids)
print(input_tensor) # tf.Tensor([0 1 2], shape=(3,), dtype=int32)

# Embedding Layer
embedding_dim = 4
embedding_layer = tf.keras.layers.Embedding(input_dim=3,
                                            output_dim=embedding_dim)
embeddings = embedding_layer(input_tensor)
print(embeddings.numpy())
# [[ 0.03793142 -0.02086505  0.0447511   0.03991923]  '나는'
#  [-0.01349724 -0.03959888  0.00891063 -0.03473145]  '너를'
#  [-0.04243529  0.02169938  0.01712142  0.031848  ]] '사랑해'

# Query, Key, Value를 추출하기 위해서 Dense Layer 3개가 필요!
d_model = 4
W_Q = tf.keras.layers.Dense(units=d_model,
                            use_bias=False)
W_K = tf.keras.layers.Dense(units=d_model,
                            use_bias=False)
W_V = tf.keras.layers.Dense(units=d_model,
                            use_bias=False)

Q = W_Q(embeddings)
K = W_K(embeddings)
V = W_V(embeddings)

# 생성된 Query 값
print(Q.numpy())
# [[ 0.02655182  0.02969113  0.01650857  0.05720312]
#  [ 0.03619953  0.00891391  0.00112747  0.01821991]
#  [-0.0515869   0.03455075 -0.03510647 -0.00033337]]
print(K.numpy())
# [[-0.06212836 -0.00620023 -0.00482773 -0.01429406]
#  [ 0.02001747 -0.03241587 -0.03564681  0.03476292]
#  [ 0.02779969 -0.01811085 -0.0040384  -0.05463289]]
print(V.numpy())
# [[-0.06212836 -0.00620023 -0.00482773 -0.01429406]
#  [ 0.02001747 -0.03241587 -0.03564681  0.03476292]
#  [ 0.02779969 -0.01811085 -0.0040384  -0.05463289]]

{'나는': 0, '너를': 1, '사랑해': 2}
[0, 1, 2]
tf.Tensor([0 1 2], shape=(3,), dtype=int32)
[[ 0.03793142 -0.02086505  0.0447511   0.03991923]
 [-0.01349724 -0.03959888  0.00891063 -0.03473145]
 [-0.04243529  0.02169938  0.01712142  0.031848  ]]
[[ 0.02655182  0.02969113  0.01650857  0.05720312]
 [ 0.03619953  0.00891391  0.00112747  0.01821991]
 [-0.0515869   0.03455075 -0.03510647 -0.00033337]]
[[-0.06212836 -0.00620023 -0.00482773 -0.01429406]
 [ 0.02001747 -0.03241587 -0.03564681  0.03476292]
 [ 0.02779969 -0.01811085 -0.0040384  -0.05463289]]
[[ 0.04660624  0.02435018 -0.05015815 -0.00396201]
 [-0.01280775 -0.02385032  0.03465305  0.02537987]
 [-0.0175597  -0.02598552 -0.00515994  0.00701259]]


In [None]:
# 여기까지 수행되었으면 우리 문자열 ('나는 너를 사랑해')에 대해
# Query, Key, Value를 구해낼 수 있어요!
# Attention Score를 구할 수 있어요!
d_k = tf.cast(tf.shape(K)[-1], tf.float32)
print(d_k) # tf.Tensor(4.0, shape=(), dtype=float32)

scores = tf.matmul(Q, K, transpose_b=True) / tf.math.sqrt(d_k)
print(scores.numpy())
# [[-1.3655381e-03  4.8455293e-04 -1.4957197e-03]
#  [-1.2850827e-03  5.1442871e-04 -7.7531178e-05]
#  [ 1.5825183e-03 -4.5639355e-04 -9.4992819e-04]]

# 확률값으로 가중치를 구할 수 있어요!
attention_weight = tf.nn.softmax(scores, axis=-1)
print(attention_weight.numpy())
# [[0.33314216 0.33375907 0.3330988 ]
#  [0.3329993  0.33359906 0.33340165]
#  [0.33384144 0.33316144 0.33299708]]

output = tf.matmul(attention_weight, V)
print(output.numpy())
# [[ 0.00540269 -0.00850394 -0.00686279  0.00948674]
#  [ 0.00539276 -0.00851147 -0.00686273  0.00948536]
#  [ 0.00544472 -0.00847001 -0.00691805  0.00946808]]

tf.Tensor(4.0, shape=(), dtype=float32)
[[-1.3655381e-03  4.8455293e-04 -1.4957197e-03]
 [-1.2850827e-03  5.1442871e-04 -7.7531178e-05]
 [ 1.5825183e-03 -4.5639355e-04 -9.4992819e-04]]
[[0.33314216 0.33375907 0.3330988 ]
 [0.3329993  0.33359906 0.33340165]
 [0.33384144 0.33316144 0.33299708]]
[[ 0.00540269 -0.00850394 -0.00686279  0.00948674]
 [ 0.00539276 -0.00851147 -0.00686273  0.00948536]
 [ 0.00544472 -0.00847001 -0.00691805  0.00946808]]
