# 🚀 GPT

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/rickiepark/Generative_Deep_Learning_2nd_Edition/blob/main/notebooks/09_transformer/gpt/gpt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
  </td>
</table>

이 노트북에서는 와인 리뷰 데이터셋으로 GPT 모델을 훈련합니다.

이 코드는 케라스 웹사이트에 있는 Apoorv Nandan이 만든 훌륭한 [GPT 튜토리얼](https://keras.io/examples/generative/text_generation_with_miniature_gpt/)을 참고했습니다.

In [1]:
import numpy as np
import json
import re
import string
from IPython.display import display, HTML

import tensorflow as tf
from tensorflow.keras import layers, models, losses, callbacks

## 0. 파라미터 <a name="parameters"></a>

In [2]:
VOCAB_SIZE = 10000
MAX_LEN = 80
EMBEDDING_DIM = 256
KEY_DIM = 256
N_HEADS = 2
FEED_FORWARD_DIM = 256
VALIDATION_SPLIT = 0.2
SEED = 42
LOAD_MODEL = False
BATCH_SIZE = 32
EPOCHS = 5

## 1. 데이터 로드 <a name="load"></a>

In [3]:
import sys

# 코랩일 경우 노트북에서 celeba 데이터셋을 받습니다.
if 'google.colab' in sys.modules:
    # 캐글-->Setttings-->API-->Create New Token에서
    # kaggle.json 파일을 만들어 코랩에 업로드하세요.
    from google.colab import files
    files.upload()
    !mkdir ~/.kaggle
    !cp kaggle.json ~/.kaggle/
    !chmod 600 ~/.kaggle/kaggle.json
    # celeba 데이터셋을 다운로드하고 압축을 해제합니다.
    !kaggle datasets download -d zynicide/wine-reviews
    !unzip -q wine-reviews.zip
    # model 디렉토리를 만듭니다.
    !mkdir models

Saving kaggle.json to kaggle.json
Downloading wine-reviews.zip to /content
 94% 48.0M/50.9M [00:03<00:00, 24.9MB/s]
100% 50.9M/50.9M [00:03<00:00, 16.3MB/s]


In [4]:
# 전체 데이터셋 로드
with open("./winemag-data-130k-v2.json") as json_data:
    wine_data = json.load(json_data)

In [5]:
wine_data[10]

{'points': '87',
 'title': 'Kirkland Signature 2011 Mountain Cuvée Cabernet Sauvignon (Napa Valley)',
 'description': 'Soft, supple plum envelopes an oaky structure in this Cabernet, supported by 15% Merlot. Coffee and chocolate complete the picture, finishing strong at the end, resulting in a value-priced wine of attractive flavor and immediate accessibility.',
 'taster_name': 'Virginie Boone',
 'taster_twitter_handle': '@vboone',
 'price': 19,
 'designation': 'Mountain Cuvée',
 'variety': 'Cabernet Sauvignon',
 'region_1': 'Napa Valley',
 'region_2': 'Napa',
 'province': 'California',
 'country': 'US',
 'winery': 'Kirkland Signature'}

In [6]:
# 데이터셋 필터링
filtered_data = [
    "wine review : "
    + x["country"]
    + " : "
    + x["province"]
    + " : "
    + x["variety"]
    + " : "
    + x["description"]
    for x in wine_data
    if x["country"] is not None
    and x["province"] is not None
    and x["variety"] is not None
    and x["description"] is not None
]

In [7]:
# 레시피 개수 카운트
n_wines = len(filtered_data)
print(f"{n_wines}개 레시피 로드")

129907개 레시피 로드


In [8]:
example = filtered_data[25]
print(example)

wine review : US : California : Pinot Noir : Oak and earth intermingle around robust aromas of wet forest floor in this vineyard-designated Pinot that hails from a high-elevation site. Small in production, it offers intense, full-bodied raspberry and blackberry steeped in smoky spice and smooth texture.


## 2. 데이터 토큰화 <a name="tokenize"></a>

In [9]:
# 구두점 앞에 공백을 채워서 별도의 '단어'로 취급합니다.
def pad_punctuation(s):
    s = re.sub(f"([{string.punctuation}, '\n'])", r" \1 ", s)
    s = re.sub(" +", " ", s)
    return s


text_data = [pad_punctuation(x) for x in filtered_data]

In [10]:
# 레시피 예시 표시
example_data = text_data[25]
example_data

'wine review : US : California : Pinot Noir : Oak and earth intermingle around robust aromas of wet forest floor in this vineyard - designated Pinot that hails from a high - elevation site . Small in production , it offers intense , full - bodied raspberry and blackberry steeped in smoky spice and smooth texture . '

In [11]:
# 텐서플로 데이터셋으로 변환하기
text_ds = (
    tf.data.Dataset.from_tensor_slices(text_data)
    .batch(BATCH_SIZE)
    .shuffle(1000)
)

In [12]:
# TextVectorization 층 만들기
vectorize_layer = layers.TextVectorization(
    standardize="lower",
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

In [13]:
# 훈련 세트에 이 층을 적용합니다.
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()

In [14]:
# 토큰:단어 매핑 표시
for i, word in enumerate(vocab[:10]):
    print(f"{i}: {word}")

0: 
1: [UNK]
2: :
3: ,
4: .
5: and
6: the
7: wine
8: a
9: of


In [15]:
# 동일한 샘플을 정수로 변환하여 표시
example_tokenised = vectorize_layer(example_data)
print(example_tokenised.numpy())

[   7   10    2   20    2   29    2   43   62    2   55    5  243 4145
  453  634   26    9  497  499  667   17   12  142   14 2214   43   25
 2484   32    8  223   14 2213  948    4  594   17  987    3   15   75
  237    3   64   14   82   97    5   74 2633   17  198   49    5  125
   77    4    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0]


## 3. 훈련 세트 생성 <a name="create"></a>

In [16]:
# 레시피 텍스트와 한 단어만큼 이동된 동일한 텍스트로 훈련 세트를 만듭니다.
def prepare_inputs(text):
    text = tf.expand_dims(text, -1)
    tokenized_sentences = vectorize_layer(text)
    x = tokenized_sentences[:, :-1]
    y = tokenized_sentences[:, 1:]
    return x, y


train_ds = text_ds.map(prepare_inputs)

In [17]:
example_input_output = train_ds.take(1).get_single_element()

In [18]:
# 입력 예시
example_input_output[0][0]

<tf.Tensor: shape=(80,), dtype=int64, numpy=
array([   7,   10,    2,   20,    2, 3598,    2,   45,  200,    2,  844,
        317,  212,    3, 1481,    5,  675,  971,  383, 1320,   23,    6,
         60,    9,   12,   45,  200,  185, 3509, 2082,  464,   17,    1,
         55,  873,    4,  303,    5,  263,   23,    6,   28,    3, 2362,
         36,    5,   24,   67,  972, 1004,  328,   49,    5,  297,   55,
       2811,    4,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0])>

In [19]:
# 출력 예시(토큰 하나씩 이동)
example_input_output[1][0]

<tf.Tensor: shape=(80,), dtype=int64, numpy=
array([  10,    2,   20,    2, 3598,    2,   45,  200,    2,  844,  317,
        212,    3, 1481,    5,  675,  971,  383, 1320,   23,    6,   60,
          9,   12,   45,  200,  185, 3509, 2082,  464,   17,    1,   55,
        873,    4,  303,    5,  263,   23,    6,   28,    3, 2362,   36,
          5,   24,   67,  972, 1004,  328,   49,    5,  297,   55, 2811,
          4,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0])>

## 5. 코잘 어텐션 마스킹 함수 만들기 <a name="causal"></a>

In [20]:
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
    i = tf.range(n_dest)[:, None]
    j = tf.range(n_src)
    m = i >= j - n_src + n_dest
    mask = tf.cast(m, dtype)
    mask = tf.reshape(mask, [1, n_dest, n_src])
    mult = tf.concat(
        [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
    )
    return tf.tile(mask, mult)


np.transpose(causal_attention_mask(1, 10, 10, dtype=tf.int32)[0])

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=int32)

## 6. 트랜스포머 블록 층 만들기 <a name="transformer"></a>

In [21]:
class TransformerBlock(layers.Layer):
    def __init__(self, num_heads, key_dim, embed_dim, ff_dim, dropout_rate=0.1):
        super(TransformerBlock, self).__init__()
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.dropout_rate = dropout_rate
        self.attn = layers.MultiHeadAttention(
            num_heads, key_dim, output_shape=embed_dim
        )
        self.dropout_1 = layers.Dropout(self.dropout_rate)
        self.ln_1 = layers.LayerNormalization(epsilon=1e-6)
        self.ffn_1 = layers.Dense(self.ff_dim, activation="relu")
        self.ffn_2 = layers.Dense(self.embed_dim)
        self.dropout_2 = layers.Dropout(self.dropout_rate)
        self.ln_2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(
            batch_size, seq_len, seq_len, tf.bool
        )
        attention_output, attention_scores = self.attn(
            inputs,
            inputs,
            attention_mask=causal_mask,
            return_attention_scores=True,
        )
        attention_output = self.dropout_1(attention_output)
        out1 = self.ln_1(inputs + attention_output)
        ffn_1 = self.ffn_1(out1)
        ffn_2 = self.ffn_2(ffn_1)
        ffn_output = self.dropout_2(ffn_2)
        return (self.ln_2(out1 + ffn_output), attention_scores)

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "key_dim": self.key_dim,
                "embed_dim": self.embed_dim,
                "num_heads": self.num_heads,
                "ff_dim": self.ff_dim,
                "dropout_rate": self.dropout_rate,
            }
        )
        return config

## 7. 토큰 임베딩과 위치 인코딩 만들기 <a name="embedder"></a>

In [22]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, max_len, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.max_len = max_len
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.token_emb = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.pos_emb = layers.Embedding(input_dim=max_len, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "max_len": self.max_len,
                "vocab_size": self.vocab_size,
                "embed_dim": self.embed_dim,
            }
        )
        return config

## 8. 트랜스포머 모델 만들기 <a name="transformer_decoder"></a>

In [23]:
inputs = layers.Input(shape=(None,), dtype=tf.int32)
x = TokenAndPositionEmbedding(MAX_LEN, VOCAB_SIZE, EMBEDDING_DIM)(inputs)
x, attention_scores = TransformerBlock(
    N_HEADS, KEY_DIM, EMBEDDING_DIM, FEED_FORWARD_DIM
)(x)
outputs = layers.Dense(VOCAB_SIZE, activation="softmax")(x)
gpt = models.Model(inputs=inputs, outputs=[outputs, attention_scores])
gpt.compile("adam", loss=[losses.SparseCategoricalCrossentropy(), None])

In [24]:
gpt.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 token_and_position_embeddin  (None, None, 256)        2580480   
 g (TokenAndPositionEmbeddin                                     
 g)                                                              
                                                                 
 transformer_block (Transfor  ((None, None, 256),      658688    
 merBlock)                    (None, 2, None, None))             
                                                                 
 dense_2 (Dense)             (None, None, 10000)       2570000   
                                                                 
Total params: 5,809,168
Trainable params: 5,809,168
Non-trainable params: 0
___________________________________________________

In [25]:
if LOAD_MODEL:
    # model.load_weights('./models/model')
    gpt = models.load_model("./models/gpt", compile=True)

## 9. 트랜스포머 훈련하기 <a name="train"></a>

In [26]:
# TextGenerator 체크포인트 만들기
class TextGenerator(callbacks.Callback):
    def __init__(self, index_to_word, top_k=10):
        self.index_to_word = index_to_word
        self.word_to_index = {
            word: index for index, word in enumerate(index_to_word)
        }

    def sample_from(self, probs, temperature):
        probs = probs ** (1 / temperature)
        probs = probs / np.sum(probs)
        return np.random.choice(len(probs), p=probs), probs

    def generate(self, start_prompt, max_tokens, temperature):
        start_tokens = [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]
        sample_token = None
        info = []
        while len(start_tokens) < max_tokens and sample_token != 0:
            x = np.array([start_tokens])
            y, att = self.model.predict(x, verbose=0)
            sample_token, probs = self.sample_from(y[0][-1], temperature)
            info.append(
                {
                    "prompt": start_prompt,
                    "word_probs": probs,
                    "atts": att[0, :, -1, :],
                }
            )
            start_tokens.append(sample_token)
            start_prompt = start_prompt + " " + self.index_to_word[sample_token]
        print(f"\n생성된 텍스트:\n{start_prompt}\n")
        return info

    def on_epoch_end(self, epoch, logs=None):
        self.generate("wine review", max_tokens=80, temperature=1.0)

In [27]:
# 모델 저장 체크포인트 만들기
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath="./checkpoint/checkpoint.ckpt",
    save_weights_only=True,
    save_freq="epoch",
    verbose=0,
)

tensorboard_callback = callbacks.TensorBoard(log_dir="./logs")

# 시작 프롬프트 토큰화
text_generator = TextGenerator(vocab)

In [28]:
gpt.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[model_checkpoint_callback, tensorboard_callback, text_generator],
)

Epoch 1/5
생성된 텍스트:
wine review : italy : southern italy : southern italy offers mature yellow rose and blue with aromas of violet , white rose cake spice , underbrush and a whiff of a whiff of bay wet stone . the concentrated palate is still young and austere , [UNK] delivers layers of dark fruit , mint , cooked cherry and licorice . give it time to develop further almond out , the fruit . 

Epoch 2/5
생성된 텍스트:
wine review : us : california : merlot : very direct and enjoyable for the price . this merlot is dry and full bodied , with ripe blackberry and cherry flavors , while the tannins have a mouthwatering structure of acidity . give it 2–3 years to open up . 

Epoch 3/5
생성된 텍스트:
wine review : us : washington : red blend : citrus , cranberry and green , this wine has enticing aromas , showing suggestions of juicy black raspberry and cherry . it is medium bodied , with a good grip to the liveliness of the finish . 

Epoch 4/5
생성된 텍스트:
wine review : italy : tuscany : red blend : meaty a

<keras.callbacks.History at 0x7f5d06453190>

In [29]:
# 최종 모델 저장
gpt.save("./models/gpt")



# 3. 트랜스포머를 사용하여 텍스트 생성

In [30]:
def print_probs(info, vocab, top_k=5):
    for i in info:
        highlighted_text = []
        for word, att_score in zip(
            i["prompt"].split(), np.mean(i["atts"], axis=0)
        ):
            highlighted_text.append(
                '<span style="background-color:rgba(135,206,250,'
                + str(att_score / max(np.mean(i["atts"], axis=0)))
                + ');">'
                + word
                + "</span>"
            )
        highlighted_text = " ".join(highlighted_text)
        display(HTML(highlighted_text))

        word_probs = i["word_probs"]
        p_sorted = np.sort(word_probs)[::-1][:top_k]
        i_sorted = np.argsort(word_probs)[::-1][:top_k]
        for p, i in zip(p_sorted, i_sorted):
            print(f"{vocab[i]}:   \t{np.round(100*p,2)}%")
        print("--------\n")

In [31]:
info = text_generator.generate(
    "wine review : us", max_tokens=80, temperature=1.0
)


생성된 텍스트:
wine review : us : california : syrah : a powerful acidity , and a leg of lamb to like lamb or duck , as a . with unusual detail and tons of raspberries , bass with ripe fruit , only just enough to balance . the flavors show through the firm , complex tannins and the dryness of them . 



In [32]:
info = text_generator.generate(
    "wine review : italy", max_tokens=80, temperature=0.5
)


생성된 텍스트:
wine review : italy : tuscany : sangiovese grosso : here ' s a dense , masculine brunello with loads of sweet fruit and spice . there ' s a sharp , compact quality that leaves a bright , bright cherry and blackberry take on the wine ' s natural acidity . 



In [33]:
info = text_generator.generate(
    "wine review : germany", max_tokens=80, temperature=0.5
)
print_probs(info, vocab)


생성된 텍스트:
wine review : germany : mosel : riesling : a whiff of waxy lanolin lends a sheen to this intensely fruity wine . it ' s rich and concentrated , with penetrating lime and peach flavors , but the palate is concentrated and full of body and rich , yet finishes long and long , with a long , mouthwatering finish . 



::   	100.0%
-:   	0.0%
grosso:   	0.0%
saar:   	0.0%
hills:   	0.0%
--------



mosel:   	99.29%
rheingau:   	0.44%
rheinhessen:   	0.14%
pfalz:   	0.11%
baden:   	0.02%
--------



::   	99.43%
-:   	0.57%
blanca:   	0.0%
grosso:   	0.0%
blanc:   	0.0%
--------



riesling:   	100.0%
pinot:   	0.0%
weissburgunder:   	0.0%
sparkling:   	0.0%
white:   	0.0%
--------



::   	100.0%
blanc:   	0.0%
-:   	0.0%
grosso:   	0.0%
blanca:   	0.0%
--------



a:   	27.11%
this:   	14.84%
while:   	13.13%
intensely:   	7.64%
[UNK]:   	3.83%
--------



whiff:   	31.99%
hint:   	10.75%
touch:   	9.18%
crush:   	7.56%
wisp:   	4.78%
--------



of:   	100.0%
from:   	0.0%
that:   	0.0%
to:   	0.0%
at:   	0.0%
--------



petrol:   	43.4%
wet:   	15.36%
lanolin:   	6.82%
smoke:   	6.5%
honey:   	4.36%
--------



lanolin:   	88.96%
honeycomb:   	4.56%
apple:   	2.78%
lemon:   	1.02%
minerals:   	0.68%
--------



lends:   	73.48%
and:   	12.77%
adds:   	7.5%
introduces:   	2.87%
on:   	1.14%
--------



a:   	62.66%
complexity:   	24.11%
an:   	7.06%
nuance:   	1.34%
the:   	1.1%
--------



savory:   	45.78%
sheen:   	20.43%
fresh:   	6.67%
lavish:   	5.96%
bright:   	3.43%
--------



to:   	99.96%
of:   	0.02%
and:   	0.01%
on:   	0.01%
,:   	0.01%
--------



this:   	99.9%
the:   	0.09%
a:   	0.01%
pristine:   	0.0%
bright:   	0.0%
--------



intensely:   	66.35%
off:   	9.0%
dry:   	7.76%
otherwise:   	2.42%
lusciously:   	2.37%
--------



fruity:   	43.68%
concentrated:   	26.69%
mineral:   	11.0%
perfumed:   	4.6%
floral:   	3.43%
--------



,:   	54.75%
riesling:   	20.92%
wine:   	17.76%
auslese:   	4.11%
off:   	0.95%
--------



.:   	87.35%
,:   	7.04%
that:   	5.33%
full:   	0.12%
from:   	0.09%
--------



it:   	68.39%
the:   	24.77%
off:   	1.15%
dry:   	0.95%
while:   	0.72%
--------



':   	99.95%
has:   	0.01%
is:   	0.01%
balances:   	0.01%
offers:   	0.0%
--------



s:   	100.0%
ll:   	0.0%
07:   	0.0%
blossomy:   	0.0%
[UNK]:   	0.0%
--------



lusciously:   	45.89%
chock:   	13.86%
intensely:   	11.99%
dry:   	5.33%
forward:   	3.66%
--------



and:   	93.97%
,:   	4.25%
in:   	1.11%
with:   	0.5%
yet:   	0.09%
--------



full:   	30.28%
concentrated:   	19.34%
creamy:   	9.42%
sweet:   	6.81%
lusciously:   	3.78%
--------



,:   	53.33%
with:   	26.99%
in:   	15.69%
on:   	2.52%
yet:   	1.02%
--------



with:   	80.64%
yet:   	14.34%
but:   	4.0%
boasting:   	0.51%
while:   	0.09%
--------



a:   	75.99%
penetrating:   	2.84%
stonefruit:   	2.38%
layers:   	2.33%
intense:   	2.13%
--------



lime:   	46.13%
flavors:   	7.67%
tangerine:   	6.89%
peach:   	6.81%
citrus:   	6.61%
--------



and:   	50.32%
-:   	24.48%
,:   	15.32%
flavors:   	6.23%
acidity:   	3.45%
--------



tangerine:   	41.07%
lemon:   	19.87%
grapefruit:   	11.94%
peach:   	7.45%
lime:   	3.92%
--------



flavors:   	99.74%
notes:   	0.21%
aromas:   	0.02%
nectar:   	0.01%
,:   	0.01%
--------



,:   	68.78%
that:   	14.74%
.:   	13.77%
and:   	1.03%
accented:   	0.75%
--------



but:   	32.62%
with:   	23.55%
yet:   	19.16%
accented:   	6.48%
along:   	5.7%
--------



it:   	25.78%
the:   	21.78%
also:   	20.45%
a:   	13.35%
balanced:   	3.53%
--------



palate:   	70.26%
finish:   	23.13%
wine:   	4.54%
acidity:   	1.06%
mouthfeel:   	0.29%
--------



is:   	99.58%
boasts:   	0.09%
offers:   	0.05%
also:   	0.04%
maintains:   	0.04%
--------



a:   	9.99%
marked:   	9.17%
long:   	9.03%
penetrating:   	6.64%
balanced:   	5.39%
--------



and:   	51.29%
with:   	35.56%
,:   	11.38%
yet:   	1.35%
without:   	0.16%
--------



penetrating:   	19.39%
concentrated:   	9.98%
rich:   	7.75%
full:   	7.68%
intense:   	7.53%
--------



of:   	96.7%
bodied:   	1.39%
in:   	1.12%
,:   	0.53%
-:   	0.13%
--------



flavor:   	28.55%
body:   	20.8%
sweet:   	12.66%
life:   	5.44%
honey:   	3.59%
--------



.:   	46.26%
and:   	36.08%
,:   	17.08%
yet:   	0.17%
without:   	0.1%
--------



a:   	41.35%
penetrating:   	18.87%
lingering:   	5.02%
rich:   	4.19%
lush:   	3.5%
--------



,:   	97.07%
.:   	0.78%
yet:   	0.5%
fruit:   	0.39%
in:   	0.36%
--------



yet:   	49.58%
with:   	21.32%
but:   	18.34%
finishing:   	7.16%
lingering:   	0.83%
--------



finishes:   	42.88%
maintains:   	13.46%
balanced:   	13.12%
it:   	7.04%
lingers:   	4.2%
--------



long:   	99.67%
with:   	0.28%
on:   	0.01%
dry:   	0.01%
a:   	0.01%
--------



and:   	56.82%
,:   	26.85%
with:   	12.97%
.:   	3.23%
on:   	0.12%
--------



long:   	72.68%
lingering:   	6.82%
spicy:   	3.78%
dry:   	3.48%
clean:   	1.88%
--------



.:   	61.38%
,:   	34.4%
with:   	3.45%
on:   	0.52%
and:   	0.18%
--------



with:   	94.53%
lingering:   	3.42%
marked:   	0.39%
as:   	0.23%
ending:   	0.22%
--------



a:   	95.98%
lingering:   	2.22%
penetrating:   	0.69%
an:   	0.28%
just:   	0.26%
--------



long:   	63.81%
lingering:   	25.22%
murmur:   	4.67%
hint:   	1.95%
bristle:   	0.67%
--------



,:   	97.6%
-:   	1.18%
finish:   	1.06%
and:   	0.03%
of:   	0.02%
--------



lingering:   	31.8%
mouthwatering:   	17.83%
spicy:   	11.51%
steely:   	9.81%
mineral:   	6.24%
--------



finish:   	98.2%
note:   	0.67%
sheen:   	0.32%
sensation:   	0.18%
,:   	0.16%
--------



.:   	99.88%
that:   	0.11%
of:   	0.01%
,:   	0.0%
marked:   	0.0%
--------



:   	78.55%
drink:   	20.58%
this:   	0.16%
it:   	0.15%
delicious:   	0.13%
--------

