# 🚀 GPT

In this notebook, we'll walk through the steps required to train your own GPT model on the wine review dataset

The code is adapted from the excellent [GPT tutorial](https://keras.io/examples/generative/text_generation_with_miniature_gpt/) created by Apoorv Nandan available on the Keras website.

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import json
import re
import string
from IPython.display import display, HTML

import tensorflow as tf
from tensorflow.keras import layers, models, losses, callbacks

## 0. Parameters <a name="parameters"></a>

In [3]:
VOCAB_SIZE = 10000
MAX_LEN = 80
EMBEDDING_DIM = 256
KEY_DIM = 256
N_HEADS = 2
FEED_FORWARD_DIM = 256
VALIDATION_SPLIT = 0.2
SEED = 42
LOAD_MODEL = False
BATCH_SIZE = 32
EPOCHS = 5

## 1. Load the data <a name="load"></a>

In [4]:
# Load the full dataset
with open("data/wine-reviews/winemag-data-130k-v2.json", encoding='utf-8') as json_data:
    wine_data = json.load(json_data)

In [5]:
wine_data[25]

{'points': '87',
 'title': 'Castello di Amorosa 2011 King Ridge Vineyard Pinot Noir (Sonoma Coast)',
 'description': 'Oak and earth intermingle around robust aromas of wet forest floor in this vineyard-designated Pinot that hails from a high-elevation site. Small in production, it offers intense, full-bodied raspberry and blackberry steeped in smoky spice and smooth texture.',
 'taster_name': 'Virginie Boone',
 'taster_twitter_handle': '@vboone',
 'price': 69,
 'designation': 'King Ridge Vineyard',
 'variety': 'Pinot Noir',
 'region_1': 'Sonoma Coast',
 'region_2': 'Sonoma',
 'province': 'California',
 'country': 'US',
 'winery': 'Castello di Amorosa'}

In [6]:
# Filter the dataset
filtered_data = [
    "wine review : "
    + x["country"]
    + " : "
    + x["province"]
    + " : "
    + x["variety"]
    + " : "
    + x["description"]
    for x in wine_data
    if x["country"] is not None
    and x["province"] is not None
    and x["variety"] is not None
    and x["description"] is not None
]

In [7]:
# Count the recipes
n_wines = len(filtered_data)
print(f"{n_wines} recipes loaded")

129907 recipes loaded


In [8]:
example = filtered_data[25]
print(example)

wine review : US : California : Pinot Noir : Oak and earth intermingle around robust aromas of wet forest floor in this vineyard-designated Pinot that hails from a high-elevation site. Small in production, it offers intense, full-bodied raspberry and blackberry steeped in smoky spice and smooth texture.


## 2. Tokenize the data <a name="tokenize"></a>

In [9]:
# Pad the punctuation, to treat them as separate 'words'
def pad_punctuation(s):
    s = re.sub(f"([{string.punctuation}, '\n'])", r" \1 ", s)
    s = re.sub(" +", " ", s)
    return s


text_data = [pad_punctuation(x) for x in filtered_data]

In [10]:
# Display an example
example_data = text_data[25]
example_data

'wine review : US : California : Pinot Noir : Oak and earth intermingle around robust aromas of wet forest floor in this vineyard - designated Pinot that hails from a high - elevation site . Small in production , it offers intense , full - bodied raspberry and blackberry steeped in smoky spice and smooth texture . '

In [11]:
# Convert to a Tensorflow Dataset
text_ds = (
    tf.data.Dataset.from_tensor_slices(text_data)
    .batch(BATCH_SIZE)
    .shuffle(1000)
)

In [12]:
# Create a vectorisation layer
vectorize_layer = layers.TextVectorization(
    standardize="lower",
    max_tokens=VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_LEN + 1,
)

In [13]:
# Adapt the layer to the training set
vectorize_layer.adapt(text_ds)
vocab = vectorize_layer.get_vocabulary()

In [14]:
# Display some token:word mappings
for i, word in enumerate(vocab[:10]):
    print(f"{i}: {word}")

0: 
1: [UNK]
2: :
3: ,
4: .
5: and
6: the
7: wine
8: a
9: of


In [15]:
# Display the same example converted to ints
example_tokenised = vectorize_layer(example_data)
print(example_tokenised.numpy())

[   7   10    2   20    2   29    2   43   62    2   55    5  243 4145
  453  634   26    9  497  499  667   17   12  142   14 2214   43   25
 2484   32    8  223   14 2213  948    4  594   17  987    3   15   75
  237    3   64   14   82   97    5   74 2633   17  198   49    5  125
   77    4    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0]


## 3. Create the Training Set <a name="create"></a>

In [16]:
# Create the training set of recipes and the same text shifted by one word
def prepare_inputs(text):
    text = tf.expand_dims(text, -1)
    tokenized_sentences = vectorize_layer(text)
    x = tokenized_sentences[:, :-1]
    y = tokenized_sentences[:, 1:]
    return x, y


train_ds = text_ds.map(prepare_inputs)

In [17]:
example_input_output = train_ds.take(1).get_single_element()

In [18]:
# Example Input
example_input_output[0][0]

<tf.Tensor: shape=(80,), dtype=int64, numpy=
array([   7,   10,    2,   20,    2,   29,    2,  271,    2,  128,   32,
          6, 1448,  331,  271,    3,   12,   13,   76, 1277,   17,  216,
          3,  653,   17,   63,    5,  131,  181,   72,    4,   22,  831,
        877,  241,  116,   23,    6,   28,    5,   31,    3,  928,  889,
          4,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0], dtype=int64)>

In [19]:
# Example Output (shifted by one token)
example_input_output[1][0]

<tf.Tensor: shape=(80,), dtype=int64, numpy=
array([  10,    2,   20,    2,   29,    2,  271,    2,  128,   32,    6,
       1448,  331,  271,    3,   12,   13,   76, 1277,   17,  216,    3,
        653,   17,   63,    5,  131,  181,   72,    4,   22,  831,  877,
        241,  116,   23,    6,   28,    5,   31,    3,  928,  889,    4,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0], dtype=int64)>

## 5. Create the causal attention mask function <a name="causal"></a>

In [20]:
#причинно-следственная маска, позволяет избежать утечки данных, чтобы модель не использовала в качестве признака ключ самого слова
def causal_attention_mask(batch_size, n_dest, n_src, dtype):
    i = tf.range(n_dest)[:, None]
    j = tf.range(n_src)
    m = i >= j - n_src + n_dest
    mask = tf.cast(m, dtype)
    mask = tf.reshape(mask, [1, n_dest, n_src])
    mult = tf.concat(
        [tf.expand_dims(batch_size, -1), tf.constant([1, 1], dtype=tf.int32)], 0
    )
    return tf.tile(mask, mult)


np.transpose(causal_attention_mask(1, 10, 10, dtype=tf.int32)[0])

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 1, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 1, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 1, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 1, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 1, 1],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

## 6. Create a Transformer Block layer <a name="transformer"></a>

In [21]:
class TransformerBlock(layers.Layer):
    def __init__(self, num_heads, key_dim, embed_dim, ff_dim, dropout_rate=0.1): #подслои слоя TransformerBlock, определяются в функции инициализации
        super(TransformerBlock, self).__init__()
        self.num_heads = num_heads
        self.key_dim = key_dim
        self.embed_dim = embed_dim
        self.ff_dim = ff_dim
        self.dropout_rate = dropout_rate
        self.attn = layers.MultiHeadAttention(
            num_heads, key_dim, output_shape=embed_dim
        ) #слой многоголовного внимания, задаем кол-во модулей, длину ключа(и запроса), размер выходного вектора
        self.dropout_1 = layers.Dropout(self.dropout_rate)
        self.ln_1 = layers.LayerNormalization(epsilon=1e-6)
        self.ffn_1 = layers.Dense(self.ff_dim, activation="relu")
        self.ffn_2 = layers.Dense(self.embed_dim)
        self.dropout_2 = layers.Dropout(self.dropout_rate)
        self.ln_2 = layers.LayerNormalization(epsilon=1e-6)

    def call(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size = input_shape[0]
        seq_len = input_shape[1]
        causal_mask = causal_attention_mask(
            batch_size, seq_len, seq_len, tf.bool
        ) #создается причинно-следственная маска, чтобы скрыть будущие ключи от запроса
        attention_output, attention_scores = self.attn(
            inputs,
            inputs,
            attention_mask=causal_mask,
            return_attention_scores=True,
        ) #создается многоголовыый слой внимания
        attention_output = self.dropout_1(attention_output)
        out1 = self.ln_1(inputs + attention_output) #первый слой сложения и нормализации
        ffn_1 = self.ffn_1(out1) #слои прямого распространения
        ffn_2 = self.ffn_2(ffn_1)
        ffn_output = self.dropout_2(ffn_2)
        return (self.ln_2(out1 + ffn_output), attention_scores) #второй слой сложения и нормализации

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "key_dim": self.key_dim,
                "embed_dim": self.embed_dim,
                "num_heads": self.num_heads,
                "ff_dim": self.ff_dim,
                "dropout_rate": self.dropout_rate,
            }
        )
        return config

## 7. Create the Token and Position Embedding <a name="embedder"></a>

In [22]:
#слой позиционного представления, кодирует позицию лексемы, потом значение и положение каждого слова фиксируются в одном векторе 
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, max_len, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.max_len = max_len
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.token_emb = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        ) #с помощью слоя Embedding создаются векторные представления лексем
        self.pos_emb = layers.Embedding(input_dim=max_len, output_dim=embed_dim) #с помощью слоя Embedding создаются представления позиций лексем

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions #выходные данные слоя объединяются для получения общего представления лексем и из позиций

    def get_config(self):
        config = super().get_config()
        config.update(
            {
                "max_len": self.max_len,
                "vocab_size": self.vocab_size,
                "embed_dim": self.embed_dim,
            }
        )
        return config

## 8. Build the Transformer model <a name="transformer_decoder"></a>

In [23]:
inputs = layers.Input(shape=(None,), dtype=tf.int32) #дополняет входные данные нулями
x = TokenAndPositionEmbedding(MAX_LEN, VOCAB_SIZE, EMBEDDING_DIM)(inputs) #создается векторное представление текста 
x, attention_scores = TransformerBlock(
    N_HEADS, KEY_DIM, EMBEDDING_DIM, FEED_FORWARD_DIM
)(x) #передаем векторное представление в TransformerBlock
outputs = layers.Dense(VOCAB_SIZE, activation="softmax")(x) #результат преобразования передается через полносвязный слой с softmax для прогнозирования следующего слова

gpt = models.Model(inputs=inputs, outputs=[outputs, attention_scores]) #модель принимает последовательность лексем и выводит прогнозируемое распределение следующего слова и данные блока трансформера
gpt.compile("adam", loss=[losses.SparseCategoricalCrossentropy(), None])




In [24]:
gpt.summary()

In [None]:
if LOAD_MODEL:
    # model.load_weights('./models/model')
    gpt = models.load_model("./models/gpt", compile=True)

## 9. Train the Transformer <a name="train"></a>

In [25]:
# Create a TextGenerator checkpoint
class TextGenerator(callbacks.Callback):
    def __init__(self, index_to_word, top_k=10):
        self.index_to_word = index_to_word
        self.word_to_index = {
            word: index for index, word in enumerate(index_to_word)
        }

    def sample_from(self, probs, temperature):
        probs = probs ** (1 / temperature)
        probs = probs / np.sum(probs)
        return np.random.choice(len(probs), p=probs), probs

    def generate(self, start_prompt, max_tokens, temperature):
        start_tokens = [
            self.word_to_index.get(x, 1) for x in start_prompt.split()
        ]
        sample_token = None
        info = []
        while len(start_tokens) < max_tokens and sample_token != 0:
            x = np.array([start_tokens])
            y, att = self.model.predict(x, verbose=0)
            sample_token, probs = self.sample_from(y[0][-1], temperature)
            info.append(
                {
                    "prompt": start_prompt,
                    "word_probs": probs,
                    "atts": att[0, :, -1, :],
                }
            )
            start_tokens.append(sample_token)
            start_prompt = start_prompt + " " + self.index_to_word[sample_token]
        print(f"\ngenerated text:\n{start_prompt}\n")
        return info

    def on_epoch_end(self, epoch, logs=None):
        self.generate("wine review", max_tokens=80, temperature=1.0)

In [27]:
# Create a model save checkpoint
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath="./checkpoint/checkpoint.weights.h5",
    save_weights_only=True,
    save_freq="epoch",
    verbose=0,
)

tensorboard_callback = callbacks.TensorBoard(log_dir="./logs")

# Tokenize starting prompt
text_generator = TextGenerator(vocab)

In [28]:
gpt.fit(
    train_ds,
    epochs=EPOCHS,
    callbacks=[model_checkpoint_callback, tensorboard_callback, text_generator],
)

Epoch 1/5
[1m4060/4060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 298ms/step - loss: 2.5900
generated text:
wine review : us : california : cabernet sauvignon : [UNK] in [UNK] with new oak barrels with that lie to the blackberry and granite soil [UNK] . smoky blackberry and spice flavors , it ' s a tannic wine that leads the exotic character , while the fruit impresses on the authoritative side . it ' s enormous in richness and this wine for six . 7 % petite sirah is also just enough to match the beef jerky

[1m4060/4060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1217s[0m 299ms/step - loss: 2.5899
Epoch 2/5
[1m4060/4060[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 300ms/step - loss: 1.9782
generated text:
wine review : us : new york : riesling : this is a richly grapefruit - flavored chardonnay , rich but offers lemony freshness so many of riesling vintages , northeastern it should be the brisk and succulent on this finger lakes [UNK] . but the finish is lon

<keras.src.callbacks.history.History at 0x249a6fa9b50>

In [None]:
# Save the final model
gpt.save("./models/gpt")

# 3. Generate text using the Transformer

In [29]:
def print_probs(info, vocab, top_k=5):
    for i in info:
        highlighted_text = []
        for word, att_score in zip(
            i["prompt"].split(), np.mean(i["atts"], axis=0)
        ):
            highlighted_text.append(
                '<span style="background-color:rgba(135,206,250,'
                + str(att_score / max(np.mean(i["atts"], axis=0)))
                + ');">'
                + word
                + "</span>"
            )
        highlighted_text = " ".join(highlighted_text)
        display(HTML(highlighted_text))

        word_probs = i["word_probs"]
        p_sorted = np.sort(word_probs)[::-1][:top_k]
        i_sorted = np.argsort(word_probs)[::-1][:top_k]
        for p, i in zip(p_sorted, i_sorted):
            print(f"{vocab[i]}:   \t{np.round(100*p,2)}%")
        print("--------\n")

In [30]:
info = text_generator.generate(
    "wine review : us", max_tokens=80, temperature=1.0
)


generated text:
wine review : us : oregon : white blend : this pale pink sparkler has the integrated touches of spice and butter as well as a mixer of water . comes through the palate , which adds interest , and is a bit disjointed , with this penetrating bottle age over raw oysters . crab and bread crust make good for choice . 



In [31]:
info = text_generator.generate(
    "wine review : italy", max_tokens=80, temperature=0.5
)


generated text:
wine review : italy : tuscany : sangiovese : there ' s a touch of band - aid on this wine , with a touch of cinnamon or cedar . the mouthfeel is quite nice . 



In [32]:
#посомтрим сколько внимания уделяется словам (чем темнее цвет выделения тем больше внимания)
info = text_generator.generate(
    "wine review : germany", max_tokens=80, temperature=0.5
)
print_probs(info, vocab)


generated text:
wine review : germany : mosel : riesling : [UNK] ' s [UNK] ' s [UNK] estate ' s [UNK] [UNK] ' s lineup of german riesling . this one is a showcase of honey and spice , with a touch of honey and saffron . it ' s lusciously sweet on the palate , but it ' s also elegantly structured , with gorgeous floral notes of honeysuckle and honey , but it ' s balanced by a lingering



::   	100.0%
grosso:   	0.0%
-:   	0.0%
[UNK]:   	0.0%
zealand:   	0.0%
--------



mosel:   	88.75%
rheingau:   	4.79%
rheinhessen:   	3.78%
nahe:   	1.83%
pfalz:   	0.67%
--------



::   	99.78%
-:   	0.22%
[UNK]:   	0.0%
grosso:   	0.0%
blanc:   	0.0%
--------



riesling:   	100.0%
sparkling:   	0.0%
pinot:   	0.0%
gewürztraminer:   	0.0%
grüner:   	0.0%
--------



::   	100.0%
-:   	0.0%
,:   	0.0%
blanc:   	0.0%
grosso:   	0.0%
--------



while:   	23.41%
this:   	21.48%
whiffs:   	14.66%
a:   	8.96%
hints:   	7.23%
--------



':   	79.62%
[UNK]:   	13.23%
is:   	3.52%
and:   	1.57%
,:   	0.9%
--------



s:   	100.0%
[UNK]:   	0.0%
entry:   	0.0%
t:   	0.0%
06:   	0.0%
--------



[UNK]:   	69.64%
entry:   	15.03%
riesling:   	3.13%
lineup:   	1.67%
standard:   	1.05%
--------



':   	67.3%
is:   	21.11%
[UNK]:   	9.2%
,:   	1.05%
-:   	0.34%
--------



s:   	99.25%
[UNK]:   	0.66%
06:   	0.04%
entry:   	0.02%
label:   	0.01%
--------



[UNK]:   	52.1%
lineup:   	17.88%
gg:   	5.53%
riesling:   	4.14%
entry:   	3.84%
--------



':   	46.29%
is:   	21.7%
[UNK]:   	15.14%
bottling:   	2.73%
,:   	2.53%
--------



':   	50.94%
,:   	15.0%
is:   	10.92%
wine:   	5.32%
bottling:   	4.95%
--------



s:   	100.0%
wildcat:   	0.0%
[UNK]:   	0.0%
shaw:   	0.0%
d:   	0.0%
--------



[UNK]:   	51.02%
gg:   	19.8%
lineup:   	7.37%
riesling:   	3.7%
consistently:   	2.25%
--------



':   	55.97%
[UNK]:   	16.23%
is:   	9.27%
,:   	6.82%
bottling:   	1.86%
--------



':   	56.06%
,:   	14.66%
[UNK]:   	12.01%
is:   	8.76%
.:   	2.31%
--------



s:   	99.52%
[UNK]:   	0.47%
label:   	0.0%
lineup:   	0.0%
gg:   	0.0%
--------



[UNK]:   	77.94%
lineup:   	5.49%
gg:   	1.97%
riesling:   	1.76%
label:   	1.52%
--------



of:   	90.83%
,:   	6.82%
.:   	1.96%
is:   	0.26%
from:   	0.04%
--------



[UNK]:   	57.3%
riesling:   	13.47%
wines:   	7.69%
gg:   	7.06%
2014:   	3.79%
--------



riesling:   	99.26%
wines:   	0.56%
[UNK]:   	0.04%
kabinett:   	0.04%
rieslings:   	0.02%
--------



,:   	55.82%
.:   	34.57%
producers:   	4.37%
and:   	1.8%
lineup:   	1.53%
--------



it:   	68.19%
this:   	22.65%
the:   	5.32%
dry:   	0.58%
a:   	0.42%
--------



is:   	71.2%
one:   	8.99%
wine:   	7.99%
dry:   	2.88%
late:   	0.91%
--------



':   	53.74%
is:   	38.45%
-:   	2.28%
has:   	1.87%
offers:   	1.27%
--------



a:   	87.37%
the:   	5.29%
an:   	2.62%
one:   	0.84%
remarkably:   	0.66%
--------



standout:   	15.44%
bit:   	14.33%
showcase:   	10.2%
stunning:   	5.86%
great:   	4.99%
--------



of:   	98.24%
for:   	1.74%
from:   	0.0%
that:   	0.0%
the:   	0.0%
--------



the:   	46.25%
minerality:   	27.37%
a:   	2.9%
mineral:   	2.56%
honeyed:   	2.15%
--------



and:   	72.93%
,:   	22.83%
-:   	4.08%
':   	0.07%
that:   	0.02%
--------



marmalade:   	18.04%
spice:   	17.25%
peach:   	12.29%
saffron:   	10.69%
nectar:   	9.72%
--------



,:   	78.96%
-:   	11.84%
that:   	2.77%
flavors:   	2.2%
as:   	0.96%
--------



but:   	81.95%
with:   	11.22%
yet:   	4.91%
and:   	1.21%
followed:   	0.21%
--------



a:   	93.74%
an:   	1.46%
hints:   	1.33%
loads:   	0.58%
some:   	0.54%
--------



hint:   	34.12%
long:   	11.93%
touch:   	9.6%
honeyed:   	5.55%
delicate:   	3.32%
--------



of:   	100.0%
more:   	0.0%
that:   	0.0%
to:   	0.0%
on:   	0.0%
--------



honey:   	42.28%
sweetness:   	17.51%
sweet:   	11.49%
minerality:   	10.26%
smoke:   	4.38%
--------



and:   	78.86%
,:   	12.52%
on:   	4.05%
sweetness:   	1.74%
that:   	1.11%
--------



spice:   	28.89%
marmalade:   	22.84%
peach:   	9.86%
saffron:   	8.32%
nectar:   	4.44%
--------



.:   	57.12%
,:   	19.66%
on:   	14.07%
that:   	3.01%
flavors:   	1.82%
--------



it:   	87.38%
the:   	8.58%
a:   	0.42%
yet:   	0.34%
finishes:   	0.33%
--------



':   	99.99%
finishes:   	0.0%
is:   	0.0%
should:   	0.0%
has:   	0.0%
--------



s:   	100.0%
ll:   	0.0%
[UNK]:   	0.0%
d:   	0.0%
09:   	0.0%
--------



a:   	42.44%
lusciously:   	14.33%
rich:   	4.27%
full:   	4.21%
intensely:   	3.84%
--------



sweet:   	48.63%
ripe:   	25.16%
textured:   	5.86%
rich:   	5.7%
concentrated:   	5.05%
--------



,:   	61.25%
yet:   	17.08%
on:   	9.39%
and:   	4.54%
in:   	3.78%
--------



the:   	99.95%
its:   	0.03%
a:   	0.01%
but:   	0.0%
entry:   	0.0%
--------



palate:   	98.86%
finish:   	0.52%
midpalate:   	0.26%
attack:   	0.2%
nose:   	0.14%
--------



,:   	83.83%
but:   	9.26%
yet:   	4.58%
with:   	1.51%
and:   	0.41%
--------



but:   	94.15%
with:   	4.17%
yet:   	1.5%
and:   	0.04%
however:   	0.03%
--------



it:   	67.99%
the:   	15.36%
there:   	3.47%
with:   	3.25%
is:   	3.1%
--------



':   	99.72%
finishes:   	0.18%
should:   	0.03%
also:   	0.02%
is:   	0.02%
--------



s:   	100.0%
ll:   	0.0%
[UNK]:   	0.0%
d:   	0.0%
re:   	0.0%
--------



balanced:   	32.29%
a:   	17.13%
also:   	12.12%
chock:   	5.46%
not:   	4.65%
--------



a:   	42.88%
elegantly:   	7.15%
quite:   	6.63%
surprisingly:   	6.27%
rich:   	5.59%
--------



structured:   	93.27%
balanced:   	5.44%
honeyed:   	0.28%
complex:   	0.15%
fruity:   	0.11%
--------



,:   	47.59%
with:   	20.17%
and:   	19.28%
.:   	11.03%
by:   	0.55%
--------



with:   	94.1%
but:   	3.52%
and:   	1.8%
yet:   	0.2%
showing:   	0.11%
--------



a:   	93.26%
hints:   	2.02%
an:   	0.68%
just:   	0.46%
lingering:   	0.36%
--------



layers:   	63.25%
minerality:   	3.95%
floral:   	3.88%
fruit:   	3.8%
acidity:   	3.37%
--------



notes:   	66.72%
,:   	8.7%
tones:   	6.45%
and:   	4.71%
complexities:   	3.83%
--------



of:   	85.54%
and:   	12.26%
that:   	1.44%
.:   	0.34%
,:   	0.24%
--------



honeysuckle:   	80.37%
jasmine:   	9.24%
honey:   	4.4%
saffron:   	2.7%
potpourri:   	1.07%
--------



and:   	88.06%
,:   	11.88%
nectar:   	0.05%
.:   	0.01%
that:   	0.0%
--------



orange:   	37.74%
peach:   	15.79%
jasmine:   	9.31%
spice:   	5.84%
lemon:   	4.64%
--------



.:   	87.52%
,:   	9.47%
that:   	2.27%
nectar:   	0.28%
on:   	0.25%
--------



but:   	74.25%
with:   	12.41%
yet:   	8.35%
and:   	2.09%
finishing:   	0.71%
--------



it:   	56.72%
the:   	33.61%
is:   	3.1%
should:   	1.53%
there:   	1.02%
--------



':   	99.62%
should:   	0.23%
finishes:   	0.06%
also:   	0.03%
is:   	0.01%
--------



s:   	100.0%
ll:   	0.0%
d:   	0.0%
[UNK]:   	0.0%
re:   	0.0%
--------



balanced:   	25.68%
a:   	21.06%
also:   	18.0%
not:   	6.51%
elegantly:   	4.65%
--------



by:   	54.26%
squarely:   	28.61%
with:   	7.69%
neatly:   	2.43%
,:   	2.34%
--------



a:   	94.29%
crisp:   	1.3%
bright:   	0.61%
fresh:   	0.48%
delicate:   	0.41%
--------



long:   	30.43%
reverberating:   	22.36%
crush:   	9.73%
lingering:   	6.27%
strike:   	4.27%
--------

