# Model 2

## Model Overview

Model 2 is an extension of the basic model, 1, with 3 LSTM layers, with dropout and normalization layers included.

Similar to Model 2, this model is trained on the Wine Spectator Data Set only.

## Imports

In [1]:
import tensorflow as tf
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import re
import pickle

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout, LayerNormalization
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

## Functions

In [23]:
def tasting_note(seed_text, length):
    '''Creates a tasting note based on the predictions from the model
    '''
    
    for _ in range(length):
        # Predict next word based on current seed text
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predict_x=model.predict(token_list)
        classes_x=np.argmax(predict_x,axis=-1)
        
        # Check for the right word
        for word, index in tokenizer.word_index.items():
            if index == classes_x:
                output_word = word
                break
        
        if index == 8:
            break        
                
        # Update seed text with the new word
        seed_text += " " + output_word
        
    return seed_text

## Loading  & Pre-Processing Data

In [3]:
wine_df = pd.read_csv('wine_df_small.csv').drop(columns = 'Unnamed: 0')

In [4]:
for row in wine_df.index:
    wine_df['wine_notes'][row] = re.sub('\—[a-zA-Z]+', '', wine_df['wine_notes'][row])

In [5]:
wine_notes = [x.lower() + '<end>' for x in wine_df['wine_notes']] # List of Strings

In [6]:
tokenizer = Tokenizer()

corpus = wine_notes

In [7]:
tokenizer.fit_on_texts(corpus)                 # Create Tokenizer Vocab
total_words = len(tokenizer.word_index) + 1

print(tokenizer.word_index)                    # Tokenizer Word index
print(total_words)

{'and': 1, 'cases': 2, 'the': 3, 'with': 4, 'of': 5, 'a': 6, 'through': 7, 'end': 8, 'made': 9, 'flavors': 10, 'now': 11, 'drink': 12, 'finish': 13, 'imported': 14, 'this': 15, '000': 16, 'notes': 17, 'cherry': 18, 'from': 19, 'on': 20, 'acidity': 21, 'tannins': 22, 'black': 23, 'that': 24, 'to': 25, 'red': 26, 'best': 27, 'fruit': 28, 'by': 29, 'in': 30, 'are': 31, 'currant': 32, '1': 33, '500': 34, 'plum': 35, 'well': 36, 'spice': 37, 'tea': 38, 'accents': 39, 'is': 40, 'white': 41, 'cabernet': 42, 'expressive': 43, 'shows': 44, 'rich': 45, 'dark': 46, 'long': 47, 'vibrant': 48, '2022': 49, 'juicy': 50, '3': 51, 'blackberry': 52, '2': 53, 'fresh': 54, 'raspberry': 55, 'dried': 56, 'aromas': 57, '2023': 58, 'texture': 59, 'fine': 60, 'hints': 61, 'mineral': 62, 'medium': 63, 'polished': 64, 'licorice': 65, '4': 66, '2025': 67, 'but': 68, 'balanced': 69, 'richness': 70, 'violet': 71, 'refined': 72, 'apple': 73, 'details': 74, 'lemon': 75, 'sauvignon': 76, 'light': 77, 'anise': 78, 'liv

In [8]:
# Tokenizing words and setting them back into a list
input_sequences = []
for line in corpus:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad Sequences 
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# Create Predictors and Labels - Xs are now all the words preceding the next word, Y
xs, labels = input_sequences[:,:-1],input_sequences[:,-1]

# Setting Y to categorical for softmax function later
ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

## Tensorflow Model

In [9]:
n_epochs = 20

model = Sequential()
model.add(Embedding(total_words, 2500, input_length=max_sequence_len-1))

model.add(Bidirectional(LSTM(150, return_sequences = True)))
model.add(Dropout(0.1))
model.add(LayerNormalization(epsilon=1e-6))

model.add(Bidirectional(LSTM(150, return_sequences = True)))
model.add(Dropout(0.1))
model.add(LayerNormalization(epsilon=1e-6))

model.add(Bidirectional(LSTM(150)))
model.add(Dropout(0.1))
model.add(LayerNormalization(epsilon=1e-6))

model.add(Dense(total_words, activation='softmax'))
adam = Adam(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
#earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
history = model.fit(xs, ys, epochs=n_epochs, verbose=1)
print(model.summary)
print(model)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
<bound method Model.summary of <keras.engine.sequential.Sequential object at 0x000001CC8816CC88>>
<keras.engine.sequential.Sequential object at 0x000001CC8816CC88>


In [22]:
tokenizer.texts_to_sequences(['<end>'])

[[8]]

In [31]:
tasting_note('Aromatic', 75)

'Aromatic and fresh with a savory that underscores well defined and plush another show finish drink now through 2027 5'

In [25]:
tasting_note('Lush', 75)

'Lush and juicy with a minerally backbone with a minerally backbone of dark of and saline flavors are well in good and saline tart and filled with a savory laced and saline laced and expressive tart backbone with a saline dark of drink now through 2025 9 cases made'

In [26]:
tasting_note('Red Wine', 75)

'Red Wine with mustard violet and rose hip by tannins leading and rose hip and rose hip to leading and rose hip and rose hip well to and rose hip to leading and rose hip and rose hip to the palate and rose hip the finish drink now through 2023 500 cases'

In [27]:
tasting_note('Wine', 75)

'Wine and huckleberry flavors of and harmonious a minerally backbone of dark plum and licorice root drink now through 2028 cases made'

In [28]:
tasting_note('Keyboard', 50)

'Keyboard and purity with a minerally to supple off and green tart flavors matched minerality out note with a spicy of ground and grated red franc show underpinning to the finish drink now through 2027 5 cases made'

In [29]:
tasting_note('Bukit Pasoh', 50)

'Bukit Pasoh and purity with a minerally to supple off and green tart flavors matched minerality out note with a spicy of ground and grated red franc show underpinning to the finish drink now through 2027 5 cases made'

In [30]:
model.save('model_1A.h5')

In [17]:
# model = tf.keras.models.load_model('model_1.h5')
# max_sequence_len = 72

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 72, 2500)          2380000   
_________________________________________________________________
bidirectional (Bidirectional (None, 72, 300)           3181200   
_________________________________________________________________
dropout (Dropout)            (None, 72, 300)           0         
_________________________________________________________________
layer_normalization (LayerNo (None, 72, 300)           600       
_________________________________________________________________
bidirectional_1 (Bidirection (None, 72, 300)           541200    
_________________________________________________________________
dropout_1 (Dropout)          (None, 72, 300)           0         
_________________________________________________________________
layer_normalization_1 (Layer (None, 72, 300)           6