In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from helpers import *
import numpy as np

In [3]:
data = [ 
    'data/social_cocktail.json',
    'data/liquor.json'
]

descriptions, names = [], []

for d in data:
    descriptions += load_data(d, field='description')
    names += load_data(d, field='name')

print('There are {} recipes in the database.'.format(len(descriptions)))

There are 1153 recipes in the database.


In [4]:
descriptions = [clean_string(x) for x in descriptions]
X_desc = flatten_list(descriptions)

names = [clean_string(x) for x in names]
X_names = flatten_list(names)

In [14]:
t = set(X_desc)
print(len(t))

1266


In [5]:
# Limit the vocabulary size
vocabulary_size=700
X_limit = limit_vocabulary(X_desc, vocabulary_size=vocabulary_size)

# One-hot encode the data
X_limit_transformed, label_encoder, onehot_encoder = encode_categorical(X_limit)
print('There are {} words with a vocabulary size of {}'.format(*X_limit_transformed.shape))

# Transform data into sequences and predictions
seq_length = 15
X, y = sequence_transform(X_limit_transformed, seq_length)
print('The sequence length is {}'.format(seq_length))
print('Observation shape is {}, label shape is {}'.format(X.shape, y.shape))

There are 31638 words with a vocabulary size of 700
The sequence length is 15
Observation shape is (31623, 15, 700), label shape is (31623, 700)


In [6]:
N = X.shape[0]
X_train, y_train = X[:int(N*0.95)], y[:int(N*0.95)]
X_val, y_val = X[int(N*0.95):], y[int(N*0.95):]

print(X_train.shape, y_train.shape, X_val.shape, y_val.shape)

(30041, 15, 700) (30041, 700) (1582, 15, 700) (1582, 700)


### Create the Neural Network Models

In [7]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM, GRU
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [11]:
model = Sequential()
# model.add(Dense(512, input_dim=X.shape[1], activation='relu'))
# model.add(Dense(256, input_dim=X.shape[1], activation='relu'))
# model.add(Dense(128, activation='relu'))
# model.add(Dense(64, activation='relu'))

model.add(GRU(512, input_shape=(X.shape[1], X.shape[2]), activation='relu'))
# model.add(Dropout(0.2))
# model.add(GRU(128, input_shape=(X.shape[1], X.shape[2]), activation='relu'))
# model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', metrics=['categorical_accuracy'], optimizer='adam')
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_1 (GRU)                  (None, 512)               1863168   
_________________________________________________________________
dense_2 (Dense)              (None, 700)               359100    
Total params: 2,222,268
Trainable params: 2,222,268
Non-trainable params: 0
_________________________________________________________________
None


In [12]:
# Create the model
batch_size = 32
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=1, batch_size=batch_size)

Train on 30041 samples, validate on 1582 samples
Epoch 1/1


<keras.callbacks.History at 0x7f21a8cf6b00>

In [375]:
from helpers import *

start = np.random.randint(0, len(X)-1)
seed = ''

for i in range(seq_length):
    x = X[start, i].reshape(1, -1)
    seed += reverse_encoding(x, label_encoder, onehot_encoder)[0] + ' '
    
seed = seed.strip()
observation = X[start]

result, prediction = [], None
str_len = 100

for i in range(str_len):
    prediction = predict_observation_2(
        model, np.array([observation]), 32, label_encoder, onehot_encoder, prediction, raw_prediction=True
    )
    result.append(prediction)    
    observation = np.vstack((observation[1:, :], prediction))
    
result_np = np.array(result).reshape(str_len, vocabulary_size)
txt = reverse_encoding(result_np, label_encoder, onehot_encoder)
print(seed, txt)

fill highball glass with ice and add tequila and salt . squeeze lime half into ['glass' 'along' 'liquid' '.' 'place' 'syrup' 'and' 'lime' 'liqueur'
 '.pour' 'and' 'coat' 'will' 'in' 'rocks' 'glass' '.' 'chill' 'overnight'
 '.' 'add' 'ginger' 'ale' 'and' 'cinnamon' '.' 'stir' 'gently' '.' '|'
 'build' 'all' 'ingredients' 'into' 'glass' '.' 'add' 'ice' '.' 'stir'
 'vigorously' 'for' '20' 'seconds' '.' 'garnish' 'with' 'lemon' 'wedge' '.'
 '|' 'pour' 'gin' 'bitters' 'and' 'grenadine' 'syrup' 'into' 'stout' 'to'
 'churn' 'until' 'any' 'then' 'add' 'vodka' 'sugar' '.' 'taste' '.' '|'
 'in' 'shaker' 'add' 'sugar' 'juice' 'and' 'rum' '.' 'add' 'remaining'
 'ingredients' '.' 'shake' 'well' '.' 'double' 'strain' 'into' 'highball'
 'glass' '.' 'top' 'up' 'with' 'soda' 'water' 'garnish' 'and' 'serve']


In [218]:
from helpers import *

start = np.random.randint(0, len(X)-1)
seed = ''

for i in range(seq_length):
    x = X[start, i].reshape(1, -1)
    seed += reverse_encoding(x, label_encoder, onehot_encoder)[0] + ' '
    
seed = seed.strip()
observation = X[start]

result, prediction = [], None
str_len = 100

for i in range(str_len):
    prediction = predict_observation_2(
        model, np.array([observation]), 32, label_encoder, onehot_encoder, prediction, raw_prediction=True
    )
    result.append(prediction)    
    observation = np.vstack((observation[1:, :], prediction))
    
result_np = np.array(result).reshape(str_len, vocabulary_size)
txt = reverse_encoding(result_np, label_encoder, onehot_encoder)
print(seed, txt)

over ice . pour ginger beer over it . squeeze in lime wedge and drop ['it' 'in' '.' '|' 'pour' 'ingredients' 'into' 'cocktail' 'shaker' 'with'
 'ice' 'cubes' '.' 'shake' 'well' '.' 'strain' 'into' 'chilled' 'cocktail'
 'glass' '.' 'garnish' 'with' 'orange' 'spiral' '.' '|' 'mix' 'tequila'
 'syrup' 'and' 'orange' 'juice' 'into' 'cocktail' 'shaker' 'with' 'ice'
 'cubes' '.' 'stir' 'well' 'for' '20' 'seconds' '.' 'strain' 'into'
 'chilled' 'cocktail' 'glass' '.' '|' 'place' 'lime' 'mixture' 'into' 'old'
 'fashioned' 'glass' 'and' 'then' 'add' '3' 'ice' 'directly' 'at' 'bar'
 'spoon' '.' 'garnish' 'with' '2' 'over' 'sugar' '.' '|' 'shake' 'of' 'cup'
 'ingredients' 'in' 'chilled' 'coffee' 'glass' '.' 'add' 'campari' 'and'
 'then' 'shake' 'well' '.' 'strain' 'into' 'old' 'fashioned' 'glass' '.']


In [29]:
model_file = 'model.json'
weights_file = 'weights.h5'

def save_model_to_json(model, model_file, weights_file):
    model_json = model.to_json()
    with open(model_file, "w") as json_file:
        json_file.write(model_json)
    model.save_weights(weights_file)
    
save_model_to_json(model, model_file, weights_file)