# Milestone II.

Basic training and evaluation process

In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns
import numpy as np
import json
import pprint

## Data preprocessing

In [2]:
# load train dataset
train = []
with open("data/train.dataflow_dialogues.jsonl") as trainfile:
    content = trainfile.read()
    train = [json.loads(line) for line in content.splitlines()]

# load validation data
valid = []
with open("data/valid.dataflow_dialogues.jsonl") as validfile:
    content = validfile.read()
    valid = [json.loads(line) for line in content.splitlines()]

In [3]:
# add begin of sentence and end of sentence tokens for the seq2seq model
for dialog in train:
    for turn in dialog["turns"]:
        turn.update({"lispress_cleaned":"<bos> "+turn["lispress"]+" <eos>"})

for dialog in valid:
    for turn in dialog["turns"]:
        turn.update({"lispress_cleaned":"<bos> "+turn["lispress"]+" <eos>"})

### Tokenization

In [4]:
from tensorflow.keras.preprocessing.text import Tokenizer
from lispress_tokenizer import tokenized_lispress

In [5]:
# BOS and EOS indicate the beginning and ending of sentences
def tokenizeLispressWithEndings(dataset):
    '''
    Tokenizes lispress of each turn in the given dataset.
    The <bos> and <eos> tokens are appended to the token list.

    Returns nothing since it modifies the dataset
    '''
    for dialog in dataset:
        for turn in dialog["turns"]:
            turn['lispress_tokenized'] = ['<bos>'] + tokenized_lispress(turn['lispress']) + ['<eos>']

In [6]:
# create a tokenizer and fit on the sentences of the train data dialogues
tokenizer_user = Tokenizer()
dialogue_sent = [turn['user_utterance']['original_text'] for dialog in train for turn in dialog['turns'] ]
tokenizer_user.fit_on_texts(dialogue_sent)
# index of "the"
tokenizer_user.word_index["the"]

1

In [7]:
# create a tokenizer and fit on the sentences of the train data dialogues
lispress_tokenizer = Tokenizer()

# Tokenize lispress in train and valid according to a predefined logic
tokenizeLispressWithEndings(train)
tokenizeLispressWithEndings(valid)

# Tokenizer.fit_on_texts method can take list of list of strings as argument assuming the strings are tokens
dialogue_sent_lispress = [turn['lispress_tokenized'] for dialog in train for turn in dialog['turns']]
lispress_tokenizer.fit_on_texts(dialogue_sent_lispress)

In [8]:
# Example tokenized lispres
print(dialogue_sent_lispress[1])
print(lispress_tokenizer.word_counts['('])
print(lispress_tokenizer.word_index[')'])


['<bos>', '(', 'Yield', ':output', '(', ':dayOfWeek', '(', 'Tomorrow', ')', ')', ')', '<eos>']
1243286
2


In [9]:
def tokenizeDialogues(dataset,tokenizer):
    """
    Add a tokens2 attribute to the user_utterance in each turn with the token indexes
    Add a tokens2_readable attribute to the user_utterance with the readable tokens
    """
    for dialog in dataset:
        for turn in dialog['turns']:
            seq = tokenizer.texts_to_sequences([turn["user_utterance"]["original_text"]])[0]
            turn["user_utterance"].update({"tokens2":seq,"tokens2_readable":[tokenizer.index_word[index] for index in seq]})
    tokenizedSet = [[ turn['user_utterance']['tokens2'] for turn in dialog['turns'] ]for dialog in dataset]
    return tokenizedSet,dataset

def tokenizeLispress(dataset,tokenizer):
    """
    Add a tokens2 attribute to the user_utterance in each turn with the token indexes
    Add a tokens2_readable attribute to the user_utterance with the readable tokens
    """
    for dialog in dataset:
        for turn in dialog['turns']:
            seq = tokenizer.texts_to_sequences([turn["lispress_tokenized"]])[0]
            turn.update({"lispress_tokens":seq,"lispress_tokens_readable":[tokenizer.index_word[index] for index in seq]})
    tokenizedSet = [[ turn['lispress_tokens'] for turn in dialog['turns'] ]for dialog in dataset]
    return tokenizedSet,dataset

In [10]:
train_tokenized,train = tokenizeDialogues(train,tokenizer=tokenizer_user)
valid_tokenized,valid = tokenizeDialogues(valid,tokenizer=tokenizer_user)

In [11]:
train_tokenized_lispress,train_lispress = tokenizeLispress(train,tokenizer=lispress_tokenizer)
valid_tokenized_lispress,valid_lispress = tokenizeLispress(valid,tokenizer=lispress_tokenizer)

In [13]:
# example: tokens generated from a message
print("message:",train[0]["turns"][0]["user_utterance"]["original_text"])
print("tokens:",train[0]["turns"][0]["user_utterance"]["tokens2"])
print("readable tokens:",train[0]["turns"][0]["user_utterance"]["tokens2_readable"])
print("lispress tokens:",valid[0]["turns"][0]["lispress_tokens"])
print("lispress tokens readable:",valid[0]["turns"][2]["lispress_tokens_readable"])


message: what date is tomorrow?
tokens: [18, 101, 12, 22]
readable tokens: ['what', 'date', 'is', 'tomorrow']
lispress tokens: [5, 1, 7, 8, 1, 29, 12, 1, 30, 10, 1, 9, 34, 1, 62, 1, 40, 33, 1, 14, 11, 1, 17, 1, 21, 1, 27, 10, 1, 25, 2, 26, 4, 1, 28, 3, 550, 3, 2, 2, 2, 2, 2, 2, 1, 40, 33, 1, 14, 11, 1, 17, 1, 21, 1, 27, 10, 1, 25, 2, 26, 4, 1, 28, 3, 191, 3, 2, 2, 2, 2, 2, 2, 2, 78, 1, 13, 1, 89, 4, 1, 15, 58, 2, 2, 2, 74, 1, 13, 4, 1, 59, 3, 211, 423, 1095, 3, 2, 2, 18, 1, 13, 1, 45, 16, 1, 44, 41, 4, 1, 38, 3, 122, 3, 2, 2, 22, 1, 42, 32, 4, 1, 15, 99, 2, 2, 2, 2, 19, 1, 13, 4, 1, 20, 3, 500, 3, 2, 2, 2, 2, 2, 2, 6]
lispress tokens readable: ['<bos>', '(', 'yield', ':output', '(', 'createcommiteventwrapper', ':event', '(', 'createpreflighteventwrapper', ':constraint', '(', 'constraint[event]', ':attendees', '(', 'andconstraint', '(', 'andconstraint', '(', 'attendeelisthasrecipient', ':recipient', '(', 'execute', ':intension', '(', 'refer', '(', 'extensionconstraint', '(', 'recipientw

In [15]:
def flatten_nested_list(list0):
    """
    Flatten the nested list's elements
    Example:
    [[1,2],[3]] => [1,2,3]
    """
    return [element for nested in list0 for element in nested ]

### Create train,validation,test data

In [16]:
MAX_TRAIN_DATA=-1 #how many train data will be used from the training set
MAX_VALID_DATA=-100 # the last 99 elements of the validation data will be used as test data
MAX_TEST_DATA=99 

In [18]:
from copy import deepcopy
def create_decoder_input(data):
    """
    from each dealogue -> remove the last element, and add a 0 to the beginning
    """
    data_tmp = deepcopy(data)
    for seqs in data_tmp:
        del seqs[-1]
        seqs.insert(0,[0])
    return data_tmp

In [20]:
# the user messages
train_data = flatten_nested_list(train_tokenized)[:MAX_TRAIN_DATA]
valid_data = flatten_nested_list(valid_tokenized)[:MAX_VALID_DATA]
test_data = flatten_nested_list(valid_tokenized)[MAX_VALID_DATA:MAX_VALID_DATA+MAX_TEST_DATA]

#the program stored in the last dialogue turn -> currently they are not taken into account
train_data_di = flatten_nested_list(create_decoder_input(train_tokenized_lispress))[:MAX_TRAIN_DATA]
valid_data_di = flatten_nested_list(create_decoder_input(valid_tokenized_lispress))[:MAX_VALID_DATA]
test_data_di = flatten_nested_list(create_decoder_input(valid_tokenized_lispress))[MAX_VALID_DATA:MAX_VALID_DATA+MAX_TEST_DATA]

#the current program
train_data_y = flatten_nested_list(train_tokenized_lispress)[:MAX_TRAIN_DATA]
valid_data_y = flatten_nested_list(valid_tokenized_lispress)[:MAX_VALID_DATA]
test_data_y = flatten_nested_list(valid_tokenized_lispress)[MAX_VALID_DATA:MAX_VALID_DATA+MAX_TEST_DATA]


In [21]:
# the length of the decoder input and output
MAX_LEN_DEC=50

# generate encoder input, decoder input and decoder target sequences
def generate_dataset(encoder_input,previous_program,current_program):
    data,data_di,data_y = [],[],[]
    for i,sent in enumerate(current_program):
        #print(sent)
        for j in range(len(sent)-1):
            data.append(encoder_input[i])
            data_di.append(current_program[i][:j+1])
            data_y.append(current_program[i][1:j+2])
    return data,data_di,data_y

In [22]:
train_data_new,train_data_new_di,train_data_new_y = None,None,None
valid_data_new,valid_data_new_di,valid_data_new_y = None,None,None
test_data_new,test_data_new_di,test_data_new_y = None,None,None

In [23]:
train_data_new,train_data_new_di,train_data_new_y = generate_dataset(train_data,train_data_di,train_data_y)
valid_data_new,valid_data_new_di,valid_data_new_y = generate_dataset(valid_data,valid_data_di,valid_data_y)
test_data_new,test_data_new_di,test_data_new_y = generate_dataset(test_data,test_data_di,test_data_y)

### Padding encoder and decoder inputs, and decoder output

In [25]:
from keras.preprocessing.sequence import pad_sequences

In [26]:
MAX_LEN=50 # length of the encoder input

# pedding the encoder inputs 

encoder_train = pad_sequences(train_data_new,maxlen=MAX_LEN,dtype="int32",padding='post', truncating='post')
decoder_train = pad_sequences(train_data_new_y,maxlen=MAX_LEN_DEC,dtype="int32",padding='pre', truncating='pre')
decoder_train_input = pad_sequences(train_data_new_di,maxlen=MAX_LEN_DEC,dtype="int32",padding='pre', truncating='pre')

encoder_valid = pad_sequences(valid_data_new,maxlen=MAX_LEN,dtype="int32",padding='post', truncating='post')
decoder_valid = pad_sequences(valid_data_new_y,maxlen=MAX_LEN_DEC,dtype="int32",padding='pre', truncating='pre')
decoder_valid_input = pad_sequences(valid_data_new_di,maxlen=MAX_LEN_DEC,dtype="int32",padding='pre', truncating='pre')

encoder_test = pad_sequences(test_data_new,maxlen=MAX_LEN,dtype="int32",padding='post', truncating='post')
decoder_test = pad_sequences(test_data_new_y,maxlen=MAX_LEN_DEC,dtype="int32",padding='pre', truncating='pre')
decoder_test_input = pad_sequences(test_data_new_di,maxlen=MAX_LEN_DEC,dtype="int32",padding='pre', truncating='pre')

### Create embedding matrices for the embedding layers

In [27]:
import os

In [29]:
GLOVE_DIR="glove"
embeddings_index = {}
with open(os.path.join(GLOVE_DIR,"glove.6B.100d.txt"),'rb') as f:
    for line in f.readlines():
        values = line.split()
        word = str(values[0],encoding="utf8")
        coefs = np.asanyarray(values[1:], dtype="float32")
        embeddings_index[word] = coefs

In [30]:
# create embedding matrix for the encoder
embedding_dimension = 100
DECODING_DEPTH = len(lispress_tokenizer.word_index)+1
embedding_matrix_encoder = np.zeros((len(tokenizer_user.word_index)+1,embedding_dimension))
for word, i in tokenizer_user.word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        embedding_matrix_encoder[i] = embedding_vector

# create embedding matrix for the decoder
embedding_matrix_decoder = np.zeros((DECODING_DEPTH,embedding_dimension))
for word, i in lispress_tokenizer.word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None and i < DECODING_DEPTH:
        embedding_matrix_decoder[i] = embedding_vector

In [31]:
def decoder_output_creater(decoder_input_data, num_samples, MAX_LEN, VOCAB_SIZE):  
    decoder_output_data = np.zeros((num_samples, MAX_LEN, VOCAB_SIZE), dtype="float32")

    for i, seqs in enumerate(decoder_input_data):
        for j, seq in enumerate(seqs):
            if j > 0 and seq < VOCAB_SIZE:
                decoder_output_data[i][j][seq] = 1.
    #print(decoder_output_data.shape)

    return decoder_output_data

### Create seq2seq model

In [32]:
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Embedding,Dense,Activation
from tensorflow.keras.layers import Input, LSTM, Dense

In [33]:
num_encoder_tokens = len(tokenizer_user.word_index)+1
num_decoder_tokens = len(lispress_tokenizer.word_index)+1
HIDDEN_DIM = 128

print("encoder tokens",num_encoder_tokens)

# Define an input sequence and process it.
embedding_dimension=100
encoder_inputs = Input(shape=(MAX_LEN, ),dtype="int32")
encoder_embedding = Embedding(input_dim = num_encoder_tokens,
                            output_dim = embedding_dimension,
                            input_length = MAX_LEN,
                            weights = [embedding_matrix_encoder],
                           trainable = False)(encoder_inputs)

encoder_LSTM = LSTM(HIDDEN_DIM, return_state=True)
encoder_outputs, state_h, state_c = encoder_LSTM(encoder_embedding)

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(MAX_LEN_DEC, ),dtype="int32")
decoder_embedding = Embedding(input_dim = DECODING_DEPTH,
                            output_dim = embedding_dimension,
                            input_length = MAX_LEN_DEC,
                            weights = [embedding_matrix_decoder],
                           trainable = False)(decoder_inputs)
decoder_LSTM = LSTM(HIDDEN_DIM, return_state=True, return_sequences=True)


decoder_outputs, _, _ = decoder_LSTM(decoder_embedding, initial_state=[state_h, state_c])

outputs = TimeDistributed(Dense(DECODING_DEPTH, activation='softmax'))(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], outputs)

encoder tokens 16297


In [34]:
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 50)]         0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 50)]         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 50, 100)      1629700     input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 50, 100)      1260700     input_2[0][0]                    
_______________________________________________________________________________________

In [39]:
model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])

## Training

In [36]:
# data loader:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, input, batch_size=32, num_classes=None, shuffle=False):
        self.batch_size = batch_size
        self.input = input
        self.indices = list(range(len(input[0])))
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        return len(self.indices) // self.batch_size

    def __getitem__(self, index):
        index = self.index[index * self.batch_size:(index + 1) * self.batch_size]
        batch = [self.indices[k] for k in index]
        
        X, y = self.__get_data(batch)
        return X, y

    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle == True:
            np.random.shuffle(self.index)

    def __get_data(self, batch):
        #print(batch[0],batch[-1])
        encoder = np.array(self.input[0][batch[0]:batch[-1]+1])
        decoder = np.array(self.input[1][batch[0]:batch[-1]+1])
        X = [encoder,decoder]
        #print(X)
        y = decoder_output_creater(self.input[2][batch[0]:batch[-1]+1],len(batch),MAX_LEN_DEC,DECODING_DEPTH)

        return X, y


In [40]:
batch_size = 50

model.fit(DataGenerator([encoder_train[:10000],decoder_train_input[:10000],decoder_train[:10000]],batch_size=batch_size),
         batch_size=batch_size,epochs=3,validation_data=DataGenerator([encoder_valid,decoder_valid_input,decoder_valid],batch_size=batch_size))

Epoch 1/3


ResourceExhaustedError:  OOM when allocating tensor with shape[50,50,12607] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
	 [[node gradient_tape/categorical_crossentropy/truediv/Neg (defined at <ipython-input-40-00c13aad770e>:4) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_train_function_10104]

Function call stack:
train_function


In [41]:
def extract_lispress_from_pred(pred,tokenizer):
    lines = []
    codes = []
    for p in pred:
        line = [tokenizer.index_word[np.argmax(vec)] if np.argmax(vec) > 0 else "UNKNOWN" for vec in p ]
        code = [np.argmax(vec) for vec in p ]
        #print(codes)
        lines.append(line)
        codes.append(code)
    return lines,codes

In [48]:
def make_prediction(sent,program,user_tokenizer,lispress_tokenizer,model,isText=True):
    # create input data
    if isText:
        seq = user_tokenizer.texts_to_sequences([sent])[0]
    else:
        seq = sent
    print("Question:", [user_tokenizer.index_word[code] for code in seq])
    decoder = ["<bos>"]
    length = 0
    MAX_LENGTH = 100
    #decoder_seq = program
    decoder_seq = [lispress_tokenizer.word_index[word] for word in decoder]
    #print(decoder_seq)
    # padding
    #print(seq)
    seq_pad = pad_sequences([seq],maxlen=MAX_LEN,dtype="int32",padding='post', truncating='post')[0]
    decoder_pad = pad_sequences([decoder_seq],maxlen=MAX_LEN_DEC,dtype="int32",padding='pre', truncating='pre')[0]

    #pred = model.predict([[seq_pad],[decoder_pad]])
    #print(len(pred))
    #line,code = extract_lispress_from_pred([pred],lispress_tokenizer)
    #decoder_pad[next_index+1] = code[0][next_index]
    #next_index += 1
    #print([lispress_tokenizer.index_word[code0] if code0>0 else "UNKNOWN" for code0 in decoder_pad ][:next_index+1])
    line = [["asd"]]

    while length < MAX_LENGTH and line[0][-1] != '<eos>':
        #print([np.array([seq_pad]),np.array([decoder_pad])])
        pred = model.predict([np.array([seq_pad]),np.array([decoder_pad])])[0]
        #print(pred)
        line,code = extract_lispress_from_pred([pred],lispress_tokenizer)
        #print(code[0])
        decoder_seq.append(code[0][-1])
        decoder_pad = pad_sequences([decoder_seq],maxlen=MAX_LEN_DEC,dtype="int32",padding='pre', truncating='pre')[0]
        length += 1
        #print(line[0][:next_index])

    print("Program:",[lispress_tokenizer.index_word[code0] if code0>0 else "UNKNOWN" for code0 in decoder_seq])  

    return decoder_seq

In [44]:
from tensorflow.keras.models import load_model

In [46]:
model = load_model('models/model20201121_newtokenizer_50-50-1M.h5')

## Evaluation

In [47]:
TP=0
ALL=0
for i,test in enumerate(test_data):
    prediction = make_prediction(test,[],user_tokenizer=tokenizer_user,lispress_tokenizer=lispress_tokenizer,model=model,isText=False)
    print("Expected:",[lispress_tokenizer.index_word[code] for code in test_data_y[i]])
    ALL+=1
    if len(prediction) == len(test_data_y[i]) and np.sum([ 0 if pred == expected else 1 for pred,expected in zip(prediction,test_data_y[i])] ) == 0:
        TP+=1
    print("Accuracy:",TP/ALL)

print("Accuracy:",TP/ALL)

Question: ['what', 'are', 'my', 'upcoming', 'appointments']
[5]
Program: ['<bos>', '(', 'yield', ':output', '(', 'findeventwrapperwithdefaults', ':constraint', '(', 'eventondate', ':date', '(', 'constraint[event]', ')', ':range', '(', 'nextweeklist', '#', '(', 'string', '"', 'lunch', '"', ')', ')', ')', ':range', '(', 'nextweeklist', ')', ')', ')', ')', ')', ')', ')', '<eos>']
Expected: ['<bos>', '(', 'yield', ':output', '(', 'findeventwrapperwithdefaults', ':constraint', '(', 'constraint[event]', ')', ')', ')', '<eos>']
Accuracy: 0.0
Question: ['what', 'is', 'planned']
[5]
Program: ['<bos>', '(', 'yield', ':output', '(', 'execute', ':intension', '(', 'choosecreateevent', ':index', '#', '(', 'number', '2', ')', ':intension', '(', 'refer', '(', 'actionintensionconstraint', ')', ')', ')', ')', ')', '<eos>']
Expected: ['<bos>', '(', 'yield', ':output', '(', 'findeventwrapperwithdefaults', ':constraint', '(', 'constraint[event]', ')', ')', ')', '<eos>']
Accuracy: 0.0
Question: ['need', '6p

Program: ['<bos>', '(', 'yield', ':output', '(', 'updatecommiteventwrapper', ':event', '(', 'updatepreflighteventwrapper', ':id', '(', ':id', '(', 'execute', ':intension', '(', 'refer', '(', 'extensionconstraint', '(', 'constraint[event]', ')', ')', ')', ')', ')', ':update', '(', 'constraint[event]', ':start', '(', 'constraint[datetime]', ':time', '(', '?=', '(', 'numberpm', ':number', '#', '(', 'number', '2', ')', ')', ')', ')', ')', ')', ')', ')', ')', '<eos>']
Expected: ['<bos>', '(', 'yield', ':output', '(', 'execute', ':intension', '(', 'reviseconstraint', ':rootlocation', '(', 'roleconstraint', '#', '(', 'path', '"', 'output', '"', ')', ')', ':oldlocation', '(', 'constraint[constraint[event]]', ')', ':new', '(', 'eventalldaystartingdateforperiod', ':event', '(', 'constraint[event]', ')', ':period', '(', 'todays', '#', '(', 'number', '7', ')', ')', ':startdate', '(', 'execute', ':intension', '(', 'refer', '(', 'andconstraint', '(', 'roleconstraint', '(', 'append', '#', '(', 'list[

Program: ['<bos>', '(', 'yield', ':output', '(', 'createcommiteventwrapper', ':event', '(', 'createpreflighteventwrapper', ':constraint', '(', 'constraint[event]', ':start', '(', '?=', '(', 'nexttime', ':date', '(', 'nextdow', ':dow', '#', '(', 'dayofweek', '"', 'friday', '"', ')', ')', ':time', '(', 'numberpm', ':number', '#', '(', 'number', '2', ')', ')', ')', ')', ')', ')', ')', ')', ')', '<eos>']
Expected: ['<bos>', '(', 'let', '(', 'x0', '(', 'md', ':day', '#', '(', 'number', '13', ')', ':month', '#', '(', 'month', '"', 'december', '"', ')', ')', ')', '(', 'yield', ':output', '(', 'createcommiteventwrapper', ':event', '(', 'createpreflighteventwrapper', ':constraint', '(', 'eventalldayfordaterange', ':daterange', '(', 'dateandconstraint', ':date1', 'x0', ':date2', '(', 'nextdayofmonth', 'x0', '#', '(', 'number', '15', ')', ')', ')', ':event', '(', 'constraint[event]', ':subject', '(', '?=', '#', '(', 'string', '"', 'conference', '"', ')', ')', ')', ')', ')', ')', ')', ')', '<eos>'

Program: ['<bos>', '(', 'yield', ':output', '(', 'execute', ':intension', '(', 'reviseconstraint', ':rootlocation', '(', 'roleconstraint', '#', '(', 'path', '"', 'output', '"', ')', ')', ':oldlocation', '(', 'constraint[constraint[event]]', ')', ':new', '(', 'constraint[event]', ':start', '(', 'constraint[datetime]', ':time', '(', '?=', '(', 'numberpm', ':number', '#', '(', 'number', '10.0', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')']
Expected: ['<bos>', '(', 'fencespecify', ')', '<eos>']
Accuracy: 0.1724137931034483
Question: ['and', 'in', 'november']
[5]
Program: ['<bos>', '(', 'yield', ':output', '(', 'execute', ':intension', '(', 'reviseconstraint', ':rootlocation', '(', 'roleconstraint', '#', '(', 'path', '"', 'output', '"',

Program: ['<bos>', '(', 'yield', ':output', '(', 'execute', ':intension', '(', 'confirmandreturnaction', ')', ')', ')', '<eos>']
Expected: ['<bos>', '(', 'yield', ':output', '(', 'execute', ':intension', '(', 'choosecreateevent', ':index', '#', '(', 'number', '1', ')', ':intension', '(', 'refer', '(', 'actionintensionconstraint', ')', ')', ')', ')', ')', '<eos>']
Accuracy: 0.13513513513513514
Question: ['put', 'me', 'down', 'a', 'meeting', 'with', 'tom', 'between', 'lunch', 'and', 'the', 'corporate', 'dinner']
[5]
Program: ['<bos>', '(', 'yield', ':output', '(', 'createcommiteventwrapper', ':event', '(', 'createpreflighteventwrapper', ':constraint', '(', 'constraint[event]', ':attendees', '(', 'attendeelisthasrecipient', ':recipient', '(', 'execute', ':intension', '(', 'refer', '(', 'extensionconstraint', '(', 'recipientwithnamelike', ':constraint', '(', 'constraint[recipient]', ')', ':name', '#', '(', 'personname', '"', 'abby', '"', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')

Program: ['<bos>', '(', 'let', '(', 'x0', '(', 'dateattimewithdefaults', ':date', '(', 'nextdow', ':dow', '#', '(', 'dayofweek', '"', 'friday', '"', ')', ')', ':time', '(', 'numberpm', ':number', '#', '(', 'number', '2', ')', ')', ')', ')', '(', 'yield', ':output', '(', 'createcommiteventwrapper', ':event', '(', 'createpreflighteventwrapper', ':constraint', '(', 'constraint[event]', ':end', '(', 'attendeelisthasrecipient', '(', 'timeafterdatetime', ':datetime', 'x0', ':time', '(', 'numberpm', ':number', '#', '(', 'number', '2', ')', ')', ')', ')', ':start', '(', '?=', ':time', '(', '?=', '(', 'nextdow', ':dow', '#', '(', 'number', '2', ')', ')', ')', ')', ')', ')', ')', ')', '<eos>']
Expected: ['<bos>', '(', 'let', '(', 'x0', '(', 'execute', ':intension', '(', 'refer', '(', 'extensionconstraint', '(', 'recipientwithnamelike', ':constraint', '(', 'constraint[recipient]', ')', ':name', '#', '(', 'personname', '"', 'bob', '"', ')', ')', ')', ')', ')', ')', '(', 'yield', ':output', '(', 'c

Program: ['<bos>', '(', 'yield', ':output', '(', 'updatecommiteventwrapper', ':event', '(', 'updatepreflighteventwrapper', ':id', '(', ':id', '(', 'execute', ':intension', '(', 'refer', '(', 'extensionconstraint', '(', 'constraint[event]', ')', ')', ')', ')', ')', ':update', '(', 'constraint[event]', ':start', '(', 'constraint[datetime]', ':time', '(', '?=', '(', 'numberpm', ':number', '#', '(', 'number', '2', ')', ')', ')', ')', ')', ')', ')', ')', ')', '<eos>']
Expected: ['<bos>', '(', 'yield', ':output', '(', 'execute', ':intension', '(', 'reviseconstraint', ':rootlocation', '(', 'roleconstraint', '#', '(', 'path', '"', 'output', '"', ')', ')', ':oldlocation', '(', 'constraint[constraint[event]]', ')', ':new', '(', 'eventafterdatetime', ':datetime', '(', ':end', '(', 'execute', ':intension', '(', 'refer', '(', 'extensionconstraint', '(', 'constraint[event]', ')', ')', ')', ')', ')', ':event', '(', 'constraint[event]', ')', ')', ')', ')', ')', '<eos>']
Accuracy: 0.125
Question: ['yup

Program: ['<bos>', '(', 'yield', ':output', '(', 'createcommiteventwrapper', ':event', '(', 'createpreflighteventwrapper', ':constraint', '(', 'constraint[event]', ':start', '(', '?=', '(', 'dateattimewithdefaults', ':date', '(', 'nextdow', ':dow', '#', '(', 'dayofweek', '"', 'friday', '"', ')', ')', ':time', '(', 'numberpm', ':number', '#', '(', 'number', '2', ')', ')', ')', ')', ':subject', '(', '?=', '#', '(', 'string', '"', 'lunch', '"', ')', ')', ')', ')', ')', ')', ')', ':update', '(', 'constraint[event]', ':start', '(', '?=', '(', 'tohours', '#', '(', 'number', '1', ')', ')', ')', ':start', '(', '?=', '(', 'dateattimewithdefaults', ':date', '(', 'nextdow', ':dow', '#', '(', 'dayofweek', '"', 'friday', '"', ')', ')', ':time', '(', 'numberpm', ':number', '#', '(', 'number', '2', ')', ')', ')', ')']
Expected: ['<bos>', '(', 'let', '(', 'x0', '(', 'dateattimewithdefaults', ':date', '(', 'nextdow', ':dow', '#', '(', 'dayofweek', '"', 'saturday', '"', ')', ')', ':time', '(', 'numberpm

Program: ['<bos>', '(', 'yield', ':output', '(', '>', '(', 'size', '(', ':results', '(', 'findeventwrapperwithdefaults', ':constraint', '(', 'eventondate', ':event', '(', 'constraint[event]', ':attendees', '(', 'attendeelisthasrecipientconstraint', ':recipientconstraint', '(', 'recipientwithnamelike', ':constraint', '(', 'constraint[recipient]', ')', ':name', '#', '(', 'personname', '"', 'abby', '"', ')', ')', ')', ')', ')', ')', ')', ')', ':update', '(', 'constraint[event]', ':start', '(', 'constraint[datetime]', ':time', '(', '?=', '(', 'numberpm', ':number', '#', '(', 'number', '2', ')', ')', ')', ')', ')', ')', ')', ')', '<eos>']
Expected: ['<bos>', '(', 'yield', ':output', '(', 'isfree', ':eventcandidates', '(', 'recipientavailability', ':eventconstraint', '(', 'constraint[event]', ':attendees', '(', 'attendeelisthasrecipient', ':recipient', '(', 'execute', ':intension', '(', 'refer', '(', 'extensionconstraint', '(', 'recipientwithnamelike', ':constraint', '(', 'constraint[recipie

Program: ['<bos>', '(', 'yield', ':output', '(', 'placehasfeature', ':feature', '(', 'weatherqueryapi', ':event', '(', 'singleton', '(', ':results', '(', 'findeventwrapperwithdefaults', ':constraint', '(', 'constraint[event]', ':subject', '(', '?~=', '#', '(', 'string', '"', 'lunch', '"', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')']
Expected: ['<bos>', '(', 'yield', ':output', '(', 'placehasfeature', ':feature', '#', '(', 'placefeature', '"', 'takeout', '"', ')', ':place', '(', 'singleton', '(', ':results', '(', 'findplacemultiresults', ':place', '#', '(', 'locationkeyphrase', '"', 'in-n-out', 'in', 'carson', 'city', '"', ')', ')', ')', ')', ')', ')', '<eos>']
Accuracy: 0

Program: ['<bos>', '(', 'yield', ':output', '(', 'execute', ':intension', '(', 'confirmandreturnaction', ')', ')', ')', '<eos>']
Expected: ['<bos>', '(', 'yield', ':output', '(', 'execute', ':intension', '(', 'confirmandreturnaction', ')', ')', ')', '<eos>']
Accuracy: 0.10869565217391304
Question: ['change', 'the', 'time', 'to', '2pm']
[5]
Program: ['<bos>', '(', 'yield', ':output', '(', 'updatecommiteventwrapper', ':event', '(', 'updatepreflighteventwrapper', ':id', '(', ':id', '(', 'execute', ':intension', '(', 'refer', '(', 'extensionconstraint', '(', 'constraint[event]', ')', ')', ')', ')', ')', ':update', '(', 'constraint[event]', ':start', '(', 'constraint[datetime]', ':time', '(', '?=', '(', 'numberpm', ':number', '#', '(', 'number', '2', ')', ')', ')', ')', ')', ')', ')', ')', ')', '<eos>']
Expected: ['<bos>', '(', 'yield', ':output', '(', 'createcommiteventwrapper', ':event', '(', 'createpreflighteventwrapper', ':constraint', '(', 'constraint[event]', ':start', '(', 'constrain

In [49]:
def simple_pred(sent):
    make_prediction(sent,[],tokenizer_user,lispress_tokenizer,model)

In [51]:
simple_pred("Add a task to my calendar")

Question: ['add', 'a', 'task', 'to', 'my', 'calendar']
Program: ['<bos>', '(', 'yield', ':output', '(', 'createcommiteventwrapper', ':event', '(', 'createpreflighteventwrapper', ':constraint', '(', 'constraint[event]', ':start', '(', '?=', '(', 'timeafterdatetime', '(', 'singleton', '(', ':results', '(', 'findeventwrapperwithdefaults', ':constraint', '(', 'constraint[event]', ':subject', '(', '?~=', '#', '(', 'string', '"', 'lunch', '"', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', ')', '<eos>']
