# Persona Intent Parsing using word embeddings

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from persona.preprocess import (
    prepare_json_data, pad_sequences, integer_encode, 
    one_hot_encode, one_hot_encode_target)
import random

## Preprocess the data

In [3]:
MAX_SEQ_LEN = 15
input_word_model, output_word_model, pairs = \
    prepare_json_data('input', 'output', './data/persona.intents.json', MAX_SEQ_LEN)

READ 816 sentence pairs
Trimmed to 816 sentence pairs
Counting words...
Counted Words:
input 244
output 53


In [4]:
input_seqs = [pair[0] for pair in pairs]
intent = [pair[1] for pair in pairs]
for i in range(10):
    rando = random.randint(0, len(pairs))
    print(input_seqs[rando],"-->" ,intent[rando])

open {place} in a file manager  --> open.place
app with max mem --> mem.max.process
what s using the mem --> mem.max.process
stop recording --> record.end
program with most mem --> mem.max.process
erase my todo list --> remove.all
what s the forecast --> weather.current
python how to do {action} --> ask
program using max mem --> mem.max.process
what process is using up the cpu --> cpu.max.process


#### Pad the input
This process will add the PAD, SOS, EOS, and UNK tokens to sequence

In [5]:
padded_input = pad_sequences(input_seqs, MAX_SEQ_LEN)
print(padded_input[1])

['SOS', 'why', 'did', '{skill}', 'fail', 'EOS', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD', 'PAD']


#### Integer Encode

We encode the sentences into integer to used im embeddings. We will be using the integers as an index for what is essentially a look up table to get the latent vector of that word

In [6]:
int_encoded_input = integer_encode(padded_input, input_word_model, MAX_SEQ_LEN)
print(int_encoded_input.shape)
print(int_encoded_input[0])

(816, 15)
[ 1.  4.  5.  6.  7.  8.  9.  2.  0.  0.  0.  0.  0.  0.  0.]


#### One hot Encode
We still one hot encode the intents so we can use to classify

In [7]:
one_hot_output = one_hot_encode([intent], output_word_model, len(intent))[0]
print(one_hot_output.shape)

(816, 53)


## Build and Train the Model

In [8]:
from persona.intent.model import IntentModel

Using TensorFlow backend.


In [14]:
input_dim = input_word_model.n_words
output_dim = output_word_model.n_words
latent_vec = 64

model = IntentModel("embeddings")
model = model(int_encoded_input, one_hot_output, input_dim, output_dim, latent_vec)
model.train(summary=True)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, None, 53)          12932     
_________________________________________________________________
lstm_3 (LSTM)                (None, 64)                30208     
_________________________________________________________________
dense_3 (Dense)              (None, 53)                3445      
Total params: 46,585
Trainable params: 46,585
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32

Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [None]:
from persona.preprocess import pad_sequence

try:
    while True:
        _input = input("input: ")
        padded_input = [pad_sequence(_input, MAX_SEQ_LEN).split()]
        int_encoded = integer_encode(padded_input, input_word_model, MAX_SEQ_LEN)
        print(int_encoded)
        prediction, confidence = model.decode(int_encoded, output_word_model)
        print("intent: ", prediction, confidence)
except KeyboardInterrupt:
    pass

input: what time is it
[[  1.  10.  93.  60.  22.   2.   0.   0.   0.   0.   0.   0.   0.   0.
    0.]]
intent:  time 0.978081
input: cpu usage
[[   1.  133.  144.    2.    0.    0.    0.    0.    0.    0.    0.    0.
     0.    0.    0.]]
intent:  cpu.total.usage 0.853811
input: what is the weather like
[[  1.  10.  60.  29.  34.  55.   2.   0.   0.   0.   0.   0.   0.   0.
    0.]]
intent:  weather.current 0.999992
input: how's the weather
[[  1.   3.  29.  34.   2.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.]]
intent:  weather.current 0.968483
