### Loading Imports

In [1]:
import pandas as pd
import numpy as np
import re
import spacy
import random
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D, MaxPooling1D, Dropout, Activation
#from keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

### Loading Data

In [2]:
atis_1_train = pd.read_csv("dados/Atis_1/atis.train.csv")
atis_1_val = pd.read_csv("dados/Atis_1/atis.dev.csv")
atis_1_test = pd.read_csv("dados/Atis_1/atis.test.csv")

atis_2_train = pd.read_csv("dados/Atis_2/atis_intents_train.csv")
atis_2_test = pd.read_csv("dados/Atis_2/atis_intents_test.csv")

In [3]:
print("Atis 1: ",atis_1_train.shape)
print("Atis 1: ",atis_1_val.shape)
print("Atis 1: ",atis_1_test.shape)
print("##############################")
print("Atis 2: ",atis_2_train.shape)
print("Atis 2: ",atis_2_test.shape)

Atis 1:  (4274, 4)
Atis 1:  (572, 4)
Atis 1:  (586, 4)
##############################
Atis 2:  (4833, 2)
Atis 2:  (799, 2)


### Pre processamento dos Dados
* Dataset a ser utilizado: <strong>Atis_1</strong>
* Verificar  <strong>Intents</strong>
* Verificar se existe algum <strong>valor Nan</strong>
* Escolher colunas com <strong>Tokens(sentenças) e Intents</strong>
* <strong>Eliminar</strong> caracter <strong>BOS</strong> do inicio das sentenças
* <strong>Eliminar</strong> caracter <strong>EOS</strong> do final das sentenças

In [4]:
atis_1_train.head(3)

Unnamed: 0,id,tokens,slots,intent
0,train-00001,BOS what is the cost of a round trip flight fr...,O O O O O O O B-round_trip I-round_trip O O B-...,atis_airfare
1,train-00002,BOS now i need a flight leaving fort worth and...,O O O O O O O B-fromloc.city_name I-fromloc.ci...,atis_flight
2,train-00003,BOS i need to fly from kansas city to chicago ...,O O O O O O B-fromloc.city_name I-fromloc.city...,atis_flight


In [5]:
atis_1_train['intent'].value_counts()

atis_flight                 3173
atis_airfare                 351
atis_ground_service          207
atis_airline                 139
atis_abbreviation             94
atis_aircraft                 70
atis_quantity                 41
atis_flight_time              38
atis_capacity                 29
atis_distance                 24
atis_airport                  23
atis_flight#atis_airfare      21
atis_city                     18
atis_ground_fare              18
atis_flight_no                16
atis_meal                      9
atis_restriction               3
Name: intent, dtype: int64

In [6]:
print("Valores Nan em train:\n",atis_1_train.isnull().sum())
print("#########################################################")
print("Valores Nan em val:\n",atis_1_val.isnull().sum())
print("#########################################################")
print("Valores Nan em test:\n",atis_1_test.isnull().sum())

Valores Nan em train:
 id        0
tokens    0
slots     0
intent    0
dtype: int64
#########################################################
Valores Nan em val:
 id        0
tokens    0
slots     0
intent    0
dtype: int64
#########################################################
Valores Nan em test:
 id        0
tokens    0
slots     0
intent    0
dtype: int64


In [7]:
atis_1_train = atis_1_train.loc[:,['tokens','intent']]
atis_1_val = atis_1_val.loc[:,['tokens','intent']]
atis_1_test = atis_1_test.loc[:,['tokens','intent']]

In [8]:
atis_1_train.head(3)

Unnamed: 0,tokens,intent
0,BOS what is the cost of a round trip flight fr...,atis_airfare
1,BOS now i need a flight leaving fort worth and...,atis_flight
2,BOS i need to fly from kansas city to chicago ...,atis_flight


In [9]:
# eliminando caracter indesejado do inicio BOS e final EOS sentenças
atis_1_train['tokens'] = [x.replace('BOS ','') for x in atis_1_train['tokens']]
atis_1_val['tokens'] = [x.replace('BOS ','') for x in atis_1_val['tokens']]
atis_1_test['tokens'] = [x.replace('BOS ','') for x in atis_1_test['tokens']]

atis_1_train['tokens'] = [x.replace(' EOS','') for x in atis_1_train['tokens']]
atis_1_val['tokens'] = [x.replace(' EOS','') for x in atis_1_val['tokens']]
atis_1_test['tokens'] = [x.replace(' EOS','') for x in atis_1_test['tokens']]

In [10]:
dict_Labels = {}
count = 0
for intent in atis_1_train['intent']:
    if intent not in dict_Labels:
        dict_Labels[intent] = count
        count += 1

In [11]:
print(dict_Labels)

{'atis_airfare': 0, 'atis_flight': 1, 'atis_abbreviation': 2, 'atis_ground_service': 3, 'atis_restriction': 4, 'atis_airport': 5, 'atis_quantity': 6, 'atis_meal': 7, 'atis_airline': 8, 'atis_city': 9, 'atis_flight_no': 10, 'atis_ground_fare': 11, 'atis_flight_time': 12, 'atis_flight#atis_airfare': 13, 'atis_distance': 14, 'atis_aircraft': 15, 'atis_capacity': 16}


In [12]:
atis_1_train['Label_intent'] = [dict_Labels[x] for x in atis_1_train['intent']]
atis_1_val['Label_intent'] = [dict_Labels[x] for x in atis_1_val['intent']]
atis_1_test['Label_intent'] = [dict_Labels[x] for x in atis_1_test['intent']]

In [13]:
atis_1_train["tokens"][3]

'what is the meaning of meal code s'

In [14]:
atis_1_train["tokens"][0]

'what is the cost of a round trip flight from pittsburgh to atlanta beginning on april twenty fifth and returning on may sixth'

In [15]:
atis_1_train.head(10)

Unnamed: 0,tokens,intent,Label_intent
0,what is the cost of a round trip flight from p...,atis_airfare,0
1,now i need a flight leaving fort worth and arr...,atis_flight,1
2,i need to fly from kansas city to chicago leav...,atis_flight,1
3,what is the meaning of meal code s,atis_abbreviation,2
4,show me all flights from denver to pittsburgh ...,atis_flight,1
5,show me all us air flights from atlanta to den...,atis_flight,1
6,list the nonstop flights early tuesday morning...,atis_flight,1
7,show me the flights from st. petersburg to tor...,atis_flight,1
8,i need a listing of flights from new york city...,atis_flight,1
9,show me the flights on american airlines which...,atis_flight,1


### Codificação

In [16]:
atis_1_train_label_intent = atis_1_train['Label_intent']
atis_1_train_label_intent = to_categorical(np.asarray(atis_1_train_label_intent))
atis_1_val_label_intent = atis_1_val['Label_intent']
atis_1_val_label_intent = to_categorical(np.asarray(atis_1_val_label_intent))
print(atis_1_train_label_intent.shape)
print(atis_1_val_label_intent.shape)

(4274, 17)
(572, 17)


In [17]:
# Usar apenas as sentencas de treinameto
# OOV para out of vocab
tokenizer = Tokenizer(oov_token='<OOV>')
tokenizer.fit_on_texts(atis_1_train['tokens'])

word_index = tokenizer.word_index
print(word_index)

{'<OOV>': 1, 'to': 2, 'from': 3, 'flights': 4, 'the': 5, 'on': 6, 'flight': 7, 'me': 8, 'what': 9, 'show': 10, 'san': 11, 'boston': 12, 'a': 13, 'denver': 14, 'in': 15, 'francisco': 16, 'and': 17, 'i': 18, 'atlanta': 19, 'is': 20, 'dallas': 21, 'pittsburgh': 22, 'all': 23, 'list': 24, 'baltimore': 25, 'philadelphia': 26, 'like': 27, 'are': 28, 'airlines': 29, 'that': 30, 'washington': 31, 'of': 32, 'between': 33, 'please': 34, 'morning': 35, 'leaving': 36, 'pm': 37, 'would': 38, 'fly': 39, 'city': 40, 'for': 41, 'need': 42, 'fare': 43, 'wednesday': 44, 'first': 45, 'after': 46, 'there': 47, 'oakland': 48, 'trip': 49, 'ground': 50, 'arriving': 51, 'you': 52, 'cheapest': 53, 'which': 54, 'round': 55, 'transportation': 56, "i'd": 57, 'before': 58, 'does': 59, 'class': 60, 'with': 61, 'available': 62, 'st': 63, 'afternoon': 64, 'one': 65, 'milwaukee': 66, 'new': 67, 'have': 68, 'at': 69, 'way': 70, 'fares': 71, 'american': 72, 'dc': 73, 'give': 74, 'york': 75, 'leave': 76, 'thursday': 77, 

In [18]:
def valor_maior_sentenca():
    maior_sentenca_train = atis_1_train['tokens'].str.len().max()
    maior_sentenca_val = atis_1_val['tokens'].str.len().max()
    maior_sentenca_test = atis_1_test['tokens'].str.len().max()
    maior_sentenca = max([maior_sentenca_train,maior_sentenca_test,maior_sentenca_val])
    return maior_sentenca

# tamanho do meu vocabulario voca_size adn 1 to unknow
vocab_size = len(word_index) + 1

# tamanho da maior sentenca dos dados originais
maior_sentenca = valor_maior_sentenca()

print("Tamanho do vocab: ",vocab_size," Tamanho da Sentenca: ",maior_sentenca)

Tamanho do vocab:  874  Tamanho da Sentenca:  259


In [19]:
# Transformar as sentencas dos dados de treinamento e teste para sequencias numericas. ex [[1,2,6],[65,952,15,65],[1,2]]
train_data_sentences = tokenizer.texts_to_sequences(atis_1_train['tokens'])
val_data_sentences = tokenizer.texts_to_sequences(atis_1_val['tokens'])
test_data_sentences = tokenizer.texts_to_sequences(atis_1_test['tokens'])

# normalizar o tamnho das sequencias usando padding. ex [[0,1,2,6],[65,952,15,65],[0,0,1,2]]
train_padded = pad_sequences(train_data_sentences, maxlen=maior_sentenca)
val_padded = pad_sequences(val_data_sentences, maxlen=maior_sentenca)
test_padded = pad_sequences(test_data_sentences, maxlen=maior_sentenca)

print("Conjunto de treinamento: ","\n",train_padded[0:19])
print("Conjunto de validação: ","\n",val_padded[0:19])
print("Conjunto de teste: ","\n",test_padded[0:19])

Conjunto de treinamento:  
 [[  0   0   0 ...   6 168 252]
 [  0   0   0 ...  37 118  79]
 [  0   0   0 ...   5 459 220]
 ...
 [  0   0   0 ...  12  17  48]
 [  0   0   0 ...  32 607 194]
 [  0   0   0 ... 132 305 608]]
Conjunto de validação:  
 [[  0   0   0 ... 350 168 252]
 [  0   0   0 ... 200 263  37]
 [  0   0   0 ...   5 220  46]
 ...
 [  0   0   0 ...  58 191  37]
 [  0   0   0 ...  15  31  73]
 [  0   0   0 ...  25   2  21]]
Conjunto de teste:  
 [[  0   0   0 ... 350 119   1]
 [  0   0   0 ... 200 263  37]
 [  0   0   0 ...   5 459 220]
 ...
 [  0   0   0 ...   2  90 367]
 [  0   0   0 ...  15  31  73]
 [  0   0   0 ...  53 174 258]]


### Spacy para Classificar Intents
* Erro na hora de passar os vetores para o modelo DEep learning

In [20]:
## Load the spacy model: nlp
# nlp = spacy.load('en_core_web_md')
#
##tamnho do vocab
#print(len(nlp.vocab.strings))
#
## dimension vector
#print(nlp.vocab.vectors_length)

In [21]:
#def valor_maior_sentenca():
#    maior_sentenca_train = atis_1_train['tokens'].str.len().max()
#    maior_sentenca_val = atis_1_val['tokens'].str.len().max()
#    maior_sentenca_test = atis_1_test['tokens'].str.len().max()
#    maior_sentenca = max([maior_sentenca_train,maior_sentenca_test,maior_sentenca_val])
#    return maior_sentenca

In [22]:
#count = 0
#for key, vector in nlp.vocab.vectors.items():
#    if nlp.vocab.strings[key] != None:
#        count += 1
#    #print(key, nlp.vocab.strings[key], vector[:3])

In [23]:
## valores para o modelo deep learning
#
##vocab_size = len(nlp.vocab.strings)
#vocab_size = count + 1
#
## tamanho da maior sentença - input_length
#maior_sentenca = valor_maior_sentenca()
#
## embedding
#embedding_dim = nlp.vocab.vectors_length
#print(vocab_size)
#print(embedding_dim)
#

In [24]:
# guardar valor de encoding dos intents
#def vetorizar_sentencas(sentencas,intents):
#    # Calculate the length of sentences
#    n_sentences = len(sentencas)
#
#    # Calculate the dimensionality of nlp
#    embedding_dim = nlp.vocab.vectors_length
#
#    # Initialize the array with zeros: X
#    sentencas_vetorizadas = np.zeros((n_sentences, embedding_dim))
#
#    # Iterate over the sentences
#    for idx, sentence in enumerate(sentencas):
#        # Pass each sentence to the nlp object to create a document
#        doc = nlp(sentence)
#        # Save the document's .vector attribute to the corresponding row in X
#        sentencas_vetorizadas[idx, :] = doc.vector
#
#    # encoding intents
#    count = 0
#    # Iterate over the intents
#    for intent_label in intents:
#        if(intent_label not in dict_Labels):
#            dict_Labels[intent_label] = count
#            count += 1
#
#
#    intents_encode = np.array([dict_Labels[x] for x in intents])
#    
#    return [sentencas_vetorizadas,intents_encode]
#
#train_vectors = vetorizar_sentencas(atis_1_train["tokens"],atis_1_train['intent'])
#val_vectors = vetorizar_sentencas(atis_1_val["tokens"],atis_1_val['intent'])
#test_vectors = vetorizar_sentencas(atis_1_test["tokens"],atis_1_test['intent'])
#
##tf.keras.utils.to_categorical
#train_sentencas_vetorizadas = train_vectors[0]
#train_intent_label = to_categorical(train_vectors[1])
#
#val_sentencas_vetorizadas = val_vectors[0]
#val_intent_label = to_categorical(val_vectors[1])
#
#test_sentencas_vetorizadas = test_vectors[0]
#test_intent_label = to_categorical(test_vectors[1])
#
#print("Shape: ",train_sentencas_vetorizadas.shape," type: ",train_sentencas_vetorizadas.dtype)
#print("Shape: ",train_intent_label.shape," type: ",train_intent_label.dtype)
#print("Shape: ",val_sentencas_vetorizadas.shape," type: ",val_sentencas_vetorizadas.dtype)
#print("Shape: ",val_intent_label.shape," type: ",val_intent_label.dtype)
#print("Shape: ",test_sentencas_vetorizadas.shape," type: ",test_sentencas_vetorizadas.dtype)
#print("Shape: ",test_intent_label.shape," type: ",test_intent_label.dtype)

#### Model

In [25]:
embedding_dim = maior_sentenca

In [26]:
model1 = Sequential()
model1.add(Embedding(vocab_size,embedding_dim, input_length=maior_sentenca))
model1.add(GlobalAveragePooling1D())
model1.add(Dense(150, activation='sigmoid'))
model1.add(Dropout(0.30))
model1.add(Dense(50, activation='relu'))
#model1.add(Dense(25, activation='relu'))
model1.add(Dense(17, activation='softmax'))


#model.compile(loss='categorical_crossentropy', optimizer=sgd)
model1.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [27]:
model1.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 259, 259)          226366    
_________________________________________________________________
global_average_pooling1d (Gl (None, 259)               0         
_________________________________________________________________
dense (Dense)                (None, 150)               39000     
_________________________________________________________________
dropout (Dropout)            (None, 150)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 50)                7550      
_________________________________________________________________
dense_2 (Dense)              (None, 17)                867       
Total params: 273,783
Trainable params: 273,783
Non-trainable params: 0
__________________________________________________

In [28]:
num_epochs = 50
history1 = model1.fit(train_padded, atis_1_train_label_intent,
                      validation_data=(val_padded, atis_1_val_label_intent),
                      epochs=num_epochs,
                      verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


#### Evaluate

In [29]:
model1.evaluate(train_padded,atis_1_train_label_intent)



[0.2138354629278183, 0.9293401837348938]

In [30]:
model1.evaluate(val_padded,atis_1_val_label_intent)



[0.4755301773548126, 0.8968531489372253]

In [31]:
atis_1_test_label_intent = atis_1_test['Label_intent']
atis_1_test_label_intent = to_categorical(np.asarray(atis_1_test_label_intent))

model1.evaluate(test_padded,atis_1_test_label_intent)



[0.3652675151824951, 0.8976109027862549]

#### Saving
* tokenizer
* model

In [32]:
import pickle
# saving
with open('recursos/tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [33]:
#from keras.models import load_model
#
#model1.save('../recursos/my_model.h5') 

In [34]:
#filepath = 'recursos/my_model.h5'
#
#model1.save(model1, filepath)
#
#tf.keras.models.save_model(
#    model1, filepath , overwrite=True, include_optimizer=True, save_format=None,
#    signatures=None
#)
##https://www.tensorflow.org/api_docs/python/tf/keras/models/save_model

In [35]:
#predicao =  model1.predict(test_padded)
#count = 1
#for elemento in predicao[:10]:
#    print("Sentença {} ".format(count),"\nPredição {}: ".format(count),elemento)
#    print("Best value of confidence: {}".format(np.argmax(elemento)))
#    print("Intent da Sentença: {}".format(inv_dict_labels[np.argmax(elemento)]))
#    print("\n")
#    count += 1

#### Avaliando modelo com conjunto de Teste

In [36]:
# Função pare test
def Predicao_Labels(sentenca_codificada):
    # inverter o dicionario dict_labels para retornar o intent pelo seu valor chave inteiro
    inv_dict_labels = {v: k for k, v in dict_Labels.items()}
    #print(inv_dict_labels)
    # passar a sentenca codificada
    predicao = model1.predict([sentenca_codificada])
    #print(predicao)
    
    intent_value = [inv_dict_labels[np.argmax(x)] for x in predicao]
    
    return intent_value

# lista com valores da predicao
Predicao_Labels(test_padded)[:5]

['atis_flight',
 'atis_flight',
 'atis_flight',
 'atis_abbreviation',
 'atis_flight']

In [37]:
test_labels = np.array(atis_1_test['intent'])
# Acurácia nos dados de test
from sklearn.metrics import accuracy_score # importando a biblioteca para calcular a acurácia
acuracia = accuracy_score(test_labels,np.array(Predicao_Labels(test_padded)))
print('Acurácia: %f' % acuracia)

Acurácia: 0.897611


In [38]:
from sklearn.metrics import classification_report
#print(classification_report(test_labels,np.array(Predicao_Labels(test_padded))))
print(classification_report(test_labels, np.array(Predicao_Labels(test_padded)), zero_division=0))

                          precision    recall  f1-score   support

       atis_abbreviation       0.71      0.62      0.67        16
           atis_aircraft       0.25      0.75      0.38         8
            atis_airfare       0.87      0.89      0.88        54
            atis_airline       0.60      0.83      0.70        18
            atis_airport       0.00      0.00      0.00         4
           atis_capacity       0.33      0.75      0.46         4
               atis_city       0.00      0.00      0.00         3
           atis_distance       0.00      0.00      0.00         3
             atis_flight       0.97      0.98      0.97       424
atis_flight#atis_airfare       0.00      0.00      0.00         3
          atis_flight_no       0.00      0.00      0.00         2
        atis_flight_time       0.00      0.00      0.00         7
        atis_ground_fare       1.00      0.50      0.67         4
     atis_ground_service       0.97      0.97      0.97        29
         

#### Get Intent de Senteça

In [39]:
# intent de sentenças
def Intent(sentenca,modelo):
    
    # loading model and tokenizer
    with open('recursos/tokenizer.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)
        
    # load model not working
    #filepath = '../recursos/my_model.h5'
    #model = tf.saved_model.load(filepath)
    
    # codificar a sentenca

    sentenca = [sentenca]
    sentenca_encoded = tokenizer.texts_to_sequences(sentenca)
    sentenca_padded = pad_sequences(sentenca_encoded)
    #print(sentenca_padded)
    #intent = Predicao_Labels(sentenca_padded)
    
    #Inverter dict_labels para poder pegar o intent 
    inv_dict_labels = {v: k for k, v in dict_Labels.items()}
    
    # Predicao de intent apartir da sentenca codificada e o modelo
    predicao = modelo.predict(sentenca_padded)
    intent_value = [inv_dict_labels[np.argmax(x)] for x in predicao]
    
    return intent_value 

In [40]:
sentenca1='I want a list of the flights from Boston to DC'
sentenca2='Are there any airports nearby?'
sentenca3='fares from DC to NY'
print(Intent(sentenca1,model1))
print(Intent(sentenca2,model1))
print(Intent(sentenca3,model1))

['atis_flight']
['atis_ground_service']
['atis_abbreviation']


### Intent Respostas

In [41]:
# criar dicionarios para guardar as respostas por intent.
# criar alguns intents basicos greetings...

#templates
user_template = "USER : {0}" 
bot_template = "BOT : {0}"

dict_atis = dict_Labels

# keywords para identificar padrões de intents basicos
keywords = {'greet': ['hello', 'hi', 'hey'],
            'goodbye': ['bye', 'farewell'],
            'thankyou': ['thank', 'thx']}

# intents são chaves do dicionario e o valor são as respostas
responses = {'default': 'default message',
             'goodbye': 'goodbye for now',
             'greet': 'Hello you! :)',
             'thankyou': 'you are very welcome'}

# Novo dicionario de respostas do bot
update_responses = {}

for intent, value in dict_Labels.items():
    if (intent not in update_responses):
        update_responses[intent] = []

for intent,resposta in responses.items():
    if intent not in update_responses:
        update_responses[intent] = [resposta]

# Define a dictionary of patterns
patterns = {}

# Iterate over the keywords dictionary
for intent, keys in keywords.items():
    
    # Create regular expressions and compile them into pattern objects
    patterns[intent] = re.compile('|'.join(keys))
    
# Print the dicionarios
print(patterns)
print(responses)
print(update_responses)

{'greet': re.compile('hello|hi|hey'), 'goodbye': re.compile('bye|farewell'), 'thankyou': re.compile('thank|thx')}
{'default': 'default message', 'goodbye': 'goodbye for now', 'greet': 'Hello you! :)', 'thankyou': 'you are very welcome'}
{'atis_airfare': [], 'atis_flight': [], 'atis_abbreviation': [], 'atis_ground_service': [], 'atis_restriction': [], 'atis_airport': [], 'atis_quantity': [], 'atis_meal': [], 'atis_airline': [], 'atis_city': [], 'atis_flight_no': [], 'atis_ground_fare': [], 'atis_flight_time': [], 'atis_flight#atis_airfare': [], 'atis_distance': [], 'atis_aircraft': [], 'atis_capacity': [], 'default': ['default message'], 'goodbye': ['goodbye for now'], 'greet': ['Hello you! :)'], 'thankyou': ['you are very welcome']}


In [42]:
import random
# Define a function to find the intent of a message
def match_intent(message):
    matched_intent = None
    
    # para intents dentro do dicionario de padões
    for intent, pattern in patterns.items():
        # Check if the pattern occurs in the message 
        if re.search(pattern, message) is not None:
            matched_intent = intent

    return matched_intent

# Define a respond function
def respond(message):
    
    intent = None
    # Call the match_intent function
    if(match_intent(message) is not None):
        intent = match_intent(message)
    else:
        listaIntent = Intent(message,model1)
        intent = listaIntent[0]
        #print(intent)
        
    # Fall back to the default response
    key = "default"
    if intent in update_responses:
        if len(update_responses[intent]) != 0:
            key = intent
    resposta = random.choice(update_responses[key])
    
    return resposta

# Define a function that sends a message to the bot: send_message
def send_message(message):
    # Print user_template including the user_message
    print(user_template.format(message))
    # Get the bot's response to the message
    response = respond(message)
    # Print the bot template including the bot's response.
    print(bot_template.format(response))

# Send messages
send_message("hello!")

send_message("Flight from NY")

USER : hello!
BOT : Hello you! :)
USER : Flight from NY
BOT : default message


### Entidades

In [43]:
sentence = atis_1_test['tokens'][0]
print(sentence)

what are the coach flights between dallas and baltimore leaving august tenth and returning august twelve


In [44]:
import spacy
nlp = spacy.load("en_core_web_md")
doc = nlp(sentence)

In [45]:
from spacy import displacy
displacy.render(doc,style='ent',jupyter=True)
displacy.render(doc,style='dep',jupyter=True,options={'distance':100})

In [46]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+" - "+str(ent.start_char)+" - "+str(ent.end_char)+" - "+ent.label_+" - "+
                 str(spacy.explain(ent.label_)))
    else:
        print("No named entities found.")

In [47]:
show_ents(doc)

dallas - 35 - 41 - GPE - Countries, cities, states
baltimore - 46 - 55 - GPE - Countries, cities, states
august tenth - 64 - 76 - DATE - Absolute or relative dates or periods
august twelve - 91 - 104 - DATE - Absolute or relative dates or periods


In [48]:
entidades = []
#padroes
from spacy.matcher import Matcher
matcher = Matcher(nlp.vocab)
pattern = [{"POS": "DET", "OP": "?"},{"POS": "NOUN"}, {"POS": "NOUN"}]

# Add the pattern to the matcher and apply the matcher to the doc
matcher.add('Entities_PATTERN', [pattern])
matches = matcher(doc)

# Iterate over the matches and print the span text
for match_id, start, end in matches:
    print('Match found:', doc[start:end].text)
    entidade_found = doc[start:end].text
    for ent in doc.ents:
        entidades.append((entidade_found,ent.text))

Match found: the coach flights
Match found: coach flights


In [49]:
print(entidades)

[('the coach flights', 'dallas'), ('the coach flights', 'baltimore'), ('the coach flights', 'august tenth'), ('the coach flights', 'august twelve'), ('coach flights', 'dallas'), ('coach flights', 'baltimore'), ('coach flights', 'august tenth'), ('coach flights', 'august twelve')]


In [50]:
def extract_entities(sentenca):
    #sentenca = sentenca.replace("\n\n",' ')
    doc = nlp(sentenca)
    dict_entidade_values = {}
    
    #padroes
    from spacy.matcher import Matcher
    matcher = Matcher(nlp.vocab)
    pattern1 = [{"POS": "DET", "OP": "?"},{"POS": "NOUN"}, {"POS": "NOUN"}]
    pattern2 = [{"POS": "DET", "OP": "?"},{"POS": "NOUN"}, {"POS": "ADP"}]
    pattern3 = [{"POS": "NOUN"},{"POS": "ADP"},{"POS": "NOUN"}]

    # Add the pattern to the matcher and apply the matcher to the doc
    matcher.add('Entities_PATTERN', [pattern1,pattern2,pattern3])
    matches = matcher(doc)

    # Iterate over the matches and print the span text
    for match_id, start, end in matches:
        #print('Match found:', doc[start:end].text)
        #entidade_found = doc[start:end].text
        pattern_found = doc[start:end].text
        
        entidade_found = "" #pattern_found
        doc2 = nlp(pattern_found)
        #chunks = list(doc2.noun_chunks)
        #print(chunks)
        for token in doc2:
            if token.pos_ == "NOUN":
                entidade_found = entidade_found +" "+ token.text
        
        if entidade_found not in dict_entidade_values:
            
            dict_entidade_values[entidade_found] = []
        
            for ent in doc.ents:
                if ent.text not in dict_entidade_values[entidade_found]:
                    lista_value = dict_entidade_values[entidade_found]
                    lista_value.append([ent.text,ent.label_])
        
        if len(entidade_found) == 0:
            entidade_found = pattern_found
        
        
    return dict_entidade_values

In [51]:
sentence = atis_1_test['tokens'][0]
extract_entities(sentence)

{' coach flights': [['dallas', 'GPE'],
  ['baltimore', 'GPE'],
  ['august tenth', 'DATE'],
  ['august twelve', 'DATE']],
 ' flights': [['dallas', 'GPE'],
  ['baltimore', 'GPE'],
  ['august tenth', 'DATE'],
  ['august twelve', 'DATE']]}

In [52]:
displacy.render(nlp(sentence),style='ent',jupyter=True)
displacy.render(nlp(sentence),style='dep',jupyter=True,options={'distance':100})

In [53]:
sentence2 = "show me the flights from boston to denver"
extract_entities(sentence2)

{' flights': [['boston', 'GPE'], ['denver', 'GPE']]}

In [54]:
displacy.render(nlp(sentence2),style='ent',jupyter=True)
displacy.render(nlp(sentence2),style='dep',jupyter=True,options={'distance':100})

In [55]:
sentence3 = "which aircraft types are used in NYC, Auckland and California"
extract_entities(sentence3)

{' aircraft types': [['NYC', 'GPE'],
  ['Auckland', 'GPE'],
  ['California', 'GPE']]}

In [56]:
displacy.render(nlp(sentence3),style='ent',jupyter=True)
displacy.render(nlp(sentence3),style='dep',jupyter=True,options={'distance':100})

In [57]:
sentence4 = atis_1_test['tokens'][1]
extract_entities(sentence4)

{' flight': [['nashville', 'GPE'],
  ['seattle', 'GPE'],
  ['no later than 3 pm', 'TIME']]}

In [58]:
displacy.render(nlp(sentence4),style='ent',jupyter=True)
displacy.render(nlp(sentence4),style='dep',jupyter=True,options={'distance':100})

In [59]:
#doc = nlp(atis_1_test['tokens'][1])
#pos_sentences = [(token.text,token.pos_) for token in doc]
#print(pos_sentences)

In [60]:
sentence5 =  atis_1_train["tokens"][0]
extract_entities(sentence5)

{' cost': [['pittsburgh', 'GPE'],
  ['atlanta', 'GPE'],
  ['april twenty fifth', 'DATE'],
  ['may sixth', 'DATE']],
 ' trip flight': [['pittsburgh', 'GPE'],
  ['atlanta', 'GPE'],
  ['april twenty fifth', 'DATE'],
  ['may sixth', 'DATE']],
 ' flight': [['pittsburgh', 'GPE'],
  ['atlanta', 'GPE'],
  ['april twenty fifth', 'DATE'],
  ['may sixth', 'DATE']]}

In [61]:
displacy.render(nlp(sentence5),style='ent',jupyter=True)
displacy.render(nlp(sentence5),style='dep',jupyter=True,options={'distance':150})

In [62]:
sentence6 =  atis_1_train["tokens"][3]
extract_entities(sentence6)

{' meaning': [], ' meaning meal': [], ' meal code': [], ' s': []}

In [63]:
#displacy.render(nlp(sentence6),style='ent',jupyter=True)
displacy.render(nlp(sentence6),style='dep',jupyter=True,options={'distance':100})

### Chat

In [106]:
# dicionario com respostas | intent:[respostas]

respostas = {}
for intent,value in dict_Labels.items():
    if intent not in respostas:
        respostas[intent] = []
#print("Criando Dicionario de Respostas: {}".format(respostas))

#print("###########################################################")

# add respostas 
flight_respostas = ["the available flights are {0}", "the available flights are {0} from {1} to {2}",
                   "the available flights are {0} to {1} from {2}"]

if len(respostas['atis_flight']) == 0:
    respostas['atis_flight'] = flight_respostas

respostas["default"] = "default"
#print(respostas['atis_flight'])

intent_basics= {"goodbye": 'goodbye for now',
               'greet': 'Hello you! :)',
               'thankyou': 'you are very welcome'}

for key, value in intent_basics.items():
    if key not in respostas:
        respostas[key] = [value]
        
print(respostas)

{'atis_airfare': [], 'atis_flight': ['the available flights are {0}', 'the available flights are {0} from {1} to {2}', 'the available flights are {0} to {1} from {2}'], 'atis_abbreviation': [], 'atis_ground_service': [], 'atis_restriction': [], 'atis_airport': [], 'atis_quantity': [], 'atis_meal': [], 'atis_airline': [], 'atis_city': [], 'atis_flight_no': [], 'atis_ground_fare': [], 'atis_flight_time': [], 'atis_flight#atis_airfare': [], 'atis_distance': [], 'atis_aircraft': [], 'atis_capacity': [], 'default': 'default', 'goodbye': ['goodbye for now'], 'greet': ['Hello you! :)'], 'thankyou': ['you are very welcome']}


In [107]:
# funçoes para resposta do bot
def Matche_Intent(mensagem):
    matched_intent = None
    
    # intents basicos
    # keywords para identificar padrões de intents basicos
    keywords = {'greet': ['hello', 'hi', 'hey'],
                'goodbye': ['bye', 'farewell'],
                'thankyou': ['thank', 'thx']}

    # Define a dictionary of patterns
    patterns = {}

    # Iterate over the keywords dictionary
    for intent, keys in keywords.items():
        # Create regular expressions and compile them into pattern objects
        patterns[intent] = re.compile('|'.join(keys))
    #print(patterns)
    
    # para intents dentro do dicionario de padões
    for intent, pattern in patterns.items():
        # Check if the pattern occurs in the message 
        if re.search(pattern, mensagem) is not None:
           # print("Mensagem> ",mensagem)
           # print(pattern)
            #print(intent)
            matched_intent = intent
            
    return matched_intent

# funçoes para resposta do bot
def Get_Intent(mensagem):
    # model = Load modelo -- Not working so hard pass the model1
    # Qual intent da sentenca
    intent = Intent(mensagem,model1)[0]
    
    return intent

#print("O intent da sentenca é : {}".format(Matche_Intent("Flights from California")))

def Get_Entidades(mensagem):    
    entidades = extract_entities(mensagem)
    return entidades

#print("################################################################")
#print("As entidades da sentenca é : {}".format(Get_Entidades("Flights from California")))

# Dados do BD exemplo
# {'atis_flight':['companhia area, N do voo,modelo de aviao, Local partida, local destino , data saida, horario da saida]}

def Get_Dados(intencao,entidades):
    # Retornar valores de um banco de dados
    # o-comphia, 1-numero voo , 2-modeloAviao, 3-LocalPartida 4-Destino 5-Data saida, 6-horario da saida  
    # i want a flight from nashville to seattle that arrives no later than 3 pm
    Dados = [
        ['atis_flight',['AZUL','985','Carcará','Recife','Santa Catarina','15 july','15 pm']],
        ['atis_flight',['GOAL','7849','CArameLo','Boston','Recife','19 december','10 am']],
        ['atis_flight',['AZULL','019','Co','Boston','seattle','13 december','4 pm']],
        ['atis_flight',['GOAL','7989','CarYo','Orlando','New Zeland','18 april','3 pm']],
        ['atis_flight',['GOAL','7989','Boing 6549','nashville','seattle','1 may','6 pm']],
        ['atis_flight',['GOAL','7989','U98','Carpina','NY','16 june','3 pm']],
        ['atis_flight',['GOAL','7989','UNYTY7','nashville','California','16 November','3 pm']],
        ['atis_flight',['GOAL','7989','Pipa','DC','seattle','16 december','3 pm']]
    ] 
   # print("Dados do BD",Dados)
    valores_encontrados = []
    for lista in Dados:        
        if lista[0].casefold() == intencao.casefold():
            #print("Intencao igual a chave")
            data = lista[1]
            
            # ex entidade {' Flights': [['California', 'GPE')]]}
            for ent,valor in entidades.items():
                #print("Entidade ",ent ," Valor ",valor)
                
                # val - [['California', 'GPE')]]
                for val in valor:
                    #print(val)
                    # casos de Localização
                    if val[1].casefold() == "GPE".casefold():
                        if val[0].casefold() == data[3].casefold() or val[0].casefold() == data[4].casefold():
                            if(data not in valores_encontrados):
                                valores_encontrados.append(data)
                    
                    # casos Data
                    if val[1].casefold() == "DATE".casefold():
                        if (val[0] in data[5]) or (data[4] in val[0]):
                            if(data not in valores_encontrados):
                                valores_encontrados.append(data)
    
    return valores_encontrados

def Resposta(mensagem):
    resposta_bot = []
    intent = None
    
    # Compor a resposta do bot    
    # Call the match_intent function
    if(Matche_Intent(mensagem) is not None):
        intent = match_intent(mensagem)
        
        # Fall back to the default response
        key = "default"
        if intent in respostas:
            if len(respostas[intent]) != 0:
                key = intent
        resposta_bot = random.choice(respostas[key])
        
    else:
        intent = Intent(mensagem,model1)[0]
        
        # Fall back to the default response
        key = "default"
        if intent in respostas:
            if len(respostas[intent]) != 0:
                key = intent
        temp_rep = random.choice(respostas[key])

        Lista_dados = Get_Dados(intent,extract_entities(mensagem))
        
        #print(Lista_dados)

        possiveis_respostas = []
        
        for dados in Lista_dados:
            available_flight = "Voo "+dados[1]+" from "+dados[0]
            rep = temp_rep.format(available_flight,dados[3],dados[4])
            #print("Rep: ",rep)
            possiveis_respostas.append(rep)
        
        if(len(possiveis_respostas)==0):
            resposta_bot =random.choice(respostas['default'])
        else:
            resposta_bot = random.choice(possiveis_respostas)
       
    return resposta_bot

#print("################################################################")

#print("Resposta do Bot : {}".format(Resposta("Flights from California")))


In [108]:
Get_Dados(Intent("Flight from Recife",model1)[0],extract_entities("Flight from Recife"))

[['AZUL', '985', 'Carcará', 'Recife', 'Santa Catarina', '15 july', '15 pm'],
 ['GOAL', '7849', 'CArameLo', 'Boston', 'Recife', '19 december', '10 am']]

In [109]:
Get_Dados(
    Intent("i want a flight from nashville to seattle that arrives no later than 3 pm",model1)[0],
    extract_entities("i want a flight from nashville to seattle that arrives no later than 3 pm"))

[['AZULL', '019', 'Co', 'Boston', 'seattle', '13 december', '4 pm'],
 ['GOAL', '7989', 'Boing 6549', 'nashville', 'seattle', '1 may', '6 pm'],
 ['GOAL', '7989', 'UNYTY7', 'nashville', 'California', '16 November', '3 pm'],
 ['GOAL', '7989', 'Pipa', 'DC', 'seattle', '16 december', '3 pm']]

In [110]:
# Define a function that sends a message to the bot: send_message
def send_message(message):
    # Print user_template including the user_message
    print(user_template.format(message))
    # Get the bot's response to the message
    response = Resposta(message)
    # Print the bot template including the bot's response.
    print(bot_template.format(response))
    
    


In [111]:
def chatbot():
    while True:
        message = input("Send Message: ")
        print("\n")
        if(message == 'exit'):
            break
        else:
            send_message(message)
chatbot()

Send Message: hello


USER : hello
BOT : Hello you! :)
Send Message: Flights from Recife


USER : Flights from Recife
BOT : the available flights are Voo 7849 from GOAL from Boston to Recife
Send Message: Flights from California


USER : Flights from California
BOT : the available flights are Voo 7989 from GOAL
Send Message: I want the flights of Orlando


USER : I want the flights of Orlando
BOT : the available flights are Voo 7989 from GOAL to Orlando from New Zeland
Send Message: Ok, thank you


USER : Ok, thank you
BOT : you are very welcome
Send Message: bye


USER : bye
BOT : goodbye for now
Send Message: exit


