# Inicializando o projeto(importações)

In [2]:
import json
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()
import pickle
import numpy as np
import tflearn
import tensorflow as tf
import random

# Importando o arquivo intents.json

In [3]:
with open('intents.json') as json_data:
    intents = json.load(json_data)

# Inicializando variáveis e tokenizando o arquivo intents.json

In [4]:
words = []
classes = []
documents = []
ignore_words = ['?']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        documents.append((w, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

classes = sorted(list(set(classes)))

print (len(documents), "documents")
print (len(classes), "classes", classes)
print (len(words), "unique stemmed words", words)

37 documents
9 classes ['age', 'goodbye', 'greeting', 'hours', 'introduction', 'name', 'payments', 'servings', 'shop']
57 unique stemmed words ['a', 'acceiv', 'ag', 'am', 'anyon', 'ar', 'buy', 'cal', 'card', 'cash', 'could', 'credit', 'cya', 'day', 'do', 'eat', 'get', 'good', 'goodby', 'guy', 'hav', 'hello', 'hi', 'hour', 'how', 'i', 'id', 'is', 'it', 'lat', 'leav', 'lik', 'mastercard', 'menu', 'nam', 'of', 'old', 'on', 'op', 'reccommend', 'see', 'serv', 'shop', 'should', 'someth', 'tak', 'the', 'ther', 'tim', 'to', 'today', 'up', 'what', 'when', 'who', 'yo', 'you']


# Criando nossos dados de treinamento

In [5]:
training = []
output = []
output_empty = [0] * len(classes)
for doc in documents:
    bag = []
    pattern_words = doc[0]
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])


random.shuffle(training)
training = np.array(training)


train_x = list(training[:,0])
train_y = list(training[:,1])


tf.reset_default_graph()

net = tflearn.input_data(shape=[None, len(train_x[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
net = tflearn.regression(net)



Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor

Instructions for updating:
keep_dims is deprecated, use keepdims instead


# Definindo modelo e configurando o tensorboard

In [6]:
model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')

model.fit(train_x, train_y, n_epoch=3000, batch_size=8, show_metric=True)
model.save('model.tflearn')

Training Step: 14999  | total loss: [1m[32m0.09838[0m[0m | time: 0.006s
| Adam | epoch: 3000 | loss: 0.09838 - acc: 0.9503 -- iter: 32/37
Training Step: 15000  | total loss: [1m[32m0.09134[0m[0m | time: 0.008s
| Adam | epoch: 3000 | loss: 0.09134 - acc: 0.9553 -- iter: 37/37
--
INFO:tensorflow:C:\Users\victo\Documents\chatbot\model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


# Salvando estrutura de dados

In [7]:
pickle.dump( {'words':words, 'classes':classes, 'train_x':train_x, 'train_y':train_y}, open( "training_data", "wb" ) )

# Carregando estrutura de dados

In [8]:
data = pickle.load( open( "training_data", "rb" ) )
words = data['words']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']

import json
with open('intents.json') as json_data:
    intents = json.load(json_data)

# Carregando nosso modelo previamente gerado

In [9]:
model.load('./model.tflearn')

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from C:\Users\victo\Documents\chatbot\model.tflearn


# Métodos de limpeza de sentença e bag of words

In [10]:
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

def bow(sentence, words, show_details=False):
    sentence_words = clean_up_sentence(sentence)
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

# Métodos de classificação e teste de resposta

In [11]:
ERROR_THRESHOLD = 0.25
def classify(sentence):
    results = model.predict([bow(sentence, words)])[0]
    results = [[i,r] for i,r in enumerate(results)]
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    return return_list

def response(sentence, userID='123', show_details=False):
    results = classify(sentence)
    if results:
        while results:
            for i in intents['intents']:
                if i['tag'] == results[0][0]:
                    return print(random.choice(i['responses']))

            results.pop(0)

# Método chat

In [12]:
def chat():
    print("Start talking with the bot (type quit to stop)!")
   
    while True:
        inp = input("You: ")
        if inp.lower() == "quit":
            break

        results = model.predict([bow(inp, words)])
        results_index = np.argmax(results)
        tag = classes[results_index]

        for tg in intents["intents"]:
            if tg['tag'] == tag:
                responses = tg['responses']
        
        clfy = classify(inp)
        if clfy[0][1] > 0.9:
            print('Machine: '+ random.choice(responses))
            print('>>>> Your statement has a score of '+"'"+ str(clfy[0][1])+"'"+ ' and has been put in the following category:'+"'"+ str(clfy[0][0])+"'")
        else:
            print("Machine: Sorry I did not understand")
            print('>>>> Your statement has a score of '+ "'"+ str(clfy[0][1])+"'"+' and has not been fully understood')
        
        

In [None]:
chat()

In [13]:
classify("how are you?")

[('greeting', 0.9458655),
 ('introduction', 0.021517314),
 ('name', 0.013339503),
 ('age', 0.010390227),
 ('goodbye', 0.008864361),
 ('payments', 2.1942116e-05),
 ('hours', 8.594824e-07),
 ('servings', 1.5011919e-07),
 ('shop', 8.705185e-08)]

In [None]:
classify("hello")