In [1]:
import tensorflow as tf
import numpy as np
import pickle
import json
import random
import spacy
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
with open("../Data/intents.json","r",encoding='utf-8') as file:
    data = json.load(file)

for i,intent in enumerate(data["intents"]):
    intent["tag"] = "tag"+str(i)

with open("../Data/intents.json","w",encoding='utf-8') as file:
    json.dump(data,file,indent=3,ensure_ascii=False)

In [3]:
nlp = spacy.load('pt_core_news_sm')

In [4]:
words = []
labels = []
docs_x = []
docs_y = []

for intent in data["intents"]:
    for pattern in intent["patterns"]:
        wrds = nlp(pattern.lower())
        wrds = [token.lemma_ for token in wrds if not token.is_stop]
        if wrds == []:
            continue
        words.extend(wrds)
        docs_x.append(wrds)
        docs_y.append(intent["tag"])

    if intent["tag"] not in labels:
        labels.append(intent["tag"])

#from spacy.lang.pt.stop_words import STOP_WORDS

ponctuations = ["?","'",'"',"!",".",","]

words = [w for w in words if w not in ponctuations]
#words = sorted(list(set(words)))
#labels = sorted(labels)
print(len(words))

1012


In [5]:
print(words[0:15])

['oi', 'eae', 'olá', 'dia', 'iai', 'thau', 'ver', 'falar', 'valer', 'util', 'ok', 'agradecer', 'ultimamente', 'tá', 'ir']


In [6]:
training = []
output = []

out_empty = [0 for _ in range(len(labels))]

for x, doc in enumerate(docs_x):
    bag = []
    wrds = []
    doc = nlp(' '.join(doc))
    
    for token in doc:
        if not token.is_stop and token.lemma_ not in ponctuations:
            wrds.append(token.lemma_)

    for w in words:
        if w in wrds:
            bag.append(1)
        else:
            bag.append(0)

    output_row = out_empty[:]
    output_row[labels.index(docs_y[x])] = 1

    training.append(bag)
    output.append(output_row)


training = np.array(training)
output = np.array(output)
N_CLASSES = len(output[0])
SHAPE = len(training[0])

In [7]:
(len(training[0])+len(output[0]))/2

592.0

In [50]:
#entradas+saídas/2 (opcional).
def dense_layers(inputs):
    x = tf.keras.layers.Dense(592,activation='relu')(inputs)
    
    y = tf.keras.layers.Dense(592/2,activation='relu')(inputs)
    y = tf.keras.layers.Dropout(0.2)(y)

    concatted = tf.keras.layers.Concatenate()([x, y])

    x = tf.keras.layers.BatchNormalization()(concatted)
    #x = tf.keras.layers.Dense(128,activation='relu')(x)
    #x = tf.keras.layers.BatchNormalization()(x)
    
    #x = tf.keras.layers.Dense(256,activation='relu')(x)
    #x = tf.keras.layers.BatchNormalization()(x)
    return x

def classfier_layer(x,N_CLASSES):
    x = tf.keras.layers.Dense(N_CLASSES,activation='softmax',name='classification')(x)
    return x

def final_model(inputs,N_CLASSES):
    dense = dense_layers(inputs)
    
    classfier = classfier_layer(dense,N_CLASSES)
    
    model = tf.keras.Model(inputs=inputs,outputs=classfier)
    
    return model
    
def define_and_compile_model(SHAPE,N_CLASSES):
    inputs = tf.keras.layers.Input(shape=(SHAPE,))
    
    # create the model
    model = final_model(inputs,N_CLASSES)
    
    # compile your model
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics = {'classification' : 'accuracy'})

    return model

In [51]:
tf.reset_default_graph()
model = define_and_compile_model(SHAPE,N_CLASSES)
model.summary()

Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           [(None, 1012)]       0                                            
__________________________________________________________________________________________________
dense_16 (Dense)                (None, 296)          299848      input_10[0][0]                   
__________________________________________________________________________________________________
dense_15 (Dense)                (None, 592)          599696      input_10[0][0]                   
__________________________________________________________________________________________________
dropout (Dropout)               (None, 296)          0           dense_16[0][0]                   
____________________________________________________________________________________________

In [52]:
#%load_ext tensorboard
%reload_ext tensorboard

In [53]:
from datetime import datetime

logdir="logs/fit/" + datetime.now().strftime("%Y-%m-%d:%H:%M:%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

model.fit(training,output,epochs=50,callbacks=[tensorboard_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f9d5d98d1f0>

In [None]:
#Executar apenas uma vez
%tensorboard --logdir logs/fit;

In [54]:
def bag_of_words(s):
    bag = [0 for _ in range(len(words))]

    ponctuations = ["?","'",'"',"!",".",","]

    doc = nlp(s.lower())
    s_words = []
    for token in doc:
        if not token.is_stop and token.lemma_ not in ponctuations:
            s_words.append(token.lemma_)

    for se in s_words:
        for i, w in enumerate(words):
            if w == se:
                bag[i] = 1
    bag = np.array(bag)
    return bag

In [55]:
while True:
    inp = str(input("Digite: "))
    if inp == "quit":
        break
        
    bag = bag_of_words(inp)
    bag = bag.reshape((1,SHAPE))
    predictions = model.predict(bag)
    results_index = np.argmax(predictions)

    tag = labels[results_index]

    for tg in data['intents']:
        if tg['tag'] == tag:
            responses = tg['responses']
            current_tag = tg

    response = random.choice(responses)
    confidence = predictions[0][results_index]
    print("Model:",response,confidence)

Model: Olá como vai? 0.9939075
Model: Tenho notícias, <news> 0.076253094
Model: <date> 0.9996803
Model: Fico feliz em saber que se sente bem 0.8009597
Model: Vou bem, e você? 0.3914616
Model: Estou bem, e você? 0.3914616
Model: <finalizar> 0.99730563


In [11]:
model.save("model_2.h5")

INFO:tensorflow:Assets written to: model.h6/assets


In [73]:
with open("../Data/intents.json","w",encoding='utf-8') as file:
    json.dump(data,file,indent=3,ensure_ascii=False)

In [12]:
with open("../Data/dataV2.pickle", "wb") as f:
    pickle.dump((words, labels, training, output), f)