In [1]:
import tensorflow as tf
import numpy as np
import pickle
import json
import random
import spacy
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
with open("../Data/intents.json","r",encoding='utf-8') as file:
    data = json.load(file)

for i,intent in enumerate(data["intents"]):
    intent["tag"] = "tag"+str(i)

with open("../Data/intents.json","w",encoding='utf-8') as file:
    json.dump(data,file,indent=3,ensure_ascii=False)
print(len(data["intents"]))

In [3]:
nlp = spacy.load('pt_core_news_sm')

156


In [4]:
words = []
labels = []
docs_x = []
docs_y = []

for intent in data["intents"]:
    for pattern in intent["patterns"]:
        wrds = nlp(pattern.lower())
        wrds = [token.lemma_ for token in wrds if not token.is_stop]
        if wrds == []:
            continue
        words.extend(wrds)
        docs_x.append(wrds)
        docs_y.append(intent["tag"])

    if intent["tag"] not in labels:
        labels.append(intent["tag"])

#from spacy.lang.pt.stop_words import STOP_WORDS

ponctuations = ["?","'",'"',"!",".",","]

words = [w for w in words if w not in ponctuations]
#words = sorted(list(set(words)))
#labels = sorted(labels)
print(len(words))

891


In [5]:
print(words[20:35])

['mau', 'sentir', 'mau', 'sentir', 'tô', 'mau', 'gostar', 'gostar', 'gostar', 'algum', 'gostar', 'nome', 'chamar', 'nome', 'chamar']


In [51]:
training = []
output = []

out_empty = [0 for _ in range(len(labels))]

for x, doc in enumerate(docs_x):
    bag = []
    wrds = []
    doc = nlp(' '.join(doc))
    
    for token in doc:
        if not token.is_stop and token.lemma_ not in ponctuations:
            wrds.append(token.lemma_)

    for w in words:
        if w in wrds:
            bag.append(1)
        else:
            bag.append(0)

    output_row = out_empty[:]
    output_row[labels.index(docs_y[x])] = 1

    bag = np.array(bag)
    bag = bag.reshape((1,bag.shape[0],1))

    training.append(bag)
    output.append(output_row)


training = np.array(training)
output = np.array(output)
N_CLASSES = len(output[0])
SHAPE = training[0].shape[1]
print(SHAPE,training[0].shape)

891 (1, 891, 1)


In [52]:
(len(training[0])+len(output[0]))/2

523.5

In [52]:
#entradas+saídas/2 (opcional).
def dense_layers(inputs):
    x = tf.keras.layers.Conv2D(2,3,activation='relu',input_shape=(1,SHAPE,1),padding='same')(inputs)
    x = tf.keras.layers.Conv2D(2,3,activation='relu',padding='same')(x)
    x = tf.keras.layers.Conv2D(2,3,activation='relu',padding='same')(x)
    
    x = tf.keras.layers.Flatten()(x)

    x = tf.keras.layers.Dense(520,activation='relu')(x)
    
    y = tf.keras.layers.Dense(520/2,activation='relu')(x)
    y = tf.keras.layers.Dropout(0.2)(y)

    concatted = tf.keras.layers.Concatenate()([x, y])

    x = tf.keras.layers.BatchNormalization()(concatted)
    return x

def classfier_layer(x,N_CLASSES):
    x = tf.keras.layers.Dense(N_CLASSES,activation='softmax',name='classification')(x)
    return x

def final_model(inputs,N_CLASSES):
    dense = dense_layers(inputs)
    
    classfier = classfier_layer(dense,N_CLASSES)
    
    model = tf.keras.Model(inputs=inputs,outputs=classfier)
    
    return model
    
def define_and_compile_model(SHAPE,N_CLASSES):
    inputs = tf.keras.layers.Input(shape=(1,SHAPE,1))
    
    # create the model
    model = final_model(inputs,N_CLASSES)
    
    # compile your model
    model.compile(optimizer='adam',loss='binary_crossentropy',metrics = {'classification' : 'accuracy'})

    return model

In [60]:
model = define_and_compile_model(SHAPE,N_CLASSES)
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_10 (InputLayer)           [(None, 1, 891, 1)]  0                                            
__________________________________________________________________________________________________
conv2d_6 (Conv2D)               (None, 1, 891, 2)    20          input_10[0][0]                   
__________________________________________________________________________________________________
conv2d_7 (Conv2D)               (None, 1, 891, 2)    38          conv2d_6[0][0]                   
__________________________________________________________________________________________________
conv2d_8 (Conv2D)               (None, 1, 891, 2)    38          conv2d_7[0][0]                   
____________________________________________________________________________________________

In [64]:
!rm -rf ./logs/

In [66]:
#%load_ext tensorboard
%reload_ext tensorboard

In [67]:
from datetime import datetime

logdir="logs/fit/" + datetime.now().strftime("%Y-%m-%d:%H:%M:%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

model.fit(training,output,epochs=20,callbacks=[tensorboard_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f1b1e7679d0>

In [71]:
# Executar apenas uma vez
# http://localhost:6006/
%tensorboard --logdir './logs/fit'

In [61]:
def bag_of_words(s):
    bag = [0 for _ in range(len(words))]

    ponctuations = ["?","'",'"',"!",".",","]

    doc = nlp(s.lower())
    s_words = []
    for token in doc:
        if not token.is_stop and token.lemma_ not in ponctuations:
            s_words.append(token.lemma_)

    for se in s_words:
        for i, w in enumerate(words):
            if w == se:
                bag[i] = 1
    bag = np.array(bag)
    return bag

In [62]:
while True:
    inp = str(input("Digite: "))
    if inp == "quit":
        break
        
    bag = bag_of_words(inp)
    bag = bag.reshape((1,SHAPE))
    predictions = model.predict(bag)
    results_index = np.argmax(predictions)

    tag = labels[results_index]

    for tg in data['intents']:
        if tg['tag'] == tag:
            responses = tg['responses']
            current_tag = tg

    response = random.choice(responses)
    confidence = predictions[0][results_index]
    print("Model:",response,confidence)

Model: Olá como vai? 0.9234132
Model: Que bom 0.6732022
Model: Vou bem, e você? 0.1849314
Model: Vou bem, e você? 0.1849314
Model: Fico feliz em saber que se sente bem 0.6732022
Model: Vou bem, e você? 0.1849314
Model: <status> 0.1849314
Model: Te vejo depois 0.87662673


In [63]:
model.save("model_2.h5")

In [73]:
with open("../Data/intents.json","w",encoding='utf-8') as file:
    json.dump(data,file,indent=3,ensure_ascii=False)

In [64]:
with open("../Data/dataV2.pickle", "wb") as f:
    pickle.dump((words, labels, training, output), f)