### Imports

In [1]:
import pickle
import random
import numpy as np
import os
from random import shuffle
from pickle import dump, load
from numpy import array
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
from keras.optimizers import SGD
from re import sub
from nltk.stem.snowball import SnowballStemmer
from nltk import word_tokenize
import language_tool_python
import pandas as pd

In [2]:
def preprocess_model(dict):
    words = []
    documents = []
    for chave, valor in dict.items():
        for i in valor:
            words.append(i)
            documents.append((i, chave))
    return words, documents

### Model

In [3]:
def train_model(dict):
    classes = []
    classes.extend(list(dict.keys()))
    words,documents = preprocess_model(dict)

    words = sorted(list(set(words)))
    classes = sorted(list(set(classes)))

    words_path = ("words.pkl")
    classes_path = ("classes.pkl")

    dump(words,open(words_path, 'wb'))
    dump(classes,open(classes_path, 'wb'))

    training = []
    output_empty = [0] * len(classes)
    for document in documents:
        bag = []
        pattern_words = document[0]
        for word in words:
            bag.append(1) if word in pattern_words else bag.append(0)
        while len(bag) < len(words):
            bag.append(0)
        output_row = list(output_empty)
        output_row[classes.index(document[1])] = 1
        training.append([bag, output_row])
    shuffle(training)
    training = array(training, dtype=object)

    x = list(training[:, 0])
    y = list(training[:, 1])

    model = Sequential()
    model.add(Dense(128, input_shape=(len(x[0]),), activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(len(y[0]), activation='softmax'))

    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['accuracy'])

    m = model.fit(array(x), array(y), epochs=200, batch_size=5, verbose=1)

    model_path = ("model.h5")
    model.save(model_path, m)

In [6]:
df = pd.read_excel(r'..\portugues\pt_troubleshooting.xlsx')
df_pattern = pd.read_excel(r'..\portugues\pt_patterns.xlsx')

In [9]:
dict = {}
for r in range(len(df['problem'].unique())):
    dict[str(r)] = df_pattern.loc[r,'patterns']

In [10]:
train_model(dict)

  super().__init__(name, **kwargs)


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [2]:
# retorna 0 ou 1 para cada palavra da bolsa de palavras
def bag_of_words(writing, words):
    sentence_words = writing.split()
    # cria uma matriz de N palavras
    bag = [0]*len(words)
    for setence in sentence_words:
        for i, word in enumerate(words):
            if word == setence:
                # atribui 1 no pacote de palavra se a palavra atual estiver na posição da frase
                bag[i] = 1
    return(array(bag))

def class_prediction(input_user):
    model = load_model('model.h5')
    words = load(open('words.pkl', 'rb'))
    classes = load(open('classes.pkl', 'rb'))
    # filtra as previsões abaixo de um limite 0.25
    prevision = bag_of_words(input_user, words)
    response_prediction = model.predict(array([prevision]))[0]
    results = [[index, response] for index, response in enumerate(response_prediction)]

    results.sort(key=lambda x: x[1], reverse=True)
    return [{"suggestion": classes[r[0]], "probability": str(r[1])} for r in results]

In [3]:
def preprocess_semantic(frase):
    tool = language_tool_python.LanguageTool('pt')
    matches = tool.check(frase)
    for i in matches:
        frase = frase[:i.offset] + i.replacements[0] + frase[i.offset+i.errorLength:]
    tool.close()
    return frase


def preprocess_stem(text):
    stemmer = SnowballStemmer("portuguese")
    tokens = word_tokenize(text)
    stems = [stemmer.stem(token) for token in tokens]
    text = ' '.join([str(element) for element in stems])
    return text

def preprocess_input(text):
    text = preprocess_semantic(text)
    text = sub(r"[!#$%&'()*+,-./:;<=>?@[^_`{|}~]+", ' ',text)
    text = preprocess_stem(text)
    text = text.lower().strip()
    # tirar pontuações, acentos e espaços extras
    text = sub('[áàãâä]', 'a', sub('[éèêë]', 'e', sub('[íìîï]', 'i', sub('[óòõôö]', 'o', sub('[úùûü]', 'u', text)))))
    # tirar espaços em branco
    text = sub(r'\s+', ' ',text)
    return text

In [4]:
texto = 'fortes vibrações durante a operação da bomba'
texto = preprocess_input(texto)
lista = texto.split()
for i in lista:
    response = class_prediction(i)
    print(i)
    print(response)
    print()
# max_value = 0
# classe = ''
# for i in response:
#     for j in i:
#         if 'e' not in i["probability"] or '-' not in i["probability"]:
#             value = float(i["probability"])
#             if value > float(max_value):
#                 max_value = value
#                 classe = i

fort
[{'suggestion': '0', 'probability': '0.02926609'}, {'suggestion': '1', 'probability': '0.028426094'}, {'suggestion': '113', 'probability': '0.025318606'}, {'suggestion': '4', 'probability': '0.022999158'}, {'suggestion': '48', 'probability': '0.019252554'}, {'suggestion': '5', 'probability': '0.016317973'}, {'suggestion': '43', 'probability': '0.016074017'}, {'suggestion': '3', 'probability': '0.015836371'}, {'suggestion': '24', 'probability': '0.015531324'}, {'suggestion': '47', 'probability': '0.013986186'}, {'suggestion': '84', 'probability': '0.01381215'}, {'suggestion': '66', 'probability': '0.013691549'}, {'suggestion': '30', 'probability': '0.013582735'}, {'suggestion': '45', 'probability': '0.013512262'}, {'suggestion': '63', 'probability': '0.0131288655'}, {'suggestion': '44', 'probability': '0.012946453'}, {'suggestion': '2', 'probability': '0.011905325'}, {'suggestion': '138', 'probability': '0.011901854'}, {'suggestion': '111', 'probability': '0.011735263'}, {'suggesti

5 out of the last 5 calls to <function Model.make_predict_function.<locals>.predict_function at 0x00000261D21CB5E0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.


oper
[{'suggestion': '0', 'probability': '0.02926609'}, {'suggestion': '1', 'probability': '0.028426094'}, {'suggestion': '113', 'probability': '0.025318606'}, {'suggestion': '4', 'probability': '0.022999158'}, {'suggestion': '48', 'probability': '0.019252554'}, {'suggestion': '5', 'probability': '0.016317973'}, {'suggestion': '43', 'probability': '0.016074017'}, {'suggestion': '3', 'probability': '0.015836371'}, {'suggestion': '24', 'probability': '0.015531324'}, {'suggestion': '47', 'probability': '0.013986186'}, {'suggestion': '84', 'probability': '0.01381215'}, {'suggestion': '66', 'probability': '0.013691549'}, {'suggestion': '30', 'probability': '0.013582735'}, {'suggestion': '45', 'probability': '0.013512262'}, {'suggestion': '63', 'probability': '0.0131288655'}, {'suggestion': '44', 'probability': '0.012946453'}, {'suggestion': '2', 'probability': '0.011905325'}, {'suggestion': '138', 'probability': '0.011901854'}, {'suggestion': '111', 'probability': '0.011735263'}, {'suggesti

6 out of the last 6 calls to <function Model.make_predict_function.<locals>.predict_function at 0x00000261D21D5820> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.


da
[{'suggestion': '0', 'probability': '0.02926609'}, {'suggestion': '1', 'probability': '0.028426094'}, {'suggestion': '113', 'probability': '0.025318606'}, {'suggestion': '4', 'probability': '0.022999158'}, {'suggestion': '48', 'probability': '0.019252554'}, {'suggestion': '5', 'probability': '0.016317973'}, {'suggestion': '43', 'probability': '0.016074017'}, {'suggestion': '3', 'probability': '0.015836371'}, {'suggestion': '24', 'probability': '0.015531324'}, {'suggestion': '47', 'probability': '0.013986186'}, {'suggestion': '84', 'probability': '0.01381215'}, {'suggestion': '66', 'probability': '0.013691549'}, {'suggestion': '30', 'probability': '0.013582735'}, {'suggestion': '45', 'probability': '0.013512262'}, {'suggestion': '63', 'probability': '0.0131288655'}, {'suggestion': '44', 'probability': '0.012946453'}, {'suggestion': '2', 'probability': '0.011905325'}, {'suggestion': '138', 'probability': '0.011901854'}, {'suggestion': '111', 'probability': '0.011735263'}, {'suggestion