In [17]:
import json 
import numpy as np 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from russian_filter import filtration

In [18]:
with open('patterns (1).json', encoding='utf-8') as file:
    data = json.load(file)
    
training_sentences = []
training_labels = []
labels = []
responses = []


for pattern in data['patterns']:
    # for filtr in filtration(pattern['questions']):
    training_sentences.append(' '.join(filtration(pattern['questions'])))
    training_labels.append(pattern['key'])
    responses.append(pattern['answers'])
    
    if pattern['key'] not in labels:
        labels.append(pattern['key'])
        
num_classes = len(labels)

In [19]:
num_classes, training_sentences

(8,
 ['служить платежный карточка',
  'оформлять платежный карточка',
  'делать клиент забывать свой PIN код',
  'депозит',
  'открывать депозит',
  'подавать заявка кредит',
  'узнавать остаток долг кредит',
  'сменить PIN код'])

In [20]:
lbl_encoder = LabelEncoder()
lbl_encoder.fit(training_labels)
training_labels = lbl_encoder.transform(training_labels)

In [21]:
training_labels

array([6, 3, 1, 2, 5, 7, 4, 0])

In [30]:
vocab_size = 1000
embedding_dim = 16
max_len = 20
oov_token = "<OOV>"

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences, truncating='post', maxlen=max_len)

In [31]:
sequences, word_index, padded_sequences

([[8, 2, 3],
  [9, 2, 3],
  [10, 11, 12, 13, 4, 5],
  [6],
  [14, 6],
  [15, 16, 7],
  [17, 18, 19, 7],
  [20, 4, 5]],
 {'<OOV>': 1,
  'платежный': 2,
  'карточка': 3,
  'pin': 4,
  'код': 5,
  'депозит': 6,
  'кредит': 7,
  'служить': 8,
  'оформлять': 9,
  'делать': 10,
  'клиент': 11,
  'забывать': 12,
  'свой': 13,
  'открывать': 14,
  'подавать': 15,
  'заявка': 16,
  'узнавать': 17,
  'остаток': 18,
  'долг': 19,
  'сменить': 20},
 array([[ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  8,  2,  3],
        [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  9,  2,  3],
        [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 10, 11,
         12, 13,  4,  5],
        [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0,  0,  6],
        [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
          0,  0, 14,  6],
        [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,

In [32]:
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(GlobalAveragePooling1D())
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='adam', metrics=['accuracy'])

model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 20, 16)            16000     
                                                                 
 global_average_pooling1d_2   (None, 16)               0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dense_6 (Dense)             (None, 16)                272       
                                                                 
 dense_7 (Dense)             (None, 16)                272       
                                                                 
 dense_8 (Dense)             (None, 8)                 136       
                                                                 
Total params: 16,680
Trainable params: 16,680
Non-trainable params: 0
__________________________________________________

In [33]:
epochs = 300
history = model.fit(padded_sequences, np.array(training_labels), epochs=epochs)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

In [34]:
model.save("chat_model")

import pickle

with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
with open('label_encoder.pickle', 'wb') as ecn_file:
    pickle.dump(lbl_encoder, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)

INFO:tensorflow:Assets written to: chat_model\assets


In [35]:
print(tokenizer)

<keras_preprocessing.text.Tokenizer object at 0x00000213AEBCD730>


In [36]:
lbl_encoder

LabelEncoder()

In [37]:
import json 
import numpy as np
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder

# import colorama 
# colorama.init()
# from colorama import Fore, Style, Back

import pickle

with open("patterns (1).json", encoding='utf-8') as file:
    data = json.load(file)


def chat():
    # load trained model
    model = keras.models.load_model('chat_model')

    # load tokenizer object
    with open('tokenizer.pickle', 'rb') as token:
        tokenizer = pickle.load(token)

    # load label encoder object
    with open('label_encoder.pickle', 'rb') as encode:
        lbl_encoder = pickle.load(encode)

    # parameters
    max_len = 20
    
    while True:
        print("User: ", end="")
        inp = ' '.join(filtration(input()))
        if inp.lower() == "quit":
            break

        result = model.predict(keras.preprocessing.sequence.pad_sequences(tokenizer.texts_to_sequences([inp]),
                                             truncating='post', maxlen=max_len))
        print(result)
        tag = lbl_encoder.inverse_transform([np.argmax(result)])
        print(tag)
        for i in data['patterns']:
            if i['key'] == tag:
                print("ChatBot:" , i['answers'])
                break

        # print(Fore.GREEN + "ChatBot:" + Style.RESET_ALL,random.choice(responses))

print( "Start messaging with the bot (type quit to stop)!")
chat()


Start messaging with the bot (type quit to stop)!
User: [[0.01146608 0.00091355 0.16090055 0.06247212 0.06317221 0.07199345
  0.12030651 0.5087755 ]]
['submitapplicationcredit']
ChatBot: Зайдите на главную страницу → сразу под Вашими картами (ниже) размещен блок «Популярные услуги», где Вы можете нажать на кнопку «Оформить кредит». Или на главной странице кликните на кнопку «Открыть онлайн» → «Оформить кредит».
User: [[0.01214113 0.00106918 0.15028678 0.0662832  0.06945278 0.06788336
  0.1213639  0.5115198 ]]
['submitapplicationcredit']
ChatBot: Зайдите на главную страницу → сразу под Вашими картами (ниже) размещен блок «Популярные услуги», где Вы можете нажать на кнопку «Оформить кредит». Или на главной странице кликните на кнопку «Открыть онлайн» → «Оформить кредит».
User: [[5.0556993e-01 1.3174309e-01 1.4852874e-01 5.5037588e-05 4.5980360e-05
  2.0042363e-01 1.3027141e-03 1.2330898e-02]]
['changepincode']
ChatBot: В мобильном приложении выберите карту на главной странице → «Дополнит