In [None]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D, LSTM, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

# Загружаем данные из файла
with open('data.txt', 'r') as f:
  data = json.load(f)

# Обрабатываем данные для обучения
training_sentences = []
training_labels = []
labels = []
responses = {}

for intent in data['intents']:
  for pattern in intent['patterns']:
    training_sentences.append(pattern)
    training_labels.append(intent['tag'])
  responses[intent['tag']] = intent['responses']
  if intent['tag'] not in labels:
    labels.append(intent['tag'])

# Преобразуем метки в числовые значения
num_classes = len(labels)
label_to_index = {label: index for index, label in enumerate(labels)}
index_to_label = {index: label for label, index in label_to_index.items()}
training_labels = [label_to_index[label] for label in training_labels]

# Токенизируем текст
tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences, truncating='post', maxlen=20)

# Разделяем данные на обучающую, валидационную и тестовую выборки
train_sequences, temp_sequences, train_labels, temp_labels = train_test_split(
    padded_sequences, np.array(training_labels), test_size=0.2, random_state=42
)
val_sequences, test_sequences, val_labels, test_labels = train_test_split(
    temp_sequences, temp_labels, test_size=0.5, random_state=42
)

# Создаем модель
model = Sequential()
model.add(Embedding(10000, 32, input_length=20))
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Обучаем модель
history = model.fit(train_sequences, train_labels, epochs=1000, batch_size=32,
                    validation_data=(val_sequences, val_labels),
                    verbose=2)

# Оцениваем модель на тестовой выборке
loss, accuracy = model.evaluate(test_sequences, test_labels, verbose=0)
print(f'Loss: {loss:.4f}')
print(f'Accuracy: {accuracy:.4f}')


Epoch 1/1000
2/2 - 5s - loss: 2.7097 - accuracy: 0.0333 - val_loss: 2.7049 - val_accuracy: 0.0000e+00 - 5s/epoch - 2s/step
Epoch 2/1000
2/2 - 0s - loss: 2.7072 - accuracy: 0.0833 - val_loss: 2.7079 - val_accuracy: 0.0000e+00 - 83ms/epoch - 42ms/step
Epoch 3/1000
2/2 - 0s - loss: 2.7068 - accuracy: 0.0833 - val_loss: 2.7102 - val_accuracy: 0.0000e+00 - 87ms/epoch - 43ms/step
Epoch 4/1000
2/2 - 0s - loss: 2.7053 - accuracy: 0.1000 - val_loss: 2.7093 - val_accuracy: 0.0000e+00 - 81ms/epoch - 40ms/step
Epoch 5/1000
2/2 - 0s - loss: 2.7057 - accuracy: 0.0667 - val_loss: 2.7092 - val_accuracy: 0.0000e+00 - 83ms/epoch - 42ms/step
Epoch 6/1000
2/2 - 0s - loss: 2.7043 - accuracy: 0.0667 - val_loss: 2.7089 - val_accuracy: 0.0000e+00 - 111ms/epoch - 55ms/step
Epoch 7/1000
2/2 - 0s - loss: 2.7020 - accuracy: 0.1000 - val_loss: 2.7087 - val_accuracy: 0.0000e+00 - 156ms/epoch - 78ms/step
Epoch 8/1000
2/2 - 0s - loss: 2.6979 - accuracy: 0.0833 - val_loss: 2.7087 - val_accuracy: 0.0000e+00 - 139ms/epo

In [None]:
def chatbot():
  while True:
    user_input = input("You: ")
    if user_input.lower() == "quit":
      break

    # Токенизируем и паддим входной текст
    new_sequences = tokenizer.texts_to_sequences([user_input])
    new_padded_sequences = pad_sequences(new_sequences, truncating='post', maxlen=20)

    # Предсказываем метку
    predicted_probabilities = model.predict(new_padded_sequences)
    predicted_class_index = np.argmax(predicted_probabilities)
    predicted_label = index_to_label[predicted_class_index]

    # Выводим ответ
    print("Chatbot:", random.choice(responses[predicted_label]))

# Запускаем чат-бота
chatbot()

You: classic book
Chatbot: Have you read 'To Kill a Mockingbird' by Harper Lee? It's a wonderful book.
You: comedy film
Chatbot: You might enjoy 'Superbad'. It's a hilarious movie!
You: biography
Chatbot: You might enjoy 'Steve Jobs' by Walter Isaacson.


KeyboardInterrupt: Interrupted by user