In [1]:
! pip install python-telegram-bot

Collecting python-telegram-bot
  Downloading python_telegram_bot-13.13-py3-none-any.whl (513 kB)
     -------------------------------------- 513.4/513.4 kB 3.2 MB/s eta 0:00:00
Collecting cachetools==4.2.2
  Downloading cachetools-4.2.2-py3-none-any.whl (11 kB)
Collecting certifi
  Downloading certifi-2022.6.15-py3-none-any.whl (160 kB)
     -------------------------------------- 160.2/160.2 kB 9.4 MB/s eta 0:00:00
Collecting APScheduler==3.6.3
  Downloading APScheduler-3.6.3-py2.py3-none-any.whl (58 kB)
     ---------------------------------------- 58.9/58.9 kB ? eta 0:00:00
Collecting tzlocal>=1.2
  Downloading tzlocal-4.2-py3-none-any.whl (19 kB)
Collecting tzdata
  Downloading tzdata-2022.1-py2.py3-none-any.whl (339 kB)
     -------------------------------------- 339.5/339.5 kB 5.3 MB/s eta 0:00:00
Collecting pytz-deprecation-shim
  Downloading pytz_deprecation_shim-0.1.0.post0-py2.py3-none-any.whl (15 kB)
Installing collected packages: tzdata, certifi, cachetools, pytz-deprecation

In [2]:
! pip install nltk

Collecting nltk
  Downloading nltk-3.7-py3-none-any.whl (1.5 MB)
     ---------------------------------------- 1.5/1.5 MB 4.0 MB/s eta 0:00:00
Collecting click
  Downloading click-8.1.3-py3-none-any.whl (96 kB)
     ---------------------------------------- 96.6/96.6 kB 5.8 MB/s eta 0:00:00
Collecting joblib
  Downloading joblib-1.1.0-py2.py3-none-any.whl (306 kB)
     -------------------------------------- 307.0/307.0 kB 6.3 MB/s eta 0:00:00
Collecting regex>=2021.8.3
  Downloading regex-2022.6.2-cp39-cp39-win_amd64.whl (262 kB)
     -------------------------------------- 262.1/262.1 kB 2.3 MB/s eta 0:00:00
Collecting tqdm
  Downloading tqdm-4.64.0-py2.py3-none-any.whl (78 kB)
     ---------------------------------------- 78.4/78.4 kB 4.3 MB/s eta 0:00:00
Installing collected packages: tqdm, regex, joblib, click, nltk
Successfully installed click-8.1.3 joblib-1.1.0 nltk-3.7 regex-2022.6.2 tqdm-4.64.0


In [3]:
! pip install sklearn

Collecting sklearn
  Downloading sklearn-0.0.tar.gz (1.1 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting scikit-learn
  Downloading scikit_learn-1.1.1-cp39-cp39-win_amd64.whl (7.4 MB)
     ---------------------------------------- 7.4/7.4 MB 4.3 MB/s eta 0:00:00
Collecting scipy>=1.3.2
  Downloading scipy-1.8.1-cp39-cp39-win_amd64.whl (36.9 MB)
     ---------------------------------------- 36.9/36.9 MB 5.4 MB/s eta 0:00:00
Collecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)
Using legacy 'setup.py install' for sklearn, since package 'wheel' is not installed.
Installing collected packages: threadpoolctl, scipy, scikit-learn, sklearn
  Running setup.py install for sklearn: started
  Running setup.py install for sklearn: finished with status 'done'
Successfully installed scikit-learn-1.1.1 scipy-1.8.1 sklearn-0.0 threadpoolctl-3.1.0


In [1]:
import json

import re
import nltk
import random

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.linear_model import LogisticRegression

import pickle
import telegram
import os

<div class="alert alert-block alert-info">
<b> Разговорная часть </b>
</div>

In [2]:
config_file = open("big_bot_config.json", "r")
BOT_CONFIG = json.load(config_file) # Converting from JSON to data structure

In [3]:
def normalize(text): 
    """
    text - str
    Function removes punctuation marks and converts text to lowercase.

    Return text (str).
    """
    text = text.lower() 
    punctuation = r"[^\w\s]"
    return re.sub(punctuation, "", text)

def isMatching(text1, text2):
    """
    text1 - str
    text2 - str

    Function finds same texts.
    Return match percentage (int).
    """ 
    text1 = normalize(text1)
    text2 = normalize(text2)
    distance = nltk.edit_distance(text1, text2)
    average_length = (len(text1) + len(text2)) / 2
    return distance / average_length < 0.3

def getIntent(text):
    """
    text - str

    Function looks for the text category in BOT_CONFIG.
    Return category answers if find text category (str).
    """
    all_intents = BOT_CONFIG["intents"]
    for name, data in all_intents.items(): 
        for example in data["examples"]: 
            if isMatching(text, example):
                return name

def getAnswer(intent):
    """
    intent - str

    Function return one random answer from answers list (str).
    """
    responses = BOT_CONFIG["intents"][intent]["responses"]
    return random.choice(responses)

In [4]:
def bot(text):
  """
  text - str

  Function tries to define an intent. If the intent is not found, then the model predicts the answer, otherwise the phrase from 'failure_phrases'.
  Return answer (str).
  """  
  intent = getIntent(text)

  if not intent:
    test = vectorizer.transform([text])
    intent = model.predict(test)[0]
        
  print("Intent =", intent)

  if intent:
    return getAnswer(intent)

  failure_phrases = BOT_CONFIG['failure_phrases']
  return random.choice(failure_phrases)

<div class="alert alert-block alert-success">
<b> Обучение модели </b>
</div>

In [5]:
# texts
X = []
# category
y = []
for name, data in BOT_CONFIG["intents"].items():
    for example in data['examples']:
        X.append(example) # append texts in X
        y.append(name) # append categories in y

In [6]:
vectorizer = TfidfVectorizer()
vectorizer.fit(X)

In [7]:
X_vectorized = vectorizer.transform(X)

In [8]:
model = LogisticRegression()
model.fit(X_vectorized, y)

In [9]:
test = vectorizer.transform(["Как дела"])
model.predict(test)

array(['mood'], dtype='<U29')

In [10]:
model.score(X_vectorized, y)

0.2671552298467688

In [11]:
f = open("bot_model_speech.bin", "wb")
pickle.dump(model, f)

<div class="alert alert-block alert-success">
<b> Загрузка </b>
</div>

In [12]:
f = open("bot_model_speech.bin", "rb")
loaded_model = pickle.load(f)

In [13]:
loaded_model

In [14]:
bot("Доброго вечерочка")

Intent = hello


'И вам не болеть!'

<div class="alert alert-block alert-info">
<b> Запуск бота с разговорной частью </b>
</div>

In [15]:
# BotFather
BOT_KEY = 'str' # enter you bot key 

In [17]:
from telegram import Update
from telegram.ext import Updater, CallbackContext, MessageHandler, Filters

# The function will be called when a message is received.
def botMessage(update: Update, context: CallbackContext):
    text = update.message.text # user text
    print(f"Message: {text}")
    reply = bot(text) # answer
    update.message.reply_text(reply)

updater = Updater(BOT_KEY)

# when any text message is received, the function 'botMessage' will be called 
updater.dispatcher.add_handler(MessageHandler(Filters.text, botMessage))

updater.start_polling()
updater.idle()

Message: Привет
Intent = hello
Message: Кто ты
Intent = name_questions
Message: Какой фильм посмотреть
Intent = films
