In [1]:
import random
import numpy as np
import regex
import re
from os import listdir
from os.path import isfile, join
import requests

# !pip install python_telegram_bot

import logging
import soundfile as sf

from telegram import ReplyKeyboardMarkup, ReplyKeyboardRemove, Update
from telegram.ext import (
    Updater,
    CommandHandler,
    MessageHandler,
    Filters,
    ConversationHandler,
    CallbackContext,
    CallbackQueryHandler
)
from telegram import InlineKeyboardButton, InlineKeyboardMarkup

import sys
import os
import pandas as pd
import subprocess
import json
import codecs
import unidecode
import noisereduce as nr

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline, Conversation

tokenizer = AutoTokenizer.from_pretrained("Grossmend/rudialogpt3_medium_based_on_gpt2")
model = AutoModelForCausalLM.from_pretrained("Grossmend/rudialogpt3_medium_based_on_gpt2")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
def get_length_param(text: str) -> str:
    tokens_count = len(tokenizer.encode(text))
    if tokens_count <= 15:
        len_param = '1'
    elif tokens_count <= 50:
        len_param = '2'
    elif tokens_count <= 256:
        len_param = '3'
    else:
        len_param = '-'
    return len_param


def conversation(input_user, chat_history_ids=None):
    
    print(f"===> User: {input_user}")
    
    # encode the new user input, add parameters and return a tensor in Pytorch
    new_user_input_ids = tokenizer.encode(f"|0|{get_length_param(input_user)}|" + input_user + tokenizer.eos_token +  "|1|1|", return_tensors="pt")

    # append the new user input tokens to the chat history
    bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if chat_history_ids is not None else new_user_input_ids
    
    # generated a response
    chat_history_ids = model.generate(
        bot_input_ids,
        num_return_sequences=1,
        max_length=512,
        no_repeat_ngram_size=3,
        do_sample=True,
        top_k=50,
        top_p=0.9,
        temperature = 0.6,
        mask_token_id=tokenizer.mask_token_id,
        eos_token_id=tokenizer.eos_token_id,
        unk_token_id=tokenizer.unk_token_id,
        pad_token_id=tokenizer.pad_token_id,
        device='cpu',
    )
    
    # pretty print last ouput tokens from bot
    decoded = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
    print(f"===> RuDialoGPT: {decoded}")
    
    return decoded, chat_history_ids

In [4]:
conversation('Привет')

===> User: Привет
===> RuDialoGPT: И тебе привет.


('И тебе привет.',
 tensor([[   96,    20,    96,    21,    96, 37954,     2,    96,    21,    96,
             21,    96,   732,  1490,  6129,    18,     2]]))

In [5]:
with open('/media/boris/F/token.txt') as token_file:
    bot_token = token_file.readline().replace('\n', '')

In [None]:
# Enable logging
logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO
)

logger = logging.getLogger(__name__)

TRANSLATE_EN_RU, TRANSLATE_RU_EN, SYNTHESIZE_EN, SYNTHESIZE_RU, EXTRA = range(5)

def normalize_str(txt) -> str:
    # TODO: REPLACE WITH YOUR OWN NORMALIZATION LOGIC HERE!!!!   
    valid_chars = (" ", "'", "!", ".", "?", "&",
                   "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
                   "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
                   "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
                   "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
                  )
    new_txt = unidecode.unidecode(txt.lower().strip())
    return new_txt
#     res_arr = []
#     for c in new_txt:
#         if c in valid_chars:
#             res_arr.append(c)
#         else:
#             res_arr.append(' ')
#     res = ''.join(res_arr).strip()    
#     return ' '.join(res.split())

def start(update: Update, context: CallbackContext) -> int:
    """"""
    
    return menu(update, context)


def menu(update: Update, context: CallbackContext) -> int:
    """Sends a message with three inline buttons attached."""
    keyboard = [
        [
            InlineKeyboardButton("Translate en->ru", callback_data='Translate en->ru'),
            InlineKeyboardButton("Translate ru->en", callback_data='Translate ru->en'),
        ],
        [
            InlineKeyboardButton("Synthesize en", callback_data='Synthesize en'),
            InlineKeyboardButton("Synthesize ru", callback_data='Synthesize ru')
        ],
        [InlineKeyboardButton("EXTRA", callback_data='EXTRA')], ## REMOVE LATER
        [InlineKeyboardButton("cancel", callback_data='cancel')],
    ]

    reply_markup = InlineKeyboardMarkup(keyboard)

    update.message.reply_text('Please choose:', reply_markup=reply_markup)

    return ConversationHandler.END

def button(update: Update, context: CallbackContext) -> int:
    """Parses the CallbackQuery and updates the message text."""
    query = update.callback_query

    # CallbackQueries need to be answered, even if no notification to the user is needed
    # Some clients may have trouble otherwise. See https://core.telegram.org/bots/api#callbackquery
    query.answer()

    query.edit_message_text(text=f"Selected option: {query.data}")

    if query.data == "Translate en->ru":
        return TRANSLATE_EN_RU
    elif query.data == "Translate ru->en":
        return TRANSLATE_RU_EN
    elif query.data == "Synthesize en":
        return SYNTHESIZE_EN
    elif query.data == "Synthesize ru":
        return SYNTHESIZE_RU
    elif query.data == "cancel":
        return ConversationHandler.END
    elif query.data == "EXTRA":
        return EXTRA


def help_command(update: Update, context: CallbackContext) -> None:
    """Displays info on how to use the bot."""
    update.message.reply_text("Use /start to test this bot.")

    
def get_message(update: Update) -> str:
    message = update.message.text
    
    message = message.replace('?', '&quest')
    
    return message
    
def translate_en_ru(update: Update, context: CallbackContext) -> None:
    translate(update, context, 'en', 'ru')                
    
def translate_ru_en(update: Update, context: CallbackContext) -> None:
    translate(update, context, 'ru', 'en')
    
def translate(update: Update, context: CallbackContext, src_lang, trg_lang) -> None:
    user = update.message.from_user
    logger.info("Bio of %s: %s", user.first_name, update.message.text)
        
    message = get_message(update)
    request = requests.get(f'http://127.0.0.1:8000/translate/{message}?src_lang={src_lang}&trg_lang={trg_lang}')
    
    translation = request.json()['translation']
    
    update.message.reply_text(translation[0]) 
    
def translate(text, src_lang, trg_lang) -> str:
    request = requests.get(f'http://127.0.0.1:8000/translate/{text}?src_lang={src_lang}&trg_lang={trg_lang}')
    translation = request.json()['translation']
    return translation[0]
    
    
def synthesize_en(update: Update, context: CallbackContext) -> None:
    synthesize(update, context, 'en')
    
def synthesize_ru(update: Update, context: CallbackContext) -> None:
    synthesize(update, context, 'ru')
    
def synthesize(update: Update, context: CallbackContext, src_lang) -> None:
    user = update.message.from_user
    logger.info("Bio of %s: %s", user.first_name, update.message.text)
        
    message = get_message(update)
    message = normalize_str(message)
    request = requests.get(f'http://127.0.0.1:8000/synthesize/{message}?src_lang={src_lang}')
    
    audio = request.json()['audio']
    
    audio_name = 'audio/' + message[:12] + '.wav'
    
    sf.write(audio_name, audio, 22050)
    
    context.bot.send_audio(chat_id=update.message.chat_id, audio=open(audio_name, 'rb'))

def synthesize(text, src_lang):
    request = requests.get(f'http://127.0.0.1:8000/synthesize/{text}?src_lang={src_lang}')
    
    audio = request.json()['audio']
    reduced_noise = nr.reduce_noise(y=audio, sr=22050)
    
    return reduced_noise
    
# def extra(update: Update, context: CallbackContext) -> None:
#     user = update.message.from_user
#     logger.info("Bio of %s: %s", user.first_name, update.message.text)
        
#     message = update.message.text
#     print('Original message', message)
#     message = translate(message.replace('?', '&quest'), 'ru', 'en')
#     print('En translated', message)
    
#     conv = Conversation(message)
#     conversational_pipeline(conv)
#     response = conv.generated_responses[0]
    
#     message = translate(response.replace('?', '&quest'), 'en', 'ru')
#     print('Ru translated', message)
#     normalized = normalize_str(message)
#     print(normalized)            
                               
#     if normalized == '':
#         normalized = '.'
    
#     audio = synthesize(normalized, 'ru')
#     audio_name = 'audio/' + response + '.wav'
#     sf.write(audio_name, audio, 22050)
    
#     context.bot.send_audio(chat_id=update.message.chat_id, audio=open(audio_name, 'rb'))
    
def extra(update: Update, context: CallbackContext) -> None:
    user = update.message.from_user
    logger.info("Bio of %s: %s", user.first_name, update.message.text)
        
    message = update.message.text
        
    response, chat_history_ids = conversation(message)
    normalized_response = normalize_str(response.replace('?', '&quest'))
    
    if normalized_response == '':
        normalized_response = '.'
    
    request = requests.get(f'http://127.0.0.1:8000/synthesize/{normalized_response}?src_lang=ru')
    
    
    audio = request.json()['audio']
    reduced_noise = nr.reduce_noise(y=audio, sr=22050)
    audio_name = 'audio/' + response + '.wav'
    sf.write(audio_name, reduced_noise, 22050)
    
    context.bot.send_audio(chat_id=update.message.chat_id, audio=open(audio_name, 'rb'))
    
    
def cancel(update: Update, context: CallbackContext) -> int:
    """Cancels and ends the conversation."""
    user = update.message.from_user
    logger.info("User %s canceled the conversation.", user.first_name)
#     update.message.reply_text(
#         'Пока.', reply_markup=ReplyKeyboardRemove()
#     )

    return menu(update, context)

    
def main() -> None:
    """Run the bot."""
    # Create the Updater and pass it your bot's token.
    updater = Updater(bot_token)

    updater.dispatcher.add_handler(CommandHandler('start', start))
#     updater.dispatcher.add_handler(CommandHandler('menu', menu))
#     updater.dispatcher.add_handler(CallbackQueryHandler(button))
    updater.dispatcher.add_handler(CommandHandler('help', help_command))
    updater.dispatcher.add_handler(ConversationHandler(
        entry_points=[CallbackQueryHandler(button)],
        states={
            TRANSLATE_EN_RU: [MessageHandler(Filters.text & ~Filters.command, translate_en_ru)],
            TRANSLATE_RU_EN: [MessageHandler(Filters.text & ~Filters.command, translate_ru_en)],
            SYNTHESIZE_EN: [MessageHandler(Filters.text & ~Filters.command, synthesize_en)],
            SYNTHESIZE_RU: [MessageHandler(Filters.text & ~Filters.command, synthesize_ru)],
            EXTRA: [MessageHandler(Filters.text & ~Filters.command, extra)],
        },
        fallbacks=[CommandHandler('menu', menu)],
    ))
    
    # Start the Bot
    updater.start_polling()

    # Run the bot until the user presses Ctrl-C or the process receives SIGINT,
    # SIGTERM or SIGABRT
    updater.idle()


if __name__ == '__main__':
    main() 

  "If 'per_message=False', 'CallbackQueryHandler' will not be "
2022-01-31 18:38:46,655 - apscheduler.scheduler - INFO - Scheduler started
2022-01-31 18:39:33,833 - __main__ - INFO - Bio of Boris: Привет


===> User: Привет
===> RuDialoGPT: Здарова, сосед)


2022-01-31 18:40:13,713 - __main__ - INFO - Bio of Boris: Меня зовут Саша


===> User: Меня зовут Саша
===> RuDialoGPT: Идиот.


2022-01-31 18:41:01,097 - __main__ - INFO - Bio of Boris: Анекдот


===> User: Анекдот
===> RuDialoGPT: Ну и кто тут анекдот?


2022-01-31 18:41:20,259 - __main__ - INFO - Bio of Boris: Ты робот


===> User: Ты робот
===> RuDialoGPT: А ты - нет
