<a href="https://colab.research.google.com/github/ReynaQuita/NLP/blob/main/Tr_Chinese_English_Telegram_Bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Driven Telegram Bot

In [1]:
! pip install transformers sentencepiece



### Defining the Greetings

In [6]:
greetings = ['Hey', 'Hello', 'Hi', 'It’s great to see you', 'Nice to see you', 'Good to see you']
bye = ['Bye', 'Bye-Bye', 'Goodbye', 'Have a good day','Stop']
thank_you = ['Thanks', 'Thank you', 'Thanks a bunch', 'Thanks a lot.', 'Thank you very much', 'Thanks so much', 'Thank you so much']
thank_response = ['You\'re welcome.' , 'No problem.', 'No worries.', ' My pleasure.' , 'It was the least I could do.', 'Glad to help.']

### Import The Model and The Tokenizer

In [14]:
model_translation = "facebook/mbart-large-50-many-to-many-mmt"
model_dialogue = "microsoft/DialoGPT-medium"

In [15]:
from transformers import AutoTokenizer, MBart50TokenizerFast, MBartForConditionalGeneration, AutoModelForCausalLM

In [16]:
tokenizer_translation = MBart50TokenizerFast.from_pretrained(model_translation)
model_translation = MBartForConditionalGeneration.from_pretrained(model_translation)

tokenizer_dialogue = AutoTokenizer.from_pretrained(model_dialogue)
model_dialogue = AutoModelForCausalLM.from_pretrained(model_dialogue)

### Generating Response

In [18]:
def response(article_zh, tokenizer_translation = tokenizer_translation, model_translation = model_translation, tokenizer_dialogue = tokenizer_dialogue, model_dialogue = model_dialogue, max_length = 100, stride = 5):
  # print("Input: {}".format(article_zh))

  #translate chinese to english
  tokenizer_translation.src_lang = "zh_CN"
  encoded_zh = tokenizer_translation(article_zh, return_tensors="pt")
  generated_tokens = model_translation.generate(
    **encoded_zh,
    forced_bos_token_id=tokenizer_translation.lang_code_to_id["en_XX"], max_length = 100
  )
  translation_zh2en = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)[0]
  # print("Translation from Chinese to English: {}".format(translation_zh2en))

  #generate dialogue
  encoded_dialogue = tokenizer_dialogue.encode(translation_zh2en + tokenizer_dialogue.eos_token, return_tensors='pt')
  generated_tokens_dialogue = model_dialogue.generate(encoded_dialogue, max_length=100, pad_token_id=tokenizer_dialogue.eos_token_id)
  generated_dialogue = tokenizer_dialogue.decode(generated_tokens_dialogue[:, encoded_dialogue.shape[-1]:][0], skip_special_tokens=True)
  # print("Generated Dialogue: {}".format(generated_dialogue))

  #translate english to chinese
  tokenizer_translation.src_lang = "en_XX"
  encoded_en = tokenizer_translation(generated_dialogue, return_tensors="pt")
  generated_tokens = model_translation.generate(
    **encoded_en,
    forced_bos_token_id=tokenizer_translation.lang_code_to_id["zh_CN"], max_length = 100
  )
  translation_en2zh = tokenizer_translation.batch_decode(generated_tokens, skip_special_tokens=True)[0]
  # print("Translation from English to Chinese: {}".format(translation_en2zh))

 

  return translation_en2zh

In [19]:
response("你要去哪裡？")

'我要去巴哈马的海滩。'

In [27]:
import random

def bot_initialize(user_msg):
    flag=True
    while(flag==True):
        user_response = user_msg
        if(user_response not in bye):
            if(user_response == '/start'):
                bot_resp = """Hi! There. Please Write something in Traditional Chinese. \nType Bye to Exit.""" 
                return bot_resp
            elif(user_response in thank_you):
                bot_resp = random.choice(thank_response)
                return bot_resp
            elif(user_response in greetings):
                bot_resp = random.choice(greetings) + ", please type something in Traditional Chinese"
                return bot_resp
            else:
                user_response = user_response.lower()
                bot_resp = response(user_response)
                # sent_tokens.remove(user_response)   # remove user question from sent_token that we added in sent_token in response() to find the Tf-Idf and cosine_similarity
                return bot_resp
        else:
            flag = False
            bot_resp = random.choice(bye)
            return bot_resp

### Activating Data Driven Telegram Bot

In [28]:
import requests
import json

class telegram_bot():
    def __init__(self):
        self.token = "1888965594:AAGzs9XKnUGO5TWZ8Sd9JehMfLa52nLZHJ8"    #write your token here!
        self.url = f"https://api.telegram.org/bot{self.token}"

    def get_updates(self,offset=None):
        url = self.url+"/getUpdates?timeout=100"   # In 100 seconds if user input query then process that, use it as the read timeout from the server
        if offset:
            url = url+f"&offset={offset+1}"
        url_info = requests.get(url)
        return json.loads(url_info.content)

    def send_message(self,msg,chat_id):
        url = self.url + f"/sendMessage?chat_id={chat_id}&text={msg}"
        if msg is not None:
            requests.get(url)

    def grab_token(self):
        return tokens

In [31]:
tbot = telegram_bot()
updates = []
update_id = None

def make_reply(msg):     # user input will go here
  
    if msg is not None:
        reply = bot_initialize(msg)     # user input will start processing from bot_initialize function
    return reply
       
while True:
    print("...")
    updates = tbot.get_updates(offset=update_id)
    updates = updates['result']
    print(updates)
    if updates:
        for item in updates:
            update_id = item["update_id"]
            print(update_id)
            try:
                message = item["message"]["text"]
                print(message)
            except:
                message = None
            from_ = item["message"]["from"]["id"]
            print(from_)

            reply = make_reply(message)
            tbot.send_message(reply,from_)

...
[{'update_id': 908997979, 'message': {'message_id': 69, 'from': {'id': 1187390433, 'is_bot': False, 'first_name': 'Reyna', 'last_name': 'Quita', 'username': 'reynaquita', 'language_code': 'en'}, 'chat': {'id': 1187390433, 'first_name': 'Reyna', 'last_name': 'Quita', 'username': 'reynaquita', 'type': 'private'}, 'date': 1632726123, 'text': 'Hi'}}]
908997979
Hi
1187390433
...
[{'update_id': 908997980, 'message': {'message_id': 71, 'from': {'id': 1187390433, 'is_bot': False, 'first_name': 'Reyna', 'last_name': 'Quita', 'username': 'reynaquita', 'language_code': 'en'}, 'chat': {'id': 1187390433, 'first_name': 'Reyna', 'last_name': 'Quita', 'username': 'reynaquita', 'type': 'private'}, 'date': 1632726133, 'text': 'Bye'}}]
908997980
Bye
1187390433
...
[{'update_id': 908997981, 'message': {'message_id': 73, 'from': {'id': 1187390433, 'is_bot': False, 'first_name': 'Reyna', 'last_name': 'Quita', 'username': 'reynaquita', 'language_code': 'en'}, 'chat': {'id': 1187390433, 'first_name': 'Rey

KeyboardInterrupt: ignored