# Data Driven Telegram Bot

### Installing Libraries

In [1]:
import nltk
import numpy as np
import string
import warnings
warnings.filterwarnings("ignore")

### Open and Defining  the Corpus

In [32]:
f = open('content.txt','r',errors = 'ignore', encoding = 'utf-8')
paragraph = f.read()

In [33]:
paragraph

'COVID-19 affects different people in different ways. Most infected people will develop mild to moderate illness and recover without hospitalization.\nMost common symptoms are fever, dry cough, tiredness. Less common symptoms are aches and pains,sore throat, diarrhoea, conjunctivitis, headache, loss of taste or smell, \na rash on skin, or discolouration of fingers or toes. Serious symptoms includes difficulty breathing or shortness of breath, chest pain or pressure, loss of speech or movement. \nSeek immediate medical attention if you have serious symptoms. Always call before visiting your doctor or health facility. People who have have mild symptoms and are otherwise healthy, \nself-isolate and contact your medical provider or a COVID-19 information line for advice. On average it takes 5–6 days from when someone is infected with the virus for symptoms to show, however it can take up to 14 days. \n\nThe first case of COVID-19 in India was reported on 30 January 2020. As of 9 July 2020,

### Defining the Greetings

In [34]:
greetings = ['Hey', 'Hello', 'Hi', 'It’s great to see you', 'Nice to see you', 'Good to see you']
bye = ['Bye', 'Bye-Bye', 'Goodbye', 'Have a good day','Stop']
thank_you = ['Thanks', 'Thank you', 'Thanks a bunch', 'Thanks a lot.', 'Thank you very much', 'Thanks so much', 'Thank you so much']
thank_response = ['You\'re welcome.' , 'No problem.', 'No worries.', ' My pleasure.' , 'It was the least I could do.', 'Glad to help.']

### Pre-processing the raw text

In [35]:
nltk.download('punkt')   # for first-time use only
nltk.download('wordnet')    # for first-time use only


sent_tokens = nltk.sent_tokenize(paragraph)
word_tokens = nltk.word_tokenize(paragraph)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [36]:
sent_tokens[:1]

['COVID-19 affects different people in different ways.']

In [37]:
word_tokens[:7]

['COVID-19', 'affects', 'different', 'people', 'in', 'different', 'ways']

In [38]:
# Lemmitization

lemmer = nltk.stem.WordNetLemmatizer()

In [39]:
def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]    # iterate through every token and lemmatize it

In [40]:
# string.punctuation has all the punctuations
# ord(punct) convert punctuation to its ASCII value
# dict contains {ASCII: None} for punctuation mark

remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

# remove_punct_dict

In [41]:
# This will return the word to LemTokens after Word tokenize, lowering its case and removing punctuation mark
# translate will find punctuation mark in remove_punct_dict and if found replace it with None

def Normalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))


### Generating Response

In [42]:
from sklearn.feature_extraction.text import TfidfVectorizer   # For Tfid Vectorizer
from sklearn.metrics.pairwise import cosine_similarity   # For cosine similarity

In [None]:
def response(user_response):
    robo_response = ''
    
    sent_tokens.append(user_response)   # Appending the Question user ask to sent_tokens
    TfidfVec = TfidfVectorizer(tokenizer = Normalize, stop_words='english')    #tokenizer ask about Pre processing and remove StopWords
    tfidf = TfidfVec.fit_transform(sent_tokens)

    vals = cosine_similarity(tfidf[-1], tfidf)     # vals has [[0,0,3]] Do cosine similarity between last vectors and all the vectors
    idx = vals.argsort()[0][-2]     # 0 means 0th list of vals ,it has index of second last index i.e. index of second highest value after sorting the cosine_similarity

    flat = vals.flatten()    # [[0,0,3]] -> [0,0,3] this will make a single list of vals which is list inside a list
    flat.sort()
    req_tfidf = flat[-2]  # this contains tfid value of second highest cosine similarity

    if(req_tfidf == 0):    # 0 means there is no similarity between the question and answer
        robo_response = robo_response + "I am sorry! I don't understand you. Please rephrase you query."
        return robo_response
    
    else:
        robo_response = robo_response + sent_tokens[idx]    # return the sentences at index -2 as answer
        return robo_response

In [None]:
import random

def bot_initialize(user_msg):
    flag=True
    while(flag==True):
        user_response = user_msg
        if(user_response not in bye):
            if(user_response == '/start'):
                bot_resp = """Hi! There. I am your Corona Protector. I can tell you all the Facts and Figures, Signs and Symptoms related to spread of Covid-19 in India. \nType Bye to Exit.""" 
                return bot_resp
            elif(user_response in thank_you):
                bot_resp = random.choice(thank_response)
                return bot_resp
            elif(user_response in greetings):
                bot_resp = random.choice(greetings) + ", What information you what related to Covid-19 in India"
                return bot_resp
            else:
                user_response = user_response.lower()
                bot_resp = response(user_response)
                sent_tokens.remove(user_response)   # remove user question from sent_token
                return bot_resp
        else:
            flag = False
            bot_resp = random.choice(bye)
            return bot_resp

### Activating Data Driven Telegram Bot

In [43]:
import requests
import json

class telegram_bot():
    def __init__(self):
        self.token = "1245993642:AAGc_EZbIoHag4SXhmx8QaNnpawTcckSCLM"    #write your token here!
        self.url = f"https://api.telegram.org/bot{self.token}"

    def get_updates(self,offset=None):
        url = self.url+"/getUpdates?timeout=100"    # In 100 seconds if user input query then process that, use it as the read timeout from the server
        if offset:
            url = url+f"&offset={offset+1}"
        url_info = requests.get(url)
        return json.loads(url_info.content)

    def send_message(self,msg,chat_id):
        url = self.url + f"/sendMessage?chat_id={chat_id}&text={msg}"
        if msg is not None:
            requests.get(url)

    def grab_token(self):
        return tokens

In [None]:
tbot = telegram_bot()

update_id = None

def make_reply(msg):     # user input will go here
  
    if msg is not None:
        reply = bot_initialize(msg)     # user input will start processing from bot_initialize function
    return reply
       
while True:
    print("...")
    updates = tbot.get_updates(offset=update_id)
    updates = updates['result']
    print(updates)
    if updates:
        for item in updates:
            update_id = item["update_id"]
            print(update_id)
            try:
                message = item["message"]["text"]
                print(message)
            except:
                message = None
            from_ = item["message"]["from"]["id"]
            print(from_)

            reply = make_reply(message)
            tbot.send_message(reply,from_)

...
[{'update_id': 998276145, 'message': {'message_id': 553, 'from': {'id': 779810422, 'is_bot': False, 'first_name': 'ຸ', 'username': 'shSood', 'language_code': 'en'}, 'chat': {'id': 779810422, 'first_name': 'ຸ', 'username': 'shSood', 'type': 'private'}, 'date': 1596554996, 'text': 'Hi'}}]
998276145
Hi
779810422
...
...
[]
...
[]
...
[]
...
