# Chatbot Development

## 1. Importing the required libraries

In [4]:
import numpy as np
# importing natural language toolkit
import nltk 
nltk.download('all')
import string
import random

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     C:\Users\ameen\AppData\Roaming\nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     C:\Users\ameen\AppData\Roaming\nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     C:\Users\ameen\AppData\Roaming\nltk_data...
[nltk_data]    |   Package biocreative_ppi is already up-to-date!
[nltk_data]    | Downloading package brown to
[nltk_data]    |     C:\Users\ameen\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown is already up-to-date!
[nltk_data]    | Downloading package brown_tei to
[nltk_data]    |     C:\Users\ameen\AppData\Roaming\nltk_data...
[nltk_data]    |   Package brown_tei is already up-to-date!
[nltk_data]    | Downloading package cess_cat to
[nltk_data]    |     C:\Users\a

## 2. Importing and reading the corpus

In [5]:
f=open('chatbot.txt', 'r', errors = 'ignore')
raw_doc = f.read()
raw_doc = raw_doc.lower() # convert the text to lowercase
nltk.download('punkt') # using the punkt torkenizer
nltk.download('wordnet') # using the wordnet dictionary
sent_tokens = nltk.sent_tokenize(raw_doc) # converts the doc to list of sentences
word_tokens = nltk.word_tokenize(raw_doc) # converts the doc to list of word

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ameen\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ameen\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


### 2.1 Example of sentence tokens

In [6]:
sent_tokens[:2]

['data science\nfrom wikipedia, the free encyclopedia\njump to navigationjump to search\nnot to be confused with information science.',
 'the existence of comet neowise (here depicted as a series of red dots) was discovered by analyzing astronomical survey data acquired by a space telescope, the wide-field infrared survey explorer.']

### 2.2 Example of word of tokens

In [7]:
word_tokens[:2]

['data', 'science']

## 3. Text Pre-processing

In [8]:
lemmer = nltk.stem.WordNetLemmatizer()

# WordNet is semmanticaly oriented dictionary of English included in NLTK

def LemTokens(tokens):
    return[lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

## 4. Defining the greeting function

In [9]:
greet_input = ('hi', 'greetings', 'hey', 'hello', 'hello there', "what's up")
greet_response = ['🙏 Welcome to SalFix Bot, How can I help you?']

def greet(sentence):
    for word in sentence.split():
        if word.lower() in greet_input:
            return random.choice(greet_response)

## 5. Response generation

In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
def response(user_response):
    chatbot_response = ''
    TfidVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    req_tfidf = flat[-2]
    if(req_tfidf == 0):
        chatbot_response = chatbot_response + "I am sorry! I don't understand you"
        return chatbot_response
    else:
        chatbot_response = chatbot_response + sent_tokens[idx]
        return chatbot_response

### Defining conversation start & end protocols

In [12]:
flag = True
print("Business Bot: My name is SalfixBot. Let's build a conversation. Also, if you want to exit or leave any time, just type bye")
while(flag == True):
    user_response = input()
    user_response = user_response.lower()
    if(user_response != 'bye'):
        if(user_response == 'thanks' or user_response == 'thank you'):
            flag = False
            print("BOT: You are welcome")
        else:
            if(greet(user_response) != None):
                print("BOT: " + greet(user_response))
            else:
                sent_tokens.append(user_response)
                word_tokens = word_tokens + nltk.word_tokenize(user_response)
                final_words = list(set(word_tokens))
                print("BOT: ", end = "")
                print(response(user_response))
                sent_tokens.remove(user_response)
    else:
        flag = False
        print("BOT: Goodbye! Take care of your business")
            

Business Bot: My name is SalfixBot. Let's build a conversation. Also, if you want to exit or leave any time, just type bye
BOT: 🙏 Welcome to SalFix Bot, How can I help you?
BOT: 🙏 Welcome to SalFix Bot, How can I help you?
BOT: Goodbye! Take care of your business
