<a href="https://colab.research.google.com/github/Taibah-10/Chatbot-using-nltk-library-in-Python/blob/main/chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [109]:
import numpy as np
import nltk
import random
import string
import warnings
warnings.filterwarnings('ignore')

In [110]:
file = open('chatbot.txt', 'r', errors='ignore')
raw = file.read()
raw = raw.lower() #convert all letters to lowercase

In [111]:
nltk.download('punkt')  #The NLTK data package includes a pre-trained Punkt tokenizer for English.
nltk.download('wordnet') #It is a large word database of English Nouns, Adjectives, Adverbs and Verbs. These are grouped into some set of cognitive synonyms, which are called synsets.

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [112]:
#tokenization
sent_token = nltk.sent_tokenize(raw) #converts to list of sentences
word_token = nltk.word_tokenize(raw) #converts to list of words

In [113]:
print(sent_token[:2])
print(word_token[:2])

['a chatbot or chatterbot is a software application used to conduct an on-line chat conversation via text or text-to-speech, in lieu of providing direct contact with a live human agent.', '[1][2]a chatbot is a type of software that can help customers by automating conversations and interact with them through messaging platforms.']
['a', 'chatbot']


In [114]:
#preprocessing as a whole
lemmer = nltk.stem.WordNetLemmatizer()
def lemmatize_tokens(tokens):
  return [lemmer.lemmatize(token) for token in tokens]

remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation) #creates a dictionary with keys as unicode values of punctuation marks and values as none 

def lemmatization(text):
  return lemmatize_tokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict))) #convert to tokens and pass into the function


In [115]:
#greeting by bot
greeting_input = ["hello", "hi", "hey", "heyya", "heyy", "greetings", "what's up", "sup"]
greeting_response = ["hello", "hi", "hey", "hi there!", "I'm glad! You are talking to me."]
def greet(sentence):
  words = sentence.split()
  for word in words:
    if(word.lower() in greeting_input):
      return random.choice(greeting_response)

In [118]:
#generating response
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def response(user_response):
    bot_response=''
    sent_token.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=lemmatization, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_token)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2] # sorts returning indices
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf==0):
      bot_response+="I am sorry, I don't understand you"
    else:
      bot_response+=sent_token[idx]
    sent_token.remove(user_response)
    return bot_response

In [119]:
flag=True
print("Bot: Hello, I'm here to solve all your queries about chatbots. If you want to exit type - 'Bye' ")
while(flag):
  user_response=input("You: ")
  user_response=user_response.lower().translate(remove_punct_dict)
  if(user_response == 'bye'):
    flag=False
    print("Bot: Bye! take care...")
  elif (user_response == 'thanks' or user_response == 'thank you'):
    flag=False
    print("Bot: You're Welcome!")
  else:
    if(greet(user_response)!=None):
      print("Bot: " + greet(user_response))
    else:
      print("Bot: " + response(user_response))

Bot: Hello, I'm here to solve all your queries about chatbots. If you want to exit type - 'Bye' 
You: hi
Bot: hey
You: Can you tell me what is chatbot
Bot: [1][2]a chatbot is a type of software that can help customers by automating conversations and interact with them through messaging platforms.
You: thanks
Bot: You're Welcome!
