#Chatbot

##Importing the Libraries

In [34]:
import numpy as np
import nltk
import string
import random

##Reading the Corpus of Text

In [35]:
f = open('/content/data.txt','r',errors = 'ignore')
raw_doc = f.read()

In [36]:
raw_doc = raw_doc.lower() #Converting entire text to lowercase
nltk.download('punkt') #Using the Punkt tonkenizer
nltk.download('wordnet') #Using the wordnet dictionary
nltk.download('omw-1.4') # Downloads the Open Multilingual WordNet 1.4 dataset from NLTK's data repository.

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [37]:
sentence_tokens = nltk.sent_tokenize(raw_doc) # Splits the raw document into individual sentences.
word_tokens = nltk.word_tokenize(raw_doc)  # Splits the raw document into individual words.

##After Tokenization

In [38]:
sentence_tokens[:5]

['hi, how are you doing?',
 "i'm fine.",
 'how about yourself?',
 "i'm fine.",
 'how about yourself?']

In [39]:
word_tokens[:5]

['hi', ',', 'how', 'are', 'you']

##Performing Text-PreProcessing Steps


In [40]:
lemmer = nltk.stem.WordNetLemmatizer()                                          # Creates a WordNetLemmatizer object to reduce words to their base form.
def LemTokens (tokens):                                                         # Function to lemmatize a list of tokens.
  return [lemmer.lemmatize(token) for token in tokens]
remove_punc_dict= dict((ord(punct), None) for punct in string.punctuation)      # Creates a dictionary to remove punctuation from text.
def LemNormalize(text):                                                         # Function to normalize text by converting to lowercase, removing punctuation, and lemmatizing.
  return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punc_dict)))

##Define Greeting functions


In [41]:
greet_inputs = ('hello', 'hi', 'whassup', 'how are you?')                  # List of possible greetings from the user.
greet_responses = ('hi', 'Hey', 'Hey There!', 'There there!!')             # List of possible responses to greetings.
def greet(sentence):                                                       # Function to respond to a user's greeting.
  for word in sentence.split():
    if word.lower() in greet_inputs:
      return random.choice(greet_responses)                                # Returns a random response if the user's sentence contains a greeting.

##Response Generation by the Bot


In [42]:
from sklearn.feature_extraction.text import TfidfVectorizer  # Imports a module to convert text data into a matrix of TF-IDF features.
from sklearn.metrics.pairwise import cosine_similarity        # Imports a function to calculate the cosine similarity between vectors.

In [43]:
def response(user_response):         # Function to generate a response from the chatbot based on the user's input using TF-IDF vectorization and cosine similarity.
  robo1_response = ''                                 # Initializes an empty string to store the chatbot's response.
  TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words = 'english', token_pattern = None)
  tfidf = TfidfVec.fit_transform(sentence_tokens)                               # Converts the sentence tokens into a TF-IDF matrix.
  vals = cosine_similarity(tfidf[-1], tfidf)                                    # Calculates the cosine similarity between the last sentence in the matrix and all other sentences.
  idx = vals.argsort()[0][-2]                                                   # Gets the index of the second-highest similarity score.
  flat = vals.flatten()                                                         # Flattens the similarity scores into a 1D array.
  flat.sort()                                                                   # Sorts the flattened array in ascending order.
  req_tfidf = flat[-2]                                                          # Gets the second-highest similarity score.
  if (req_tfidf == 0):                                                          # Checks if the second-highest similarity score is zero.
    robo1_response = robo1_response + "I am sorry. Unable to understand you!"   # If the score is zero, the chatbot responds with an appropriate message.
    return robo1_response                                                      # Returns the chatbot's response.
  else:                                                                         # If the score is not zero, the chatbot responds with the second-highest similarity sentence.
    robo1_response = robo1_response + sentence_tokens[idx]
    return robo1_response                                                      # Returns the chatbot's response.

##Defining the ChatFlow

In [None]:
flag = True                                                             # Initializes a flag variable to True
print('Hello! I am the Retreival Learning Bot. Start typing your text after greeting to talk to me. For ending convo type bye!') # Prints a greeting message
while(flag == True):                                                    # Starts a while loop to continue the conversation until the flag is set to False
  user_response = input()                                                # Takes input from the user
  user_response = user_response.lower()                                  # Converts the user's input to lowercase
  if(user_response != 'bye'):                                            # Checks if the user's input is not 'bye'
    if(user_response == 'thank you' or user_response == 'thanks'):       # Checks if the user's input is 'thank you' or 'thanks'
      flag = False                                                      # Sets the flag to False to end the conversation
      print('Bot: You are Welcome..')                                  # Prints a response
    else:                                                              # If the user's input is not 'thank you' or 'thanks'
      if(greet(user_response) != None):                                 # Checks if the user's input is a greeting
        print('Bot '+ greet(user_response))                              # Prints the response to the greeting
      else:
        sentence_tokens.append(user_response)                             # Appends the user's input to the sentence tokens
        word_tokens = word_tokens + nltk.word_tokenize(user_response)
        final_words = list(set(word_tokens))
        print('Bot: ', end = '')                                           # Prints a response
        print(response(user_response))                                     # Prints the response to the user's input
        sentence_tokens.remove(user_response)                              # Removes the user's input from the sentence tokens
  else:                                                                  # If the user's input is 'bye'
    flag = False                                                          # Sets the flag to False to end the conversation
    print('Bot: Goodbye!')                                                    # Prints a goodbye message