# The code represents a chatbot implemented using Python and the Natural Language Toolkit (NLTK) library. The chatbot is designed to provide responses to user queries about Hugging Face, a popular natural language processing (NLP) library. The chatbot utilizes techniques such as tokenization, lemmatization, and TF-IDF (Term Frequency-Inverse Document Frequency) to process user input and generate appropriate responses.

# The chatbot begins by greeting the user and asking for their queries. It handles various types of user interactions, including greetings, expressions of gratitude, and general questions. The chatbot employs cosine similarity to determine the most relevant response from a pre-defined set of sentences. It calculates the similarity scores between the user's input and the available sentences and selects the response with the highest similarity score.

# The code also includes functionality to visualize the chatbot responses using Matplotlib. The user's input and corresponding chatbot responses are stored in a DataFrame, and a bar plot is generated to display the chatbot responses over the course of the conversation. 

In [None]:
import nltk  # Importing the Natural Language Toolkit library
import io  # Importing the io module for input/output operations
import numpy as np  # Importing the NumPy library for array operations and computations
import random  # Importing the random module for generating random numbers and making random selections
import string  # Importing the string module for string manipulation functions and constants
import warnings  # Importing the warnings module for controlling warning messages
import matplotlib.pyplot as plt
import pandas as pd
warnings.filterwarnings("ignore")  # Ignoring any warning messages

In [None]:
f = open('/content/huggingface.txt', 'r', errors='ignore')  # Open the file 'huggingface.txt' in read mode, ignoring any decoding errors
raw = f.read()  # Read the contents of the file into the variable 'raw'
raw = raw.lower()  # Convert the contents of 'raw' to lowercase
nltk.download('punkt')  # Download the necessary resources for tokenization
nltk.download('wordnet')  # Download the necessary resources for lemmatization
sent_tokens = nltk.sent_tokenize(raw)  # Tokenize 'raw' into a list of sentences using NLTK's sentence tokenizer
word_tokens = nltk.word_tokenize(raw)  # Tokenize 'raw' into a list of words using NLTK's word tokenizer

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


In [None]:
word_tokens[:5]  # Retrieve the first two elements from the list 'word_tokens'

['hugging', 'face', 'article', 'talk', 'read']

In [None]:
sent_tokens[:10]  # Access the first two elements of the list 'sent_tokens'

In [None]:
lemmer = nltk.stem.WordNetLemmatizer()   # Create an instance of the WordNet lemmatizer from NLTK
def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]
    # Lemmatize each token in the given list of tokens using WordNet lemmatizer

remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
    # Create a dictionary to map punctuation characters to None using dictionary comprehension

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))
    # Normalize the given text by converting it to lowercase, removing punctuation,
    # tokenizing it into words, and then lemmatizing each word using WordNet lemmatizer


In [None]:
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up", "hey")  # List of common greeting inputs
GREETING_RESPONSES = ["hi", "hey", "hi there", "hello", "I am glad! You are talking to me"]  # List of corresponding greeting responses

def greeting(sentence):
    for word in sentence.split():  # Split the sentence into individual words
        if word.lower() in GREETING_INPUTS:  # Check if any word in the sentence matches a greeting input
            return random.choice(GREETING_RESPONSES)  # If a greeting is detected, return a random greeting response

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer  # Importing TfidfVectorizer from scikit-learn library

from sklearn.metrics.pairwise import cosine_similarity  # Importing cosine_similarity from scikit-learn library

In [None]:
def response(user_response):
    chatbot_response = ' '  # Initialize an empty string to store the chatbot's response
    sent_tokens.append(user_response)  # Append the user's response to the list of sentence tokens
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words="english")  # Create a TfidfVectorizer object with a tokenizer and stop words
    tfidf = TfidfVec.fit_transform(sent_tokens)  # Compute the TF-IDF matrix for the sentence tokens
    vals = cosine_similarity(tfidf[-1], tfidf)  # Compute the cosine similarity between the last sentence token (user's response) and all other sentence tokens
    idx = vals.argsort()[0][-2]  # Get the index of the most similar sentence token (excluding the user's response)
    flat = vals.flatten()  # Flatten the cosine similarity values into a 1D array
    flat.sort()  # Sort the cosine similarity values in ascending order
    req_tfidf = flat[-2]  # Get the second highest cosine similarity value
    if req_tfidf == 0:
        chatbot_response = chatbot_response + "I am sorry! I don't understand you"  # If the similarity is 0, the chatbot doesn't understand the user's input
        return chatbot_response
    else:
        chatbot_response = sent_tokens[idx]  # Retrieve the most similar sentence token as the chatbot's response
        return chatbot_response


In [None]:
# Create empty lists to store user responses and chatbot responses
user_responses = []
chatbot_responses = []

n = True
print("Chatbot: Hi, my name is Chatbot. I will answer your queries about Hugging face. If you want to exit, type 'Bye!'")

while n:
    user_response = input()
    user_response = user_response.lower()
    if user_response in ["bye", "goodbye"]:
        n = False
        print("Chatbot: Bye! Take care.")
    else:
        if user_response in ["thanks", "thank you"]:
            n = False
            print("Chatbot: You're welcome.")
        else:
            greeting_result = greeting(user_response)
            if greeting_result is not None:
                print("Chatbot: " + greeting_result)
            else:
                print("Chatbot: ", end="")
                chatbot_response = response(user_response)
                print(chatbot_response)
                sent_tokens.remove(user_response)
                
                # Append the user and chatbot responses to the respective lists
                user_responses.append(user_response)
                chatbot_responses.append(chatbot_response)

# Create a DataFrame using the collected responses
data = {'user_response': user_responses, 'chatbot_response': chatbot_responses}
df = pd.DataFrame(data)

# Plot the data
plt.figure(figsize=(8, 6))
plt.bar(range(len(df)), df['chatbot_response'])
plt.xlabel('Interaction')
plt.ylabel('Chatbot Response')
plt.title('Chatbot Responses')

# Display the plot
plt.show()

Chatbot: Hi, my name is Chatbot. I will answer your queries about Hugging face. If you want to exit, type 'Bye!'
hi
Chatbot: hi
year
Chatbot: find sources: "hugging face" – news · newspapers · books · scholar · jstor (february 2023) (learn how and when to remove this template message)
hugging face, inc.
hugging face logo.png
type	private
industry	artificial intelligence, machine learning, software development
founded	2016; 7 years ago in new york city
headquarters	new york city, u.s.
area served	worldwide
key people	
clément delangue (ceo)
julien chaumond (cto)
thomas wolf (cso)
products	transformers, datasets, spaces
website	huggingface.co
hugging face, inc. is an american company that develops tools for building applications using machine learning.
hhhtyhg
Chatbot:  I am sorry! I don't understand you
