In [104]:
import numpy as np
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics.pairwise import cosine_similarity

In [105]:
data = {
    "text" : ["hello", "hi", "how are you", "good morning", "goodnight", "bye", "goodbye", "see you later", "thank you", "thanks", "what is your name", "who are you", "what do you"],
    "intent" : ["greetings", "greetings", "greetings", "greetings", "greetings", "farewell", "farewell", "farewell", "gratitude", "gratitude", "about_bot", "about_bot", "about_bot"]
}

In [106]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,text,intent
0,hello,greetings
1,hi,greetings
2,how are you,greetings
3,good morning,greetings
4,goodnight,greetings


In [107]:
nltk.download('stopwords')
nltk.download('punkt_tab')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [108]:
def preprocess_text(text):
    # stop_words = set(stopwords.words("english"))
    word_tokens = word_tokenize(text.lower())
    filtered_text = [word for word in word_tokens if word.isalnum]
    return " ".join(filtered_text)
df["clean_text"] = df["text"].apply(preprocess_text)


In [109]:
df.tail()

Unnamed: 0,text,intent,clean_text
8,thank you,gratitude,thank you
9,thanks,gratitude,thanks
10,what is your name,about_bot,what is your name
11,who are you,about_bot,who are you
12,what do you,about_bot,what do you


In [110]:
vectorizer = TfidfVectorizer()
x_train = vectorizer.fit_transform(df["clean_text"])

In [111]:
x_train_df= pd.DataFrame(x_train.toarray(), columns=vectorizer.get_feature_names_out())

In [112]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(x_train_df, df["intent"])

In [120]:
def chatbot_response(user_input):
  user_input_clean = preprocess_text(user_input)
  user_input_vector = vectorizer.transform([user_input_clean])

  similarity_score = cosine_similarity(user_input_vector, x_train)
  best_match_index = np.argmax(similarity_score)
  max_similarity= np.max(similarity_score)

  if max_similarity < 0.2:
    return "I'm sorry, I didn't understand that."
  user_input = pd.DataFrame(user_input_vector.toarray(), columns=vectorizer.get_feature_names_out())
  intent = model.predict(user_input)[0]

  responses = {
      "greetings" : "Hello! How are you",
      "farewell" : "Goodbye! Have a great day",
      "gratitude" : "You're welcome",
      "about_bot" : "I'm a chatbot using Naive Bayes and cosine similarity"
  }
  return responses.get(intent, "I'm sorry, I didn't understand that.")

In [121]:
print('ChatBot is ready, Type quit/exit to exit')
while True:
  user_input = input("You : ")
  if user_input.lower() in ['exit', 'quit']:
    print("Chatbot : Goodbye!")
    break
  response = chatbot_response(user_input)
  print("Chatbot : ", response)

ChatBot is ready, Type quit/exit to exit
You : Hi
Chatbot :  Hello! How are you
You : Hello
Chatbot :  Hello! How are you
You : What is your name
Chatbot :  I'm a chatbot using Naive Bayes and cosine similarity
You : exit
Chatbot : Goodbye!
