# 🤖 Chatbot using NLTK, TF-IDF & Cosine Similarity

A simple rule-based chatbot implemented in a Colab notebook.

In [None]:
!pip install gradio nltk google-cloud-dialogflow

In [None]:
import numpy as np
import nltk
import string
import random
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

## 📚 Reading the Corpus

In [None]:
f = open('chatbot.txt','r',errors='ignore')
raw_doc = f.read().lower()
sentence_tokens = nltk.sent_tokenize(raw_doc)
word_tokens = nltk.word_tokenize(raw_doc)

## 🔄 Preprocessing: Lemmatization and Normalization

In [None]:
lemmer = nltk.stem.WordNetLemmatizer()

def Lemtokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

def LemNormalize(text):
    remove_punc_dict = str.maketrans('', '', string.punctuation)
    return Lemtokens(nltk.word_tokenize(text.lower().translate(remove_punc_dict)))

text = "The cats are running faster than the dogs."
print(LemNormalize(text))

## 🙋‍♂️ Greeting Inputs

In [None]:
greet_inputs = ('hello','hi','wassup','how are you')
greet_response = ('hi','hey','Hey there !' , 'wassup buddy  !!')

def greet(sentence):
    for word in sentence.split():
        if word.lower() in greet_inputs:
            return random.choice(greet_response)

## 🤖 Bot Response Generator

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def response(user_response):
    robo1_response = ''
    sentence_tokens.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sentence_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfid = flat[-2]
    sentence_tokens.pop()
    if req_tfid == 0:
        robo1_response = "I am sorry. Unable to understand you!"
    else:
        robo1_response = sentence_tokens[idx]
    return robo1_response

## 💬 Chat Loop

In [None]:
flag = True
print("Hello, I am the Retrieval Learning Bot. Start chatting with me (type 'bye' to exit)")

while flag:
    user_response = input().lower()
    if user_response != 'bye':
        if user_response in ['thanks', 'thank you']:
            flag = False
            print("Bot: You're welcome!")
        elif greet(user_response) is not None:
            print("Bot:", greet(user_response))
        else:
            print("Bot:", response(user_response))
    else:
        flag = False
        print("Bot: Goodbye!")