<a href="https://colab.research.google.com/github/Sandli07goyal/Cantilever/blob/main/Chatbot_for_FAQs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install nltk spacy scikit-learn
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [2]:
import nltk
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load SpaCy model
nlp = spacy.load('en_core_web_sm')

In [3]:
# Example FAQ dataset
faq_data = {
    "What is your return policy?": "You can return any product within 30 days of purchase.",
    "What are your shipping options?": "We offer standard and expedited shipping options.",
    "How can I track my order?": "You will receive a tracking number via email once your order is shipped.",
    "Do you offer international shipping?": "Yes, we offer international shipping to selected countries.",
    "What payment methods do you accept?": "We accept credit cards, PayPal, and Apple Pay.",
}

In [4]:
questions = list(faq_data.keys())

# Vectorize the questions using TF-IDF
vectorizer = TfidfVectorizer()
question_vectors = vectorizer.fit_transform(questions)

In [5]:
def get_most_similar_question(user_query):
    # Preprocess user query
    query_vector = vectorizer.transform([user_query])

    # Compute cosine similarity between user query and FAQ questions
    similarities = cosine_similarity(query_vector, question_vectors)

    # Get the index of the most similar question
    most_similar_idx = similarities.argmax()

    return questions[most_similar_idx], faq_data[questions[most_similar_idx]]

In [6]:
def chatbot():
    print("Hi! I'm your FAQ chatbot. Ask me anything about our products or services.")
    print("Type 'exit' to end the conversation.")

    while True:
        user_query = input("\nYou: ")

        if user_query.lower() == "exit":
            print("Chatbot: Goodbye!")
            break

        # Get the most similar FAQ
        question, answer = get_most_similar_question(user_query)

        print(f"Chatbot: {answer}")

In [7]:
chatbot()

Hi! I'm your FAQ chatbot. Ask me anything about our products or services.
Type 'exit' to end the conversation.

You: what is the dependency
Chatbot: You can return any product within 30 days of purchase.

You: exit
Chatbot: Goodbye!


In [8]:
!pip install transformers



In [11]:
from transformers import BertTokenizer, BertModel
import torch
import numpy as np

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def encode_question(question):
    inputs = tokenizer(question, return_tensors="pt")
    outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).detach().numpy()

# Encode all FAQs
question_embeddings = np.vstack([encode_question(q) for q in questions])

# Similarity function using BERT embeddings
def get_most_similar_question_bert(user_query):
    query_embedding = encode_question(user_query)
    similarities = cosine_similarity(query_embedding, question_embeddings)
    most_similar_idx = similarities.argmax()
    return questions[most_similar_idx], faq_data[questions[most_similar_idx]]

