In [2]:
# Install required libraries
!pip install nltk scikit-learn




In [3]:
# Preparing the Data
# Create a list of FAQs and their corresponding answers. For simple conversation, include greetings and common responses.
faq_data = [
    {"question": "Hi", "answer": "Hello! How can I help you today?"},
    {"question": "Hello", "answer": "Hi there! What can I do for you?"},
    {"question": "Bye", "answer": "Goodbye! Have a great day!"},
    {"question": "What are your hours?", "answer": "We are open from 9 AM to 5 PM, Monday to Friday."},
    {"question": "Where are you located?", "answer": "We are located at 123 Main Street."},
    {"question": "How can I contact support?", "answer": "You can contact support at support@example.com."},
    {"question": "Can I talk to a human agent?", "answer": "Yes. Just type “talk to a human” or “live support,” and the chatbot will transfer your request if support is available."},
    {"question": "Is the chatbot collecting my personal data?", "answer": "The chatbot may collect information to provide better support. However, your data is handled according to our [privacy policy]. We do not sell or misuse personal data."},
    {"question": "Is this chatbot multilingual?", "answer": "Currently, it supports [English, French, Spanish]. We’re working to support more languages in the future."}
    
]


In [4]:
# Text Preprocessing
# The preprocess function cleans the text by:

# Converting to lowercase.

# Tokenizing into words.

# Removing punctuation.

# Removing common stopwords (like "the", "is", "and") that do not add much meaning.

# This helps the model focus on important words for matching.
import nltk
import string
from nltk.corpus import stopwords

nltk.download('punkt')
nltk.download('stopwords')

def preprocess(text):
    tokens = nltk.word_tokenize(text.lower())
    tokens = [t for t in tokens if t not in string.punctuation]
    tokens = [t for t in tokens if t not in stopwords.words('english')]
    return " ".join(tokens)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\santa\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\santa\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [5]:
# Vectorizing Questions
# The TfidfVectorizer converts text into numerical vectors based on word importance.

# It uses the preprocess function to clean questions before vectorization.

# X is a matrix where each row corresponds to a question vector.

# This allows us to compare user input with all questions numerically.
from sklearn.feature_extraction.text import TfidfVectorizer

questions = [item['question'] for item in faq_data]
answers = [item['answer'] for item in faq_data]

vectorizer = TfidfVectorizer(preprocessor=preprocess)
X = vectorizer.fit_transform(questions)


In [6]:
# Setting Up the Chatbot Logic
# For each user input, compute similarity with all FAQ questions and return the answer with the highest similarity.
# User input is vectorized using the same vectorizer.

# cosine_similarity measures how similar the user input is to each question.

# The highest similarity score's index (idx) is found.

# If the similarity is above 0.2 (a chosen threshold), the corresponding answer is returned.

# Otherwise, a fallback message is given.
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def get_response(user_input):
    user_vec = vectorizer.transform([user_input])
    similarities = cosine_similarity(user_vec, X)
    idx = np.argmax(similarities)
    if similarities[0, idx] > 0.2:  # Threshold for matching
        return answers[idx]
    else:
        return "I'm sorry, I don't understand. Can you please rephrase?"


In [7]:
# Chat Loop
# This loop continuously takes user input.

# If the user types "exit", "quit", or "bye", the chatbot says goodbye and stops.

# Otherwise, it calls get_response and prints the answer.
while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit", "bye"]:
        print("Bot: Goodbye! Have a nice day!")
        break
    response = get_response(user_input)
    print("Bot:", response)


You:  hi


Bot: Hello! How can I help you today?


You:  hello


Bot: Hi there! What can I do for you?


You:  can i talk to a human agent ?


Bot: Yes. Just type “talk to a human” or “live support,” and the chatbot will transfer your request if support is available.


You:  who are you ?


Bot: I'm sorry, I don't understand. Can you please rephrase?


You:  bye


Bot: Goodbye! Have a nice day!
