In [10]:
# Step 1: Install and import required packages

# Installing NLTK (for NLP preprocessing)
!pip install nltk

# Importing all necessary libraries
import nltk                                        # NLP preprocessing tools
from nltk.corpus import stopwords                  # stopwords list
from nltk.tokenize import word_tokenize            # word tokenization
import string                                       # punctuation list
from sklearn.feature_extraction.text import TfidfVectorizer   # text → vector conversion
from sklearn.metrics.pairwise import cosine_similarity         # similarity score

# Downloading essential NLTK datasets
nltk.download('punkt')
nltk.download('stopwords')




[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [11]:
# Step 2: Creating FAQ dataset (questions + answers)

faqs = {
    "What is AI?": "Artificial Intelligence allows machines to perform tasks that normally require human intelligence.",
    "What is Machine Learning?": "Machine learning enables computers to learn from data without being explicitly programmed.",
    "What is Deep Learning?": "Deep learning uses neural networks with multiple layers to learn complex patterns.",
    "What is Data Science?": "Data science involves collecting, cleaning, analyzing, and interpreting large amounts of data.",
    "What is Python used for?": "Python is used for AI, data science, automation, web development, and more.",
    "What is NLP?": "Natural Language Processing allows computers to understand and generate human language.",
}


In [12]:
# Step 3: Preprocessing text using NLTK


stop_words = set(stopwords.words('english'))        # stopwords set for faster lookup
punct = string.punctuation                          # punctuation list

def preprocess(text):
    text = text.lower()                             # convert to lowercase
    tokens = word_tokenize(text)                    # tokenize into words
    tokens = [t for t in tokens if t not in punct]  # remove punctuation
    tokens = [t for t in tokens if t not in stop_words]  # remove stopwords
    return " ".join(tokens)                         # rejoin tokens as clean text


In [14]:
# Step 4: Preprocessing all FAQ questions

questions = list(faqs.keys())              # extracting all FAQ questions
answers = list(faqs.values())              # extracting all FAQ answers

clean_questions = [preprocess(q) for q in questions]   # preprocessed questions
clean_questions


['ai',
 'machine learning',
 'deep learning',
 'data science',
 'python used',
 'nlp']

In [15]:
# Step 5: Converting preprocessed FAQ questions to TF-IDF vectors

vectorizer = TfidfVectorizer()             # transformer to convert text → vectors
X = vectorizer.fit_transform(clean_questions)  # vectorizing FAQ questions


In [16]:
# Step 6: Matching user query with most similar FAQ

def chatbot_response(user_query):
    # Preprocess user question
    clean_query = preprocess(user_query)
    
    # Convert to TF-IDF vector
    query_vec = vectorizer.transform([clean_query])
    
    # Calculate cosine similarity
    scores = cosine_similarity(query_vec, X).flatten()
    
    # Get best match index
    best_match = scores.argmax()
    
    # Return answer for closest matching FAQ
    return answers[best_match]


In [18]:
# Step 7: Chatbot interaction loop

print("Chatbot is ready! Ask your questions (type 'quit' to stop)")

while True:
    user = input("You: ")
    if user.lower() == "quit":
        print("Chatbot: Goodbye!")
        break

    response = chatbot_response(user)
    print("Chatbot:", response)


Chatbot is ready! Ask your questions (type 'quit' to stop)


You:  What does AI do?


Chatbot: Artificial Intelligence allows machines to perform tasks that normally require human intelligence.


You:  What does a Deep leaning model use? 


Chatbot: Deep learning uses neural networks with multiple layers to learn complex patterns.


You:  quit


Chatbot: Goodbye!
