In [3]:
import pandas as pd
import numpy as np
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import random
import re


In [6]:
# Load the dataset
df = pd.read_csv("chatbot_dataset_extended.csv")

# Display the first few rows
print(df.head())

             question                                             answer
0               Hello  Hi there! How can I assist you today? I can an...
1        How are you?  I'm just a bot, but I'm doing great! How about...
2  What is your name?  I'm a chatbot designed to answer your queries....
3    Who created you?  I was created by a developer who loves AI and ...
4      Tell me a joke  Sure! Here's a joke for you: Why don’t scienti...


In [7]:
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\[.*?\]', '', text)  # Remove text inside brackets
    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Remove URLs
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    return text.strip()

# Apply text cleaning to dataset
df["question"] = df["question"].apply(clean_text)
df["answer"] = df["answer"].apply(clean_text)


In [8]:
# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer()

# Fit and transform questions
X = vectorizer.fit_transform(df["question"])


In [9]:
def chatbot_response(user_input):
    user_input = clean_text(user_input)  # Clean user input
    user_vec = vectorizer.transform([user_input])  # Convert input to vector

    # Compute cosine similarity
    similarity_scores = cosine_similarity(user_vec, X)
    
    # Find the most similar question
    best_match_index = np.argmax(similarity_scores)

    # Check if similarity is above threshold
    if similarity_scores[0, best_match_index] > 0.2:  # Adjust threshold as needed
        return df.iloc[best_match_index]["answer"]
    else:
        return "I'm sorry, I don't understand. Can you rephrase your question?"


In [None]:
print("Chatbot: Hello! Ask me anything. Type 'exit' to end.")

while True:
    user_input = input("You: ")
    
    if user_input.lower() == "exit":
        print("Chatbot: Goodbye! Have a great day!")
        break
    
    response = chatbot_response(user_input)
    print("Chatbot:", response)


Chatbot: Hello! Ask me anything. Type 'exit' to end.


You:  how are you


Chatbot: im just a bot but im doing great how about you if you need any help feel free to ask


You:  please suggest some songs


Chatbot: I'm sorry, I don't understand. Can you rephrase your question?
