In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re

# Sample dataset - you can expand this with your own questions and answers
data = {
    'question': [
        'hello',
        'hi',
        'how are you',
        'what is your name',
        'who are you',
        'what can you do',
        'tell me a joke',
        'what is machine learning',
        'how does python work',
        'what is artificial intelligence',
        'goodbye',
        'bye',
        'see you later',
        'thanks',
        'thank you',
        'what time is it',
        'how old are you',
        'where are you from',
        'what is your purpose',
        'do you like music'
    ],
    'answer': [
        'Hello! How can I help you today?',
        'Hi there! What can I do for you?',
        'I am just a chatbot, but I am functioning well! How about you?',
        'I am a chatbot created to help answer your questions!',
        'I am an AI chatbot designed to assist with information and conversations.',
        'I can answer questions, have conversations, and provide information on various topics.',
        'Why did the chatbot cross the road? To get to the other website!',
        'Machine learning is a subset of AI that allows computers to learn without being explicitly programmed.',
        'Python is an interpreted programming language that executes code line by line.',
        'Artificial intelligence is the simulation of human intelligence in machines.',
        'Goodbye! Have a great day!',
        'Bye! Come back if you have more questions!',
        'See you later! Take care!',
        'You are welcome! Happy to help!',
        'My pleasure! Let me know if you need anything else.',
        'I am a chatbot, I do not have access to real-time clock information.',
        'I was just created, so I am brand new!',
        'I exist in the digital world to help users like you!',
        'My purpose is to assist you with information and answer your questions.',
        'I do not have personal preferences, but I can talk about music!'
    ]
}

df = pd.DataFrame(data)
print("Dataset created with", len(df), "Q&A pairs")

Dataset created with 20 Q&A pairs


In [None]:
class SimpleChatbot:
    def __init__(self):
        self.vectorizer = TfidfVectorizer()
        self.questions = []
        self.answers = []
        self.X = None

    def train(self, questions, answers):
        """Train the chatbot with questions and answers"""
        self.questions = questions
        self.answers = answers

        # Preprocess and vectorize the questions
        processed_questions = [self.preprocess_text(q) for q in questions]
        self.X = self.vectorizer.fit_transform(processed_questions)
        print(f"Chatbot trained with {len(questions)} Q&A pairs")

    def preprocess_text(self, text):
        """Basic text preprocessing"""
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
        return text

    def get_response(self, user_input, threshold=0.3):
        """Get response for user input"""
        if self.X is None:
            return "Chatbot not trained yet. Please train with data first."

        # Preprocess user input
        processed_input = self.preprocess_text(user_input)

        # Vectorize user input
        input_vector = self.vectorizer.transform([processed_input])

        # Calculate similarity with all questions
        similarities = cosine_similarity(input_vector, self.X)

        # Find the best match
        best_match_idx = np.argmax(similarities)
        best_similarity = similarities[0, best_match_idx]

        # Return answer if similarity is above threshold
        if best_similarity > threshold:
            return self.answers[best_match_idx]
        else:
            return "I'm not sure how to answer that. Can you try rephrasing your question?"

    def add_qna(self, question, answer):
        """Add new Q&A pair to the chatbot"""
        self.questions.append(question)
        self.answers.append(answer)

        # Retrain with new data
        processed_questions = [self.preprocess_text(q) for q in self.questions]
        self.X = self.vectorizer.fit_transform(processed_questions)
        print(f"Added new Q&A pair. Total pairs: {len(self.questions)}")

In [None]:
# Create and train the chatbot
chatbot = SimpleChatbot()
chatbot.train(df['question'].tolist(), df['answer'].tolist())

# Test the chatbot
def chat_with_bot():
    print("Chatbot: Hello! I'm your assistant. Type 'quit' to exit.")

    while True:
        user_input = input("\nYou: ")

        if user_input.lower() in ['quit', 'exit', 'bye']:
            print("Chatbot: Goodbye!")
            break

        response = chatbot.get_response(user_input)
        print(f"Chatbot: {response}")

# Start chatting
chat_with_bot()

Chatbot trained with 20 Q&A pairs
Chatbot: Hello! I'm your assistant. Type 'quit' to exit.

You: quit
Chatbot: Goodbye!


In [None]:
class EnhancedChatbot(SimpleChatbot):
    def __init__(self):
        super().__init__()
        self.greetings = ['hello', 'hi', 'hey', 'hola']
        self.goodbyes = ['bye', 'goodbye', 'see you', 'farewell']

    def get_response(self, user_input, threshold=0.3):
        """Enhanced response method with special cases"""
        processed_input = self.preprocess_text(user_input)

        # Check for greetings
        if any(greeting in processed_input for greeting in self.greetings):
            return "Hello! How can I assist you today?"

        # Check for goodbyes
        if any(goodbye in processed_input for goodbye in self.goodbyes):
            return "Goodbye! Feel free to come back if you have more questions!"

        # Check for thanks
        if 'thank' in processed_input:
            return "You're welcome! Happy to help!"

        # Use parent class method for other queries
        return super().get_response(user_input, threshold)

    def interactive_learning(self):
        """Allow the chatbot to learn new Q&A pairs interactively"""
        print("\nInteractive Learning Mode - Type 'stop' to exit learning")

        while True:
            question = input("\nEnter a new question: ")
            if question.lower() == 'stop':
                break

            answer = input("Enter the answer: ")
            if answer.lower() == 'stop':
                break

            self.add_qna(question, answer)
            print("New Q&A pair added successfully!")

# Usage of enhanced chatbot
enhanced_bot = EnhancedChatbot()
enhanced_bot.train(df['question'].tolist(), df['answer'].tolist())

# Test enhanced chatbot
print("Testing enhanced chatbot:")
test_questions = [
    "hello there",
    "what is your name",
    "what is machine learning",
    "thank you very much",
    "goodbye my friend",
    "what is the meaning of life"  # This should trigger default response
]

for question in test_questions:
    response = enhanced_bot.get_response(question)
    print(f"Q: {question}")
    print(f"A: {response}\n")

Chatbot trained with 20 Q&A pairs
Testing enhanced chatbot:
Q: hello there
A: Hello! How can I assist you today?

Q: what is your name
A: I am a chatbot created to help answer your questions!

Q: what is machine learning
A: Hello! How can I assist you today?

Q: thank you very much
A: You're welcome! Happy to help!

Q: goodbye my friend
A: Goodbye! Feel free to come back if you have more questions!

Q: what is the meaning of life
A: I am a chatbot created to help answer your questions!



In [None]:
# Save dataset to CSV
df.to_csv('chatbot_dataset.csv', index=False)
print("Dataset saved to 'chatbot_dataset.csv'")

# Load dataset from CSV
def load_dataset_from_csv(file_path):
    try:
        df_loaded = pd.read_csv(file_path)
        return df_loaded['question'].tolist(), df_loaded['answer'].tolist()
    except FileNotFoundError:
        print("Dataset file not found. Using default dataset.")
        return df['question'].tolist(), df['answer'].tolist()

# Load and train from CSV
questions, answers = load_dataset_from_csv('chatbot_dataset.csv')
chatbot_from_file = SimpleChatbot()
chatbot_from_file.train(questions, answers)

Dataset saved to 'chatbot_dataset.csv'
Chatbot trained with 20 Q&A pairs
