In [None]:
# Step 1: Import Libraries
import re
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 2: Dataset
dataset = {
    "hello": "Hi there! How can I help you?",
    "how are you": "I'm just a bot, but I'm doing great! How about you?",
    "what is AI": "AI stands for Artificial Intelligence, the simulation of human intelligence in machines.",
    "bye": "Goodbye! Have a great day!"
}

# Step 3: Preprocess Input
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text

# Step 4: Get Response
def get_response(user_input, dataset):
    user_input = preprocess(user_input)
    responses = list(dataset.values())
    inputs = list(dataset.keys()) + [user_input]

    # Convert text into vectors
    vectorizer = CountVectorizer().fit_transform(inputs)
    vectors = vectorizer.toarray()

    # Compute cosine similarity
    similarity = cosine_similarity(vectors[-1:], vectors[:-1])
    best_match_index = np.argmax(similarity)

    return responses[best_match_index]

# Step 5: Chatbot Function
def chatbot():
    print("Chatbot: Hello! Type 'exit' to end the chat.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break
        response = get_response(user_input, dataset)
        print(f"Chatbot: {response}")

# Step 6: Run Chatbot
chatbot()

Chatbot: Hello! Type 'exit' to end the chat.


You:  Hi


Chatbot: Hi there! How can I help you?
