In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity



In [None]:
# Load data into a pandas dataframe
data = pd.read_csv('example_data.csv')


In [None]:
# Convert text to lowercase
data['question'] = data['question'].str.lower()
data['response'] = data['response'].str.lower()

In [None]:
# Tokenize the text
data['question_tokens'] = data['question'].apply(lambda x: word_tokenize(x))
data['response_tokens'] = data['response'].apply(lambda x: word_tokenize(x))

In [None]:
# Remove stop words
stop_words = set(stopwords.words('english'))
data['question_tokens'] = data['question_tokens'].apply(lambda x: [word for word in x if word not in stop_words])
data['response_tokens'] = data['response_tokens'].apply(lambda x: [word for word in x if word not in stop_words])

In [None]:
# Perform stemming
ps = PorterStemmer()
data['question_tokens'] = data['question_tokens'].apply(lambda x: [ps.stem(word) for word in x])
data['response_tokens'] = data['response_tokens'].apply(lambda x: [ps.stem(word) for word in x])

In [None]:
# Perform lemmatization
lemmatizer = WordNetLemmatizer()
data['question_tokens'] = data['question_tokens'].apply(lambda x: [lemmatizer.lemmatize(word) for word in x])
data['response_tokens'] = data['response_tokens'].apply(lambda x: [lemmatizer.lemmatize(word) for word in x])

In [None]:
# Use TF-IDF to vectorize the text
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(data['response'])
tfidf_question_matrix = tfidf_vectorizer.transform(data['question'])

In [None]:
# Evaluate chatbot performance
def evaluate_chatbot(chatbot_function, test_data):
    num_correct = 0
    num_total = len(test_data)

    for i, row in test_data.iterrows():
        user_input = row['question']
        expected_response = row['response']
        chatbot_response = chatbot_function(user_input)

        if chatbot_response.strip() == expected_response.strip():
            num_correct += 1
        else:
            print("Input: " + user_input)
            print("Expected output: " + expected_response)
            print("Chatbot output: " + chatbot_response)
            print()

    accuracy = num_correct / num_total
    print("Accuracy: {:.2f}%".format(accuracy * 100))

In [None]:
# Load test dataset
test_data = pd.read_csv('example_test_data.csv')

In [None]:
# Evaluate chatbot performance
evaluate_chatbot(chatbot_response, test_data)