In [None]:
import pandas as pd
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Load data into a pandas dataframe
data = pd.read_csv('example_data.csv')

In [None]:
# Convert text to lowercase
data['question'] = data['question'].str.lower()
data['response'] = data['response'].str.lower()

In [None]:
# Tokenize the text
data['question_tokens'] = data['question'].apply(lambda x: word_tokenize(x))
data['response_tokens'] = data['response'].apply(lambda x: word_tokenize(x))

In [None]:
# Remove stop words
stop_words = set(stopwords.words('english'))
data['question_tokens'] = data['question_tokens'].apply(lambda x: [word for word in x if word not in stop_words])
data['response_tokens'] = data['response_tokens'].apply(lambda x: [word for word in x if word not in stop_words])

In [None]:
# Perform stemming
ps = PorterStemmer()
data['question_tokens'] = data['question_tokens'].apply(lambda x: [ps.stem(word) for word in x])
data['response_tokens'] = data['response_tokens'].apply(lambda x: [ps.stem(word) for word in x])

In [None]:
# Perform lemmatization
lemmatizer = WordNetLemmatizer()
data['question_tokens'] = data['question_tokens'].apply(lambda x: [lemmatizer.lemmatize(word) for word in x])
data['response_tokens'] = data['response_tokens'].apply(lambda x: [lemmatizer.lemmatize(word) for word in x])

In [None]:
# Use TF-IDF to vectorize the text
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(data['response'])
tfidf_question_matrix = tfidf_vectorizer.transform(data['question'])

In [None]:
# Initialize chatbot
def chatbot_response(user_input):
    # Vectorize user input
    user_input_tokens = word_tokenize(user_input.lower())
    user_input_tokens = [word for word in user_input_tokens if word not in stop_words]
    user_input_tokens = [ps.stem(word) for word in user_input_tokens]
    user_input_tokens = [lemmatizer.lemmatize(word) for word in user_input_tokens]
    user_input_tfidf = tfidf_vectorizer.transform([' '.join(user_input_tokens)])

    # Calculate cosine similarity between user input and preprocessed data
    cosine_similarities = cosine_similarity(user_input_tfidf, tfidf_matrix).flatten()
    related_docs_indices = cosine_similarities.argsort()[::-1]

    # Get response with highest cosine similarity to user input
    if cosine_similarities[related_docs_indices[0]] == 0:
        chatbot_response = "I'm sorry, I don't understand. Can you please rephrase your question?"
    else:
        chatbot_response = data.iloc[related_docs_indices[0]]['response']

    return chatbot_response

In [None]:
# Main loop to interact with the chatbot
print("Welcome to the Telecommunication Company Chatbot!")
print("Please type your question or type 'quit' to exit.")

In [None]:
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break
    chatbot_response = chatbot_response(user_input)
    print("Chatbot: " + chatbot_response)