In [129]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_distances
from nltk.stem import WordNetLemmatizer
import nltk
import joblib

In [130]:
# Download the WordNet resource
nltk.download('wordnet')

# Read conversation from the text file
with open("../data/human_chat.txt", "r") as file:
    conversation = file.readlines()

# Extract chatbot and employee responses
chatbot_responses = []
employee_responses = []

for i in range(0, len(conversation), 2):
    if i + 1 < len(conversation):  # Check if index is within bounds
        chatbot_responses.append(conversation[i][9:].strip())  # Exclude "Chatbot: " and strip whitespace
        employee_responses.append(conversation[i + 1][10:].strip())  # Exclude "Employee: " and strip whitespace
    else:
        print("Warning: Incomplete conversation data at index", i)  # Debugging statement

# Create a pandas DataFrame
data = {
    'Chatbot Response': chatbot_responses,
    'Employee Response': employee_responses
}
df = pd.DataFrame(data)

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\chama\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [131]:
# Sentiment Analysis
Text = df['Employee Response']
sid = SentimentIntensityAnalyzer()
df['rating'] = Text.apply(sid.polarity_scores)
df = pd.concat([df.drop(['rating'], axis=1), df['rating'].apply(pd.Series)], axis=1)


In [132]:
# TF-IDF Vectorization
tfidf = TfidfVectorizer()
factors = tfidf.fit_transform(df['Employee Response']).toarray()



In [133]:
# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

In [134]:

def chatbot(query):
    # step:-1 clean
    query = lemmatizer.lemmatize(query)
    # step:-2 word embedding - transform
    query_vector = tfidf.transform([query]).toarray()
    # step-3: cosine similarity
    similar_score = 1 -cosine_distances(factors,query_vector)
    index = similar_score.argmax() # take max index position
    # searching or matching question
    matching_question = df.loc[index]['Chatbot Response']
    response = df.loc[index]['Employee Response']
    pos_score = df.loc[index]['pos']
    neg_score = df.loc[index]['neg']
    neu_score = df.loc[index]['neu']
    confidence = similar_score[index][0]
    chat_dict = {'match':matching_question,
                 'response':response,
                 'score':confidence,
                 'pos':pos_score,
                 'neg':neg_score,
                 'neu':neu_score}
    return chat_dict

In [135]:
query = "Amazing to think about, isn't it."
response = chatbot(query)
print(response)

{'match': 'Yep! I believe there is a day for that', 'response': "Amazing to think about, isn't it.", 'score': 1.0, 'pos': 0.432, 'neg': 0.0, 'neu': 0.568}
