# Importing necessary libraries:

In [1]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Loading the dataset:

In [2]:
df = pd.read_csv(r"C:\Users\Magda\Documents\sy\counsel-chat-master\counsel-chat-master\data\20200325_counsel_chat.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,questionID,questionTitle,questionText,questionLink,topic,therapistInfo,therapistURL,answerText,upvotes,views,split
0,0,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,"Sherry Katz, LCSWCouples and Family Therapist,...",https://counselchat.com/therapists/sherry-katz...,"If everyone thinks you're worthless, then mayb...",1,2899,train
1,1,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,"Robin Landwehr, DBH, LPCC, NCCMental Health in...",https://counselchat.com/therapists/robin-landw...,"Hello, and thank you for your question and see...",1,3514,train
2,2,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,Lee KingI use an integrative approach to treat...,https://counselchat.com/therapists/lee-king,First thing I'd suggest is getting the sleep y...,0,5,train
3,3,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,"Shauntai Davis-YearginPersonalized, private on...",https://counselchat.com/therapists/shauntai-da...,Therapy is essential for those that are feelin...,0,31,train
4,4,0,Can I change my feeling of being worthless to ...,I'm going through some things with my feelings...,https://counselchat.com/questions/can-i-change...,depression,Jordan WhiteLicensed Social Worker at Oak Root...,https://counselchat.com/therapists/jordan-white,I first want to let you know that you are not ...,0,620,train


# Cleaning the dataset:

Unnecessary columns are dropped using the drop() method of the DataFrame object

In [3]:
df = df.drop(['Unnamed: 0', 'questionID', 'questionTitle',
       'questionLink', 'therapistInfo', 'therapistURL',
       'upvotes', 'views', 'split', 'topic'], axis=1)
df.head()

Unnamed: 0,questionText,answerText
0,I'm going through some things with my feelings...,"If everyone thinks you're worthless, then mayb..."
1,I'm going through some things with my feelings...,"Hello, and thank you for your question and see..."
2,I'm going through some things with my feelings...,First thing I'd suggest is getting the sleep y...
3,I'm going through some things with my feelings...,Therapy is essential for those that are feelin...
4,I'm going through some things with my feelings...,I first want to let you know that you are not ...


The text data is cleaned by removing special characters, numbers, and stopwords using the clean_text() function defined in the code


In [4]:
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Magda\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Magda\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [5]:
def clean_text(text):
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower()
    text = text.split()
    text = [lemmatizer.lemmatize(word) for word in text if not word in stop_words]
    text = ' '.join(text)
    return text

In [6]:
df['cleaned_question'] = df['questionText'].apply(clean_text)
df['cleaned_answer'] = df['answerText'].apply(clean_text)

# Vectorizing the text:

The TfidfVectorizer object is used to convert the cleaned text data into a matrix of TF-IDF features


Two matrices are generated: one for the cleaned question text and one for the cleaned answer text

In [7]:
# Vectorize the text using TfidfVectorizer
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['cleaned_question'])
y = vectorizer.transform(df['cleaned_answer'])

# Training the chatbot:


The generate_response() function is defined to take a user input, clean it, vectorize it, calculate the cosine similarity between the user input and the questions in the dataset, and return the answer with the highest similarity score

In [8]:
def generate_response(user_input):
    user_input = clean_text(user_input)
    user_input_vector = vectorizer.transform([user_input])
    similarities = cosine_similarity(user_input_vector, X).flatten()
    index = np.argmax(similarities)
    return df['cleaned_answer'][index]

# Testing the chatbot:

A while loop is used to continuously prompt the user for input until the user enters "quit"
The generate_response() function is called with the user input as the argument, and the response is printed to the console

In [None]:
# Test the chatbot
while True:
    user_input = input('You: ')
    if user_input.lower() == 'quit':
        break
    response = generate_response(user_input)
    print('Chatbot:', response)

You: My husband and I are in a terrible place. Part of me wants to fix it, but then I'm caught up with not knowing how and not being able to communicate my feelings to him because he always feels I'm blaming him. Sometimes I am because the feelings of distrust are so strong. I feel they must be coming from something he's doing, but sometimes I know I'm being irrational. Still, that doesn
