In [1]:
# importing required libraries
import numpy as np
import pandas as pd
import json
import re
import nltk
from nltk.tokenize import word_tokenize

In [2]:
# Import JSON file
data = pd.read_json(r'intents.json')

Unnamed: 0,intents
0,"{'tag': 'greeting', 'patterns': ['Hi', 'Hey', ..."
1,"{'tag': 'morning', 'patterns': ['Good morning'..."
2,"{'tag': 'afternoon', 'patterns': ['Good aftern..."
3,"{'tag': 'evening', 'patterns': ['Good evening'..."
4,"{'tag': 'night', 'patterns': ['Good night'], '..."
...,...
79,"{'tag': 'spirituality', 'patterns': ['I don't ..."
80,"{'tag': 'stress', 'patterns': ['I need help de..."
81,"{'tag': 'substance-abuse', 'patterns': ['I'm a..."
82,"{'tag': 'trauma', 'patterns': ['There are issu..."


In [3]:
# Loading data from another csv
data2 = pd.read_csv('Conversation.csv')
data2.drop(['Unnamed: 0'],axis=1,inplace=True)
data2.rename(columns = {'question':'questions','answer':'responses'},inplace=True)

Unnamed: 0,questions,responses
0,"hi, how are you doing?",i'm fine. how about yourself?
1,i'm fine. how about yourself?,i'm pretty good. thanks for asking.
2,i'm pretty good. thanks for asking.,no problem. so how have you been?
3,no problem. so how have you been?,i've been great. what about you?
4,i've been great. what about you?,i've been good. i'm in school right now.
...,...,...
3720,that's a good question. maybe it's not old age.,are you right-handed?
3721,are you right-handed?,yes. all my life.
3722,yes. all my life.,you're wearing out your right hand. stop using...
3723,you're wearing out your right hand. stop using...,but i do all my writing with my right hand.


In [4]:
questions=[]
responses=[]

# taking data from 1st file
for intent in data['intents']:
    for lines in intent['patterns']:
        questions.append(lines)
    for lines in intent['responses']:
        responses.append(lines)
        
# taking data from 2nd file
for i in data2['questions']:
    questions.append(i)
for i in data2['responses']:
    responses.append(i)

In [5]:
# creating new dataframe
data_new=pd.DataFrame({"questions":questions,"responses":responses})
data_new

Unnamed: 0,questions,responses
0,Hi,Hello there. Tell me how are you feeling today?
1,Hey,Hi there. What brings you here today?
2,Is anyone there?,Hi there. How are you feeling today?
3,Hi there,Great to see you. How do you feel currently?
4,Hello,Hello there. Glad to see you're back. What's g...
...,...,...
4822,that's a good question. maybe it's not old age.,are you right-handed?
4823,are you right-handed?,yes. all my life.
4824,yes. all my life.,you're wearing out your right hand. stop using...
4825,you're wearing out your right hand. stop using...,but i do all my writing with my right hand.


In [6]:
# Remove duplicates
data_new = data_new.drop_duplicates()

# Remove non-alphabetic characters
data_new['questions'] = data_new['questions'].apply(lambda x: re.sub(r'[^a-zA-Z\s]', '', x))

# Convert all text to lowercase
data_new['questions'] = data_new['questions'].apply(lambda x: x.lower())

# Tokenize the text
data_new['questions'] = data_new['questions'].apply(lambda x: word_tokenize(x))

# Convert the text back to a string
data_new['questions'] = data_new['questions'].apply(lambda x: ' '.join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_new['questions'] = data_new['questions'].apply(lambda x: re.sub(r'[^a-zA-Z\s]', '', x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_new['questions'] = data_new['questions'].apply(lambda x: x.lower())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_new['questions'] = data_new['quest

In [7]:
# Tokenizing the data
sentences = data_new['questions']
tokenized_sent = []
for s in sentences:
    tokenized_sent.append(word_tokenize(s.lower()))

# Implementing Doc2Vec model and training it 
from gensim.models.doc2vec import Doc2Vec, TaggedDocument

tagged_data = [TaggedDocument(d, [i]) for i, d in enumerate(tokenized_sent)]
tagged_data
model = Doc2Vec(tagged_data, vector_size = 30, window = 10, min_count = 1, epochs = 500)

In [9]:
# Testing the model
import random
from tkinter import *

# Creating an interface for interacting with user
BG_GRAY="#ABB2B9"
BG_COLOR="#17202A"
TEXT_COLOR="#EAECEE"
BUTTON_COLOR="#FFFDD0"

FONT="Helvetica 14"
FONT_BOLD="Helvetica 13 bold"

class ChatbotGUI:
    # Function called once class object is created
    def __init__(self):
        self.window = Tk()
        self._setup_main_window()
    
    # Displaying the main window to user
    def _setup_main_window(self):
        # Giving basic details about window
        self.window.title("Mental Healthcare Chatbot")
        self.window.resizable(width=False,height=False)
        self.window.configure(width=600, height=700, bg=BG_COLOR)
        
        # Displaying the window with labels
        head_label=Label(self.window,bg=BG_COLOR,fg=TEXT_COLOR,text="Welcome",font=FONT_BOLD,pady=10)
        head_label.place(relwidth=1)
        
        line=Label(self.window,width=450,bg=BG_GRAY)
        line.place(relwidth=1,rely=0.07,relheight=0.012)
        
        # Giving text and window width and height
        self.text_widget=Text(self.window,width=20,height=2,bg=BG_COLOR,fg=TEXT_COLOR,font=FONT,padx=5,pady=5)
        self.text_widget.place(relwidth=1,rely=0.08,relheight=0.745)
        self.text_widget.configure(cursor="arrow",state=DISABLED)
        
        # Defining scrollbar for the interface
        scrollbar=Scrollbar(self.text_widget)
        scrollbar.place(relheight=1,relx=0.974)
        scrollbar.configure(command=self.text_widget.yview)
        
        bottom_label=Label(self.window,bg=BG_GRAY,height=60)
        bottom_label.place(relwidth=1,rely=0.825)
        
        # Path for messages
        self.msg_entry=Entry(bottom_label,bg="#2C3E50",fg=TEXT_COLOR,font=FONT)
        self.msg_entry.place(relwidth=0.74,relheight=0.06,rely=0.008,relx=0.011)
        self.msg_entry.focus()
        self.msg_entry.bind("<Return>",self._on_entry_pressed)
        
        send_button=Button(bottom_label,text="Send",bg=BUTTON_COLOR,font=FONT_BOLD,width=20, command=lambda:self._on_entry_pressed(None))
        send_button.place(relx=0.77,rely=0.008,relheight=0.06,relwidth=0.22)
        
    # Message entry function
    def _on_entry_pressed(self,entry):
        msg=self.msg_entry.get()
        self._insert_message(msg,"You")

    # Add message in interface   
    def _insert_message(self,msg,sender):
        # If user didn't wrote anything return
        if not msg:
            return
        
        self.msg_entry.delete(0,END)
        msg1=f"{sender}: {msg}\n\n"
        self.text_widget.configure(state=NORMAL)
        self.text_widget.insert(END,msg1)
        self.text_widget.configure(state=DISABLED)
        
        # Otherwise take input data and predict the output from model
        test_doc = word_tokenize(msg.lower())
        test_doc_vector = model.infer_vector(test_doc)
        index=model.docvecs.most_similar(positive = [test_doc_vector],topn=1)[0][0]  
        response = responses[index]
        
        # Display the output got from model
        bot_name="Saarthi"
        msg2=f"{bot_name}: {response}\n\n"
        self.text_widget.configure(state=NORMAL)
        self.text_widget.insert(END,msg2)
        self.text_widget.configure(state=DISABLED)
        
        self.text_widget.see(END)

    # Repeat the process again  
    def start(self):
        self.window.mainloop() # start the GUI

# Main function
if __name__ == "__main__":
    gui = ChatbotGUI()
    gui.start()