# Importing necessary libraries

In [1]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Loading the dataset

In [2]:
df = pd.read_csv("C:\\Users\\Mazen\\Desktop\\Mental_Health_FAQ.csv")
df.head()

Unnamed: 0,Question_ID,Questions,Answers
0,1590140,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...
1,2110618,Who does mental illness affect?,It is estimated that mental illness affects 1 ...
2,6361820,What causes mental illness?,It is estimated that mental illness affects 1 ...
3,9434130,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,7657263,Can people with mental illness recover?,"When healing from mental illness, early identi..."


# Cleaning/preprocessing

In [3]:
df = df.drop('Question_ID', axis=1)
df.head()

Unnamed: 0,Questions,Answers
0,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...
1,Who does mental illness affect?,It is estimated that mental illness affects 1 ...
2,What causes mental illness?,It is estimated that mental illness affects 1 ...
3,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,Can people with mental illness recover?,"When healing from mental illness, early identi..."


In [4]:
nltk.download('stopwords')
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()


def preprocess(corpus):
    corpus = corpus.lower()
    corpus = re.sub('[^a-zA-Z]', ' ', corpus)
    corpus = corpus.split()
    corpus = [lemmatizer.lemmatize(word) for word in corpus if not word in stop_words]
    corpus = ' '.join(corpus)
    return corpus

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Mazen\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Mazen\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [5]:
df['Questions'] = df['Questions'].apply(preprocess)
df['Answers'] = df['Answers'].apply(preprocess)

df.head()

Unnamed: 0,Questions,Answers
0,mean mental illness,mental illness health condition disrupt person...
1,mental illness affect,estimated mental illness affect adult america ...
2,cause mental illness,estimated mental illness affect adult america ...
3,warning sign mental illness,symptom mental health disorder vary depending ...
4,people mental illness recover,healing mental illness early identification tr...


# Text Vectorization

In [6]:
vectorizer = TfidfVectorizer()

X = vectorizer.fit_transform(df['Questions'])
y = vectorizer.transform(df['Answers'])

# Model Training

In [7]:
def respond(input_text):
    input_text = preprocess(input_text)
    input_text_vector = vectorizer.transform([input_text])
    sim = cosine_similarity(input_text_vector, X).flatten()
    index = np.argmax(sim)
    return df['Answers'][index]

# Model Evaluation

In [8]:
while True:
    input_text = input('You: ')
    if (input_text.lower() == 'quit') or (input_text.lower() == 'end'):
        break
    answer = respond(input_text)
    print('Bot:', answer)

You: people with mental illness, can they recover?
Bot: healing mental illness early identification treatment vital importance based nature illness range effective treatment available type treatment essential person affected proactive fully engaged recovery process many people mental illness diagnosed treated respond well although might experience return symptom even case careful monitoring management disorder still quite possible live fulfilled productive life
You: mental illness affects whom?
Bot: estimated mental illness affect adult america adult serious mental illness mental illness discriminate affect anyone regardless gender age income social status ethnicity religion sexual orientation background although mental illness affect anyone certain condition may common different population instance eating disorder tend occur often female disorder attention deficit hyperactivity disorder prevalent child additionally age susceptible young old especially vulnerable mental illness usually