In [1]:
import nltk
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
lemmatizer = nltk.stem.WordNetLemmatizer()

In [2]:
data = pd.read_csv('Mental_Health_FAQ.csv')
data.head()

Unnamed: 0,Question_ID,Questions,Answers
0,1590140,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...
1,2110618,Who does mental illness affect?,It is estimated that mental illness affects 1 ...
2,6361820,What causes mental illness?,It is estimated that mental illness affects 1 ...
3,9434130,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,7657263,Can people with mental illness recover?,"When healing from mental illness, early identi..."


In [3]:
data.drop ('Question_ID', axis = 1, inplace = True)

In [6]:
def preprocess_text(text):
    sentences = nltk.sent_tokenize(text)
    preprocessed_sentences = []
    for sentence in sentences:
        tokens = [lemmatizer.lemmatize(word.lower()) for word in nltk.word_tokenize(sentence) if word.isalnum()]

        preprocessed_sentence = ' '.join(tokens)
        preprocessed_sentences.append(preprocessed_sentence)
    
    return ' '.join(preprocessed_sentences)


data['tokenized Questions'] = data['Questions'].apply(preprocess_text)
data.head()

Unnamed: 0,Questions,Answers,tokenized Questions
0,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...,what doe it mean to have a mental illness
1,Who does mental illness affect?,It is estimated that mental illness affects 1 ...,who doe mental illness affect
2,What causes mental illness?,It is estimated that mental illness affects 1 ...,what cause mental illness
3,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...,what are some of the warning sign of mental il...
4,Can people with mental illness recover?,"When healing from mental illness, early identi...",can people with mental illness recover


In [7]:
y = data['tokenized Questions'].to_list()
y

['what doe it mean to have a mental illness',
 'who doe mental illness affect',
 'what cause mental illness',
 'can people with mental illness recover',
 'what should i do if i know someone who appears to have the symptom of a mental disorder',
 'how can i find a mental health professional for myself or my child',
 'what treatment option are available',
 'if i become involved in treatment what do i need to know',
 'what is the difference between mental health professional',
 'how can i find a mental health professional right for my child or myself',
 'if i become involved in treatment what do i need to know',
 'where else can i get help',
 'what should i know before starting a new medication',
 'if i feel better after taking medication doe this mean i am cured and can stop taking it',
 'how can i get help paying for my medication',
 'where can i go to find therapy',
 'where can i learn about type of mental health treatment',
 'what are the different type of mental health professional',

In [8]:
tfidf_vectorizer = TfidfVectorizer()
corpus = tfidf_vectorizer.fit_transform(y)

In [9]:
user = input('Please ask your question: ')
user

'tell me about drugs'

In [12]:
preprocessed_user = preprocess_text(user)
preprocessed_user

'tell me about drug'

In [13]:
vector_user = tfidf_vectorizer.transform([preprocessed_user])
print(vector_user)

  (0, 206)	0.5181515985764668
  (0, 125)	0.5648750591870182
  (0, 74)	0.5181515985764668
  (0, 1)	0.37941271630477386


In [14]:
similarity_scores = cosine_similarity(vector_user, corpus)
similarity_scores

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.12526852, 0.        , 0.        ,
        0.        , 0.        , 0.11697337, 0.09928855, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.13016531,
        0.        , 0.11476618, 0.22974659, 0.        , 0.21097123,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.11798986, 0.10328864, 0.        , 0.        ,
        0.        , 0.14955221, 0.09262957, 0.  

In [15]:
most_similar_index = similarity_scores.argmax()
most_similar_index

32

In [18]:
data['Answers'].iloc[most_similar_index]

"Some people may advise you on good evidence of what works with the best of intentions, but it's important to find out what works best for you."

In [20]:
def get_response(user_input):
    user_input_processed = preprocess_text(user_input) 
    user_input_vector = tfidf_vectorizer.transform([user_input_processed])
    similarity_scores = cosine_similarity(user_input_vector, corpus)
    most_similar_index = similarity_scores.argmax() 
    return data['Answers'].iloc[most_similar_index] 

greetings = ["Hi human! How are you doing today",
            "Hello.... How can I help",
            'Twale baba nla, wetin dey happen nah',
            'How far Alaye, wetin happen'
            "Good Day .... How can I help", 
            "Hello There... How can I be useful to you today",
            "Hi beautiful/handsome.... How can I be of use"]
exits = ['thanks bye', 'bye', 'quit', 'exit', 'bye bye', 'close']
farewell = ['Thanks....see you soon', 'Babye, See you soon', 'Bye... See you later', 'take care', 'Enjoy', 'Bye... come back soon']

random_farewell = random.choice(farewell) 
random_greetings = random.choice(greetings) 

while True:
    user_input = input("You: ")
    if user_input.lower() in exits:
        print(f"\nChatbot: {random_farewell}!")
        break
    if user_input.lower() in ['hi', 'hello', 'hey', 'hi there']:
        print(f"\nChatbot: {random_greetings}!")
    else:   
        response = get_response(user_input)
        print(f"\nChatbot: {response}")


Chatbot: Sorting out if you are drinking too much can be complicated. You are unique and your relationship with alcohol is unique. No one has the same combination of life experiences and influences that you do. So even though you and your friend may choose to drink, how and why you use alcohol may be different. 
 Those of us who drink, seek benefits from alcohol, not difficulties. But sometimes we start using more, and more often, than makes sense. As a general rule, drinking alcohol becomes a problem when it negatively affects our life or the lives of others. Many people imagine this refers to people who consume alcohol “all day every day.” But even drinking too much on a single occasion can lead to a problem, for instance making a poor decision such as driving while impaired. 
 What’s also important to recognize is the potential for negative consequences related to drinking in various contexts (i.e., places, times and activities) and over time (e.g., coping with chronic problems). Y