In [1]:
import nltk
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
lemmatizer = nltk.stem.WordNetLemmatizer()

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to C:\Users\Fareedah
[nltk_data]     Ajao\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to C:\Users\Fareedah
[nltk_data]     Ajao\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\Fareedah
[nltk_data]     Ajao\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
data = pd.read_csv('Mental_Health_FAQ.csv')
data.drop('Question_ID', axis = 1, inplace = True)
data.head()

Unnamed: 0,Questions,Answers
0,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...
1,Who does mental illness affect?,It is estimated that mental illness affects 1 ...
2,What causes mental illness?,It is estimated that mental illness affects 1 ...
3,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,Can people with mental illness recover?,"When healing from mental illness, early identi..."


In [6]:
def preprocess_text(text):
    # Identifies all sentences in the data
    sentences = nltk.sent_tokenize(text)
    
    # Tokenize and lemmatize each word in each sentence
    preprocessed_sentences = []
    for sentence in sentences:
        tokens = [lemmatizer.lemmatize(word.lower()) for word in nltk.word_tokenize(sentence) if word.isalnum()]

        preprocessed_sentence = ' '.join(tokens)
        preprocessed_sentences.append(preprocessed_sentence)
    
    return ' '.join(preprocessed_sentences)

data['tokenized Questions'] = data['Questions'].apply(preprocess_text)

data.head()

Unnamed: 0,Questions,Answers,tokenized Questions
0,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...,what doe it mean to have a mental illness
1,Who does mental illness affect?,It is estimated that mental illness affects 1 ...,who doe mental illness affect
2,What causes mental illness?,It is estimated that mental illness affects 1 ...,what cause mental illness
3,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...,what are some of the warning sign of mental il...
4,Can people with mental illness recover?,"When healing from mental illness, early identi...",can people with mental illness recover


In [7]:
x = data['tokenized Questions'].to_list()
x

['what doe it mean to have a mental illness',
 'who doe mental illness affect',
 'what cause mental illness',
 'can people with mental illness recover',
 'what should i do if i know someone who appears to have the symptom of a mental disorder',
 'how can i find a mental health professional for myself or my child',
 'what treatment option are available',
 'if i become involved in treatment what do i need to know',
 'what is the difference between mental health professional',
 'how can i find a mental health professional right for my child or myself',
 'if i become involved in treatment what do i need to know',
 'where else can i get help',
 'what should i know before starting a new medication',
 'if i feel better after taking medication doe this mean i am cured and can stop taking it',
 'how can i get help paying for my medication',
 'where can i go to find therapy',
 'where can i learn about type of mental health treatment',
 'what are the different type of mental health professional',

In [8]:
# Vectorize corpus
tfidf_vectorizer = TfidfVectorizer()
corpus = tfidf_vectorizer.fit_transform(x)

In [9]:
print(corpus[0])

  (0, 101)	0.36441743462193266
  (0, 128)	0.2595599997967565
  (0, 93)	0.4131356340148716
  (0, 216)	0.3064104920291124
  (0, 126)	0.4362655661041626
  (0, 110)	0.3782586235113424
  (0, 70)	0.4131356340148716
  (0, 228)	0.1765554179540623


In [10]:
user = input('Pls ask your question: ')
user

'who does mental illness affect'

In [11]:
pre_user = preprocess_text(user)
pre_user

'who doe mental illness affect'

In [12]:
vect_user = tfidf_vectorizer.transform([pre_user])
print(vect_user)

  (0, 231)	0.5066540480622876
  (0, 128)	0.2821525222397721
  (0, 101)	0.39613691788888045
  (0, 70)	0.44909562820041293
  (0, 6)	0.5523407361335997


In [13]:
similarity_scores = cosine_similarity(vect_user, corpus)
similarity_scores

array([[0.40313212, 1.        , 0.3248845 , 0.16652142, 0.22896776,
        0.22024482, 0.0679568 , 0.        , 0.        , 0.08796003,
        0.06237331, 0.        , 0.        , 0.        , 0.0970682 ,
        0.        , 0.        , 0.07084862, 0.06821274, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.14570482,
        0.        , 0.08006947, 0.07356154, 0.0998961 , 0.07361812,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.07205246, 0.06708053, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.08483111, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.16386476, 0.        , 0.        , 0.31

In [14]:
most_similar_index = similarity_scores.argmax()
most_similar_index

1

In [16]:
data['Answers'].iloc[most_similar_index]

'It is estimated that mental illness affects 1 in 5 adults in America, and that 1 in 24 adults have a serious mental illness. Mental illness does not discriminate; it can affect anyone, regardless of gender, age, income, social status, ethnicity, religion, sexual orientation, or background.\nAlthough mental illness can affect anyone, certain conditions may be more common in different populations. For instance, eating disorders tend to occur more often in females, while disorders such as attention deficit/hyperactivity disorder is more prevalent in children.\nAdditionally, all ages are susceptible, but the young and the old are especially vulnerable. Mental illnesses usually strike individuals in the prime of their lives, with 75 percent of mental health conditions developing by the age of 24. This makes identification and treatment of mental disorders particularly difficult, because the normal personality and behavioral changes of adolescence may mask symptoms of a mental health condit

In [17]:
def collector():
    user = input('Pls ask your question: ')
    pre_user = preprocess_text(user)
    vect_user = tfidf_vectorizer.transform([pre_user])
    similarity_scores = cosine_similarity(vect_user, corpus)
    most_similar_index = similarity_scores.argmax()
    return(data['Answers'].iloc[most_similar_index])

collector()

'It is estimated that mental illness affects 1 in 5 adults in America, and that 1 in 24 adults have a serious mental illness. Mental illness does not discriminate; it can affect anyone, regardless of gender, age, income, social status, ethnicity, religion, sexual orientation, or background. Although mental illness can affect anyone, certain conditions may be more common in different populations. For instance, eating disorders tend to occur more often in females, while disorders such as attention deficit/hyperactivity disorder is more prevalent in children. Additionally, all ages are susceptible, but the young and the old are especially vulnerable. Mental illnesses usually strike individuals in the prime of their lives, with 75 percent of mental health conditions developing by the age of 24. This makes identification and treatment of mental disorders particularly difficult, because the normal personality and behavioral changes of adolescence may mask symptoms of a mental health conditio

In [18]:
def get_response(user_input):
    user_input_processed = preprocess_text(user_input) # ....................... Preprocess the user's input using the preprocess_text function

    user_input_vector = tfidf_vectorizer.transform([user_input_processed])# .... Vectorize the preprocessed user input using the TF-IDF vectorizer

    similarity_scores = cosine_similarity(user_input_vector, corpus) # .. Calculate the score of similarity between the user input vector and the corpus (df) vector

    most_similar_index = similarity_scores.argmax() # ..... Find the index of the most similar question in the corpus (df) based on cosine similarity

    return data['Answers'].iloc[most_similar_index] # ... Retrieve the corresponding answer from the df DataFrame and return it as the chatbot's response

# create greeting list 
greetings = ["Hey There.... I am a creation of Ehiz Danny Agba Coder.... How can I help",
            "Hi Human.... How can I help",
            'Twale baba nla, wetin dey happen nah',
            'How far Alaye, wetin happen'
            "Good Day .... How can I help", 
            "Hello There... How can I be useful to you today",
            "Hi GomyCode Student.... How can I be of use"]

exits = ['thanks bye', 'bye', 'quit', 'exit', 'bye bye', 'close']
farewell = ['Thanks....see you soon', 'Babye, See you soon', 'Bye... See you later', 'Bye... come back soon']

random_farewell = random.choice(farewell) # ---------------- Randomly select a farewell message from the list
random_greetings = random.choice(greetings) # -------- Randomly select greeting message from the list

# Test your chatbot
while True:
    user_input = input("You: ")
    if user_input.lower() in exits:
        print(f"\nChatbot: {random_farewell}!")
        break
    if user_input.lower() in ['hi', 'hello', 'hey', 'hi there']:
        print(f"\nChatbot: {random_greetings}!")
    else:   
        response = get_response(user_input)
        print(f"\nChatbot: {response}")


Chatbot: Where you go for help will depend on the nature of the problem and/or symptoms and what best fits you. Often, the best place to start is by talking with someone you trust about your concerns, such as a family member, friend, clergy, healthcare provider, or other professionals. Having this social support is essential in healing from mental illness, and you will be able to ask them for referrals or recommendations for trusted mental health practitioners. Search for mental health resources in your area.
Secondly, there are people and places throughout Nebraska that provide services to talk, to listen, and to help you on your journey to recovery. Thirdly, many people find peer support a helpful tool that can aid in their recovery. There are a variety of organizations that offer support groups for consumers, their family members, and friends. Some support groups are peer led while others may be led by a mental health professional.

Chatbot: We can all suffer from mental health cha