In [1]:
import nltk
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
#import spacy
lemmatizer = nltk.stem.WordNetLemmatizer()



[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ajayi\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ajayi\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ajayi\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [2]:
df = pd.read_csv('Mental_Health_FAQ.csv')
df.head(10)

Unnamed: 0,Question_ID,Questions,Answers
0,1590140,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...
1,2110618,Who does mental illness affect?,It is estimated that mental illness affects 1 ...
2,6361820,What causes mental illness?,It is estimated that mental illness affects 1 ...
3,9434130,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,7657263,Can people with mental illness recover?,"When healing from mental illness, early identi..."
5,1619387,What should I do if I know someone who appears...,Although this website cannot substitute for pr...
6,1030153,How can I find a mental health professional fo...,Feeling comfortable with the professional you ...
7,8022026,What treatment options are available?,Just as there are different types of medicatio...
8,1155199,"If I become involved in treatment, what do I n...",Since beginning treatment is a big step for in...
9,7760466,What is the difference between mental health p...,There are many types of mental health professi...


In [3]:
sent = 'I bought some phones two years back when it rained last years eve'
tokenized = nltk.word_tokenize(sent)

In [4]:
[lemmatizer.lemmatize(word.lower()) for word in tokenized]

['i',
 'bought',
 'some',
 'phone',
 'two',
 'year',
 'back',
 'when',
 'it',
 'rained',
 'last',
 'year',
 'eve']

In [5]:

# Define a function for text preprocessing (including lemmatization)
def preprocess_text(text):
    # Identifies all sentences in the data
    sentences = nltk.sent_tokenize(text)

    # Tokenize and lemmatize each word in each sentence
    preprocessed_sentences = []
    for sentence in sentences:
        tokens = [lemmatizer.lemmatize(word.lower()) for word in nltk.word_tokenize(sentence) if word.isalnum()]
        # Turns to basic root - each word in the tokenized word found in the tokenized sentence - if they are all alphanumeric
        # The code above does the following:
        # Identifies every word in the sentence
        # Turns it to a lower case
        # Lemmatizes it if the word is alphanumeric

        preprocessed_sentence = ' '.join(tokens)
        preprocessed_sentences.append(preprocessed_sentence)

    return ' '.join(preprocessed_sentences)


df['tokenized Questions'] = df['Questions'].apply(preprocess_text)
df.head()

Unnamed: 0,Question_ID,Questions,Answers,tokenized Questions
0,1590140,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...,what doe it mean to have a mental illness
1,2110618,Who does mental illness affect?,It is estimated that mental illness affects 1 ...,who doe mental illness affect
2,6361820,What causes mental illness?,It is estimated that mental illness affects 1 ...,what cause mental illness
3,9434130,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...,what are some of the warning sign of mental il...
4,7657263,Can people with mental illness recover?,"When healing from mental illness, early identi...",can people with mental illness recover


In [6]:
corpus = df['tokenized Questions'].to_list()
corpus

['what doe it mean to have a mental illness',
 'who doe mental illness affect',
 'what cause mental illness',
 'can people with mental illness recover',
 'what should i do if i know someone who appears to have the symptom of a mental disorder',
 'how can i find a mental health professional for myself or my child',
 'what treatment option are available',
 'if i become involved in treatment what do i need to know',
 'what is the difference between mental health professional',
 'how can i find a mental health professional right for my child or myself',
 'if i become involved in treatment what do i need to know',
 'where else can i get help',
 'what should i know before starting a new medication',
 'if i feel better after taking medication doe this mean i am cured and can stop taking it',
 'how can i get help paying for my medication',
 'where can i go to find therapy',
 'where can i learn about type of mental health treatment',
 'what are the different type of mental health professional',

In [7]:
# Vectorize corpus
tfidf_vectorizer = TfidfVectorizer()

vectorised_corpus = tfidf_vectorizer.fit_transform(corpus)
# TDIDF is a numerical statistic used to evaluate how important a word is to a document in a collection or corpus.
# The TfidfVectorizer calculates the Tfidf values for each word in the corpus and uses them to create a matrix where each row represents a document and each column represents a word.
# The cell values in the matrix correspond to the importance of each word in each document.
print(vectorised_corpus)

  (0, 101)	0.36441743462193266
  (0, 128)	0.2595599997967565
  (0, 93)	0.4131356340148716
  (0, 216)	0.3064104920291124
  (0, 126)	0.4362655661041626
  (0, 110)	0.3782586235113424
  (0, 70)	0.4131356340148716
  (0, 228)	0.1765554179540623
  (1, 6)	0.5523407361335997
  (1, 231)	0.5066540480622876
  (1, 101)	0.39613691788888045
  (1, 128)	0.2821525222397721
  (1, 70)	0.44909562820041293
  (2, 36)	0.6958986765070116
  (2, 101)	0.5441013605412818
  (2, 128)	0.3875416915165623
  (2, 228)	0.2636098989594234
  (3, 189)	0.3888505175982014
  (3, 226)	0.3888505175982014
  (3, 208)	0.20619723379229024
  (3, 142)	0.5390782138680418
  (3, 192)	0.3888505175982014
  (3, 20)	0.27888228313399066
  (3, 101)	0.27888228313399066
  (3, 128)	0.19863672392259624
  :	:
  (95, 141)	0.42484474614924805
  (95, 112)	0.42484474614924805
  (95, 45)	0.42484474614924805
  (95, 74)	0.38970384828260646
  (95, 223)	0.3162701372700484
  (95, 134)	0.27709743719530117
  (95, 97)	0.20594459776752813
  (95, 33)	0.14762176669

In [10]:
user_input = input('Pls ask your mental health related question: ')


In [11]:
pre_input = preprocess_text(user_input)
pre_input

'what cause mental illness'

In [12]:
trans_input = tfidf_vectorizer.transform([pre_input])
print(trans_input)

  (0, 228)	0.2636098989594234
  (0, 128)	0.3875416915165623
  (0, 101)	0.5441013605412818
  (0, 36)	0.6958986765070116


In [13]:
# find the similarity between vectorized user question and vectorized corpus
sim_score = cosine_similarity(trans_input, vectorised_corpus)
sim_score

array([[0.3454121 , 0.3248845 , 1.        , 0.26433783, 0.31449144,
        0.10474217, 0.09333992, 0.0504516 , 0.0392834 , 0.17671403,
        0.08567089, 0.0392834 , 0.        , 0.04146869, 0.        ,
        0.        , 0.        , 0.09731188, 0.1370412 , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.29272485,
        0.0455938 , 0.10997689, 0.10103813, 0.6431174 , 0.14790075,
        0.        , 0.04125004, 0.03725448, 0.        , 0.        ,
        0.        , 0.03842815, 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.09896538, 0.09213635, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.1165171 , 0.        , 0.07705311, 0.07705311, 0.04631514,
        0.05058916, 0.04940822, 0.06464678, 0.03665274, 0.02827324,
        0.03268224, 0.        , 0.        , 0.        , 0.        ,
        0.26012063, 0.02687654, 0.05355395, 0.48

In [14]:
import numpy as np
np.argmax(sim_score)

2

In [15]:
df['Answers'].iloc[4]

'When healing from mental illness, early identification and treatment are of vital importance. Based on the nature of the illness, there are a range of effective treatments available. For any type of treatment, it is essential that the person affected is proactive and fully engaged in their own recovery process.\nMany people with mental illnesses who are diagnosed and treated respond well, although some might experience a return of symptoms. Even in such cases, with careful monitoring and management of the disorder, it is still quite possible to live a fulfilled and productive life.'

In [16]:
def get_response(user_input):
    user_input_processed = preprocess_text(user_input) # ....................... Preprocess the user's input using the preprocess_text function

    user_input_vector = tfidf_vectorizer.transform([user_input_processed])# .... Vectorize the preprocessed user input using the TF-IDF vectorizer

    similarity_scores = cosine_similarity(user_input_vector, vectorised_corpus) # .. Calculate the score of similarity between the user input vector and the corpus (df) vector

    most_similar_index = similarity_scores.argmax() # ..... Find the index of the most similar question in the corpus (df) based on cosine similarity

    return df['Answers'].iloc[most_similar_index] # ... Retrieve the corresponding answer from the df DataFrame and return it as the chatbot's response


# create greeting list
greetings = ["Hey There.... I am a creation of Ehiz Danny Agba Coder.... How can I help",
            "Hi Human.... How can I help",
            'Twale baba nla, wetin dey happen nah',
            'How far Alaye, wetin happen'
            "Good Day .... How can I help",
            "Hello There... How can I be useful to you today",
            "Hi Student.... How can I be of use"]

exits = ['thanks bye', 'bye', 'quit', 'exit', 'bye bye', 'close']
farewell = ['Thanks....see you soon', 'Babye, See you soon', 'Bye... See you later', 'Bye... come back soon']

random_farewell = random.choice(farewell) # ---------------- Randomly select a farewell message from the list
random_greetings = random.choice(greetings) # -------- Randomly select greeting message from the list

# Test your chatbot
while True:
    user_input = input("You: ")
    if user_input.lower() in exits:
        print(f"\nChatbot: {random_farewell}!")
        break
    if user_input.lower() in ['hi', 'hello', 'hey', 'hi there']:
        print(f"\nChatbot: {random_greetings}!")
    else:
        response = get_response(user_input)
        print(f"\nChatbot: {response}")


Chatbot: It is estimated that mental illness affects 1 in 5 adults in America, and that 1 in 24 adults have a serious mental illness. Mental illness does not discriminate; it can affect anyone, regardless of gender, age, income, social status, ethnicity, religion, sexual orientation, or background. Although mental illness can affect anyone, certain conditions may be more common in different populations. For instance, eating disorders tend to occur more often in females, while disorders such as attention deficit/hyperactivity disorder is more prevalent in children. Additionally, all ages are susceptible, but the young and the old are especially vulnerable. Mental illnesses usually strike individuals in the prime of their lives, with 75 percent of mental health conditions developing by the age of 24. This makes identification and treatment of mental disorders particularly difficult, because the normal personality and behavioral changes of adolescence may mask symptoms of a mental health