In [60]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#Load Sentiment Model


In [61]:
import pickle
import numpy as np
import nltk
from nltk.corpus import RegexpTokenizer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

In [62]:
# Load other tools for text preprocessing

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

##Define object classes

In [63]:
class MajorityVoteClassifier(object):
    def __init__(self, classifiers):
        self.classifiers = classifiers

    def predict(self, X):
        predictions = []
        for clf in self.classifiers:
            predictions.append(clf.predict(X))

        # Perform majority vote
        majority_vote = np.apply_along_axis(lambda x: np.argmax(np.bincount(x)), axis=0, arr=predictions)
        return majority_vote

    @classmethod
    def load(cls, filename):
        with open(filename, 'rb') as f:
            return pickle.load(f)

In [64]:
class TfidfEmbeddingVectorizer(object):

    def __init__(self, word_model):
        self.word_model = word_model
         #word_model represents the pre-trained word embedding model
        self.word_idf_weight = None
        #This attribute will later store the IDF (Inverse Document Frequency) weights of the words.
        self.vector_size = word_model.vector_size
        #The vector_size attribute is set to the dimensionality of the word vectors in the word_model
        self.is_fitted = False

    def default_idf(self):
      return self.max_idf

    def fit(self, docs): # comply with scikit-learn transformer requirement
        """
        Fit in a list of docs, which had been preprocessed and tokenized,
        such as word bi-grammed, stop-words removed, lemmatized, part of speech filtered.
        Then build up a tfidf model to compute each word's idf as its weight.
        Noted that tf weight is already involved when constructing average word vectors, and thus omitted.
        :param: pre_processed_docs: list of docs, which are tokenized
        :return: self
        """
        text_docs = []
        for doc in docs:
            text_docs.append(" ".join(doc))

        tfidf = TfidfVectorizer()  # default 1-gram 
        tfidf.fit(text_docs) # must be list of text string

        # if a word was never seen - it must be at least as infrequent
        # as any of the known words - so the default idf is the max of
        # known idf's
        max_idf = max(tfidf.idf_)    # used as default value for defaultdict

        # Create the word_idf_weight defaultdict using a lambda function
        # self.word_idf_weight = defaultdict(lambda: max_idf)

        # Create the word_idf_weight defaultdict using the default_idf function
        # self.word_idf_weight = defaultdict(default_idf)
        self.max_idf = max(tfidf.idf_)
        self.word_idf_weight = defaultdict(self.default_idf)


        # Populate the word_idf_weight defaultdict with the word and idf pairs
        for word, i in tfidf.vocabulary_.items():
            self.word_idf_weight[word] = tfidf.idf_[i]

        self.is_fitted = True  # Set the flag to True after fitting

        return self

    def transform(self, docs):       # comply with scikit-learn transformer requirement
        doc_word_vector = self.word_average_list(docs)
        return doc_word_vector

    def word_average(self, sent):
        """
        Compute average word vector for a single doc/sentence.
        :param sent: list of sentence tokens
        :return:
            mean: float of averaging word vectors
        """

        mean = []
        for word in sent:
            if word in self.word_model.key_to_index:
                vector = self.word_model.get_vector(word)
                mean.append(vector * self.word_idf_weight[word])  # idf weighted

        if not mean:
            return np.zeros(self.vector_size)
        else:
            mean = np.array(mean).mean(axis=0)
            return mean

    def word_average_list(self, docs):
        """
        Compute average word vector for multiple docs, where docs had been tokenized.
        :param docs: list of sentence in list of separated tokens
        :return:
            array of average word vector in shape (len(docs),)
        """
        return np.vstack([self.word_average(sent) for sent in docs])

    
    def save(self, filename):
        """
        Save the vectorizer to a file using pickle.
        :param filename: Name of the file to save the vectorizer.
        """
        with open(filename, 'wb') as f:
            pickle.dump(self, f)


    @classmethod
    def load(cls, filename):
        """
        Load the vectorizer from a file.
        :param filename: Name of the file to load the vectorizer from.
        :return: An instance of the TfidfEmbeddingVectorizer class.
        """
        with open(filename, 'rb') as f:
            return pickle.load(f)


            
#In summary, this class combines the TF-IDF vectorization technique with pre-trained word embeddings 
#to transform preprocessed and tokenized documents into average word vector representations that are weighted by IDF. 
#The resulting vectors can be used as input features for various machine learning models or downstream tasks.        


##Load Trained Models

In [65]:
# Load the vectorizer from a file
loaded_vectorizer = TfidfEmbeddingVectorizer.load('/content/drive/MyDrive/chatbot_data/tfidf_vec_tr.pkl')

# Load the weights from a file
with open('/content/drive/MyDrive/chatbot_data/word_idf_weights.pkl', 'rb') as file:
    loaded_word_idf_weights = pickle.load(file)

# Create an instance of TfidfEmbeddingVectorizer using the loaded vectorizer
tfidf_vec_tr = TfidfEmbeddingVectorizer(loaded_vectorizer.word_model)

tfidf_vec_tr.word_idf_weight = loaded_word_idf_weights

In [66]:
# Load the model
model = MajorityVoteClassifier.load('/content/drive/MyDrive/chatbot_data/majority_vote_model.pkl')

#Load Chatbot Model

In [67]:
!pip -q install transformers

In [68]:
saved_model_dir = '/content/drive/MyDrive/chatbot_data/version1' # path to the model file

In [69]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

tokenizer = AutoTokenizer.from_pretrained('microsoft/DialoGPT-medium', padding_side="left")
chatbot = AutoModelForCausalLM.from_pretrained(saved_model_dir)

#Load Roberta Model

In [70]:
import re
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

In [113]:
model_rob_name = "deepset/roberta-base-squad2"
model_rob = AutoModelForQuestionAnswering.from_pretrained(model_rob_name)
tokenizer_model_rob = AutoTokenizer.from_pretrained(model_rob_name)
# Creating the Q&A pipeline
nlp = pipeline('question-answering', model_rob=model_rob_name, tokenizer_model_rob=model_rob_name)

No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [72]:
#Contextual Text / corpus

text_rob = """
Argentina Helpline: 08003451435

Australia Helpline: 13 11 14 

Austria Helpline: 142

Bangladesh Helpline: 09612-119911

Barbados Helpline: (246) 4299999

Belarus Helpline: 375(29)101-73-73

Belgium Helpline: 107

Bosnia and Herzegovina Helpline: 080050305

Brazil Helpline: 188 

Bulgaria Helpline: 116111

Canada Helpline: (855)242-3310, Specyfic or kids: 1-800-6686868

Chile Helpline: 1515 (for youth), 67613603

Colombia Helpline:106

Costa Rica Helpline:(506) 2272–3774

Croatia Helpline: 014828888

Cyprus Helpline: For kids/parents 1466

Denmark	Helpline: 116111

Ecuador	Helpline: (593)997196911

Estonia	Helpline: 6558088

Fiji	Helpline: 1543

Finland	Helpline: 020391920

France	Helpline: 3114

Germany	Helpline: 08001110111

Gibraltar	Helpline: 116123

Greece Helpline: 1018 

Guyana Helpline: (592)600-7896

Hong Kong	 Helpline: 28960000

Hungary  Helpline: 116123

Iceland  Helpline: 1717	

India  Helpline:	1800121203040

Iran	Helpline: https://irancrisisline.org/

Ireland	 Helpline: 116123

Israel Helpline: 1201

Italy	 Helpline: (022)327-2327

Japan	 Helpline: 03-5774-0992

Kenya	 Helpline: (072)217-8177

Latvia Helpline: 116123

Lithuania	 Helpline: 116123

Luxembourg  Helpline: 454545

Malaysia Helpline: 0327806803

Malta Helpline:	179

Mauritius	Helpline: 800-1111

Mexico	Helpline: 55-259-8121

New Zealand	Helpline: 4357

Norway Helpline:	116123

Pakistan Helpline: 03041111741

Peru Helpline: 113

Poland Helpline:	800702222

Portugal Helpline: 225506070

Romania	Helpline: 0374456420

Singapore Helpline:	1767

Slovakia Helpline:	0800800566

Slovenia Helpline: 116123

South Africa Helpline: 0117811852

South Korea	Helpline: 1566-2525

Spain	Helpline: 900525100

Sri Lanka	Helpline: 1333

Sweden Helpline: 116111

Switzerland	Helpline: 147

Thailand	Helpline: 02-113-6789

Netherlands	Helpline: 09001450

China	Helpline: 4008960960

Philippines	Helpline: 1800-1888-1553

Taiwan Helpline: 1995

United Kingdom Helpline:	0800 689 5652

United States Helpline:	1-800-662-4357

Ukraine	Helpline: 7333

Serbia Helpline:	0800-300-303

If these helplines do not meet your needs, you can visit https://findahelpline.com/ for additional options.

Symptoms of Stress:
- Anxiety
- Skin problems
- Menstrual problems
- Headaches
- Trouble sleeping
- Jaw pain
- Changes in appetite
- Frequent mood swings
- Difficulty concentrating
- Feeling overwhelmed
- Increased use of tobacco, alcohol, or drugs
- Worsening of chronic health or mental health problems

Ways to Reduce Stress:
1. Accept your needs: Recognize what your triggers are. What situations make you feel physically and mentally agitated? Once you know this, you can avoid them when it's reasonable to, and cope when you can't.
2. Manage your time: Prioritizing your activities can help you use your time well. Making a day-to-day schedule helps ensure you don't feel overwhelmed by everyday tasks and deadlines.
3. Practice relaxation: Deep breathing, meditation, and progressive muscle relaxation are good ways to calm yourself. Taking a break to refocus can have benefits beyond the immediate moment.
4. Exercise daily: Schedule time to walk outside, bike, or join a dance class. Whatever you do, make sure it's fun. Daily exercise naturally produces stress-relieving hormones in your body and improves your overall physical health.
5. Set aside time for yourself: Schedule something that makes you feel good. It might be reading a book, going to the movies, getting a massage, or taking your dog for a walk.
6. Eat well: Eating unprocessed foods, like whole grains, vegetables, and fresh fruit, is the foundation for a healthy body and mind. Eating well can also help stabilize your mood.
7. Get enough sleep: Symptoms of some mental health conditions, like mania in bipolar disorder, can be triggered by getting too little sleep.
8. Avoid alcohol and drugs: They don't actually reduce stress; in fact, they often worsen it. If you're struggling with substance abuse, educate yourself and get help.
9. Talk to someone: Whether to friends, family, a counselor, or a support group, airing out and talking can help.

"""

#Define functions

In [73]:
# functions to get user's name and location

import random

# Function to ask for user's name
def ask_user_name():
    name_variations = [
      "May I know your name?",
      "Could you please tell me your name?",
      "I'd love to know what your name is.",
      "Do you mind sharing your name with me?",
      "What should I call you?",
      "Could you introduce yourself by telling me your name?",
      "May I inquire about your name?",
      "What name do you go by?",
      "I'm curious, what's your name?",
      "Would you be kind enough to let me know your name?"]

    random_name_variation = random.choice(name_variations)
    print(">> Pandora: {}\n".format(random_name_variation))
    print('[Please enter only your first name.]\n')
    user_name = input()
    while len(user_name.split()) > 1:
        print(">> Pandora: Please enter only your first name.")
        user_name = input()
    return user_name

# Function to ask for user's country
def ask_user_country(user_name):

    country_questions = [
      "Which country are you from?",
      "May I know the country you call home?",
      "Could you kindly share your country of residence?",
      "In which country do you currently reside?",
      "What country do you hail from?",
      "What's the name of your home country?",
      "I'm curious, what's your country?",
      "Could you let me know the country you belong to?",
      "From which country do you originate?",
      "What is the country of your citizenship?"]

    random_question = random.choice(country_questions)
    print(">> Pandora: Nice to meet you, {}.".format(user_name))
    print(">> Pandora: {}[Please enter only your country.]\n".format(random_question))
    user_country = input()
    while len(user_country.split()) > 2:
        print(">> Pandora: Please enter only your country.")
        user_country = input()
    return user_country

In [74]:
# functions to perform sentiment analysis on user's answers

def preprocess_text(text):

  # tokenization
  tokenizer = RegexpTokenizer(r'[a-zA-Z]{2,}') #remove number and words has only one character
  ttext = tokenizer.tokenize(text.lower())

  # filter stopwords
  words = [w for w in ttext if not w in stopwords.words('english')]

  lemmatizer = WordNetLemmatizer()
  lemma_text = [lemmatizer.lemmatize(i) for i in words]

  return tfidf_vec_tr.transform([lemma_text])
  

def sentiment_analysis(text, model):
    # Preprocess the input text if required
    processed_text = preprocess_text(text)
    
    # Make predictions using the loaded model
    predictions = model.predict(processed_text)

    return predictions

In [75]:
# function initiates conversation with the trained chatbot

def make_conversation(turns):
  
  user_inputs = []

  for x in range(turns):
      # Simulate a single turn of User-Bot conversation without chat history
      generated_responses = []  # List to store generated responses
      # Simulate a single turn of User-Bot conversation without chat history
      user_input = input(">> User:")

      # save user inputs
      user_inputs.append(user_input)
      
      user_input_ids = tokenizer.encode(user_input + tokenizer.eos_token, return_tensors='pt')

      # Generate a response from the model
      bot_response_ids = chatbot.generate(user_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)

      # Decode the bot's response
      bot_response = tokenizer.decode(bot_response_ids[:, user_input_ids.shape[-1]:][0], skip_special_tokens=True)

      # Check if the response has been generated before
      while bot_response in generated_responses:
          # Generate a new response
          bot_response_ids = chatbot.generate(user_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
          bot_response = tokenizer.decode(bot_response_ids[:, user_input_ids.shape[-1]:][0], skip_special_tokens=True)

      # Print the bot's response (excluding the last response in the loop)
      if x < turns - 1:
          print("Pandora: {}".format(bot_response))

      # Add the response to the list of generated responses
      generated_responses.append(bot_response)

  return user_inputs

In [162]:
# Main function to run the chatbot
def run_chatbot(turns, sentiment_model, model_rob):
    print(">> Pandora: Hello there! I need to take some information from you before we start!")

    user_name = ask_user_name()
    user_country = ask_user_country(user_name)

    user_inputs = []
    print(">> Pandora: Hello {}!. Let's get started!\nTell me what's bothering you.\n".format(user_name))

    while True:
        answers = make_conversation(turns)

        analysis_text = '. '.join(answers)
        prediction = sentiment_analysis(analysis_text, sentiment_model)

        if prediction[0] == 0:
            print(">> Pandora: It seems like you're not stressed anymore. If you or someone else need assistance, feel free to reach out anytime. Have a great day!")
            print("\n>> Pandora: What would you like to do next?")
            print("[If you want to restart the conversation, please enter 'restart'. If you want to exit, please enter 'exit'.]\n")
            user_input = input()

            if user_input.lower() == "restart":
               print("\n>> Pandora: Sure! Let's start over.")
               user_name = ask_user_name()
               user_country = ask_user_country(user_name)
               print(">> Pandora: Hello {}! Let's get started!\nTell me what's bothering you.\n".format(user_name))
               continue
            elif user_input.lower() == "exit":
               print("\n>> Pandora: Goodbye! If you or someone else need any assistance in the future, feel free to reach out. Take care!")
               return user_name, user_country, None
               # Stop execution of the loop and exit the program

        else:
            print(">> Pandora: {}, I sense that you might be feeling stressed.\nIf the feelings of stress persist, I suggest you seek help from a qualified mental health professional.\nIn the meantime, I'm here to help! Would you be interested in learning more about the symptoms of stress so you can assess if that resonates with your own experience?".format(user_name))
            print("[Please answer with only yes/no.]\n")
            proceed = input()
            all_yes = True

            if proceed.lower() == "yes":
                # Define the question1 using the user's name
                question1 = "{}, would you like to know the symptoms of stress?".format(user_name)
                # Extract the symptoms of stress based on the context
                symptoms = re.findall(r"- (.+)", text_rob)
                # Print the symptoms of stress
                print(">> Pandora: Symptoms of stress:")
                for symptom in symptoms:
                    print(symptom.strip())

                # Add 2nd question
                print("\n>> Pandora: {}, would you also like some tips on how to reduce stress?".format(user_name))
                print("[Please answer with only yes/no.]\n")
                proceed2 = input()

                if proceed2.lower() == "yes":
                    # Define the question2
                    question2 = "What are some ways to reduce stress?"
                    # Extract the ways to reduce stress based on the context
                    ways = text_rob.split("Ways to Reduce Stress:")[1].strip().split("\n")
                    # Print the question and ways to reduce stress
                    print(">> Pandora: Ways to reduce stress:")
                    for way in ways:
                        print(way.strip())

                    # Add 3rd question
                    print("\n>> Pandora: In addition to this information, would you like a helpline in {} that can offer support to help you cope with stress?".format(user_country))
                    print("[Please answer with only yes/no.]\n")
                    proceed3 = input()

                    if proceed3.lower() == "yes":
                        # Define the question3
                        question3 = "Would you like a helpline in {} that can offer support to help you cope with stress?".format(user_country)
                        # Extract the helpline for Country based on the context
                        helpline = re.search(r"{}\s+Helpline:\s*([\d()+\- ]+)".format(user_country), text_rob, re.IGNORECASE)
                        # Print the helpline for Country
                        print("\n>> Pandora: Helpline in {} :".format(user_country))
                        if helpline:
                            print(helpline.group(1))
                        else:
                            print("\n>> Pandora: No helpline found for {}.".format(user_country))
                        # Add last question
                        print("\n>> Pandora: If this helpline does not meet your needs, would you like a site that you can visit for additional options?")
                        print("[Please answer with only yes/no.]\n")
                        proceed4 = input()
                        if proceed4.lower() == "yes":
                            QA_input = {
                                'question': 'If this helpline does not meet your needs, would you like a site that you can visit for additional options?',
                                'context': text_rob
                            }
                            answer = nlp(QA_input)
                            # Get the additional option
                            add_option = answer['answer'].split('\n')
                            # Print the Site
                            print("\n>> Pandora: Site for additional options:")
                            for option in add_option:
                                print(option.strip())
                        elif proceed4.lower() == "no":
                            all_yes = False
                            print(">> Pandora: Alright, if you change your mind or need any assistance, feel free to let me know. Take care!")
                        else:
                            print(">> Pandora: Sorry, I couldn't understand your response. Please answer with 'yes' or 'no'.")

                    elif proceed3.lower() == "no":
                        all_yes = False
                        print(">> Pandora: Alright, if you change your mind or need any assistance, feel free to let me know. Take care!")
                    else:
                        print(">> Pandora: Sorry, I couldn't understand your response. Please answer with 'yes' or 'no'.")

                elif proceed2.lower() == "no":
                    all_yes = False
                    print(">> Pandora: Alright, if you change your mind or need any assistance, feel free to let me know. Take care!")
                else:
                    print(">> Pandora: Sorry, I couldn't understand your response. Please answer with 'yes' or 'no'.")

            elif proceed.lower() == "no":
                all_yes = False
                print(">> Pandora: Alright, if you change your mind or need any assistance, feel free to let me know. Take care!")
            else:
                print(">> Pandora: Sorry, I couldn't understand your response. Please answer with 'yes' or 'no'.")

            if all_yes:
                print(">> Pandora: Thank you for answering all the questions! I hope the information provided is helpful. If you have any more questions or need further assistance, feel free to ask. Take care!")

        print("\n>> Pandora: What else would you like to discuss?")
        print("[If you want to restart the conversation please enter restart, if you want to close the chatbot press exit, if you want to continue making conversation just skip]\n")
        user_input = input()

        if user_input.lower() == "restart":
            print("\n>> Pandora:  Sure! Let's start over.")
            user_name = ask_user_name()
            user_country = ask_user_country(user_name)
            print(">> Pandora: Hello {}! Let's get started!\nTell me what's bothering you.\n".format(user_name))
            continue
        elif user_input.lower() == "exit":
            print("\n>> Pandora: Goodbye! If you or someone else need any assistance in the future, feel free to reach out. Take care!")
            return user_name, user_country, proceed  # Stop execution of the loop and exit the program 

        # Conversation continues here based on user input


#App

In [163]:
turns = 3

user_name, user_counrty, proceed = run_chatbot(turns, model, model_rob) # model parameter is the model for sentiment analysis

>> Pandora: Hello there! I need to take some information from you before we start!
>> Pandora: I'm curious, what's your name?

[Please enter only your first name.]

m
>> Pandora: Nice to meet you, m.
>> Pandora: From which country do you originate?[Please enter only your country.]

greece
>> Pandora: Hello m!. Let's get started!
Tell me what's bothering you.

>> User:i am stressed


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Pandora: I am sorry to hear that. What is the reason behind this?
>> User:i feel anxious for my first day at work


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Pandora: I'm sorry to hear that. What is the reason behind this?
>> User:i m bad at making conversation so i have anxiety about today


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


>> Pandora: m, I sense that you might be feeling stressed.
If the feelings of stress persist, I suggest you seek help from a qualified mental health professional.
In the meantime, I'm here to help! Would you be interested in learning more about the symptoms of stress so you can assess if that resonates with your own experience?
[Please answer with only yes/no.]

yes
>> Pandora: Symptoms of stress:
Anxiety
Skin problems
Menstrual problems
Headaches
Trouble sleeping
Jaw pain
Changes in appetite
Frequent mood swings
Difficulty concentrating
Feeling overwhelmed
Increased use of tobacco, alcohol, or drugs
Worsening of chronic health or mental health problems

>> Pandora: m, would you also like some tips on how to reduce stress?
[Please answer with only yes/no.]

yes
>> Pandora: Ways to reduce stress:
1. Accept your needs: Recognize what your triggers are. What situations make you feel physically and mentally agitated? Once you know this, you can avoid them when it's reasonable to, and cope 

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Pandora: I'm here for you. Could you tell me why you're feeling this way?
>> User:i failed one exan


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


Pandora: I'm sorry to hear that. What's the reason behind this?
>> User:i was anxious and i didn't make it in time


A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


>> Pandora: margarita, I sense that you might be feeling stressed.
If the feelings of stress persist, I suggest you seek help from a qualified mental health professional.
In the meantime, I'm here to help! Would you be interested in learning more about the symptoms of stress so you can assess if that resonates with your own experience?
[Please answer with only yes/no.]

no
>> Pandora: Alright, if you change your mind or need any assistance, feel free to let me know. Take care!

>> Pandora: What else would you like to discuss?
[If you want to restart the conversation please enter restart, if you want to close the chatbot press exit, if you want to continue making conversation just skip]

exit

>> Pandora: Goodbye! If you or someone else need any assistance in the future, feel free to reach out. Take care!
