# Mental Health Sentiment Chatbot Application

## Installing libraries, dependencies, and data

In [None]:
# Import the required libraries and dependencies
import pandas as pd
from sklearn.model_selection import train_test_split
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

from dotenv import load_dotenv
import os
from langchain_openai import ChatOpenAI
from langchain import PromptTemplate
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain

# Set the column width to view the statments.
pd.set_option('max_colwidth', 200)

In [None]:
# Load the dataset.
df = pd.read_csv("Combined Data.csv", index_col="Unnamed: 0")
# Display a sample of the dataset. 
df.sample(5)

## Data Cleanup and Preparation

In [None]:
# Check for missing values. 
df.info()

In [None]:
# Null values in the statement column
df['statement'].notnull().value_counts()

In [None]:
# Drop null values
df = df.dropna()
df.info()

In [None]:
# Get the number of different statuses in the status column:
df['status'].value_counts()

In [None]:
# Turning the statement column to a list.
statements = df['statement'].to_list()
statements

In [None]:
# Using the analyzer to determine the sentiment of each statement.
sentiment = []
score = []
analyzer = SentimentIntensityAnalyzer()
for statement in statements:
    statement_sentiment = analyzer.polarity_scores(statement)
    if statement_sentiment['compound'] >= 0.05:
        sentiment.append("Positive")
 
    elif statement_sentiment['compound'] <= - 0.05:
        sentiment.append("Negative")
 
    else:
        sentiment.append("Neutral")
    score.append(statement_sentiment['compound'])

In [None]:
# Creating sentiment and score columns.
df['sentiment'] = sentiment
df['score'] = score
df.head()

In [None]:
# Displaying a sample of the new DataFrame.
df.sample(10)

In [None]:
# Get the number of different results in the sentiment column:
df['sentiment'].value_counts()

In [None]:
# Set the features variable.
X = df['statement']
# Set the target variables.
y_status = df['status']
y_sentiment = df['sentiment']

In [None]:
# Split data into training and testing for status
X_status_train, X_status_test, y_status_train, y_stauts_test = train_test_split(X, y_status, test_size=0.30, random_state=1)

In [None]:
# Split data into training and testing for sentiment
X_sentiment_train, X_sentiment_test, y_sentiment_train, y_sentiment_test = train_test_split(X, y_sentiment, test_size=0.30, random_state=1)

## ML Model

Title: y_status_train

In [None]:
status_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words=None)),
    ('classifier', LinearSVC())
])

status_pipeline.fit(X_status_train, y_status_train)

# Print the results
status_predictions = status_pipeline.predict(X_status_test)
print(status_predictions)


In [None]:
# Make predictions on the test data
status_predictions = status_pipeline.predict(X_status_test)

# Show the test data and corresponding predictions
stauts_test_results = pd.DataFrame({'X_test': X_status_test, 'Predicted_y': status_predictions, 'Actual_y': y_stauts_test})
print(stauts_test_results)

In [None]:
# Validate the model by checking the model's training and testing accuracy.
training_accuracy = status_pipeline.score(X_status_train, y_status_train)
testing_accuracy = status_pipeline.score(X_status_test, y_stauts_test)

In [None]:
# Create the confusion matrix
confusion_mat = confusion_matrix(y_stauts_test, status_predictions)

# Print the confusion matrix
print("Status Confusion Matrix:")
print(confusion_mat)

# Print a classification report
print("Status Classification Report:")
print(classification_report(y_stauts_test, status_predictions))

# Print the overall accuracy
accuracy = accuracy_score(y_stauts_test, status_predictions)
print("Overall Accuracy:", accuracy)

Title: y_sentiment_train

In [None]:
sentiment_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words=None)),
    ('classifier', LinearSVC())
])

sentiment_pipeline.fit(X_sentiment_train, y_sentiment_train)

# Print the results
sentiment_predictions = sentiment_pipeline.predict(X_sentiment_test)
print(sentiment_predictions)


In [None]:
# Make predictions on the test data
sentiment_predictions = sentiment_pipeline.predict(X_sentiment_test)

# Show the test data and corresponding predictions
sentiment_test_results = pd.DataFrame({'X_test': X_sentiment_test, 'Predicted_y': sentiment_predictions, 'Actual_y': y_sentiment_test})
print(sentiment_test_results)

In [None]:
# Validate the model by checking the model's training and testing accuracy.
training_accuracy = sentiment_pipeline.score(X_sentiment_train, y_sentiment_train)
testing_accuracy = sentiment_pipeline.score(X_sentiment_test, y_sentiment_test)

In [None]:
# Create the confusion matrix
confusion_mat = confusion_matrix(y_sentiment_test, sentiment_predictions)

# Print the confusion matrix
print("Sentiment Confusion Matrix:")
print(confusion_mat)

# Print a classification report
print("Sentiment Classification Report:")
print(classification_report(y_sentiment_test, sentiment_predictions))

# Print the overall accuracy
accuracy = accuracy_score(y_sentiment_test, sentiment_predictions)
print("Overall Accuracy:", accuracy)

## Open AI

In [None]:
# Load environment variables.
load_dotenv()

# Set the model name for our LLMs.
OPENAI_MODEL = "gpt-3.5-turbo"

# Store the API key in a variable.
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

print(type(OPENAI_API_KEY))

In [None]:
llm=ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name=OPENAI_MODEL, temperature=0.5)
simple_prompt = ChatPromptTemplate.from_template("{query}")

In [None]:
def mental_health_chatbot(statement):
    format = """
    You are a clinical psychologist. Answer only questions that would be relevant to mental health.
    If you don't know the answer, say you don't know
    If the human asks questions not related to mental health, remind them that your job is to help
    them understand their mental health status, and ask them for a question on that topic. If they ask a question which
    there is not enough information to answer, tell them you don't know and don't make up an
    answer.

    Question: {query}
    Answer:
    """


    #contstruct the prompt template

    prompt_template = PromptTemplate(
        input_variables=["query"],
        template=format

    )

    #contstuct a chain using this template
    chain = LLMChain(llm=llm, prompt=prompt_template) 
    statement = statement
    status = status_pipeline.predict([statement])
    sentiment = sentiment_pipeline.predict([statement])
    query = {"query":f'The statement from the user is:{statement}\n The mental health status of the user is/has:{status}\n The sentiment of the statement is:{sentiment}\n Does the user require any assistance? If so what would you suggest?'}
    #run the chain


    result = chain.invoke(query)
    return result["text"]

## Gradio App

This section of code involves a user interface where users input statements about their mental state. The code then processes these inputs and returns corresponding mental health statuses using two display textbox components. The purpose is to predict and provide insights into the user's mental state based on their statements.

The data source consolidates information from various Kaggle datasets centered on different facets of mental health. It draws from diverse platforms such as social media, Reddit, Twitter, and more. Each entry is labeled with a specific mental health status, making it an invaluable resource for in-depth analyses, insights into mental health trends, patterns, and predictive modeling.
https://www.kaggle.com/datasets/suchintikasarkar/sentiment-analysis-for-mental-health

In [None]:
# Uncomment these lines if you are using Google Colab.
! pip install transformers
! pip install gradio

In [None]:
# Import transformers pipeline
from transformers import pipeline
# Import Gradio
import gradio as gr

In [None]:
# Initialize the pipeline to generate questions and answers using the distilbert-base-cased-distilled-squad model.
# question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')

In [None]:
# !pip install --upgrade gradio

import pandas as pd
import gradio as gr

# Step 1: Load your CSV into a Pandas DataFrame
df = pd.read_csv('/Combined Data.csv')

# Step 2: Define your Gradio interface and processing function
def predict_mental_health_status(statement):
    # Example: In a real application, this function would perform text classification or analysis
    # For simplicity, this example checks if the statement contains keywords
    if 'depression' in statement.lower():
        prediction = 'Depression'
    elif 'suicidal' in statement.lower():
        prediction = 'Suicidal'
    elif 'anxiety' in statement.lower():
        prediction = 'Anxiety'
    elif 'stress' in statement.lower():
        prediction = 'Stress'
    elif 'bi-polar' in statement.lower() or 'bipolar' in statement.lower():
        prediction = 'Bi-Polar'
    elif 'personality disorder' in statement.lower():
        prediction = 'Personality Disorder'
    else:
        prediction = 'Normal'

    return prediction

# Step 3: Define Gradio interface
app = gr.Interface(
    fn=predict_mental_health_status,
    inputs=gr.Textbox(label="Enter your statement"), # Use gr.inputs.Textbox
    outputs=gr.Textbox(label="Predicted Mental Health Status") # Use gr.outputs.Textbox
)

# Step 4: Launch the Gradio app
app.launch(show_error=True)
