In [None]:
import pandas as pd
import numpy as np
import re

import matplotlib.pyplot as plt
from wordcloud import WordCloud
import collections

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import SnowballStemmer

from sklearn.model_selection import train_test_split

import tensorflow as tf
from keras.layers import Embedding, Bidirectional, LSTM, Dense
from keras.models import Sequential
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Datasets/mental health corpus/mental_health.csv")
df.head()

Unnamed: 0,text,label
0,dear american teens question dutch person hear...,0
1,nothing look forward lifei dont many reasons k...,1
2,music recommendations im looking expand playli...,0
3,im done trying feel betterthe reason im still ...,1
4,worried year old girl subject domestic physic...,1


In [None]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
max_words = 10000
max_len = 200

tokenizer = Tokenizer(num_words = max_words)

In [None]:
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    # Tokenize text
    tokens = nltk.word_tokenize(text)
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word not in stop_words]
    # Join filtered tokens back to text
    preprocessed_text = ' '.join(filtered_tokens)
    return preprocessed_text

In [None]:
data = df['text'].apply(preprocess_text)

In [None]:
tokenizer.fit_on_texts(data)
sequence = tokenizer.texts_to_sequences(data)
data = pad_sequences(sequence, maxlen = max_len)

In [None]:
labels = tf.keras.utils.to_categorical(df['label'])

In [None]:
model = Sequential()
model.add(Embedding(max_words, 128, input_length = max_len))
model.add(Bidirectional(LSTM(64, dropout = 0.3, recurrent_dropout = 0.2)))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(2, activation = 'softmax'))

model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 128)          1280000   
                                                                 
 bidirectional (Bidirectiona  (None, 128)              98816     
 l)                                                              
                                                                 
 dense (Dense)               (None, 32)                4128      
                                                                 
 dense_1 (Dense)             (None, 2)                 66        
                                                                 
Total params: 1,383,010
Trainable params: 1,383,010
Non-trainable params: 0
_________________________________________________________________


In [None]:
history = model.fit(data, labels, validation_split = 0.3, epochs = 4, batch_size = 32)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [None]:
model.save("Sentiment.h5")

In [None]:
text_array = [["I feel like killing my self"],
              ["I feel like hanging myself"],
              ["I feel great"]]

column_name = ["text"]

df1 = pd.DataFrame(text_array, columns=column_name)

In [None]:
data1 = df1["text"].apply(preprocess_text)

sequence1 = tokenizer.texts_to_sequences(data1)
data1 = pad_sequences(sequence1, maxlen = max_len)

In [None]:
predictions = model.predict(data1)



In [None]:
predictions

array([[0.0717564 , 0.9282436 ],
       [0.37671974, 0.6232802 ],
       [0.7323508 , 0.26764917]], dtype=float32)

In [None]:
# After training save model weights first to avoid having to retrain again
# Then use model to predict sentiment classes for conversation data
# Use new conversation data to train a

In [None]:
sentiment_scores = predictions[:, 1] - predictions[:, 0]

In [None]:
sentiment_scores

array([ 0.85648715,  0.24656048, -0.46470165], dtype=float32)

In [None]:
!pip install scikit-fuzzy

Collecting scikit-fuzzy
  Downloading scikit-fuzzy-0.4.2.tar.gz (993 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m994.0/994.0 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-fuzzy
  Building wheel for scikit-fuzzy (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-fuzzy: filename=scikit_fuzzy-0.4.2-py3-none-any.whl size=894079 sha256=5503e395fd27f23f3a403e48bf7c4b0fa4146f655b6f49c6950b37f79aea9475
  Stored in directory: /root/.cache/pip/wheels/4f/86/1b/dfd97134a2c8313e519bcebd95d3fedc7be7944db022094bc8
Successfully built scikit-fuzzy
Installing collected packages: scikit-fuzzy
Successfully installed scikit-fuzzy-0.4.2


In [None]:
import numpy as np
import skfuzzy as fuzz
from skfuzzy import control as ctrl

# Define linguistic terms for sentiment
sentiment = ctrl.Antecedent(np.arange(-1, 1.01, 0.01), 'sentiment')

# Define linguistic terms for sentiment categories
labels = ['very_negative', 'negative', 'neutral', 'positive', 'very_positive']
sentiment.automf(names=labels)

# Define linguistic terms for consequent (labels)
labels_consequent = ['very_negative', 'negative', 'neutral', 'positive', 'very_positive']
consequent = ctrl.Consequent(np.arange(0, 101, 1), 'label')
consequent.automf(names=labels_consequent)

# Define fuzzy rule base
rules = [
    ctrl.Rule(sentiment['very_negative'], consequent['very_negative']),
    ctrl.Rule(sentiment['negative'], consequent['negative']),
    ctrl.Rule(sentiment['neutral'], consequent['neutral']),
    ctrl.Rule(sentiment['positive'], consequent['positive']),
    ctrl.Rule(sentiment['very_positive'], consequent['very_positive'])
]

# Create a FIS with the defined antecedent, consequent, and rules
sentiment_ctrl = ctrl.ControlSystem(rules)
sentiment_prediction = ctrl.ControlSystemSimulation(sentiment_ctrl)

def fuzzy_layer(sentiment):
    lstm_sentiment_predictions = sentiment

    # Initialize an empty array for defuzzified sentiment labels
    defuzzified_labels = np.empty_like(lstm_sentiment_predictions)

    # Create a FIS with the defined antecedent, consequent, and rules
    sentiment_ctrl = ctrl.ControlSystem(rules)

    # Loop over each LSTM sentiment prediction
    for i, lstm_sentiment_prediction in enumerate(lstm_sentiment_predictions):
        sentiment_prediction = ctrl.ControlSystemSimulation(sentiment_ctrl)
        sentiment_prediction.input['sentiment'] = lstm_sentiment_prediction
        sentiment_prediction.compute()
        defuzzified_labels[i] = sentiment_prediction.output['label']

    labels = []

    for label in defuzzified_labels:
        if label <= 12.4:
            labels.append("Very-negative")
        elif 12.5 <= label <= 37.4:
            labels.append("Negative")
        elif 37.5 <= label <= 62.4:
            labels.append("Neutral")
        elif 62.5 <= label <= 87.4:
            labels.append("Positive")
        else:
            labels.append("Very-positive")

    return labels

# Defuzzified Labels: [81.494934 62.362434 27.42874 ]

In [None]:
labels = fuzzy_layer(sentiment_scores)

In [None]:
labels

['Positive', 'Neutral', 'Negative']

In [None]:
df = pd.read_json("/content/drive/MyDrive/Datasets/mental health corpus/combined_dataset.json", lines=True)

In [None]:
df.head()

Unnamed: 0,Context,Response
0,I'm going through some things with my feelings...,"If everyone thinks you're worthless, then mayb..."
1,I'm going through some things with my feelings...,"Hello, and thank you for your question and see..."
2,I'm going through some things with my feelings...,First thing I'd suggest is getting the sleep y...
3,I'm going through some things with my feelings...,Therapy is essential for those that are feelin...
4,I'm going through some things with my feelings...,I first want to let you know that you are not ...


In [None]:
data = df['Context'].apply(preprocess_text)

In [None]:
sequence = tokenizer.texts_to_sequences(data)
data = pad_sequences(sequence, maxlen = max_len)

In [None]:
from tensorflow.keras.models import load_model

model = load_model("/content/Sentiment.h5")



In [None]:
predictions = model.predict(data)



In [None]:
sentiment_scores = predictions[:, 1] - predictions[:, 0]

In [None]:
df['scores'] = sentiment_scores

In [None]:
labels = fuzzy_layer(sentiment_scores)

In [None]:
df['labels'] = labels

In [None]:
df.head()

Unnamed: 0,Context,Response,scores,labels
0,I'm going through some things with my feelings...,"If everyone thinks you're worthless, then mayb...",0.968614,Very-positive
1,I'm going through some things with my feelings...,"Hello, and thank you for your question and see...",0.968614,Very-positive
2,I'm going through some things with my feelings...,First thing I'd suggest is getting the sleep y...,0.968614,Very-positive
3,I'm going through some things with my feelings...,Therapy is essential for those that are feelin...,0.968614,Very-positive
4,I'm going through some things with my feelings...,I first want to let you know that you are not ...,0.968614,Very-positive


In [None]:
df['labels'].unique()

array(['Very-positive', 'Positive', 'Negative', 'Neutral',
       'Very-negative'], dtype=object)

In [None]:
df.to_csv("/content/drive/MyDrive/Datasets/mental health corpus/mental_health_data.csv", index=False)

In [None]:
df.shape

(3512, 4)