In [75]:
#Importing library

import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

In [76]:
# load dataset
df = pd.read_csv('mental-health2.csv')

# Column 'text' for sentences and column 'label' for label
texts = df['text'].values
labels = df['label'].values
df.head()

Unnamed: 0,text,label
0,This seroquel withdrawal sucks Problems sleepi...,1
1,Mental/emotional pain What do you do when you ...,1
2,Bipolar and different perspectives? Hi everyon...,1
3,2019 resolution: be S.T.A.B.L.E. Hey friends! ...,1
4,"Enjoy life with this one weird trick. Yes, you...",1


In [77]:
# Calculate class weights
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

print("Class Weights: ", class_weights_dict)

Class Weights:  {0: 1.8856749311294765, 1: 5.014652014652015, 2: 0.44047619047619047}


In [78]:
def weighted_loss(weights):
    def loss(y_true, y_pred):
        y_true = tf.cast(y_true, tf.int32)
        sample_weights = tf.gather(weights, y_true)
        unweighted_losses = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
        weighted_losses = unweighted_losses * sample_weights
        return tf.reduce_mean(weighted_losses)
    return loss

In [79]:
# Preprocessing
vocab_size = 1000
embedding_dim = 16
max_length = 20
trunc_type = 'post'
padding_type = 'post'
oov_tok = "<OOV>"

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(texts)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(texts)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [80]:
# separating data into training and testing
train_sequences, test_sequences, train_labels, test_labels = train_test_split(padded, labels, test_size=0.2, random_state=42)

train_labels = train_labels.astype(int)
test_labels = test_labels.astype(int)

In [81]:
# making model
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    GlobalAveragePooling1D(),
    Dense(24, activation='relu', kernel_regularizer=regularizers.l2(0.01)),
    Dropout(0.5),
    Dense(3, activation='softmax')
])
weights = tf.constant(class_weights, dtype=tf.float32)
model.compile(loss=weighted_loss(weights), optimizer='adam', metrics=['accuracy'])

In [82]:
# EarlyStopping callback
early_stopping = EarlyStopping(
    monitor='val_accuracy',
    patience=5,
    verbose=1,
    mode='max',
    restore_best_weights=True,
    baseline=0.9
)

# Melatih model
num_epochs = 50
history = model.fit(
    train_sequences,
    train_labels,
    epochs=num_epochs,
    validation_data=(test_sequences, test_labels),
    callbacks=[early_stopping]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 8: early stopping


In [83]:
# model evaluation
loss, accuracy = model.evaluate(test_sequences, test_labels)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')

Loss: 0.508139967918396
Accuracy: 0.8327246308326721


In [84]:
# prediction using model
new_texts = '''diee
'''
new_sequences = tokenizer.texts_to_sequences(new_texts)
new_padded = pad_sequences(new_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

In [85]:
predictions = model.predict(new_padded)
print(predictions)

[[0.37325472 0.16358851 0.46315676]
 [0.36204356 0.15821508 0.47974136]
 [0.37325472 0.16358851 0.46315676]
 [0.37325472 0.16358851 0.46315676]
 [0.36810002 0.16096973 0.4709303 ]]


In [86]:
# convert prediction into percentage
predictions_percent = predictions * 100
for i, prediction in enumerate(predictions_percent[0]):
    print(f"Class {i}: {prediction:.2f}%")

# predict result interpretation
predicted_class = predictions.argmax(axis=1)
print(f'Predicted class: {predicted_class[0]}')

Class 0: 37.33%
Class 1: 16.36%
Class 2: 46.32%
Predicted class: 2
