In [40]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [41]:
df = pd.read_csv('dataset.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,text,title,target
0,0,Welcome to /r/depression's check-in post - a p...,"Regular check-in post, with information about ...",1
1,1,We understand that most people who reply immed...,Our most-broken and least-understood rules is ...,1
2,2,Anyone else just miss physical touch? I crave ...,"I haven’t been touched, or even hugged, in so ...",1
3,3,I’m just so ashamed. Everyone and everything f...,Being Depressed is Embarrassing,1
4,4,I really need a friend. I don't even have a si...,I'm desperate for a friend and to feel loved b...,1


In [42]:
df = df.drop(columns='Unnamed: 0',axis=1)
df.head()

Unnamed: 0,text,title,target
0,Welcome to /r/depression's check-in post - a p...,"Regular check-in post, with information about ...",1
1,We understand that most people who reply immed...,Our most-broken and least-understood rules is ...,1
2,Anyone else just miss physical touch? I crave ...,"I haven’t been touched, or even hugged, in so ...",1
3,I’m just so ashamed. Everyone and everything f...,Being Depressed is Embarrassing,1
4,I really need a friend. I don't even have a si...,I'm desperate for a friend and to feel loved b...,1


In [43]:
df['title_text']=df['title']+" "+df['text']
df.head()

Unnamed: 0,text,title,target,title_text
0,Welcome to /r/depression's check-in post - a p...,"Regular check-in post, with information about ...",1,"Regular check-in post, with information about ..."
1,We understand that most people who reply immed...,Our most-broken and least-understood rules is ...,1,Our most-broken and least-understood rules is ...
2,Anyone else just miss physical touch? I crave ...,"I haven’t been touched, or even hugged, in so ...",1,"I haven’t been touched, or even hugged, in so ..."
3,I’m just so ashamed. Everyone and everything f...,Being Depressed is Embarrassing,1,Being Depressed is Embarrassing I’m just so as...
4,I really need a friend. I don't even have a si...,I'm desperate for a friend and to feel loved b...,1,I'm desperate for a friend and to feel loved b...


In [44]:
X = df['title_text'].astype(str).values
y = df['target'].values

In [45]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [46]:
vocab_size = 10000
embedding_dim =200
max_len =200


In [47]:
tokenizer = Tokenizer(num_words=vocab_size,oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)

In [48]:
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [49]:
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding='post', truncating='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len, padding='post', truncating='post')



In [50]:
embeddings_index = {}
embedding_dim = 200  
with open('glove.6B.200d.txt', encoding='utf-8') as f: 
    for line in f:
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs

In [51]:
embedding_matrix =np.zeros((vocab_size, embedding_dim))

In [52]:
embedding_matrix = np.zeros((vocab_size, embedding_dim))
for word, i in tokenizer.word_index.items():
    if i < vocab_size:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

In [53]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Dropout

In [54]:
model=Sequential([
    Embedding(input_dim=vocab_size, output_dim=200, input_length=max_len,
    weights=[embedding_matrix], trainable=False),
    Bidirectional(LSTM(128,return_sequences=False)),
    Dropout(0.3),
    Dense(64,activation='relu'),
    Dropout(0.3),
    Dense(5,activation='softmax')
])

In [55]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 200, 200)          2000000   
                                                                 
 bidirectional_2 (Bidirecti  (None, 256)               336896    
 onal)                                                           
                                                                 
 dropout_4 (Dropout)         (None, 256)               0         
                                                                 
 dense_4 (Dense)             (None, 64)                16448     
                                                                 
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_5 (Dense)             (None, 5)                 325       
                                                      

In [56]:
model.fit(X_train_pad,y_train,
          epochs=13,validation_split=0.1,
          batch_size=32)

Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13


<keras.src.callbacks.History at 0x212c20ffa60>

In [57]:
loss, accuracy = model.evaluate(X_test_pad,y_test)
print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.7475


In [58]:
model.save("model.h5")

  saving_api.save_model(


In [59]:
import pickle
with open('tokenizer.pkl','wb') as file:
    pickle.dump(tokenizer,file)