<a href="https://colab.research.google.com/github/Hafeeplay/Animation-Nation/blob/master/sp2try2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
from keras.models import Model
from keras.layers import Input, Dense, Embedding, LSTM, Bidirectional, Dropout, concatenate
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
# from keras_preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE
import pandas as pd
import numpy as np
from imblearn.under_sampling import RandomUnderSampler

In [3]:

# Load the dataset
df = pd.read_csv("/content/drive/MyDrive/Specialproject2/depression_dataset_reddit_cleaned.csv")
X=df.iloc[:, 0].values
y=df.iloc[:, 1].values

In [4]:
# Resample the dataset to balance the classes
df_majority = df[df["is_depression"]== 0]
df_minority = df[df["is_depression"]== 1]
df_minority_upsampled = resample(df_minority, replace=True, n_samples=len(df_majority), random_state=42)
df_upsampled = pd.concat([df_majority, df_minority_upsampled])
df_upsampled["is_depression"].value_counts()

0    3900
1    3900
Name: is_depression, dtype: int64

In [5]:
# Split the dataset into train and test sets
X = df_upsampled["clean_text"].values
y = df_upsampled["is_depression"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, train_size=0.7, random_state=42)

In [6]:
# Preprocess the text data
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)
vocab_size = len(tokenizer.word_index) + 1
max_length = 100
X_train_pad = pad_sequences(X_train_seq, maxlen=max_length, padding='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=max_length, padding='post')

In [7]:
# Apply SMOTE to balance the classes in the train set
smote = SMOTE()
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_pad, y_train)

In [8]:
# Build the model
inputs = Input(shape=(max_length,))
embedding_layer = Embedding(vocab_size, 100, input_length=max_length)(inputs)
dropout_layer = Dropout(0.2)(embedding_layer)
rnn_layer = Bidirectional(LSTM(128))(dropout_layer)
lstm_layer = LSTM(128)(dropout_layer)
bilstm_layer = Bidirectional(LSTM(128))(dropout_layer)
concat_layer = concatenate([rnn_layer, lstm_layer, bilstm_layer])
dense_layer = Dense(128, activation='relu')(concat_layer)
outputs = Dense(1, activation='sigmoid')(dense_layer)
model = Model(inputs=inputs, outputs=outputs)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [9]:
# Train the model
history = model.fit(X_train_resampled, y_train_resampled, epochs=10, batch_size=32, validation_data=(X_test_pad, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test_pad, y_test, verbose=0)
print('Accuracy: %f' % (accuracy*100))

Accuracy: 96.367520
