In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import StackingClassifier

In [2]:
data = pd.read_csv('depression_dataset_reddit_cleaned.csv')
X = data['Message']
y = data['Type']
data.head()

Unnamed: 0,Message,Type
0,we understand that most people who reply immed...,1
1,welcome to r depression s check in post a plac...,1
2,anyone else instead of sleeping more when depr...,1
3,i ve kind of stuffed around a lot in my life d...,1
4,sleep is my greatest and most comforting escap...,1


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
tfidf_vectorizer = TfidfVectorizer(max_features=1000)
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

In [5]:
base_models = [
    ('Random Forest', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('Gradient Boosting', GradientBoostingClassifier(n_estimators=100, random_state=42)),
    ('SVM', SVC(kernel='linear', C=1.0, random_state=42))
]

In [6]:
for name, model in base_models:
    model.fit(X_train_tfidf, y_train)
    y_pred = model.predict(X_test_tfidf)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy of {name}: {accuracy}")

Accuracy of Random Forest: 0.9573367808661927
Accuracy of Gradient Boosting: 0.9508726567550096
Accuracy of SVM: 0.9592760180995475


In [7]:
meta_model = LogisticRegression()
stacked_model = StackingClassifier(estimators=base_models, final_estimator=meta_model)
stacked_model.fit(X_train_tfidf, y_train)

StackingClassifier(estimators=[('Random Forest',
                                RandomForestClassifier(random_state=42)),
                               ('Gradient Boosting',
                                GradientBoostingClassifier(random_state=42)),
                               ('SVM',
                                SVC(kernel='linear', probability=True,
                                    random_state=42))],
                   final_estimator=LogisticRegression())

In [8]:
y_pred_stacked = stacked_model.predict(X_test_tfidf)
accuracy_stacked = accuracy_score(y_test, y_pred_stacked)
print(f"Accuracy of Stacked Model: {accuracy_stacked}")

Accuracy of Stacked Model: 0.9657401422107305


In [9]:
while True:
    user_input = input("Enter a message or 'exit' to quit: ")
    if user_input.lower() == 'exit':
        break
    user_input_tfidf = tfidf_vectorizer.transform([user_input])
    prediction = stacked_model.predict(user_input_tfidf)
    if prediction[0] == 1:
        print("Depressed")
        print("")
    else:
        print("Normal Text")
        print("")

Enter a message or 'exit' to quit: i have never felt this low and that is why i want to kick the bucket
Normal Text

Enter a message or 'exit' to quit: i sometimes feel like none of this makes any sense
Normal Text

Enter a message or 'exit' to quit: i want to die
Depressed

Enter a message or 'exit' to quit: i killed a man
Normal Text

Enter a message or 'exit' to quit: i hate how i feel about myself. sometimes i wish i could end all the pain
Depressed

Enter a message or 'exit' to quit: sometimes i feel so shitty about how people make me feel
Depressed

Enter a message or 'exit' to quit: i wonder if my life has any purpose
Normal Text

Enter a message or 'exit' to quit: my does not have any purpose and thats why i should end it
Normal Text

Enter a message or 'exit' to quit: people don't care about me like i care about them and that makes me feel shitty
Depressed

Enter a message or 'exit' to quit: i dont feel that hungry 
Normal Text

Enter a message or 'exit' to quit: 
Normal Text
