In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import pickle
import re
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords

# Load dataset
data = pd.read_csv("depression_dataset.csv")
df = data.fillna("")  # Fill missing values

# Define stemming function
def stemming(content):
    con = re.sub("[^a-zA-Z]", " ", content)  
    con = con.lower()
    con = con.split()
    con = [PorterStemmer().stem(word) for word in con if word not in stopwords.words("english")]  
    con = " ".join(con)
    return con

# Apply stemming to the correct column
df["clean_text"] = df["clean_text"].apply(stemming)

# Split data
X_train, X_test, Y_train, Y_test = train_test_split(df["clean_text"], df["is_depression"], test_size=0.25)

# Use TfidfVectorizer for feature extraction
tfidf_vectorizer = TfidfVectorizer()
x_tfidf = tfidf_vectorizer.fit_transform(X_train)
x_tfidft = tfidf_vectorizer.transform(X_test)

# Initialize and train the Decision Tree model
model = DecisionTreeClassifier()
model.fit(x_tfidf, Y_train)

# Make predictions on test set
predictions = model.predict(x_tfidft)

# Evaluate the model
accuracy = accuracy_score(Y_test, predictions)
conf_matrix = confusion_matrix(Y_test, predictions)
classification_rep = classification_report(Y_test, predictions)

print(f"Accuracy: {accuracy}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(classification_rep)


Accuracy: 0.9177444386963269

Confusion Matrix:
[[890  83]
 [ 76 884]]

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.91      0.92       973
           1       0.91      0.92      0.92       960

    accuracy                           0.92      1933
   macro avg       0.92      0.92      0.92      1933
weighted avg       0.92      0.92      0.92      1933



In [2]:
# Save the model and vectorizer using pickle
pickle.dump(tfidf_vectorizer, open("vector.pkl", "wb"))
pickle.dump(model, open("model.pkl", "wb"))

# Load the model and vectorizer
vectorizer = pickle.load(open("vector.pkl", "rb"))
loaded_model = pickle.load(open("model.pkl", "rb"))

user_input = input("Enter your text: ")

# Apply stemming to user input
user_input_stemmed = stemming(user_input)

# Use the suicide_detection function to predict
input_data = [user_input_stemmed]
vector_form1 = vectorizer.transform(input_data)
prediction = loaded_model.predict(vector_form1)

if prediction[0] == 1:
    print('Suicidal')
else:
    print('Non-suicidal')


Enter your text: i want to die
Suicidal
