# Import Modules

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
import warnings

# Load Dataset and split it into training and testing

In [70]:
pd.set_option('future.no_silent_downcasting', True)
warnings.filterwarnings('ignore')

data = pd.read_csv('depression.csv')

encoder = LabelEncoder()
data['Gender'] = encoder.fit_transform(data['Gender'])
data['Have you ever had suicidal thoughts ?'] = encoder.fit_transform(data['Have you ever had suicidal thoughts ?'])
data['Family History of Mental Illness'] = encoder.fit_transform(data['Family History of Mental Illness'])

data['Dietary Habits'].replace(
    {'Moderate': 1,
     'Healthy': 2,
     'Unhealthy': 3},
      inplace=True
    )

data['Sleep Duration'].replace(
    {'Less than 5 hours': 0,
     '5-6 hours': 1,
     '7-8 hours': 2,
     'More than 8 hours': 3},
      inplace=True
    )

X = data.drop('Depression', axis=1)
y = data['Depression']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 95.05%


# Test the model to predict if a student will have depression or not

In [110]:
"""
Scale to Enter Data
  1. Academic Pressure: 1-5
  2. Study Satisfaction: 1-5

  3. Sleep Duration: {
     'Less than 5 hours': 0,
     '5-6 hours': 1,
     '7-8 hours': 2,
     'More than 8 hours': 3
     }

  4. Dietary Habits: {
     'Moderate': 1,
     'Healthy': 2,
     'Unhealthy': 3
     }

  5. Have you ever had suicidal thoughts ? {'Yes': 1, 'No': 0}

  6. Study Hours: Any number (usually 0-12)
  7. Financial Stress: 1-5

  8. Family History of Mental Illness: {'Yes': 1, 'No': 0}
  9. Gender: {'Male': 1, 'Female': 0}
"""

person = pd.DataFrame({
    'Gender': ['1'],
    'Age': ['17'],
    'Academic Pressure': ['3'],
    'Study Satisfaction': ['1'],
    'Sleep Duration': ['2'],
    'Dietary Habits': ['1'],
    'Have you ever had suicidal thoughts ?': ['1'],
    'Study Hours': ['3'],
    'Financial Stress': ['1'],
    'Family History of Mental Illness': ['0']
}, index=[0])


prediction = model.predict(person)
probability = model.predict_proba(person)

if 0.50 <= probability[0][1] <= 0.79:
    print("The person is likely to have mild depression.\n")
elif probability[0][1] > 0.79:
    print("The person is likely to have major depression.\n")
else:
    print("The person is unlikely to have depression.\n")

print(f"Probability of depression: {probability[0][1]*100:.2f}%")

The person is likely to have mild depression.

Probability of depression: 79.00%
