In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import joblib


In [2]:
data_classification = pd.read_csv('resources/preprocessed_student_depression_data.csv')
data_classification = data_classification.drop(columns=['Family History of Mental Illness'])

In [3]:
X_class = data_classification.drop(columns=['Depression'])
y_class = data_classification['Depression']
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
random_forest_model = RandomForestClassifier(n_estimators=300, random_state=42, max_depth=10, min_samples_split=5)
random_forest_model.fit(X_train_class, y_train_class)
y_pred_class = random_forest_model.predict(X_test_class)
print(f'RF: Classification Report:\n{random_forest_model.score(X_test_class, y_test_class)}')
print(f'RF: Confusion Matrix:\n{pd.crosstab(y_test_class, y_pred_class, rownames=["Actual"], colnames=["Predicted"])}')
print(f'RF: Accuracy: {random_forest_model.score(X_test_class, y_test_class)}')
print(f'RF: F1 Score: {random_forest_model.score(X_test_class, y_test_class)}')
print(f'RF: Precision: {random_forest_model.score(X_test_class, y_test_class)}')
print(f'RF: Recall: {random_forest_model.score(X_test_class, y_test_class)}')
print(f'RF: ROC AUC: {random_forest_model.score(X_test_class, y_test_class)}')

RF: Classification Report:
0.8349758107865974
RF: Confusion Matrix:
Predicted     0     1
Actual               
0          1824   519
1           402  2836
RF: Accuracy: 0.8349758107865974
RF: F1 Score: 0.8349758107865974
RF: Precision: 0.8349758107865974
RF: Recall: 0.8349758107865974
RF: ROC AUC: 0.8349758107865974


In [4]:
model_tree = DecisionTreeClassifier(random_state=42, max_depth=10, min_samples_split=5)
model_tree.fit(X_train_class, y_train_class)
y_pred_tree = model_tree.predict(X_test_class)
print(f'Decision Tree Classification Report:\n{model_tree.score(X_test_class, y_test_class)}')
print(f'Decision Tree Confusion Matrix:\n{pd.crosstab(y_test_class, y_pred_tree, rownames=["Actual"], colnames=["Predicted"])}')
print(f'Decision Tree Accuracy: {model_tree.score(X_test_class, y_test_class)}')
print(f'Decision Tree F1 Score: {model_tree.score(X_test_class, y_test_class)}')
print(f'Decision Tree Precision: {model_tree.score(X_test_class, y_test_class)}')
print(f'Decision Tree Recall: {model_tree.score(X_test_class, y_test_class)}')
print(f'Decision Tree ROC AUC: {model_tree.score(X_test_class, y_test_class)}')

Decision Tree Classification Report:
0.811503314818133
Decision Tree Confusion Matrix:
Predicted     0     1
Actual               
0          1781   562
1           490  2748
Decision Tree Accuracy: 0.811503314818133
Decision Tree F1 Score: 0.811503314818133
Decision Tree Precision: 0.811503314818133
Decision Tree Recall: 0.811503314818133
Decision Tree ROC AUC: 0.811503314818133


In [5]:
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_class, y_train_class)
y_pred_knn = knn_model.predict(X_test_class)
print(f'KNN Classification Report:\n{knn_model.score(X_test_class, y_test_class)}')
print(f'KNN Confusion Matrix:\n{pd.crosstab(y_test_class, y_pred_knn, rownames=["Actual"], colnames=["Predicted"])}')
print(f'KNN Accuracy: {knn_model.score(X_test_class, y_test_class)}')
print(f'KNN F1 Score: {knn_model.score(X_test_class, y_test_class)}')
print(f'KNN Precision: {knn_model.score(X_test_class, y_test_class)}')
print(f'KNN Recall: {knn_model.score(X_test_class, y_test_class)}')
print(f'KNN ROC AUC: {knn_model.score(X_test_class, y_test_class)}')

KNN Classification Report:
0.7882099982082064
KNN Confusion Matrix:
Predicted     0     1
Actual               
0          1636   707
1           475  2763
KNN Accuracy: 0.7882099982082064
KNN F1 Score: 0.7882099982082064
KNN Precision: 0.7882099982082064
KNN Recall: 0.7882099982082064
KNN ROC AUC: 0.7882099982082064


In [6]:
joblib.dump(random_forest_model, 'resources/rf_model.pkl')

['resources/rf_model.pkl']