In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import joblib


In [12]:
data_classification = pd.read_csv('resources/preprocessed_student_depression_data.csv')
data_classification = data_classification.drop(columns=['Family History of Mental Illness'])

In [None]:
X_class = data_classification.drop(columns=['Depression'])
y_class = data_classification['Depression']
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
random_forest_model = RandomForestClassifier(n_estimators=300, random_state=42, max_depth=10, min_samples_split=5)
random_forest_model.fit(X_train_class, y_train_class)
y_pred_class = random_forest_model.predict(X_test_class)
print(f'RF: Classification Report:\n{random_forest_model.score(X_test_class, y_test_class)}')
print(f'RF: Confusion Matrix:\n{pd.crosstab(y_test_class, y_pred_class, rownames=["Actual"], colnames=["Predicted"])}')
print(f'RF: Accuracy: {random_forest_model.score(X_test_class, y_test_class)}')
print(f'RF: F1 Score: {random_forest_model.score(X_test_class, y_test_class)}')
print(f'RF: Precision: {random_forest_model.score(X_test_class, y_test_class)}')
print(f'RF: Recall: {random_forest_model.score(X_test_class, y_test_class)}')
print(f'RF: ROC AUC: {random_forest_model.score(X_test_class, y_test_class)}')

Classification Report:
0.8344382727109837
Confusion Matrix:
Predicted     0     1
Actual               
0          1820   523
1           401  2837
Accuracy: 0.8344382727109837
F1 Score: 0.8344382727109837
Precision: 0.8344382727109837
Recall: 0.8344382727109837
ROC AUC: 0.8344382727109837


In [None]:
model_tree = DecisionTreeClassifier(random_state=42, max_depth=10, min_samples_split=5)
model_tree.fit(X_train_class, y_train_class)
y_pred_tree = model_tree.predict(X_test_class)
print(f'Decision Tree Classification Report:\n{model_tree.score(X_test_class, y_test_class)}')
print(f'Decision Tree Confusion Matrix:\n{pd.crosstab(y_test_class, y_pred_tree, rownames=["Actual"], colnames=["Predicted"])}')
print(f'Decision Tree Accuracy: {model_tree.score(X_test_class, y_test_class)}')
print(f'Decision Tree F1 Score: {model_tree.score(X_test_class, y_test_class)}')
print(f'Decision Tree Precision: {model_tree.score(X_test_class, y_test_class)}')
print(f'Decision Tree Recall: {model_tree.score(X_test_class, y_test_class)}')
print(f'Decision Tree ROC AUC: {model_tree.score(X_test_class, y_test_class)}')

Decision Tree Classification Report:
0.8093531625156782
Decision Tree Confusion Matrix:
Predicted     0     1
Actual               
0          1766   577
1           487  2751
Decision Tree Accuracy: 0.8093531625156782
Decision Tree F1 Score: 0.8093531625156782
Decision Tree Precision: 0.8093531625156782
Decision Tree Recall: 0.8093531625156782
Decision Tree ROC AUC: 0.8093531625156782


In [15]:
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_class, y_train_class)
y_pred_knn = knn_model.predict(X_test_class)
print(f'KNN Classification Report:\n{knn_model.score(X_test_class, y_test_class)}')
print(f'KNN Confusion Matrix:\n{pd.crosstab(y_test_class, y_pred_knn, rownames=["Actual"], colnames=["Predicted"])}')
print(f'KNN Accuracy: {knn_model.score(X_test_class, y_test_class)}')
print(f'KNN F1 Score: {knn_model.score(X_test_class, y_test_class)}')
print(f'KNN Precision: {knn_model.score(X_test_class, y_test_class)}')
print(f'KNN Recall: {knn_model.score(X_test_class, y_test_class)}')
print(f'KNN ROC AUC: {knn_model.score(X_test_class, y_test_class)}')

KNN Classification Report:
0.7887475362838201
KNN Confusion Matrix:
Predicted     0     1
Actual               
0          1636   707
1           472  2766
KNN Accuracy: 0.7887475362838201
KNN F1 Score: 0.7887475362838201
KNN Precision: 0.7887475362838201
KNN Recall: 0.7887475362838201
KNN ROC AUC: 0.7887475362838201


In [None]:
joblib.dump(random_forest_model, 'resources/rf_model.pkl')