In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns
import time


In [None]:

df = pd.read_csv("EDA_Student_Depression_Dataset.csv")
df.head()


In [None]:

columns = df.columns.tolist()
columns.remove('Depression')

X = df[columns]
y = df['Depression']


In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)


In [None]:

rf = RandomForestClassifier()
param_grid = {
    'n_estimators': [100],
    'max_depth': [10]
}
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=3)

start_time_cv = time.time()
grid_search.fit(X_train, y_train)
end_time_cv = time.time()

print(f"Tempo cross-validation: {end_time_cv - start_time_cv:.2f} secondi")

best_model = grid_search.best_estimator_
y_proba_cv = best_model.predict_proba(X_test)[:, 1]
auc_cv = roc_auc_score(y_test, y_proba_cv)
print(f"AUC (Cross-Validated): {auc_cv}")


In [None]:

rf_direct = RandomForestClassifier(n_estimators=200, max_depth=10)

start_time = time.time()
rf_direct.fit(X_train, y_train)
end_time = time.time()

training_time = end_time - start_time
print(f"Tempo di addestramento: {training_time:.2f} secondi")

y_pred = rf_direct.predict(X_test)


In [None]:

print("AUC:", roc_auc_score(y_test, rf_direct.predict_proba(X_test)[:, 1]))

print(f"\nAccuracy score on the test set: {accuracy_score(y_test, y_pred)}")
print(f"\nClassification Report:\n{classification_report(y_test, y_pred)}")

cm = confusion_matrix(y_test, y_pred)
ax = plt.axes()
sns.heatmap(cm, annot=True, fmt='d', ax=ax)
ax.set_title('Random Forest Confusion Matrix')
plt.show()
