In [112]:
import pandas as pd

In [113]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [114]:
df = pd.read_csv('/content/mental_health_workplace_survey.csv')

In [115]:
y = df['BurnoutRisk']
X = df.drop(columns=['BurnoutRisk', 'BurnoutLevel'], errors='ignore')
X = pd.get_dummies(X)

In [116]:
scaler = StandardScaler()
num_cols = X.select_dtypes(include=['int64', 'float64']).columns
X[num_cols] = scaler.fit_transform(X[num_cols])

In [117]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [118]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
dt_acc = accuracy_score(y_test, dt.predict(X_test))
print(f"Decision Tree Accuracy (all features): {dt_acc:.2f}")

Decision Tree Accuracy (all features): 0.53


In [107]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
rf_acc = accuracy_score(y_test, rf.predict(X_test))
print(f"Random Forest Accuracy (all features): {rf_acc:.2f}")

Random Forest Accuracy (all features): 0.67


In [119]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_acc = accuracy_score(y_test, knn.predict(X_test))
print(f"k-NN Accuracy (all features): {knn_acc:.2f}")

k-NN Accuracy (all features): 0.60
k-NN Accuracy (all features): 0.60


In [120]:
importances = rf.feature_importances_
feature_series = pd.Series(importances, index=X.columns)
top3 = feature_series.sort_values(ascending=False).head(3).index.tolist()
print("Top 3 important features:", top3)

Top 3 important features: ['ProductivityScore', 'ManagerSupportScore', 'StressLevel']
Top 3 important features: ['ProductivityScore', 'ManagerSupportScore', 'StressLevel']


In [121]:
X_reduced = X[top3]
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X_reduced, y, test_size=0.2, random_state=42)

In [122]:
dt.fit(X_train_r, y_train_r)
dt_acc_r = accuracy_score(y_test_r, dt.predict(X_test_r))
print(f"Decision Tree Accuracy (top 3 features): {dt_acc_r:.2f}")

Decision Tree Accuracy (top 3 features): 0.54
Decision Tree Accuracy (top 3 features): 0.54


In [123]:
rf.fit(X_train_r, y_train_r)
rf_acc_r = accuracy_score(y_test_r, rf.predict(X_test_r))
print(f"Random Forest Accuracy (top 3 features): {rf_acc_r:.2f}")

Random Forest Accuracy (top 3 features): 0.63


In [124]:
knn.fit(X_train_r, y_train_r)
knn_acc_r = accuracy_score(y_test_r, knn.predict(X_test_r))
print(f"k-NN Accuracy (top 3 features): {knn_acc_r:.2f}")

k-NN Accuracy (top 3 features): 0.58
