In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

**Data Preprocessing**

In [None]:
df = pd.read_csv('/content/Student Depression Dataset.csv')

In [None]:
df.info()

In [None]:
df['Profession'].unique()

In [None]:
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.dropna(inplace=True)

In [None]:
df_dummies = pd.get_dummies(df)
df_dummies

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.countplot(x=df['Depression'])
plt.title("Class Distribution")
plt.show()

In [None]:
from sklearn.preprocessing import LabelEncoder

X = df.drop(columns=['Depression'])
y = df['Depression']

label_encoders = {}

for col in X.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

print(X)

In [None]:
X

In [None]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
numerical_features = X_resampled.select_dtypes(include=['int64','float64']).columns

X_resampled[numerical_features] = scaler.fit_transform(X_resampled[numerical_features])

print(X_resampled)

In [None]:


import matplotlib.pyplot as plt
import seaborn as sns


plt.figure(figsize=(12, 10))
correlation_matrix = X_resampled.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix of Features')
plt.show()

sns.pairplot(X_resampled)
plt.suptitle('Pairplot of Selected Numerical Features', y=1.02)
plt.show()


for col in X_resampled.columns:
    plt.figure(figsize=(8, 6))
    sns.histplot(X_resampled[col], kde=True)
    plt.title(f'Distribution of {col}')
    plt.xlabel(col)
    plt.ylabel('Frequency')
    plt.show()


for col in X_resampled.columns:
    plt.figure(figsize=(8, 6))
    sns.boxplot(x=X_resampled[col])
    plt.title(f'Box Plot of {col}')
    plt.show()


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
import xgboost as xgb
from sklearn.neural_network import MLPClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout
from tensorflow.keras.utils import to_categorical


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)


**Logistic Regression**

In [None]:
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)


**K-Nearest Neighbors**

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print("KNN Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)


**DecisionTree Classifier**

In [None]:
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred = dt.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)


**RandomForestClassifier**

In [None]:
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)


**XGBoost**

In [None]:
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)


**AdaBoostClassifier**

In [None]:
adaboost = AdaBoostClassifier(n_estimators=50)
adaboost.fit(X_train, y_train)
y_pred = adaboost.predict(X_test)
print("AdaBoost Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)


**MLPClassifier**

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
print("MLP Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)


**CNN**

In [None]:

X_train_cnn = np.expand_dims(X_train, axis=-1)
X_test_cnn = np.expand_dims(X_test, axis=-1)


y_train_cnn = to_categorical(y_train, num_classes=2)
y_test_cnn = to_categorical(y_test, num_classes=2)


cnn_model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)),
    Flatten(),
    Dense(50, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


cnn_model.fit(X_train_cnn, y_train_cnn, epochs=10, batch_size=32, validation_data=(X_test_cnn, y_test_cnn))


y_pred_cnn = np.argmax(cnn_model.predict(X_test_cnn), axis=1)
y_test_cnn_labels = np.argmax(y_test_cnn, axis=1)

print("CNN Accuracy:", accuracy_score(y_test_cnn_labels, y_pred_cnn))
print(classification_report(y_test_cnn_labels, y_pred_cnn))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)


**SupportVectorMachine**

In [None]:
svm_model = SVC(kernel='linear', probability=True)
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)
print("SVM Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
import xgboost as xgb

param_grids = {
    "LogisticRegression": {
        "model": [LogisticRegression()],
        "model__C": [0.01, 0.1, 1, 10]
    },
    "SVC": {
        "model": [SVC()],
        "model__C": [0.1, 1, 10],
        "model__kernel": ["linear", "rbf"]
    },
    "KNN": {
        "model": [KNeighborsClassifier()],
        "model__n_neighbors": [3, 5, 7]
    },
    "DecisionTree": {
        "model": [DecisionTreeClassifier()],
        "model__max_depth": [3, 5, 10],
        "model__criterion": ["gini", "entropy"]
    },
    "RandomForest": {
        "model": [RandomForestClassifier()],
        "model__n_estimators": [50, 100, 200],
        "model__max_depth": [None, 10, 20]
    },
    "AdaBoost": {
        "model": [AdaBoostClassifier()],
        "model__n_estimators": [50, 100, 200],
        "model__learning_rate": [0.01, 0.1, 1]
    },
    "XGBoost": {
        "model": [xgb.XGBClassifier()],
        "model__n_estimators": [50, 100, 200],
        "model__learning_rate": [0.01, 0.1, 0.2],
        "model__max_depth": [3, 5, 10]
    },
    "MLPClassifier": {
        "model": [MLPClassifier()],
        "model__hidden_layer_sizes": [(50,), (100,), (50,50)],
        "model__activation": ["relu", "tanh"],
        "model__solver": ["adam", "sgd"]
    }
}

best_models = {}

for model_name, param_grid in param_grids.items():
    print(f"Tuning {model_name}...")
    pipeline = Pipeline([("model", param_grid["model"][0])])
    grid_search = GridSearchCV(pipeline, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    best_models[model_name] = grid_search.best_estimator_
    print(f"Best parameters for {model_name}: {grid_search.best_params_}")


In [None]:
!pip install keras-tuner -q

In [None]:

from tensorflow import keras
import keras_tuner as kt

def build_model(hp):
    model = keras.Sequential()
    model.add(Dense(hp.Int('units', min_value=32, max_value=128, step=32), activation='relu'))
    model.add(Dropout(hp.Float('dropout', 0.1, 0.5, step=0.1)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', [0.01, 0.001, 0.0001])),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Keras tuner
tuner = kt.Hyperband(build_model, objective='val_accuracy', max_epochs=10, factor=3, directory='my_dir', project_name='depression_detection')
tuner.search(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f"Best Hyperparameters for Neural Network: {best_hps.values}")


**XAI-Techniques **

In [None]:
import shap

In [None]:
explainer=shap.LinearExplainer(log_reg,X_train,feature_perturbation='correlation_dependent')
shap_values=explainer.shap_values(X_test)
shap_df=pd.DataFrame(shap_values,columns=X_test.columns)
shap_df

In [None]:
shap.summary_plot(shap_values,X_test,feature_names=X_test.columns)

In [None]:
shap.dependence_plot('Financial Stress',shap_values,X_test,feature_names=X_test.columns)

In [None]:
plt.title('Feature Importance')
shap.plots.bar(shap.Explanation(shap_values, data=X_test, feature_names=X_test.columns), max_display=15)

**Lightgbm**

In [None]:
import lightgbm as lgb
gbm_model = lgb.LGBMClassifier()
gbm_model.fit(X_train, y_train)
y_pred_gbm = gbm_model.predict(X_test)
print("GBM Accuracy:", accuracy_score(y_test, y_pred_gbm))
print(classification_report(y_test, y_pred_gbm))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)


**LSTM**

In [None]:
from tensorflow.keras.layers import LSTM

X_train_rnn = X_train.values.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test_rnn = X_test.values.reshape(X_test.shape[0], 1, X_test.shape[1])

rnn_model = Sequential([
    LSTM(units=64, activation='relu', input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])),
    Dense(1, activation='sigmoid')
])

rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

rnn_model.fit(X_train_rnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_rnn, y_test))

y_pred_rnn = (rnn_model.predict(X_test_rnn) > 0.5).astype(int)
print("RNN Accuracy:", accuracy_score(y_test, y_pred_rnn))
print(classification_report(y_test, y_pred_rnn))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)

In [None]:
from tensorflow.keras.layers import Bidirectional, LSTM

X_train_rnn = X_train.values.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test_rnn = X_test.values.reshape(X_test.shape[0], 1, X_test.shape[1])

rnn_model = Sequential([
    Bidirectional(LSTM(units=64, activation='relu'), input_shape=(X_train_rnn.shape[1], X_train_rnn.shape[2])),
    Dense(1, activation='sigmoid')
])

rnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

rnn_model.fit(X_train_rnn, y_train, epochs=10, batch_size=32, validation_data=(X_test_rnn, y_test))

y_pred_rnn = (rnn_model.predict(X_test_rnn) > 0.5).astype(int)
print("RNN Accuracy:", accuracy_score(y_test, y_pred_rnn))
print(classification_report(y_test, y_pred_rnn))
cm = confusion_matrix(y_test, y_pred)
cm_display = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=log_reg.classes_)

plt.figure(figsize=(8, 6))
cm_display.plot(cmap='Blues', values_format='d')
plt.title('Confusion Matrix for Depression Prediction')
plt.show()

# For more detailed analysis, you can also print the raw numbers
print("\nConfusion Matrix:")
print(cm)

**XAI Techniques - LIME**

In [None]:
!pip install lime
import lime
import lime.lime_tabular


model=log_reg
explainer = lime.lime_tabular.LimeTabularExplainer(
    training_data=X_train.values,
    feature_names=X_train.columns,
    class_names=['No Depression', 'Depression'],
    mode='classification'
)


instance_idx = 0
instance = X_test.iloc[instance_idx]


explanation = explainer.explain_instance(
    data_row=instance.values,
    predict_fn=model.predict_proba,
    num_features=10
)


explanation.show_in_notebook()

In [None]:
import pandas as pd
import numpy as np
print(pd.__version__)
print(np.__version__)

**Counterfactual Explanations**

In [None]:
!pip install dice-ml -q
import dice_ml
from dice_ml import Dice
import pandas as pd
import numpy as np


continuous_feature_names = [
    'Age', 'Year of study', 'CGPA', 'Financial Stress', 'Living expenses',
    'Academic performance', 'Social support', 'Sleep quality', 'Health issues',
    'Extra-curricular activities'
]
continuous_features_present = list(set(continuous_feature_names) & set(X_resampled.columns))


df_resampled = pd.DataFrame(X_resampled, columns=X.columns)
df_resampled['Depression'] = y_resampled

d = dice_ml.Data(dataframe=df_resampled,
                 continuous_features=continuous_features_present,
                 outcome_name='Depression')

m = dice_ml.Model(model=log_reg, backend='sklearn')


exp = Dice(d, m, method="random")


query_instance_index = 0
query_instance = X_resampled.iloc[[query_instance_index]].drop(columns=['Depression'], errors='ignore')

e = exp.generate_counterfactuals(query_instance,
                              total_CFs=5,
                              desired_class="opposite")

cf_df = e.cf_examples_list[0].final_cfs_df


all_features = X.columns
dummy_array = np.zeros((len(cf_df), len(all_features)))


for feature in cf_df.columns:
    if feature in all_features:
        col_idx = list(all_features).index(feature)
        dummy_array[:, col_idx] = cf_df[feature]

dummy_array = scaler.inverse_transform(dummy_array)

cf_df_original = pd.DataFrame(dummy_array, columns=all_features)


for col in X.select_dtypes(include=['object']).columns:
    if col in label_encoders:

        cf_df_original[col] = label_encoders[col].inverse_transform(
            cf_df_original[col].round().astype(int)
        )

original_instance = query_instance.copy()
dummy_original = np.zeros((1, len(all_features)))
for i, feature in enumerate(all_features):
    if feature in original_instance.columns:
        dummy_original[0, i] = original_instance[feature].values[0]
dummy_original = scaler.inverse_transform(dummy_original)
original_instance_original = pd.DataFrame(dummy_original, columns=all_features)


for col in X.select_dtypes(include=['object']).columns:
    if col in label_encoders:
        original_instance_original[col] = label_encoders[col].inverse_transform(
            original_instance_original[col].round().astype(int)
        )

changed_features_list = []
for i in range(len(cf_df_original)):
    changed_features = []
    for col in cf_df_original.columns:
        original_val = original_instance_original[col].values[0]
        cf_val = cf_df_original.iloc[i][col]


        if col in continuous_features_present:
            if not np.isclose(original_val, cf_val, rtol=0.05):
                changed_features.append(col)

        else:
            if original_val != cf_val:
                changed_features.append(col)
    changed_features_list.append(changed_features)


print("=== Complete Analysis ===")
print(f"{'':<4}{'Age':<6}{'CGPA':<6}{'Study Satisfaction':<20}{'Sleep Duration':<15}")
for i, row in original_instance_original.iterrows():
    print(f"{i:<4}{row['Age']:<6.1f}{row['CGPA']:<6.1f}{row['Study Satisfaction']:<20}{row['Sleep Duration']:<15.1f}")


feature_direction = {
    'CGPA': 'increase',
    'Sleep quality': 'increase',
    'Financial Stress': 'decrease',
    'Living expenses': 'decrease',
    'Social support': 'increase',
    'Academic performance': 'increase',
    'Health issues': 'decrease',
    'Extra-curricular activities': 'increase',
    'Age': 'neutral',
    'Year of study': 'neutral',
    'Course': 'neutral',
    'Gender': 'neutral',
    'City': 'neutral',
    'Academic Pressure': 'decrease',
}

print("\n=== Clear Recommendations ===")

for i, cf_row in cf_df_original.iterrows():
    changed_features = []
    for col in cf_df_original.columns:
        original_val = original_instance_original[col].values[0]
        cf_val = cf_row[col]


        is_changed = False
        if col in continuous_features_present:
            is_changed = not np.isclose(original_val, cf_val, rtol=0.05)
        else:
            is_changed = original_val != cf_val

        if not is_changed:
            continue

        direction = feature_direction.get(col, 'neutral')
        if direction == 'increase' and cf_val <= original_val:
            continue
        if direction == 'decrease' and cf_val >= original_val:
            continue

        changed_features.append((col, original_val, cf_val))

    if len(changed_features) > 0:
        print(f"\nScenario {i+1}:")
        for feature, original_val, cf_val in changed_features:

            if feature == 'CGPA':
                recommendation = f"  • Improve academic performance: Aim for a CGPA of around {cf_val:.2f} (currently {original_val:.2f}). Seek tutoring, study groups, or time management strategies."
            elif feature == 'Sleep quality':
                recommendation = f"  • Prioritize better sleep: Aim for a sleep quality of {cf_val:.1f} (currently {original_val:.1f}). Establish a bedtime routine or consult a sleep specialist if needed."
            elif feature == 'Financial Stress':
                recommendation = f"  • Reduce financial stress: Try bringing stress levels down to {cf_val:.1f} (currently {original_val:.1f}). Look into financial aid, budgeting tools, or part-time jobs."
            elif feature == 'Living expenses':
                recommendation = f"  • Lower your living expenses: Consider reducing costs to around {cf_val:.1f} (currently {original_val:.1f}) by exploring affordable housing or shared accommodations."
            elif feature == 'Social support':
                recommendation = f"  • Strengthen your social support: Improve social interactions to a level of {cf_val:.1f} (currently {original_val:.1f}). Join groups, stay connected with friends/family."
            elif feature == 'Academic performance':
                recommendation = f"  • Boost academic performance: Aim for {cf_val:.1f} (currently {original_val:.1f}). Use goal-setting, time blocking, and feedback sessions to help."
            elif feature == 'Health issues':
                recommendation = f"  • Address health concerns: Reduce health-related issues to {cf_val:.1f} (currently {original_val:.1f}). Consult health professionals and prioritize self-care."
            elif feature == 'Extra-curricular activities':
                recommendation = f"  • Get involved: Increase participation in extracurricular activities to {cf_val:.1f} (currently {original_val:.1f}) for balance and personal growth."
            elif feature == 'Academic Pressure':
                recommendation = f"  • Manage academic pressure: Consider lowering academic pressure to {cf_val:.1f} (currently {original_val:.1f}). Use stress-reduction techniques and manage expectations."
            elif feature == 'Year of study':
                recommendation = f"  • Year-specific support: You may benefit from resources tailored to Year {int(cf_val)} (currently Year {int(original_val)})."
            elif feature == 'Age':
                recommendation = f"  • Age-related context: At {cf_val:.0f} years (currently {original_val:.0f}), consider age-appropriate wellness and mental health resources."
            elif feature == 'Course':
                recommendation = f"  • Reconsider your course: Explore if the course change from '{original_val}' to '{cf_val}' aligns more with your interests and well-being."
            elif feature == 'Gender':
                recommendation = f"  • Gender-specific well-being: If gender has changed from '{original_val}' to '{cf_val}', consider tailored support groups or safe spaces."
            elif feature == 'City':
                recommendation = f"  • Relocation consideration: A change in city from '{original_val}' to '{cf_val}' might reflect environmental or lifestyle influences on well-being."
            else:
                recommendation = f"  • Adjust {feature}: Consider changing '{feature}' from '{original_val}' to '{cf_val}'."

            print(recommendation)


**ROC Curve**

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc


models = {
    'Logistic Regression': log_reg,
    'KNN': knn,
    'Decision Tree': dt,
    'Random Forest': rf,
    'XGBoost': xgb_model,
    'AdaBoost': adaboost,
    'MLP': mlp,
    'CNN': cnn_model,
    'SVM': svm_model
}

plt.figure(figsize=(10, 8))

for model_name, model in models.items():
    if model_name == 'CNN':
        y_pred_prob = model.predict(X_test_cnn)[:, 1]  # Assuming binary classification
    else:
        y_pred_prob = model.predict_proba(X_test)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_pred_prob)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'{model_name} (AUC = {roc_auc:.2f})')

plt.plot([0, 1], [0, 1], 'k--', label='Random Guess')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

In [None]:
!pip install datasets transformers -q

import pandas as pd

import torch

import numpy as np

from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, EvalPrediction

from datasets import Dataset

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

import os

os.environ["WANDB_DISABLED"] = "true"

# Load your CSV

df = pd.read_csv("Student Depression Dataset.csv")

# Reduce the sample size to 100

df = df.sample(n=100, random_state=42)  # Take a random sample of 100 rows

# Transform each row into a sentence

def row_to_sentence(row):

    return (

        f"A {int(row['Age'])}-year-old {row['Gender']} from {row['City']}, studying {row['Degree']}, "

        f"experiences academic pressure of level {row['Academic Pressure']} and work pressure of level {row['Work Pressure']}. "

        f"They have a CGPA of {row['CGPA']}, study satisfaction level {row['Study Satisfaction']}, and job satisfaction level {row['Job Satisfaction']}. "

        f"They sleep {row['Sleep Duration']}, follow a {row['Dietary Habits']} diet, and spend {row['Work/Study Hours']} hours per day on work or study. "

        f"Suicidal thoughts: {row['Have you ever had suicidal thoughts ?']}, Financial stress level: {row['Financial Stress']}, "

        f"Family history of mental illness: {row['Family History of Mental Illness']}. Depression label: {row['Depression']}."

    )

df['text'] = df.apply(row_to_sentence, axis=1)

# Create Hugging Face Dataset

dataset = Dataset.from_pandas(df[['text', 'Depression']])

# Load tokenizer and model

tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

# Preprocessing function for tokenization

def preprocess_function(examples):

    return tokenizer(examples['text'], padding='max_length', truncation=True)

# Apply preprocessing

dataset = dataset.map(preprocess_function, batched=True)

# Rename 'Depression' column to 'labels'

dataset = dataset.rename_column('Depression', 'labels')

# Set format to 'torch'

dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# Split dataset into train and test

# Use 'seed' instead of 'random_state' for Hugging Face Datasets

dataset = dataset.train_test_split(test_size=0.2, seed=42)

# Define compute_metrics function for evaluation

def compute_metrics(pred):

    labels = pred.label_ids

    preds = pred.predictions.argmax(-1)

    probs = pred.predictions[:, 1]  # Probabilities for class 1 (Depression)

    acc = accuracy_score(labels, preds)

    prec = precision_score(labels, preds)

    rec = recall_score(labels, preds)

    f1 = f1_score(labels, preds)

    auc = roc_auc_score(labels, probs)

    return {

        'accuracy': acc,

        'precision': prec,

        'recall': rec,

        'f1': f1,

        'auc': auc

    }

# Training arguments

training_args = TrainingArguments(

    output_dir="./bert_results",

    per_device_train_batch_size=8,

    num_train_epochs=3,

    logging_dir="./logs",

    save_strategy="no",

    load_best_model_at_end=True,  # Load the best model based on evaluation metric

    metric_for_best_model="f1"    # Use F1-score as the metric for selecting the best model

)

# Trainer

trainer = Trainer(

    model=model,

    args=training_args,

    train_dataset=dataset['train'],

    eval_dataset=dataset['test'],

    compute_metrics=compute_metrics

)

# Train the model

trainer.train()

# Evaluate the model on the test set

eval_results = trainer.evaluate()

# Print evaluation results

for metric, value in eval_results.items():

    print(f"{metric}: {value:.4f}")
