In [19]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ikynahidwin/depression-student-dataset")

print("Path to dataset files:", path)

Path to dataset files: /home/morning/.cache/kagglehub/datasets/ikynahidwin/depression-student-dataset/versions/1


In [20]:
import os
os.listdir(path)

['Depression Student Dataset.csv']

In [21]:
import pandas as pd
import fairtl_statisticaltest as fl
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder

In [22]:
df = pd.read_csv(f"{path}/Depression Student Dataset.csv")

In [23]:
df.shape

(502, 11)

In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 502 entries, 0 to 501
Data columns (total 11 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Gender                                 502 non-null    object 
 1   Age                                    502 non-null    int64  
 2   Academic Pressure                      502 non-null    float64
 3   Study Satisfaction                     502 non-null    float64
 4   Sleep Duration                         502 non-null    object 
 5   Dietary Habits                         502 non-null    object 
 6   Have you ever had suicidal thoughts ?  502 non-null    object 
 7   Study Hours                            502 non-null    int64  
 8   Financial Stress                       502 non-null    int64  
 9   Family History of Mental Illness       502 non-null    object 
 10  Depression                             502 non-null    object 
dtypes: flo

In [25]:
df.head()

Unnamed: 0,Gender,Age,Academic Pressure,Study Satisfaction,Sleep Duration,Dietary Habits,Have you ever had suicidal thoughts ?,Study Hours,Financial Stress,Family History of Mental Illness,Depression
0,Male,28,2.0,4.0,7-8 hours,Moderate,Yes,9,2,Yes,No
1,Male,28,4.0,5.0,5-6 hours,Healthy,Yes,7,1,Yes,No
2,Male,25,1.0,3.0,5-6 hours,Unhealthy,Yes,10,4,No,Yes
3,Male,23,1.0,4.0,More than 8 hours,Unhealthy,Yes,7,2,Yes,No
4,Female,31,1.0,5.0,More than 8 hours,Healthy,Yes,4,2,Yes,No


In [26]:
df['Gender'] = df['Gender'].map({'Female': 0, 'Male': 1})
df['Sleep Duration'] = df['Sleep Duration'].map({'Less than 5 hours': 0, '5-6 hours': 1, '7-8 hours': 2, 'More than 8 hours': 3})
df['Dietary Habits'] = df['Dietary Habits'].map({'Healthy': 0, 'Moderate': 1, 'Unhealthy': 2})
df['Have you ever had suicidal thoughts ?'] = df['Have you ever had suicidal thoughts ?'].map({'No': 0, 'Yes': 1})
df['Family History of Mental Illness'] = df['Family History of Mental Illness'].map({'No': 0, 'Yes': 1})
df['Depression'] = df['Depression'].map({'No': 0, 'Yes': 1})

In [27]:
df.head()

Unnamed: 0,Gender,Age,Academic Pressure,Study Satisfaction,Sleep Duration,Dietary Habits,Have you ever had suicidal thoughts ?,Study Hours,Financial Stress,Family History of Mental Illness,Depression
0,1,28,2.0,4.0,2,1,1,9,2,1,0
1,1,28,4.0,5.0,1,0,1,7,1,1,0
2,1,25,1.0,3.0,1,2,1,10,4,0,1
3,1,23,1.0,4.0,3,2,1,7,2,1,0
4,0,31,1.0,5.0,3,0,1,4,2,1,0


In [28]:
y = df['Depression'].values
X = df.drop('Depression', axis=1)

In [29]:
print(X.shape)
selector = VarianceThreshold(threshold=0.1)
X = selector.fit_transform(X)
print(X.shape)

(502, 10)
(502, 10)


In [30]:
# build mask
gender_0_mask = df['Gender'] == 0
gender_1_mask = df['Gender'] == 1

count_gender_0 = gender_0_mask.sum()
count_gender_1 = gender_1_mask.sum()

print("Female: ", count_gender_0)
print("Male: ", count_gender_1)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_scaled_Gender_0 = X_scaled[gender_0_mask]
X_scaled_Gender_1 = X_scaled[gender_1_mask]
y_Gender_0 = y[gender_0_mask]
y_Gender_1 = y[gender_1_mask]

Female:  235
Male:  267


In [31]:
seed = 42
models = fl.build_models(seed)

results_list = []

kf = KFold(n_splits=5, shuffle=True, random_state=seed)

In [32]:
print("Starting experiments for Female(0)")
fl.run_experiment(kf, models, X_scaled_Gender_0, y_Gender_0, 'Female', results_list)

print("Starting experiments for Male(1)")
fl.run_experiment(kf, models, X_scaled_Gender_1, y_Gender_1, 'Male', results_list)

final_results_df = pd.concat(results_list, ignore_index=True)
print(final_results_df)

Starting experiments for Female(0)
Processing fold 1 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF
Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 2 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 3 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 4 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 5 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Starting experiments for Male(1)
Processing fold 1 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 2 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 3 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 4 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 5 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
   Fold   Group   SVM_TPR   SVM_TNR   SVM_FPR   SVM_FNR  SVM_TP  SVM_TN  \
0     1  Female  0.923077  1.000000  0.000000  0.076923      24      21   
1     2  Female  0.950000  1.000000  0.000000  0.050000      19      27   
2     3  Female  0.826087  0.916667  0.083333  0.173913      19      22   
3     4  Female  0.909091  0.880000  0.120000  0.090909      20      22   
4     5  Female  0.913043  0.958333  0.041667  0.086957      21      23   
5     1    Male  0.964286  0.884615  0.115385  0.035714      27      23   
6     2    Male  0.928571  0.923077  0.076923  0.071429      26      24   
7     3    Male  0.958333  0.827586  0.172414  0.041667      23      24   
8     4    Male  0.925926  0.961538  0.038462  0.074074      25      25   
9     5    Male  0.903226  0.954545  0.045455  0.096774      28      21   

   SVM_FP  SVM_FN  ...  ANN_FP  ANN_FN    NB_TPR    NB_TNR    NB_FPR  \



In [33]:
results_df = pd.concat(results_list, ignore_index=True)

In [34]:
result_path = './results/K54_result.xlsx'
results_df.to_excel(result_path, index=False)

In [35]:
df = pd.read_excel(result_path)
df.head()

Unnamed: 0,Fold,Group,SVM_TPR,SVM_TNR,SVM_FPR,SVM_FNR,SVM_TP,SVM_TN,SVM_FP,SVM_FN,...,ANN_FP,ANN_FN,NB_TPR,NB_TNR,NB_FPR,NB_FNR,NB_TP,NB_TN,NB_FP,NB_FN
0,1,Female,0.923077,1.0,0.0,0.076923,24,21,0,2,...,0,2,0.923077,0.857143,0.142857,0.076923,24,18,3,2
1,2,Female,0.95,1.0,0.0,0.05,19,27,0,1,...,0,1,1.0,0.962963,0.037037,0.0,20,26,1,0
2,3,Female,0.826087,0.916667,0.083333,0.173913,19,22,2,4,...,2,1,0.913043,1.0,0.0,0.086957,21,24,0,2
3,4,Female,0.909091,0.88,0.12,0.090909,20,22,3,2,...,2,1,0.863636,0.76,0.24,0.136364,19,19,6,3
4,5,Female,0.913043,0.958333,0.041667,0.086957,21,23,1,2,...,0,2,0.782609,0.958333,0.041667,0.217391,18,23,1,5


In [36]:
label = 'Female'

fl.perform_t_tests(df, 'SVM', label)
fl.perform_t_tests(df, 'DT', label)
fl.perform_t_tests(df, 'RF', label)
fl.perform_t_tests(df, 'LR', label)
fl.perform_t_tests(df, 'KNN', label)
fl.perform_t_tests(df, 'ANN', label)
fl.perform_t_tests(df, 'NB', label)


SVM -TPR: TtestResult(statistic=np.float64(-1.3449239435783462), pvalue=np.float64(0.21553055850655786), df=np.float64(8.0))
SVM - FPR: TtestResult(statistic=np.float64(-1.1926407441646698), pvalue=np.float64(0.2671810116983668), df=np.float64(8.0))
SVM - FN/FP: MannwhitneyuResult(statistic=np.float64(19.5), pvalue=np.float64(0.1732171126447002))
DT -TPR: TtestResult(statistic=np.float64(-1.1815643155513884), pvalue=np.float64(0.27130881894285214), df=np.float64(8.0))
DT - FPR: TtestResult(statistic=np.float64(0.01515473650784359), pvalue=np.float64(0.9882798611539035), df=np.float64(8.0))
DT - FN/FP: TtestResult(statistic=np.float64(-0.5386764143691267), pvalue=np.float64(0.6047711702959234), df=np.float64(8.0))
RF -TPR: TtestResult(statistic=np.float64(-2.855981847984476), pvalue=np.float64(0.0212824379402325), df=np.float64(8.0))
RF - FPR: TtestResult(statistic=np.float64(0.7189430011489745), pvalue=np.float64(0.49263838566993734), df=np.float64(8.0))
RF - FN/FP: MannwhitneyuResult(