In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("shashwatwork/dementia-prediction-dataset")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /home/morning/.cache/kagglehub/datasets/shashwatwork/dementia-prediction-dataset/versions/2


In [2]:
import os
os.listdir(path)

['dementia_dataset.csv']

In [3]:
import pandas as pd
import fairtl as fl
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder
from sklearn.preprocessing import OneHotEncoder

In [4]:
df = pd.read_csv(f"{path}/dementia_dataset.csv")

In [5]:
df.shape

(373, 15)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 373 entries, 0 to 372
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Subject ID  373 non-null    object 
 1   MRI ID      373 non-null    object 
 2   Group       373 non-null    object 
 3   Visit       373 non-null    int64  
 4   MR Delay    373 non-null    int64  
 5   M/F         373 non-null    object 
 6   Hand        373 non-null    object 
 7   Age         373 non-null    int64  
 8   EDUC        373 non-null    int64  
 9   SES         354 non-null    float64
 10  MMSE        371 non-null    float64
 11  CDR         373 non-null    float64
 12  eTIV        373 non-null    int64  
 13  nWBV        373 non-null    float64
 14  ASF         373 non-null    float64
dtypes: float64(5), int64(5), object(5)
memory usage: 43.8+ KB


In [7]:
df.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


In [8]:
df.dropna(inplace=True)
df.reset_index(drop=True, inplace=True)
df.drop(['Subject ID', 'MRI ID'], axis=1, inplace=True)
df.head()

Unnamed: 0,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,Nondemented,1,0,F,R,88,18,3.0,28.0,0.0,1215,0.71,1.444
3,Nondemented,2,538,F,R,90,18,3.0,27.0,0.0,1200,0.718,1.462
4,Nondemented,1,0,M,R,80,12,4.0,28.0,0.0,1689,0.712,1.039


In [9]:
df.shape

(354, 13)

In [10]:
df['Group'].unique()

array(['Nondemented', 'Demented', 'Converted'], dtype=object)

In [11]:
df['Group'] = df['Group'].map({'Nondemented': 0, 'Converted': 1, 'Demented': 2})

In [12]:
df.drop('Hand', inplace=True, axis=1)
df.head()

Unnamed: 0,Group,Visit,MR Delay,M/F,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,0,1,0,M,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,0,2,457,M,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,0,1,0,F,88,18,3.0,28.0,0.0,1215,0.71,1.444
3,0,2,538,F,90,18,3.0,27.0,0.0,1200,0.718,1.462
4,0,1,0,M,80,12,4.0,28.0,0.0,1689,0.712,1.039


In [13]:
df['M/F'] = df['M/F'].map({'F': 0, 'M': 1})

In [14]:
y = df['Group'].values
X = df.drop('Group', axis=1)

In [15]:
print(X.shape)
selector = VarianceThreshold(threshold=0.1)
X = selector.fit_transform(X)
print(X.shape)

(354, 11)
(354, 9)


In [16]:
# build mask
gender_0_mask = df['M/F'] == 0
gender_1_mask = df['M/F'] == 1

count_gender_0 = gender_0_mask.sum()
count_gender_1 = gender_1_mask.sum()

print("Female: ", count_gender_0)
print("Male: ", count_gender_1)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_scaled_Gender_0 = X_scaled[gender_0_mask]
X_scaled_Gender_1 = X_scaled[gender_1_mask]
y_Gender_0 = y[gender_0_mask]
y_Gender_1 = y[gender_1_mask]

Female:  204
Male:  150


In [17]:
seed = 42
models = fl.build_models(seed)

results_list = []

kf = KFold(n_splits=5, shuffle=True, random_state=seed)

In [18]:
print("Starting experiments for Female(0)")
fl.run_experiment(kf, models, X_scaled_Gender_0, y_Gender_0, 'Female', results_list)

print("Starting experiments for Male(1)")
fl.run_experiment(kf, models, X_scaled_Gender_1, y_Gender_1, 'Male', results_list)

final_results_df = pd.concat(results_list, ignore_index=True)
print(final_results_df)

Starting experiments for Female(0)
Processing fold 1 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF
Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 2 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 3 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 4 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 5 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Starting experiments for Male(1)
Processing fold 1 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 2 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 3 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 4 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 5 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
   Fold   Group  SVM_TPR  SVM_TNR  SVM_FPR  SVM_FNR  SVM_TP  SVM_TN  SVM_FP  \
0     1  Female     0.00      1.0      0.0     1.00       0      33       0   
1     2  Female     0.25      1.0      0.0     0.75       1      20       0   
2     3  Female     0.00      1.0      0.0     1.00       0      27       0   
3     4  Female     0.00      1.0      0.0     1.00       0      25       0   
4     5  Female     0.50      1.0      0.0     0.50       2      24       0   
5     1    Male     0.00      1.0      0.0     1.00       0       9       0   
6     2    Male     0.00      1.0      0.0     1.00       0      14       0   
7     3    Male     0.50      1.0      0.0     0.50       1       8       0   
8     4    Male     0.00      1.0      0.0     0.00       0      15       0   
9     5    Male     0.00      1.0      0.0     1.00       0      13       0   

   SVM_FN  ...  ANN_FP  ANN



In [19]:
results_df = pd.concat(results_list, ignore_index=True)

In [20]:
result_path = './results/k128_result.xlsx'
results_df.to_excel(result_path, index=False)

In [21]:
df = pd.read_excel(result_path)
df.head()

Unnamed: 0,Fold,Group,SVM_TPR,SVM_TNR,SVM_FPR,SVM_FNR,SVM_TP,SVM_TN,SVM_FP,SVM_FN,...,ANN_FP,ANN_FN,NB_TPR,NB_TNR,NB_FPR,NB_FNR,NB_TP,NB_TN,NB_FP,NB_FN
0,1,Female,0.0,1,0,1.0,0,33,0,2,...,0,2,0.0,1.0,0.0,1.0,0,33,0,1
1,2,Female,0.25,1,0,0.75,1,20,0,3,...,0,3,0.25,1.0,0.0,0.75,1,20,0,3
2,3,Female,0.0,1,0,1.0,0,27,0,5,...,0,5,0.428571,1.0,0.0,0.571429,3,27,0,4
3,4,Female,0.0,1,0,1.0,0,25,0,2,...,0,2,0.0,1.0,0.0,1.0,0,25,0,2
4,5,Female,0.5,1,0,0.5,2,24,0,2,...,0,2,0.5,1.0,0.0,0.5,2,24,0,2


In [22]:
label = 'Female'

fl.perform_t_tests(df, 'SVM', label)
fl.perform_t_tests(df, 'DT', label)
fl.perform_t_tests(df, 'RF', label)
fl.perform_t_tests(df, 'LR', label)
fl.perform_t_tests(df, 'KNN', label)
fl.perform_t_tests(df, 'ANN', label)
fl.perform_t_tests(df, 'NB', label)


SVM -TPR: MannwhitneyuResult(statistic=np.float64(14.5), pvalue=np.float64(0.6985353583033387))
SVM - FPR: MannwhitneyuResult(statistic=np.float64(12.5), pvalue=np.float64(1.0))
SVM - FN/FP: MannwhitneyuResult(statistic=np.float64(22.0), pvalue=np.float64(0.0441713449084426))
DT -TPR: TtestResult(statistic=np.float64(-0.931266147332835), pvalue=np.float64(0.378962141918589), df=np.float64(8.0))
DT - FPR: TtestResult(statistic=np.float64(-0.6759082873637976), pvalue=np.float64(0.5181570224458656), df=np.float64(8.0))
DT - FN/FP: MannwhitneyuResult(statistic=np.float64(19.5), pvalue=np.float64(0.1679384709880123))
RF -TPR: MannwhitneyuResult(statistic=np.float64(6.0), pvalue=np.float64(0.1973957318444992))
RF - FPR: MannwhitneyuResult(statistic=np.float64(15.0), pvalue=np.float64(0.4237107971667934))
RF - FN/FP: MannwhitneyuResult(statistic=np.float64(20.0), pvalue=np.float64(0.11369260618551544))
LR -TPR: TtestResult(statistic=np.float64(-1.8772326393844898), pvalue=np.float64(0.0973188