In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("andrewmvd/heart-failure-clinical-data")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /home/morning/.cache/kagglehub/datasets/andrewmvd/heart-failure-clinical-data/versions/1


In [2]:
import os
os.listdir(path)

['heart_failure_clinical_records_dataset.csv']

In [3]:
import fairtl as fl
import pandas as pd
from sklearn.feature_selection import VarianceThreshold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold

In [4]:
df = pd.read_csv(f'{path}/heart_failure_clinical_records_dataset.csv')
df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [5]:
df['DEATH_EVENT'].unique()

array([1, 0])

In [6]:
df.shape

(299, 13)

In [7]:
df = df.drop_duplicates().reset_index(drop=True)

In [8]:
df.shape

(299, 13)

In [9]:
y = df['DEATH_EVENT'].values
X = df.drop('DEATH_EVENT', axis=1)

In [10]:
print(X.shape)
selector = VarianceThreshold(threshold=0.1)
X = selector.fit_transform(X)
print(X.shape)

(299, 12)
(299, 12)


In [11]:
# build mask
gender_0_mask = df['sex'] == 0
gender_1_mask = df['sex'] == 1

count_gender_0 = gender_0_mask.sum()
count_gender_1 = gender_1_mask.sum()

print("Female: ", count_gender_0)
print("Male: ", count_gender_1)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_scaled_Gender_0 = X_scaled[gender_0_mask]
X_scaled_Gender_1 = X_scaled[gender_1_mask]
y_Gender_0 = y[gender_0_mask]
y_Gender_1 = y[gender_1_mask]

Female:  105
Male:  194


In [12]:
seed = 42
models = fl.build_models(seed)

results_list = []

kf = KFold(n_splits=5, shuffle=True, random_state=seed)

In [13]:
print("Starting experiments for Female(0)")
fl.run_experiment(kf, models, X_scaled_Gender_0, y_Gender_0, 'Female', results_list)

print("Starting experiments for Male(1)")
fl.run_experiment(kf, models, X_scaled_Gender_1, y_Gender_1, 'Male', results_list)

final_results_df = pd.concat(results_list, ignore_index=True)
print(final_results_df)

Starting experiments for Female(0)
Processing fold 1 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF
Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 2 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 3 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 4 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 5 for group Female
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Starting experiments for Male(1)
Processing fold 1 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 2 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 3 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 4 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
Processing fold 5 for group Male
Training and evaluating model: SVM
Training and evaluating model: LR
Training and evaluating model: KNN
Training and evaluating model: RF




Training and evaluating model: DT
Training and evaluating model: ANN
Training and evaluating model: NB
   Fold   Group   SVM_TPR   SVM_TNR   SVM_FPR   SVM_FNR  SVM_TP  SVM_TN  \
0     1  Female  1.000000  0.823529  0.176471  0.000000       4      14   
1     2  Female  0.333333  0.750000  0.250000  0.666667       3       9   
2     3  Female  0.444444  1.000000  0.000000  0.555556       4      12   
3     4  Female  0.142857  0.928571  0.071429  0.857143       1      13   
4     5  Female  0.200000  0.937500  0.062500  0.800000       1      15   
5     1    Male  0.500000  0.827586  0.172414  0.500000       5      24   
6     2    Male  0.636364  0.892857  0.107143  0.363636       7      25   
7     3    Male  0.500000  0.952381  0.047619  0.500000       9      20   
8     4    Male  0.615385  0.923077  0.076923  0.384615       8      24   
9     5    Male  0.600000  0.928571  0.071429  0.400000       6      26   

   SVM_FP  SVM_FN  ...  ANN_FP  ANN_FN    NB_TPR    NB_TNR    NB_FPR  \



In [14]:
results_df = pd.concat(results_list, ignore_index=True)

In [15]:
result_path = './results/k1_result.xlsx'
results_df.to_excel(result_path, index=False)

In [16]:
df = pd.read_excel(result_path)
df.head()

Unnamed: 0,Fold,Group,SVM_TPR,SVM_TNR,SVM_FPR,SVM_FNR,SVM_TP,SVM_TN,SVM_FP,SVM_FN,...,ANN_FP,ANN_FN,NB_TPR,NB_TNR,NB_FPR,NB_FNR,NB_TP,NB_TN,NB_FP,NB_FN
0,1,Female,1.0,0.823529,0.176471,0.0,4,14,3,0,...,3,1,0.75,1.0,0.0,0.25,3,17,0,1
1,2,Female,0.333333,0.75,0.25,0.666667,3,9,3,6,...,4,5,0.444444,0.75,0.25,0.555556,4,9,3,5
2,3,Female,0.444444,1.0,0.0,0.555556,4,12,0,5,...,2,1,0.333333,0.916667,0.083333,0.666667,3,11,1,6
3,4,Female,0.142857,0.928571,0.071429,0.857143,1,13,1,6,...,1,6,0.142857,1.0,0.0,0.857143,1,14,0,6
4,5,Female,0.2,0.9375,0.0625,0.8,1,15,1,4,...,1,4,0.0,1.0,0.0,1.0,0,16,0,5


In [17]:
label = 'Female'

fl.perform_t_tests(df, 'SVM', label)
fl.perform_t_tests(df, 'DT', label)
fl.perform_t_tests(df, 'RF', label)
fl.perform_t_tests(df, 'LR', label)
fl.perform_t_tests(df, 'KNN', label)
fl.perform_t_tests(df, 'ANN', label)
fl.perform_t_tests(df, 'NB', label)


SVM -TPR: TtestResult(statistic=np.float64(-0.9372526422199056), pvalue=np.float64(0.3760567908491588), df=np.float64(8.0))
SVM - FPR: TtestResult(statistic=np.float64(0.34255540129556866), pvalue=np.float64(0.7407594198183941), df=np.float64(8.0))
SVM - FN/FP: MannwhitneyuResult(statistic=np.float64(16.0), pvalue=np.float64(0.5476190476190477))
DT -TPR: TtestResult(statistic=np.float64(0.7989743991205137), pvalue=np.float64(0.44737458893969184), df=np.float64(8.0))
DT - FPR: TtestResult(statistic=np.float64(-0.39882747612105834), pvalue=np.float64(0.7004542336636175), df=np.float64(8.0))
DT - FN/FP: MannwhitneyuResult(statistic=np.float64(10.0), pvalue=np.float64(0.6904761904761905))
RF -TPR: TtestResult(statistic=np.float64(0.0723209861651795), pvalue=np.float64(0.9441218844150362), df=np.float64(8.0))
RF - FPR: TtestResult(statistic=np.float64(-0.24914911649715576), pvalue=np.float64(0.8095229459181925), df=np.float64(8.0))
RF - FN/FP: MannwhitneyuResult(statistic=np.float64(13.5), 