In [None]:
#import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import wilcoxon #Wilcoxon signed-rank test for paired samples
from scipy.stats import ranksums #Wilcoxon rank-sum statistic for two independent unequal-sized samples

#load factuals and counterfactuals
factuals=pd.read_csv("factuals_FNR_red.txt",sep="\t") 
counterfactuals=pd.read_csv("counterfactuals_FNR_red.txt",sep="\t")

In [None]:
#keep only factuals with admissible counterfactuals
factuals=factuals[(counterfactuals["sBP"].values!=0)] 
counterfactuals=counterfactuals[(counterfactuals["sBP"].values!=0)]

In [None]:
#consider four different subgroups: F_HTN, F_noHTN, M_HTN, M_noHTN
factuals_f_HTN=factuals.loc[(factuals['Sex'] ==0) & (factuals['HTN'] ==1)]
counter_f_HTN=counterfactuals.loc[(factuals['Sex'] ==0) & (factuals['HTN'] ==1)]
factuals_f_noHTN=factuals.loc[(factuals['Sex'] ==0) & (factuals['HTN'] ==0)]
counter_f_noHTN=counterfactuals.loc[(factuals['Sex'] ==0) & (factuals['HTN'] ==0)]
factuals_m_HTN=factuals.loc[(factuals['Sex'] ==1) & (factuals['HTN'] ==1)]
counter_m_HTN=counterfactuals.loc[(factuals['Sex'] ==1) & (factuals['HTN'] ==1)]
factuals_m_noHTN=factuals.loc[(factuals['Sex'] ==1) & (factuals['HTN'] ==0)]
counter_m_noHTN=counterfactuals.loc[(factuals['Sex'] ==1) & (factuals['HTN'] ==0)]

In [None]:
#compare differences between factuals and counterfactuals, for each controllable input feature, in the four different groups
print ('F_HTN')
print('FBS:', wilcoxon(factuals_f_HTN['FBS'],counter_f_HTN['FBS']))
print('SBP:', wilcoxon(factuals_f_HTN['sBP'],counter_f_HTN['sBP']))
print('BMI:',wilcoxon(factuals_f_HTN['BMI'],counter_f_HTN['BMI']))
print('LDL:',wilcoxon(factuals_f_HTN['LDL'],counter_f_HTN['LDL']))
print('HDL:',wilcoxon(factuals_f_HTN['HDL'],counter_f_HTN['HDL']))
print('TG', wilcoxon(factuals_f_HTN['TG'],counter_f_HTN['TG']))
print('Total Cholesterol:', wilcoxon(factuals_f_HTN['Total Cholesterol'],counter_f_HTN['Total Cholesterol']))

print ('F_noHTN')
print('FBS:', wilcoxon(factuals_f_noHTN['FBS'],counter_f_noHTN['FBS']))
print('SBP:', wilcoxon(factuals_f_noHTN['sBP'],counter_f_noHTN['sBP']))
print('BMI:',wilcoxon(factuals_f_noHTN['BMI'],counter_f_noHTN['BMI']))
print('LDL:',wilcoxon(factuals_f_noHTN['LDL'],counter_f_noHTN['LDL']))
print('HDL:',wilcoxon(factuals_f_noHTN['HDL'],counter_f_noHTN['HDL']))
print('TG', wilcoxon(factuals_f_noHTN['TG'],counter_f_noHTN['TG']))
print('Total Cholesterol:', wilcoxon(factuals_f_noHTN['Total Cholesterol'],counter_f_noHTN['Total Cholesterol']))

print ('m_HTN')
print('FBS:', wilcoxon(factuals_m_HTN['FBS'],counter_m_HTN['FBS']))
print('SBP:', wilcoxon(factuals_m_HTN['sBP'],counter_m_HTN['sBP']))
print('BMI:',wilcoxon(factuals_m_HTN['BMI'],counter_m_HTN['BMI']))
print('LDL:',wilcoxon(factuals_m_HTN['LDL'],counter_m_HTN['LDL']))
print('HDL:',wilcoxon(factuals_m_HTN['HDL'],counter_m_HTN['HDL']))
print('TG', wilcoxon(factuals_m_HTN['TG'],counter_m_HTN['TG']))
print('Total Cholesterol:', wilcoxon(factuals_m_HTN['Total Cholesterol'],counter_m_HTN['Total Cholesterol']))

print ('m_noHTN')
print('FBS:', wilcoxon(factuals_m_noHTN['FBS'],counter_m_noHTN['FBS']))
print('SBP:', wilcoxon(factuals_m_noHTN['sBP'],counter_m_noHTN['sBP']))
print('BMI:',wilcoxon(factuals_m_noHTN['BMI'],counter_m_noHTN['BMI']))
print('LDL:',wilcoxon(factuals_m_noHTN['LDL'],counter_m_noHTN['LDL']))
print('HDL:',wilcoxon(factuals_m_noHTN['HDL'],counter_m_noHTN['HDL']))
print('TG', wilcoxon(factuals_m_noHTN['TG'],counter_m_noHTN['TG']))
print('Total Cholesterol:', wilcoxon(factuals_m_noHTN['Total Cholesterol'],counter_m_noHTN['Total Cholesterol']))

In [None]:
#evaluate median, 25th and 75th percentiles of the difference between factuals and counterfactuals 
diff_f_HTN=counter_f_HTN-factuals_f_HTN
diff_m_HTN=counter_m_HTN-factuals_m_HTN
diff_f_noHTN=counter_f_noHTN-factuals_f_noHTN
diff_m_noHTN=counter_m_noHTN-factuals_m_noHTN


print('F HTN')
print (diff_f_HTN.median())
print('')
print('q1:'+str(diff_f_HTN.quantile(0.25)))
print('')
print ('q3:'+str(diff_f_HTN.quantile(0.75)))

print('F noHTN')
print (diff_f_noHTN.median())
print('')
print('q1:'+str(diff_f_noHTN.quantile(0.25)))
print('')
print ('q3:'+str(diff_f_noHTN.quantile(0.75)))

print('M HTN')
print (diff_m_HTN.median())
print('')
print('q1:'+str(diff_m_HTN.quantile(0.25)))
print('')
print ('q3:'+str(diff_m_HTN.quantile(0.75)))

print('M noHTN')
print (diff_m_noHTN.median())
print('')
print('q1:'+str(diff_m_noHTN.quantile(0.25)))
print('')
print ('q3:'+str(diff_m_noHTN.quantile(0.75)))


In [None]:
#compare changes required for hypertensive and non-hypertensive patients, for each controllable input feature

print('FBS')
print('F HTN vs F no HTN')
print(ranksums(diff_f_HTN['FBS'],diff_f_noHTN['FBS']))
print('M HTN vs M no HTN')
print(ranksums(diff_m_HTN['FBS'],diff_m_noHTN['FBS']))

print('sBP')
print('F HTN vs F no HTN')
print(ranksums(diff_f_HTN['sBP'],diff_f_noHTN['sBP']))
print('M HTN vs M no HTN')
print(ranksums(diff_m_HTN['sBP'],diff_m_noHTN['sBP']))

print('BMI')
print('F HTN vs F no HTN')
print(ranksums(diff_f_HTN['BMI'],difff_noHTN['BMI']))
print('M HTN vs M no HTN')
print(ranksums(diff_m_HTN['BMI'],diffm_noHTN['BMI']))

print('HDL')
print('F HTN vs F no HTN')
print(ranksums(diff_f_HTN['HDL'],diff_f_noHTN['HDL']))
print('M HTN vs M no HTN')
print(ranksums(diff_m_HTN['HDL'],diff_m_noHTN['HDL']))

print('LDL')
print('F HTN vs F no HTN')
print(ranksums(diff_f_HTN['LDL'],diff_f_noHTN['LDL']))
print('M HTN vs M no HTN')
print(ranksums(diff_m_HTN['LDL'],diff_m_noHTN['LDL']))

print('TG')
print('F HTN vs F no HTN')
print(ranksums(diff_f_HTN['TG'],diff_f_noHTN['TG']))
print('M HTN vs M no HTN')
print(ranksums(diff_m_HTN['TG'],diff_m_noHTN['TG']))

print('Total Cholesterol')
print('F HTN vs F no HTN')
print(ranksums(diff_f_HTN['Total Cholesterol'],diff_f_noHTN['Total Cholesterol']))
print('M HTN vs M no HTN')
print(ranksums(diff_m_HTN['Total Cholesterol'],diff_m_noHTN['Total Cholesterol']))
