In [8]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [9]:
raw_data = pd.read_csv("ADNI_dataset.csv")
raw_data = raw_data[raw_data["CDR"] != "None"]
raw_data = raw_data[raw_data["MMSE"] != "None"]
roi_data = pd.concat([raw_data["CDR"], raw_data["MMSE"], raw_data.loc[:, 'BrainSeg':]], axis=1).astype(float)
roi_data = pd.concat([raw_data["Group"],  roi_data], axis=1)
roi_data = roi_data[roi_data["CDR"] >= 0]

In [10]:
clinical_data = roi_data.loc[:, :"MMSE"]
volume_data = roi_data.loc[:, "BrainSeg":]
# rates = {}
# for column in volume_data.columns:
#     rates[column] = int(('%e' % float(volume_data[column].mean())).split('e')[1])
#     volume_data[column] = volume_data[column] * (10 ** -rates[column])
data = pd.concat([clinical_data, volume_data], axis=1)

In [11]:
NC = data[data["Group"] == "CN"]
MCI = data[data["Group"] == "MCI"]
AD = data[data["Group"] == "AD"]

In [None]:
from scipy.stats import f_oneway
from scipy.stats import ttest_ind

fig = plt.figure(figsize=(30, 200))
ax = {}

for index, column in enumerate(volume_data.columns):
    ax[index] = fig.add_subplot(23, 3, index + 1)
    ax[index].boxplot([NC[column], MCI[column], AD[column]])
    ax[index].set_xticklabels(["NC", "MCI", "AD"])
    y_min, y_max = ax[index].get_ylim()
    
    _, p = f_oneway(NC[column], MCI[column], AD[column])
    p_NM = ttest_ind(NC[column], MCI[column]).pvalue
    p_MA = ttest_ind(MCI[column], AD[column]).pvalue
    p_NA = ttest_ind(NC[column], AD[column]).pvalue
    
    if (p < 0.05): column += '*'
    if (p < 0.005): column += '*'
    if (p < 0.001): column += '*'
    ax[index].set_title(column)
    
    if (p_NM < 0.05): ax[index].plot([1, 2], [y_max, y_max])
    if (p_MA < 0.05): ax[index].plot([2, 3], [y_max - 0.1, y_max - 0.1])
    if (p_NA < 0.05): ax[index].plot([1, 3], [y_max - 0.2, y_max - 0.2])

#fig.savefig("temp_result.png")

In [16]:
from scipy.stats import f_oneway
from scipy.stats import ttest_ind

header = ["Name", "NC_mean", "NC_std", "MCI_mean", "MCI_std", "AD_mean", "AD_std", "NM_t", "NM_p", "MA_t", "MA_p", "NA_t", "NA_p"]
dataset = pd.DataFrame(columns=header)
for column in volume_data.columns:
    NM_t, NM_p = ttest_ind(NC[column], MCI[column])
    MA_t, MA_p = ttest_ind(MCI[column], AD[column])
    NA_t, NA_p = ttest_ind(NC[column], AD[column])
    dataset.loc[0 if pd.isnull(dataset.index.max()) else dataset.index.max() + 1] = [
        column, NC[column].mean(), NC[column].std(), MCI[column].mean(), MCI[column].std(), AD[column].mean(), AD[column].std(),
         NM_t, NM_p, MA_t, MA_p, NA_t, NA_p]
#dataset.to_csv("file2.csv", mode="w")

In [22]:
for index, row in dataset.iterrows():
    if row.NC_mean < row.MCI_mean and row.MCI_mean < row.AD_mean:
        print("증가: " + row.Name)
    elif row.NC_mean > row.MCI_mean and row.MCI_mean > row.AD_mean:
        print("감소: " + row.Name)

증가: VentricleChoroidVol
감소: lhCortex
감소: rhCortex
감소: Cortex
감소: SubCortGray
감소: TotalGray
감소: BrainSegVol-to-eTIV
감소: MaskVol-to-eTIV
증가: lhSurfaceHoles
증가: SurfaceHoles
증가: Left-Lateral-Ventricle
증가: Left-Inf-Lat-Vent
감소: Left-Thalamus-Proper
감소: Left-Putamen
증가: 3rd-Ventricle
증가: 4th-Ventricle
감소: Left-Hippocampus
감소: Left-Amygdala
증가: CSF
감소: Left-Accumbens-area
감소: Left-VentralDC
증가: Left-choroid-plexus
증가: Right-Lateral-Ventricle
증가: Right-Inf-Lat-Vent
감소: Right-Putamen
감소: Right-Hippocampus
감소: Right-Amygdala
감소: Right-Accumbens-area
감소: Right-VentralDC
증가: Right-choroid-plexus
증가: WM-hypointensities
감소: non-WM-hypointensities
증가: CC_Mid_Posterior
감소: CC_Mid_Anterior
