# Import Modules

In [1]:
# General modules & loading data
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from IPython.display import display

from tqdm import trange

import pingouin as pg

In [2]:
pd.options.display.max_rows = 20

# Load Data

## Load feature set

In [3]:
root = os.path.split(os.getcwd())[0]
image_feats = {'t1': pd.read_csv(root + '\\0.2 outputs\T1\Merged_Features_T1.csv'),
               't2': pd.read_csv(root + '\\0.2 outputs\T2\Merged_Features_T2.csv')}


## Test for normality in remission and progression distributions

In [10]:
# print(t1_feats.iloc[:,1:2].columns[0])
# print(pg.normality(t1_feats, dv=t1_feats.iloc[:,1:2].columns[0], group='Class'))


normality_test_df = pd.DataFrame()

for i in trange(1,image_feats['t1'].shape[1]-1):
    ntest = pg.normality(image_feats['t1'], dv=image_feats['t1'].iloc[:,i:i+1].columns[0], group='Class')
    normality_test_df = normality_test_df.append({
                    'Variable':image_feats['t1'].iloc[:,i:i+1].columns[0],
                    'Remission_Normality': str(ntest['normal']['Remission']),
                    'Remission_p_value': ntest['pval']['Remission'],
                    'Progression_Normality': str(ntest['normal']['Progression']),
                    'Progression_p_value': ntest['pval']['Progression']
                    }, 
                   ignore_index=True)

normality_test_df = normality_test_df.reindex(columns = ['Variable',
                                                         'Remission_Normality',
                                                         'Progression_Normality',
                                                         'Remission_p_value',
                                                         'Progression_p_value'])

100%|██████████| 1702/1702 [00:10<00:00, 160.27it/s]


In [11]:
display(normality_test_df)

Unnamed: 0,Variable,Remission_Normality,Progression_Normality,Remission_p_value,Progression_p_value
0,CT_original_shape_Elongation,True,False,1.086067e-01,0.026658
1,CT_original_shape_Flatness,True,True,2.000505e-01,0.139970
2,CT_original_shape_LeastAxisLength,True,True,5.146951e-02,0.088881
3,CT_original_shape_MajorAxisLength,False,True,3.000878e-03,0.050864
4,CT_original_shape_Maximum2DDiameterColumn,True,False,1.061043e-01,0.016650
...,...,...,...,...,...
1697,PET_wavelet-LLL_gldm_LargeDependenceLowGrayLev...,False,False,9.597299e-12,0.001602
1698,PET_wavelet-LLL_gldm_LowGrayLevelEmphasis,False,False,5.664038e-10,0.003519
1699,PET_wavelet-LLL_gldm_SmallDependenceEmphasis,False,True,4.510042e-09,0.084581
1700,PET_wavelet-LLL_gldm_SmallDependenceHighGrayLe...,False,False,7.924935e-08,0.002455


# Test for Significant Features

In [12]:
alpha = 0.05

ttest_df = pd.DataFrame()

for i in trange(1,image_feats['t1'].shape[1]-1):
    res = pg.pairwise_ttests(data=image_feats['t1'], 
                             dv=image_feats['t1'].iloc[:,i:i+1].columns[0], 
                             between='Class', 
                             parametric=False, 
                             correction='auto',
                             alpha=alpha)
    
    res['Feature'] = image_feats['t1'].iloc[:,i:i+1].columns[0]
    res['Significance'] = str((res['p-unc']<alpha)[0])
    ttest_df=ttest_df.append(res)
    
ttest_df = ttest_df.reindex(columns=['Feature',
                                   'Significance',
                                   'p-unc',
                                   'hedges',
                                   'Contrast', 
                                   'A', 
                                   'B', 
                                   'Paired', 
                                   'Parametric', 
                                   'U-val', 
                                   'Tail'])

ttest_df.reset_index(drop=True, inplace=True)
ttest_df.sort_values(by='p-unc', inplace=True)
ttest_df.sort_values(by='hedges', inplace=True)

100%|██████████| 1702/1702 [00:17<00:00, 98.26it/s]


In [13]:
with pd.option_context('display.max_rows', 100, 'display.max_columns', 10):
    display(ttest_df.loc[ttest_df['Significance']=='True'])

Unnamed: 0,Feature,Significance,p-unc,hedges,Contrast,...,B,Paired,Parametric,U-val,Tail
12,CT_original_shape_SurfaceVolumeRatio,True,0.002744,-1.007454,Class,...,Remission,False,False,109.0,two-sided
1269,PET_wavelet-HLL_glcm_Imc2,True,0.029790,-0.998480,Class,...,Remission,False,False,158.0,two-sided
1646,PET_wavelet-LLL_glcm_MCC,True,0.031077,-0.981215,Class,...,Remission,False,False,159.0,two-sided
863,PET_original_shape_SurfaceVolumeRatio,True,0.003233,-0.979647,Class,...,Remission,False,False,112.0,two-sided
463,CT_wavelet-HLL_ngtdm_Contrast,True,0.002457,-0.909553,Class,...,Remission,False,False,107.0,two-sided
...,...,...,...,...,...,...,...,...,...,...,...
1134,PET_wavelet-LHL_gldm_GrayLevelNonUniformity,True,0.022994,1.995668,Class,...,Remission,False,False,424.0,two-sided
1093,PET_wavelet-LHL_glrlm_GrayLevelNonUniformity,True,0.022994,2.017001,Class,...,Remission,False,False,424.0,two-sided
1465,PET_wavelet-HHL_glrlm_GrayLevelNonUniformity,True,0.009533,2.038394,Class,...,Remission,False,False,443.0,two-sided
1506,PET_wavelet-HHL_gldm_GrayLevelNonUniformity,True,0.011562,2.112945,Class,...,Remission,False,False,439.0,two-sided


In [14]:
alpha = 0.01

ttest_df = pd.DataFrame()

for i in trange(1,image_feats['t1'].shape[1]-1):
    res = pg.pairwise_ttests(data=image_feats['t1'], 
                             dv=image_feats['t1'].iloc[:,i:i+1].columns[0], 
                             between='Class', 
                             parametric=False, 
                             correction='auto',
                             alpha=alpha)
    
    res['Feature'] = image_feats['t1'].iloc[:,i:i+1].columns[0]
    res['Significance'] = str((res['p-unc']<alpha)[0])
    ttest_df=ttest_df.append(res)
    
ttest_df = ttest_df.reindex(columns=['Feature',
                                   'Significance',
                                   'p-unc',
                                   'hedges',
                                   'Contrast', 
                                   'A', 
                                   'B', 
                                   'Paired', 
                                   'Parametric', 
                                   'U-val', 
                                   'Tail'])

ttest_df.reset_index(drop=True, inplace=True)
ttest_df.sort_values(by='p-unc', inplace=True)
ttest_df.sort_values(by='hedges', inplace=True)

100%|██████████| 1702/1702 [00:17<00:00, 98.42it/s]


In [15]:
with pd.option_context('display.max_rows', 100, 'display.max_columns', 10):
    display(ttest_df.loc[ttest_df['Significance']=='True'])


Unnamed: 0,Feature,Significance,p-unc,hedges,Contrast,...,B,Paired,Parametric,U-val,Tail
12,CT_original_shape_SurfaceVolumeRatio,True,0.002744,-1.007454,Class,...,Remission,False,False,109.0,two-sided
863,PET_original_shape_SurfaceVolumeRatio,True,0.003233,-0.979647,Class,...,Remission,False,False,112.0,two-sided
463,CT_wavelet-HLL_ngtdm_Contrast,True,0.002457,-0.909553,Class,...,Remission,False,False,107.0,two-sided
757,CT_wavelet-HHH_gldm_SmallDependenceLowGrayLeve...,True,0.003233,-0.830975,Class,...,Remission,False,False,112.0,two-sided
1033,PET_wavelet-LLH_ngtdm_Coarseness,True,0.004455,-0.819385,Class,...,Remission,False,False,118.0,two-sided
...,...,...,...,...,...,...,...,...,...,...,...
77,CT_original_glszm_LargeAreaHighGrayLevelEmphasis,True,0.001562,1.720679,Class,...,Remission,False,False,477.0,two-sided
821,CT_wavelet-LLL_glszm_LargeAreaHighGrayLevelEmp...,True,0.001313,1.745490,Class,...,Remission,False,False,480.0,two-sided
800,CT_wavelet-LLL_glrlm_GrayLevelNonUniformity,True,0.003601,1.851233,Class,...,Remission,False,False,462.0,two-sided
56,CT_original_glrlm_GrayLevelNonUniformity,True,0.003412,1.892895,Class,...,Remission,False,False,463.0,two-sided
