In [None]:
import matplotlib.pyplot as plt
import statistics
import seaborn as sns
import pandas as pd
import numpy as np
from glob import glob
import os
import nibabel as nib

In [None]:

ct_path = './data/ct_scans/fold_0/*'
directories = glob(ct_path)

rows = []
rows_alt = []
for directory in directories:
    dir_name = os.path.basename(directory)
    files = glob(os.path.join(directory, '*'))
    print(dir_name, len(files))
    row = {'split': dir_name, 'PatientNumber': []}
    
    for f in files:
        file_name = os.path.basename(f)
        name_only = file_name.split('.')[0]
        if name_only[0] == '0':
            name_only = name_only[1:]
        row['PatientNumber'].append(name_only)

        row_alt = {'split': dir_name, 'PatientNumber': name_only}
        rows_alt.append(row_alt)

    rows.append(row)

split_data = pd.DataFrame(rows)
pd.set_option('display.max_colwidth', 1000)
tex_name = './results/ct_ich_data_split.tex'
split_data.to_latex(tex_name, index=False, escape=False)

split_data = pd.DataFrame(rows_alt)


In [None]:
data = pd.read_csv('./data/hemorrhage_diagnosis_raw_ct.csv')

name_map = {
    'Epidural': 'EDH',
    'Subdural': 'SDH',
    'Intraventricular': 'IVH',
    'Intraparenchymal': 'IPH',
    'Subarachnoid': 'SAH',
}
data.rename(columns=name_map, inplace=True)
data.head()

In [None]:
data['Total'] = data.iloc[:,2:7].sum(axis=1)
N_slice_sick = data[data.Total > 0].shape[0]
N_slice_healthy = data[data.Total == 0].shape[0]

print(N_slice_sick, N_slice_healthy)

In [None]:
summed = data.groupby(['PatientNumber']).agg({'IVH':'sum','IPH':'sum','SAH':'sum','EDH':'sum','SDH':'sum'}).reset_index()
summed.PatientNumber = summed.PatientNumber.astype('str')
int_columns = summed.select_dtypes(np.number).columns
print(int_columns)
summed[int_columns] = summed[int_columns].clip(upper=1)
summed.head(10)

In [None]:
N_patients = len(summed.PatientNumber.unique())
summed['Total'] = summed.iloc[:, 1:6].sum(axis=1)
N_sick = len(summed[summed.Total > 0])
N_healthy = len(summed[summed.Total == 0])
assert(N_sick + N_healthy == N_patients)
print(f'Number of patients: {N_patients}. Affected patients: {N_sick}. Healthy patients: {N_healthy}. Ratio (sick): {(N_sick*1.0)/N_patients}. Ratio (healthy): {(N_healthy* 1.0)/N_patients}')

In [None]:

def barplot(data, save_path):
    only_hemorrhages = data.iloc[:, 1:6].sum(axis=0) * 1.0
    print(only_hemorrhages.min(), only_hemorrhages.max())
    N_sick_in_data = len(data[data.Total > 0])
    sns.barplot(x=(only_hemorrhages / N_sick_in_data)*100,y=only_hemorrhages.index, color='lightblue', edgecolor='k')
    plt.xlabel('Portion of affected patients (%)')
    plt.ylabel('ICH sub-type')
    plt.savefig(f'{save_path}.pdf', bbox_inches='tight')
    plt.savefig(f'{save_path}.png', bbox_inches='tight')
    plt.show()

In [None]:
print(summed.shape, split_data.shape)
merged = summed.merge(split_data)

In [None]:
merged.shape

In [None]:
import matplotlib
matplotlib.rcParams['mathtext.fontset'] = 'stix'
matplotlib.rcParams['font.family'] = 'STIXGeneral'

barplot(merged[merged.split == 'train'], './results/ct_ich_train_bleeding_types')

In [None]:
barplot(merged[merged.split == 'test'], './results/ct_ich_test_bleeding_types')

In [None]:
barplot(merged[merged.split == 'validation'], './results/ct_ich_validation_bleeding_types')

In [None]:
slice_data = data.copy()
slice_data['TotalPerSlice'] = slice_data.iloc[:, 2:7].sum(axis=1)
ambiguous = slice_data[slice_data.TotalPerSlice > 1]
unambiguous = slice_data[slice_data.TotalPerSlice == 1]

print(f'Number of unambiguous slices: {unambiguous.shape[0]}')
print(f'Number of ambiguous slices: {ambiguous.shape[0]}')
print(f'Patients with ambiguous slices: {ambiguous.PatientNumber.unique()}')
print(f'Number of patients with ambiguous slices: {len(ambiguous.PatientNumber.unique())}')
print(f'Reduction effect: {((ambiguous.shape[0]*1.0)/(unambiguous.shape[0]))*100}%')