In [1]:
import matplotlib.pyplot as plt
import nibabel as nib
import random
import numpy as np
from skimage import transform
from collections import OrderedDict
import json
from pathlib import Path
from torchvision import transforms
from sklearn.model_selection import train_test_split
import skimage.transform as skTrans
import pandas as pd
import torch

In [2]:
with open('configs/config_encoder.json') as config_file:
    config_encoder = json.load(config_file)
with open('configs/preprocessing_datasets.json') as config_file:
    config_datasets = json.load(config_file)

In [12]:
df_count = pd.DataFrame(columns = ['dataset', 'label', 'count'])
for dataset in config_datasets :
    for path in Path(dataset['savedir']+ '/train/').rglob("mask.nii.gz") :
        
        if dataset['Data'] == 'ACDC' :
            labels = ['background', 'RV', 'Myo', 'LV'] 
            
        elif dataset['Data'] == 'CIMAS' :
            labels = ['background','LV', 'Myo', 'RV'] 
        elif dataset['Data'] == 'Abide' :
            labels = ['background',"cerebellum GM", "cerebellum WM", "cerebral GM", "cerebral WM",
                      "thalamus ", "hippocampus", "amygdala", "lateral ventricles", 
                      "caudate", "putamen", "pallidum", "ventral Diencephalon", 
                      "third ventricle", "brain stem"] 
        else :
            break
        mask_data = nib.load(path)
        affine_mask = mask_data.affine
        mask = mask_data.get_fdata()
        unique, count = np.unique(mask, return_counts = True)
        if len(unique) != 0:
            for i in range(len(unique)):
                df_count = df_count.append([{'dataset' : dataset['Data'], 
                                             'label': str(int(unique[i])) + ' ' + labels[int(unique[i])] ,
                                             'count': int(count[i])}] )
                
df_count["count"] = pd.to_numeric(df_count["count"], downcast="float")
df_count["count"]  = np.round(df_count["count"] )
df_count

Unnamed: 0,dataset,label,count
0,CIMAS,0 background,2003983.0
0,CIMAS,1 LV,15188.0
0,CIMAS,2 Myo,21947.0
0,CIMAS,3 RV,23266.0
0,CIMAS,0 background,1985141.0
...,...,...,...
0,ACDC,3 LV,8925.0
0,ACDC,0 background,341140.0
0,ACDC,1 RV,9262.0
0,ACDC,2 Myo,10147.0


In [16]:
df_ACDC = df_count[df_count['dataset'] == 'CIMAS' ][['label', 'count']].groupby(['label']).agg(['mean', 'sum'])
df_ACDC['count']= np.round(df_ACDC['count'])
df_ACDC = df_ACDC.drop('0 background')
total = df_ACDC[('count','sum')].sum()
df_ACDC['freq'] = df_ACDC[('count','sum')]/total
df_ACDC = df_ACDC.sort_values(('freq'),ascending=True)
df_ACDC

Unnamed: 0_level_0,count,count,freq
Unnamed: 0_level_1,mean,sum,Unnamed: 3_level_1
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1 LV,20215.0,262801.0,0.245095
3 RV,29823.0,387704.0,0.361583
2 Myo,32441.0,421735.0,0.393321


In [17]:
df_ACDC = df_count[df_count['dataset'] == 'ACDC' ].groupby(['label']).agg(['mean', 'sum'])
df_ACDC['count']= np.round(df_ACDC['count'])
df_ACDC = df_ACDC.drop('0 background')
total = df_ACDC[('count','sum')].sum()
df_ACDC['freq'] = df_ACDC[('count','sum')]/total
df_ACDC = df_ACDC.sort_values(('freq'),ascending=True)
df_ACDC

Unnamed: 0_level_0,count,count,freq
Unnamed: 0_level_1,mean,sum,Unnamed: 3_level_1
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2 Myo,7355.0,507519.0,0.280136
1 RV,9357.0,645611.0,0.356359
3 LV,9544.0,658557.0,0.363505


In [25]:
df_ACDC = df_count[df_count['dataset'] == 'Abide' ].groupby(['label']).agg(['mean', 'sum'])
df_ACDC['count']= np.round(df_ACDC['count'])
df_ACDC = df_ACDC.drop('0 background')
total = df_ACDC[('count','sum')].sum()
df_ACDC['freq'] = df_ACDC[('count','sum')]/total
df_ACDC['1-freq'] = 1- df_ACDC[('count','sum')]/total
df_ACDC = df_ACDC.sort_values(('1-freq'),ascending=False)
df_ACDC['weight']  = df_ACDC['1-freq']/(df_ACDC['1-freq'].sum())
df_ACDC

Unnamed: 0_level_0,count,count,freq,1-freq,weight
Unnamed: 0_level_1,mean,sum,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
label,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
13 third ventricle,1136.0,27274.0,0.000844,0.999156,0.076858
7 amygdala,3670.0,88091.0,0.002727,0.997273,0.076713
11 pallidum,4626.0,111032.0,0.003437,0.996563,0.076659
9 caudate,8175.0,196211.0,0.006073,0.993927,0.076456
6 hippocampus,9123.0,218959.0,0.006777,0.993223,0.076402
12 ventral Diencephalon,9433.0,226388.0,0.007007,0.992993,0.076384
10 putamen,10228.0,245474.0,0.007598,0.992402,0.076339
5 thalamus,18355.0,440526.0,0.013635,0.986365,0.075874
8 lateral ventricles,18796.0,451106.0,0.013962,0.986038,0.075849
14 brain stem,22748.0,545943.0,0.016898,0.983102,0.075623
