
- EchoTime
- RepetitionTime
- Voxel Volume (SliceThickness)
- 3 x 3 x 3 Voxel Volume (DistanceBetweenSlices)
- PixelSpacing

In [37]:
import pandas as pd
from dicom_csv.spatial import get_slice_spacing, order_series
from tqdm import tqdm
import numpy as np

In [38]:
df_gbm = pd.read_csv('/anvar/public_datasets/preproc_study/gbm/meta.csv').drop_duplicates('SOPInstanceUID')
df_lgg = pd.read_csv('/anvar/public_datasets/preproc_study/lgg/meta.csv').drop_duplicates('SOPInstanceUID')
df_bgpd = pd.read_csv('/anvar/private_datasets/anvar_work/BURDENKO_TCIA_SUBMIT/baseline/meta.csv')

In [43]:
df_gbm.Manufacturer.value_counts()

GE MEDICAL SYSTEMS         8308
SIEMENS                    3794
Philips Medical Systems    1316
QIICR                       796
Name: Manufacturer, dtype: int64

In [44]:
df_lgg.Manufacturer.value_counts()

GE MEDICAL SYSTEMS         6108
General Electric           2755
Philips Medical Systems    1530
SIEMENS                     994
QIICR                       304
Name: Manufacturer, dtype: int64

In [45]:
df_bgpd.Manufacturer.value_counts()

GE MEDICAL SYSTEMS         80354
SIEMENS                    12678
Philips Medical Systems     6643
TOSHIBA_MEC                 2578
TOSHIBA                      578
Varian Medical Systems       386
Medical Physics Center       273
ЗАО НПФ 'АЗ'                  80
Name: Manufacturer, dtype: int64

In [19]:
df_gbm['MR_sequence'] = df_gbm.PathToFolder.apply(lambda x: x.split('/')[-1])
df_lgg['MR_sequence'] = df_lgg.PathToFolder.apply(lambda x: x.split('/')[-1])
df_bgpd['MR_sequence'] = df_bgpd.PathToFolder.apply(lambda x: x.split('/')[-1])

In [20]:
gp = df_gbm.groupby('SeriesInstanceUID')['FileName'].count()
df_gbm['slices_number'] = df_gbm.SeriesInstanceUID.map(gp)

gp = df_lgg.groupby('SeriesInstanceUID')['FileName'].count()
df_lgg['slices_number'] = df_lgg.SeriesInstanceUID.map(gp)

gp = df_bgpd.groupby('SeriesInstanceUID')['FileName'].count()
df_bgpd['slices_number'] = df_bgpd.SeriesInstanceUID.map(gp)

In [21]:
spacing = dict()
for uid in tqdm(df_gbm.query('slices_number>1').SeriesInstanceUID.unique()):
    sub = df_gbm.query('SeriesInstanceUID==@uid')
    try:
        dz = get_slice_spacing(order_series(sub), max_delta=0.2)
        spacing[uid] = dz
    except Exception as e:
        pass
df_gbm['PixelSpacing2'] = df_gbm.SeriesInstanceUID.map(spacing)


spacing = dict()
for uid in tqdm(df_lgg.query('slices_number>1').SeriesInstanceUID.unique()):
    sub = df_lgg.query('SeriesInstanceUID==@uid')
    try:
        dz = get_slice_spacing(order_series(sub), max_delta=0.2)
        spacing[uid] = dz
    except Exception as e:
        pass
df_lgg['PixelSpacing2'] = df_lgg.SeriesInstanceUID.map(spacing)


spacing = dict()
for uid in tqdm(df_bgpd.query('slices_number>1').SeriesInstanceUID.unique()):
    sub = df_bgpd.query('SeriesInstanceUID==@uid')
    try:
        dz = get_slice_spacing(order_series(sub), max_delta=0.2)
        spacing[uid] = dz
    except Exception as e:
        pass
df_bgpd['PixelSpacing2'] = df_bgpd.SeriesInstanceUID.map(spacing)

# get_slice_spacing(df_gbm.query('SeriesInstanceUID=="1.3.6.1.4.1.14519.5.2.1.1706.4001.300182168244910270765145894367"'))

100%|██████████| 408/408 [00:25<00:00, 16.32it/s]
100%|██████████| 263/263 [00:13<00:00, 19.18it/s]
100%|██████████| 901/901 [02:27<00:00,  6.10it/s]


In [23]:
# sorted(df_gbm.columns)

In [24]:
columns = ['PatientID','SeriesInstanceUID','slices_number','EchoTime','RepetitionTime', 'InversionTime',
            'PixelSpacing0', 'PixelSpacing1', 'PixelSpacing2', 'MR_sequence', 'SliceThickness']

df_gbm = df_gbm.drop_duplicates(['SeriesInstanceUID'])[columns].query('slices_number>1')\
                .sort_values('MR_sequence').reset_index(drop=True)
df_lgg = df_lgg.drop_duplicates(['SeriesInstanceUID'])[columns].query('slices_number>1')\
                .sort_values('MR_sequence').reset_index(drop=True)
df_bgpd = df_bgpd.drop_duplicates(['SeriesInstanceUID'])[columns]\
                .query('slices_number>1').sort_values('MR_sequence')\
                .query('MR_sequence!="CT"').query('MR_sequence!="RTSTRUCT"').reset_index(drop=True)

In [25]:
df_bgpd['VoxelVolume'] = df_bgpd['PixelSpacing0'] * df_bgpd['PixelSpacing1'] * df_bgpd['SliceThickness']
df_bgpd['3VoxelVolume_1'] = 9*df_bgpd['PixelSpacing0'] * df_bgpd['PixelSpacing1'] * df_bgpd['SliceThickness']
df_bgpd['3VoxelVolume_2'] = 9*df_bgpd['PixelSpacing0'] * df_bgpd['PixelSpacing1'] * df_bgpd['PixelSpacing2']
df_bgpd['3VoxelVolume'] = df_bgpd[['3VoxelVolume_1', '3VoxelVolume_2']].max(axis=1)


df_gbm['VoxelVolume'] = df_gbm['PixelSpacing0'] * df_gbm['PixelSpacing1'] * df_gbm['SliceThickness']
df_gbm['3VoxelVolume_1'] = 9*df_gbm['PixelSpacing0'] * df_gbm['PixelSpacing1'] * df_gbm['SliceThickness']
df_gbm['3VoxelVolume_2'] = 9*df_gbm['PixelSpacing0'] * df_gbm['PixelSpacing1'] * df_gbm['PixelSpacing2']
df_gbm['3VoxelVolume'] = df_gbm[['3VoxelVolume_1', '3VoxelVolume_2']].max(axis=1)


df_lgg['VoxelVolume'] = df_lgg['PixelSpacing0'] * df_lgg['PixelSpacing1'] * df_lgg['SliceThickness']
df_lgg['3VoxelVolume_1'] = 9*df_lgg['PixelSpacing0'] * df_lgg['PixelSpacing1'] * df_lgg['SliceThickness']
df_lgg['3VoxelVolume_2'] = 9*df_lgg['PixelSpacing0'] * df_lgg['PixelSpacing1'] * df_lgg['PixelSpacing2']
df_lgg['3VoxelVolume'] = df_lgg[['3VoxelVolume_1', '3VoxelVolume_2']].max(axis=1)

In [26]:
# df_bgpd.to_csv('~/df_bgpd_stats_mri.csv')
# df_gbm.to_csv('~/df_gbm_stats_mri.csv')
# df_lgg.to_csv('~/df_lgg_stats_mri.csv')

In [27]:
df_bgpd = pd.read_csv('~/df_bgpd_stats_mri.csv')
df_gbm= pd.read_csv('~/df_gbm_stats_mri.csv')
df_lgg= pd.read_csv('~/df_lgg_stats_mri.csv')


- EchoTime
- RepetitionTime
- Voxel Volume (SliceThickness)
- 3 x 3 x 3 Voxel Volume (DistanceBetweenSlices)
- PixelSpacing


### todo:
> Дисперсия между T1 разных пациентов по сравнению с дисперсией T1-T2 для одного пациента (гистограммы)

### EchoTime

In [28]:
def get_min_max_nunique(df, col, modality):
    return (np.round(df.query('MR_sequence==@modality')[col].min(), 1),
            np.round(df.query('MR_sequence==@modality')[col].median(), 1),
            np.round(df.query('MR_sequence==@modality')[col].max(), 1),
            np.round(df.query('MR_sequence==@modality')[col].nunique(), 1))

In [36]:
for dataset, name in zip([df_gbm, df_lgg, df_bgpd], ['GBM', 'LGG', 'BGPD']): 
    print(name)
    for modality in ['T1', 'T2']:#, 'CT1', 'FLAIR']:
        for col in ['EchoTime', 'RepetitionTime', 'VoxelVolume']:
            print(modality, col, get_min_max_nunique(dataset, col, modality))
        print('---')
    print('====')

GBM
T1 EchoTime (2.1, 8.3, 19.0, 28)
T1 RepetitionTime (5.0, 500.0, 3379.6, 56)
T1 VoxelVolume (0.5, 3.1, 5.2, 32)
---
T2 EchoTime (20.0, 99.3, 120.0, 38)
T2 RepetitionTime (2020.0, 3500.0, 6650.0, 36)
T2 VoxelVolume (0.2, 1.5, 5.2, 32)
---
====
LGG
T1 EchoTime (3.7, 10.0, 15.0, 11)
T1 RepetitionTime (8.0, 550.0, 3232.0, 38)
T1 VoxelVolume (0.6, 3.4, 13.2, 17)
---
T2 EchoTime (16.1, 103.5, 120.0, 17)
T2 RepetitionTime (897.0, 3500.0, 10000.0, 18)
T2 VoxelVolume (0.5, 1.1, 35.2, 19)
---
====
BGPD
T1 EchoTime (1.8, 9.7, 23.0, 51)
T1 RepetitionTime (7.4, 500.0, 3119.2, 50)
T1 VoxelVolume (0.1, 1.1, 5.3, 53)
---
T2 EchoTime (18.4, 99.6, 120.0, 67)
T2 RepetitionTime (567.0, 4420.0, 8200.0, 57)
T2 VoxelVolume (0.1, 1.1, 4.8, 60)
---
====
