In [1]:
import os
print(os.getcwd())
# change working directory to project root
import platform
if platform.system() is "Windows":
    os.chdir('D:\\git\\cardio')
else:
    os.chdir('/Users/Code/Git/cardio')
print(os.getcwd())
from src.utils.utils_io import Console_and_file_logger
from src.utils.file_wrapper.app.Files import Basefile, Nrrdfile
import logging
Console_and_file_logger('ahf_export_5', logging.INFO)

import re
import nrrd
import numpy as np
from scipy import stats
from collections import OrderedDict
import json
import SimpleITK as sitk


D:\git\cardio\notebooks
D:\git\cardio


  mpl.use('TkAgg')
2018-10-27 20:01:21,942 INFO -------------------- Start --------------------
2018-10-27 20:01:21,942 INFO Filename: ahf_export_5
2018-10-27 20:01:21,942 INFO Log directory: ./logs/


Working directory: D:\git\cardio.
Log dir: ./logs/


In [2]:
class ComplexEncoder(json.JSONEncoder):
    def default(self, obj):
        if hasattr(obj,'reprJSON'):
            return obj.reprJSON()
        else:
            return json.JSONEncoder.default(self, obj)

In [3]:
class Serie:
    
    def __init__(self, se_name):
        logging.debug('Serie created: {}'.format(se_name))
        self.se_name = se_name    
        self.se_name_short = os.path.split(self.se_name)[1]
        self.img, self.header = self.load_serie(self.se_name)
        logging.debug('Serie {} loaded.'.format(self.se_name_short))
        
    def load_serie(self, se_name):
        """
        This function uses the pynnrd library for nrrd image loading
        or Simpleitk for dcm loading
        :param filename:
        :return: img_numpy_arr, img_header_dict
        """
        filename, file_extension = os.path.splitext(se_name)

        if file_extension == ".dcm":

            sitk_img = sitk.ReadImage(se_name)
            img = sitk.GetArrayFromImage(sitk_img)
            header={}
            header['sizes'] = sitk_img.GetSize()
            header['dimension'] = sitk_img.GetDimension()
            try:
                header['0008|103e'] = sitk_img.GetMetaData('0008|103e')
            except Exception as e:
                header['0008|103e'] = 'not_given'
                pass
            header['space directions'] = sitk_img.GetDirection()
            header['space origin'] = sitk_img.GetOrigin()

        elif file_extension == "nrrd":
            img, header = nrrd.read(se_name)

        return img, header
    
    #def reprJSON(self):
    #    return self.to_json()
    
    def to_json(self):
        json_representation = {}
        json_representation['se_name'] = self.se_name_short
        d = stats.describe(self.img.flatten())
        json_representation['img_min'] = int(d.minmax[0])
        json_representation['img_max'] = int(d.minmax[1])
        json_representation['img_mean'] = int(d.mean)
        json_representation['img_variance'] = int(d.variance)
        json_representation['sizes'] = str(self.header.get('sizes', [0,0,0]))
        json_representation['x'] = int(self.header['sizes'][0])
        json_representation['y'] = int(self.header['sizes'][1])
        json_representation['dimension'] = int(self.header.get('dimension',0))
        json_representation['0008|103e'] = str(self.header.get('0008|103e', 'not_given'))
        return json_representation
        
    def get_serie_name(self, se_name):
        # pattern does fit to series from patient 0001 but not to 0706 for example
        # pattern = '^.+(\d\.\d\.\d{2}\.\d\.\d{4}\.\d\.\d{2}\.\d\.\d{4}\.\d{29}\.\w+)$'
        pattern = '^.+(\\|\/)((\d|\.)+)$'
        se_name_short = re.search(pattern, se_name).group(1)
        return se_name_short
        
    

In [4]:

class Study:
    
    def __init__(self, st_name):
        logging.debug('Study created; {}'.format(st_name))
        self.st_name = st_name 
        self.st_name_short = os.path.basename(os.path.normpath(self.st_name))
        #self.series_description = self.get_series_description(st_name)
        self.series = self.describe_study(st_name)

            
    def reprJSON(self):
        return vars(self)
    
    
    def get_series_description(self, directory):
        series = []
        series = [Serie(os.path.join(directory, serie_name)).to_json() for serie_name in os.listdir(directory) if self.validate(serie_name)]
        return series
    
    def get_series_full(self, directory):
        series = []
        series = [Serie(os.path.join(directory, serie_name)) for serie_name in os.listdir(directory) if self.validate(serie_name)]
        return series
    
    def describe_study(self, directory):
        description = {}
        # no exception handling in list comprehension
        # better to loop only once
        #series =  [Serie(os.path.join(directory, serie_name)).to_json() for serie_name in os.listdir(directory) if self.validate(serie_name)]
        series = []
        description['img_min'] = 100
        description['img_max'] = 0
        description['img_mean'] = 0
        description['img_variance'] = 0
        
        for serie_name in os.listdir(directory):
            if self.validate(serie_name):
                try:
                    serie = Serie(os.path.join(directory, serie_name)).to_json()
                    description['img_min'] = min(description['img_min'], serie['img_min'])
                    description['img_max'] = max(description['img_max'], serie['img_max'])
                    description['img_mean'] += serie['img_mean']
                    description['img_variance'] += serie['img_variance']
                    
                    series.append(serie)
                except Exception as e:
                    logging.error('Faild to get infos for study: {}'.format(serie_name))
                    logging.error('Error: {}'.format(str(e)))
                    continue
                    
        
        description['num_series'] = len(series)
        description['img_mean'] = description['img_mean'] / description['num_series']
        description['img_variance'] = description['img_variance'] / description['num_series']
        description['sizes'] = series[0].get('sizes')
        description['x'] = series[0].get('x')
        description['y'] = series[0].get('y')
        description['dimension'] = series[0].get('dimension')
        description['0008|103e'] = series[0].get('0008|103e')
        return description
        
        
        
    def validate(self, se_name):
        if se_name.startswith('.'):
            return False
        
        file, extension = os.path.splitext(se_name)
        
        if extension =='.dcm':
            return True
        
        if extension =='.nrrd':
            return True
        
        return False
    

    
        

In [5]:
# sumarize all series descriptions to one Study description to lower the json file size
# test Study
study = Study('D:\\small\\dcm\\0001-02Z1L14N-2006-11-09\\1.3.12.2.1107.5.99.2.1013.30000008030612131778100062857')
print(json.dumps(study.reprJSON(), cls=ComplexEncoder, indent=1))


{
 "st_name": "D:\\small\\dcm\\0001-02Z1L14N-2006-11-09\\1.3.12.2.1107.5.99.2.1013.30000008030612131778100062857",
 "st_name_short": "1.3.12.2.1107.5.99.2.1013.30000008030612131778100062857",
 "series": {
  "img_min": 0,
  "img_max": 2741,
  "img_mean": 160.11111111111111,
  "img_variance": 81096.22222222222,
  "num_series": 18,
  "sizes": "(192, 168, 1)",
  "x": 192,
  "y": 168,
  "dimension": 3,
  "0008|103e": "axi6mm"
 }
}


In [6]:

class Patient:
    
    
    def __init__(self, p_name):
        
        logging.debug('patient created: {}'.format(p_name))
        # directory name = patient name
        self.p_name = p_name
        self.p_name_short = os.path.basename(os.path.normpath(self.p_name))
        try:
            self.studies = self.get_studies(self.p_name)
            self.num_studies = len(self.studies)
            self.num_series = sum([study.series.get('num_series') for study in self.studies])
            logging.info('Patient {} with {} studies and {} series.'.format(self.p_name_short, len(self.studies), self.num_series))
            logging.info('Series: {}'.format([study.series.get('num_series') for study in self.studies]))
        except Exception as e:
            logging.error('Loading patient {} failed: {}'.format(self.p_name_short, str(e)))
            
    def to_json(self):
        json_repr = vars(self)
        json_repr['studies'] = [study.to_json() for study in self.studies]
        return json_repr
    
    def reprJSON(self):
        return vars(self)
        
    def get_studies(self, directory):
        studies = []
        studies = [Study(os.path.join(directory, study_name)) for study_name in os.listdir(directory) if not study_name.startswith('.')]
        return studies

        

In [7]:
# test patient

patient = Patient('D:\\small\\dcm\\0001-02Z1L14N-2006-11-09\\')

2018-10-27 20:01:50,638 INFO Patient 0001-02Z1L14N-2006-11-09 with 52 studies and 1258 series.
2018-10-27 20:01:50,638 INFO Series: [18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 25, 25, 25, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 25, 25, 30, 30, 13, 27, 88, 88, 1, 88, 88, 14, 66]


In [16]:
class Datamanager:
    
    def __init__(self, sc_name, root_dir):
        logging.info('Datamanager created: {}'.format(sc_name))
        self.sc_name = sc_name
        self.root_dir = root_dir
        self.patients = self.get_patients(root_dir)
    
    def reprJSON(self):
        return dict(sc_name=self.sc_name, root_dir=self.root_dir, patients=self.patients)
    
    def get_patients(self, directory):
        patients = []
        # hack for ignoring all files starting with a dot
        # create one patient per directory
        patients = [Patient(os.path.join(directory, patient_name)) for patient_name in os.listdir(directory) if not patient_name.startswith('.')]
        return patients
        
        
        

In [15]:
experiment = 'example_small'
data_manager = Datamanager(sc_name=experiment, root_dir='D:\\small\\dcm\\')
logging.info('datamanager done, writing now json file')   
with open(experiment + '.json', 'w') as outfile:
    json.dump(data_manager.reprJSON(), outfile, cls=ComplexEncoder, indent=1)
logging.info('json dump done, everything fine!') 

2018-10-27 20:10:11,555 INFO Scan created: example_small
2018-10-27 20:10:25,160 INFO Patient 0001-02Z1L14N-2006-11-09 with 52 studies and 1258 series.
2018-10-27 20:10:25,160 INFO Series: [18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 25, 25, 25, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 25, 25, 30, 30, 13, 27, 88, 88, 1, 88, 88, 14, 66]
2018-10-27 20:10:34,767 INFO Patient 0002-02Z1L14N-2006-11-09 with 51 studies and 1075 series.
2018-10-27 20:10:34,767 INFO Series: [18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 25, 25, 25, 25, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 25, 25, 30, 30, 30, 88, 88, 1, 31, 13, 14]
2018-10-27 20:10:34,767 INFO datamanager done, writing now json file
2018-10-27 20:10:34,785 INFO json dump done, everything fine!


In [None]:


print(os.getcwd())
print('test')
experiment= 'ahf_export_dcm_all'
data_manager = Datamanager(sc_name=experiment, root_dir='D:\\ahf_export\\')

logging.info('datamanager done, writing now json file')   
with open(experiment + '.json', 'w') as outfile:
    json.dump(data_manager.reprJSON(), outfile, cls=ComplexEncoder, indent=1)
logging.info('json dump done, everything fine!')    
    
json_repr= json.dumps(scan.reprJSON(), cls=ComplexEncoder, indent=1)




2018-10-27 16:15:31,470 INFO Scan created: ahf_export_dcm_all


D:\git\cardio
test


2018-10-27 16:15:54,737 INFO Patient 0001-02Z1L14N-2006-11-09 with 52 studies and 1258 series.
2018-10-27 16:15:54,737 INFO Series: [18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 25, 25, 25, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 25, 25, 30, 30, 13, 27, 88, 88, 1, 88, 88, 14, 66]
2018-10-27 16:16:15,397 INFO Patient 0002-02Z1L14N-2006-11-09 with 51 studies and 1075 series.
2018-10-27 16:16:15,397 INFO Series: [18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 25, 25, 25, 25, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 25, 25, 30, 30, 30, 88, 88, 1, 31, 13, 14]
2018-10-27 16:16:51,101 INFO Patient 0003-04NEJQUZ-2007-03-13 with 9 studies and 1629 series.
2018-10-27 16:16:51,102 INFO Series: [475, 400, 105, 25, 105, 105, 105, 300, 9]
2018-10-27 16:17:25,740 INFO Patient 0004-04NEJQUZ-2008-03-14 with 9 studies and 1439 series.
2018-10-27 16:17:25,740 INFO Series: [450, 105, 125, 75,

In [24]:
import json
from pandas.io.json import json_normalize


#json_data = json.dumps(scan.reprJSON(), cls=ComplexEncoder, indent=1)

#print(json_data['sc_name'])
print(json_data)




IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

