In [1]:
# importing required modules
import os
import time
import numpy as np
import matplotlib.pyplot as plt
import csv
from zipfile import ZipFile
import shutil
import scipy.misc
import math
import re
import cv2
import pydicom
from pydicom.data import get_testdata_files
from skimage import exposure
import glob

In [2]:
data_path = '/scratch1/06568/joyce_w/field_20259/'
output_path = '/work2/06568/joyce_w/frontera/Abdominal_imaging/data/processed/field_20259_processed/'

In [3]:
class BadShape(Exception):
    """
    Error raised when loaded data don't have the right shape.
    """
    pass

In [4]:
class NoFile(Exception):
    """
    Error raised when there is not the image we look for.
    """
    pass

In [5]:
class Pancreas:
    # only one shot per participant
    def __init__(self, file, ext = '.jpg'):
        """
        :param file: string, ID _ data_ID _ instance _ test_number (+ potentially .zip), e.g. 2016212_20227_2_0 (+ potentially .zip)
        """
        self.file = os.path.splitext(file)[0]
        self.data_path = '/scratch1/06568/joyce_w/field_20259/'
        self.output_path = '/work2/06568/joyce_w/frontera/Abdominal_imaging/data/processed/field_20259_processed/'
        self.unzip_folder = os.path.join(self.output_path, self.file)
        self.ext = ext
        self.img_computed = False # True if image already computed
        self.dicoms = None
        self.extract_info_from_file()
        
    def extract_info_from_file(self):
        """
        Extract each information from file name: patient_id, field_id, instance and shot.
        """
        self.patient_id = re.findall('(\d+)_\d+_\d+_\d+', self.file)[0]
        self.field_id = re.findall('\d+_(\d+)_\d+_\d+', self.file)[0]
        self.instance = re.findall('\d+_\d+_(\d+)_\d+', self.file)[0]
        self.shot = re.findall('\d+_\d+_\d+_(\d+)', self.file)[0]
    
    def unzip(self):
        """
        Extract information from zip file.
        """
        try:
            os.mkdir(self.unzip_folder)
        except FileExistsError:
            pass
        with ZipFile(os.path.join(self.data_path, self.file + '.zip'), 'r') as zip:
            # printing all the contents of the zip file
            zip.extractall(self.unzip_folder)

    def extract_info_from_unzip(self):
        """
        Extract relevant information from unzipped files.
        """
        # store all .dcm names in a list
        self.files = [ f for f in np.sort(os.listdir(self.unzip_folder)) if re.search('.dcm$', f)]
        # store all .dcm files in a list
        self.dicoms = [ pydicom.filereader.dcmread(os.path.join(self.unzip_folder,f)) for f in self.files ]
        # store all Protocol Names in a list
        self.ProtocolNames = self.ItemToValues('ProtocolName')
        self.PerformedProcedureStepDescription = self.ItemToValues('PerformedProcedureStepDescription')
        self.InstanceNumber = self.ItemToValues('InstanceNumber')
        self.SeriesDescription = self.ItemToValues('SeriesDescription')
        
    def extract_dcom14(self):
        # store all .dcm names in a list
        self.files = [ f for f in np.sort(os.listdir(self.unzip_folder)) if re.search('.dcm$', f)]
        self.dicoms = [ pydicom.filereader.dcmread(os.path.join(self.unzip_folder,self.files[14])) ]
        self.SeriesDescription = self.ItemToValues('SeriesDescription')
        
    def ItemToValues(self, item):
        if self.dicoms is None:
            raise Exception('self.dicoms has not been instantiated.')
        return [ self.dicoms[k].get_item(item).value.decode("utf-8") for k in range(len(self.dicoms)) ]
        
    def get_ProtocolNames(self):
        return self.ProtocolNames
    
    def get_pixels(self):
        """
        Return list images
        """
        return [ self.dicoms[k].pixel_array for k in range(len(self.dicoms))]
    
    def NameToPixels(self, name):
        """
        Return array corresponding to a name.
        TO COMPLETE FOR FULL BODY
        """
        indexes =  [i for i, x in enumerate(self.ProtocolNames) if x == name]
        return self.dicoms[indexes[0]].pixel_array
    
    def get_array(self, img_nb):
        """
        Return array corresponding to image nÂ°img_nb.
        """
        return self.dicoms[img_nb].pixel_array
    
    def plot_imgs(self, img_nb = None, order = None):
        """
        Plot images. If img_nb is not None, plot the image number img_nb in self.dicoms.
        
        :param order: iterable, order in which images are displayed
        """
        
        if img_nb is not None:
            plt.figure()
            plt.imshow(self.dicoms[img_nb].pixel_array, cmap=plt.cm.bone)
            plt.show()
        elif order is not None:
            for k in order:
                dcm = self.dicoms[k]
                plt.figure()
                plt.imshow(dcm.pixel_array, cmap=plt.cm.bone)
                plt.show() 
        else:
            for dcm in self.dicoms:
                plt.figure()
                plt.imshow(dcm.pixel_array, cmap=plt.cm.bone)
                plt.show() 
    
    def img_index(self):
        """
        Return index of the selected img.
        """
        for index in range(len(self.SeriesDescription)):
            if 'T1MAP' in self.SeriesDescription[index]:
                return(index)
        raise NoFile

    def check_shape(self):
        """
        Check the shape of the selected image.
        If this shape is not right, it raises the error BadShape
        """
        if np.array(self.img_ar).shape != (288, 384):
            raise BadShape
            
    def compute_right_img(self, unzip = True):
        """
        Compute right image.
        """
        if unzip:
            self.unzip()
        try:
            self.extract_dcom14()
            self.img_ar = self.get_array(self.img_index())
        except:
            self.extract_info_from_unzip()
            self.img_ar = self.get_array(self.img_index())
        self.check_shape()
        # crop the image
        self.img_ar = self.img_ar[:,:350]
        self.img_ar_enhanced = exposure.equalize_adapthist(np.copy(self.img_ar), clip_limit=0.03) # contrast enhancement
        # preprocess array for jpg convertion
        def convert_for_jpg(img):
            img = img * (255/np.max(img))
            img = img.astype(int)
            return img
            
        self.img_ar = convert_for_jpg(self.img_ar)
        self.img_ar_enhanced = convert_for_jpg(self.img_ar_enhanced)
    
    def save_img(self):
        """
        Save images in the right folder.
        """
        try:
            # compute images
            self.compute_right_img()
            # save raw img
            #cv2.imwrite(os.path.join(self.output_path, 'raw', self.patient_id + '_' + self.instance + '.jpg'), self.img_ar)
            cv2.imwrite(os.path.join('/scratch1/06568/joyce_w/field_20259_unzip/', self.patient_id + '_' + self.instance + '.jpg'), self.img_ar)
            # save img with enhanced contrast
            #cv2.imwrite(os.path.join(self.output_path, 'contrast', self.patient_id + '_' + self.instance + '.jpg'), self.img_ar_enhanced)
            cv2.imwrite(os.path.join(self.output_path, self.patient_id + '_' + self.instance + '.jpg'), self.img_ar_enhanced)
        except BadShape:
            pass
        except NoFile:
            pass
        shutil.rmtree( self.unzip_folder )

In [6]:
files = glob.glob(data_path + '*_2_0.zip')

for i in range(len(files)):
    if i % 100 == 0:
        print(i, "out of", len(files))
    pancreas = Pancreas(file=files[i])
    pancreas.save_img()

0 out of 59444
100 out of 59444
200 out of 59444
300 out of 59444
400 out of 59444
500 out of 59444
600 out of 59444
700 out of 59444
800 out of 59444
900 out of 59444
1000 out of 59444
1100 out of 59444
1200 out of 59444
1300 out of 59444
1400 out of 59444
1500 out of 59444
1600 out of 59444
1700 out of 59444
1800 out of 59444
1900 out of 59444
2000 out of 59444
2100 out of 59444
2200 out of 59444
2300 out of 59444
2400 out of 59444
2500 out of 59444
2600 out of 59444
2700 out of 59444
2800 out of 59444
2900 out of 59444
3000 out of 59444
3100 out of 59444
3200 out of 59444
3300 out of 59444
3400 out of 59444
3500 out of 59444
3600 out of 59444
3700 out of 59444
3800 out of 59444
3900 out of 59444
4000 out of 59444
4100 out of 59444
4200 out of 59444
4300 out of 59444
4400 out of 59444
4500 out of 59444
4600 out of 59444
4700 out of 59444
4800 out of 59444
4900 out of 59444
5000 out of 59444
5100 out of 59444
5200 out of 59444
5300 out of 59444
5400 out of 59444
5500 out of 59444
5600

In [14]:
files = glob.glob(data_path + '*_3_0.zip')

for i in range(len(files)):
    if i % 100 == 0:
        print(i, "out of", len(files))
    pancreas = Pancreas(file=files[i])
    pancreas.save_img()

0 out of 5017
100 out of 5017
200 out of 5017
300 out of 5017
400 out of 5017
500 out of 5017
600 out of 5017
700 out of 5017
800 out of 5017
900 out of 5017
1000 out of 5017
1100 out of 5017
1200 out of 5017
1300 out of 5017
1400 out of 5017
1500 out of 5017
1600 out of 5017
1700 out of 5017
1800 out of 5017
1900 out of 5017
2000 out of 5017
2100 out of 5017
2200 out of 5017
2300 out of 5017
2400 out of 5017
2500 out of 5017
2600 out of 5017
2700 out of 5017
2800 out of 5017
2900 out of 5017
3000 out of 5017
3100 out of 5017
3200 out of 5017
3300 out of 5017
3400 out of 5017
3500 out of 5017
3600 out of 5017
3700 out of 5017
3800 out of 5017
3900 out of 5017
4000 out of 5017
4100 out of 5017
4200 out of 5017
4300 out of 5017
4400 out of 5017
4500 out of 5017
4600 out of 5017
4700 out of 5017
4800 out of 5017
4900 out of 5017
5000 out of 5017
