In [None]:
import sys
import os
import random
os.nice(10)
os.environ["OPENBLAS_NUM_THREADS"] = "10"
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import astra
import numpy as np
import pydicom 
import pandas as pd
import matplotlib.pyplot as plt
import glob
from multiprocessing import Pool
import tensorflow as tf

## Construct dataset for U-Net training

In [None]:

#define the astra projection function 


def astra_project(src_dir, file_list, n_projections, savedir):
    
    """
    Forward and backproject CT images with a specified number of projections. 
    Saves the output in the specified savedir folder with the same filename as the original file.
    
    Parameters
    
    -----------------
    src_dir : string
        directory where base images are located
    file_list : list of strings
        list of filenames which shall be processed
    n_projections : int
        number of projections to be used for the reconstruction
    savedir : str
        directory where the output files are saved
    """
    
    #define geometry
    geometry = 'parallel'
    pixel_spacing = 1
    n_detectors = 768

    #iterate through list
    for i, file in enumerate(file_list):
        sys.stdout.write('\r'+f'{i}/{len(file_list)}')
        
        #load file
        pyd = pydicom.dcmread(src_dir + "/" + file)        
 
        #setup geometries
        vol_geom = astra.create_vol_geom(512, 512)
        proj_geom = astra.create_proj_geom(geometry, pixel_spacing, n_detectors, np.linspace(0,2*np.pi,n_projections,False))

        proj_id = astra.create_projector('cuda', proj_geom,vol_geom)
        data = (pyd.RescaleSlope*pyd.pixel_array + pyd.RescaleIntercept + 1024)/4095

        if data.shape == (512, 512):
            sinogram_id, sinogram = astra.create_sino(data, proj_id, returnData=True, gpuIndex=0)

            # Create a data object for the reconstruction

            rec_id = astra.data2d.create('-vol', vol_geom)
            
            # create configuration
            cfg = astra.astra_dict('FBP_CUDA')
            cfg['ReconstructionDataId'] = rec_id
            cfg['ProjectionDataId'] = sinogram_id
            cfg['option'] = {'FilterType': 'Ram-Lak'}
            alg_id = astra.algorithm.create(cfg)
            astra.algorithm.run(alg_id)
            rec = astra.data2d.get(rec_id)
        
            # Clean up.
            astra.algorithm.delete(alg_id)EfficientNet
            astra.data2d.delete(rec_id)
            astra.data2d.delete(sinogram_id)
            
            #save data
            name = os.path.splitext(os.path.basename(file))[0]
            print(name)
            savepath = savedir + f'/{angle}/'
            if not os.path.isdir(savepath):
                os.makedirs(savepath)
                print("\ncreated folder: ", savepath)
            np.save(savepath + f'{name}.npy', rec.astype('float16'))
        else:
            print('invalid shape, skip this image')
            
    astra.projecto.delete(proj_id)
    
    return data, sinogram, rec

In [None]:
#make sure that the base dicom files are located in ./Data/stage_2_train/
#get list of training and test data
df_train = pd.read_csv(os.path.abspath("./train_UNet.csv"))
df_test = pd.read_csv(os.path.abspath("./test_data.csv"))
file_list = [*df_test['filename'], *df_train['filename']]

src_dir = os.path.abspath("./Data/stage_2_train/")
save_dir = os.path.abspath("./Data/")
angles = [4096, 2048, 1024, 512, 256, 128, 64]

In [None]:
#create data
for angle in angles:
    print("\n", angle)
    data, sino, rec = astra_project(src_dir, file_list, angle, save_dir)

In [None]:
#check results

name = os.listdir(save_dir + "/4096/")[0]
print(name)
for angle in angles:
    img = np.load(f"{save_dir}/{angle}/{name}")
    plt.figure(f"{angle}")
    plt.imshow(img*4095-1024, vmin=0, vmax=80, cmap='gray')
    

## Construct dataset for EfficientNet training

In [None]:
#for the following functions, path, df_meta and save_path have to be defined outside the functions in order for multiprocessing to work
#this is probably not the best practice but multiprocessing.starmap and functools.partial got stuck

def load_rescale(filename):
    
    """
    Load dicom file -> rescale with slope and intercept -> 
    clip to brain window -> scale to [0, 255] -> resize to (260, 260)
    
    Parameters
    -----------------
    filename : str
        filename of dicom file
        
    Returns
    -----------------
    numpy array
    """    
    print("load_rescale1")
    file = pydicom.dcmread(path + filename)
    array = file.RescaleSlope*file.pixel_array + file.RescaleIntercept
    array = array.clip(0, 80)/80.* 255.
    array = tf.image.resize(array[tf.newaxis, ..., tf.newaxis], [260, 260])
    return np.array(array).astype('uint8').squeeze()
    
def load_neighbouring_slices(slice_id):
    
    """
    Forward and backproject CT images with a specified number of projections. 
    Saves the output in the specified savedir folder with the same filename as the original file.
    
    Parameters
    -----------------
    slice_id : string
        slice id of file
        
    Returns
    -----------------
    image_cat : array
        concatenated image with channels
    label : str
    filename : str
    """
    #print(slice_id)
    index = int(slice_id.split('_')[2])
    study_uid = 'ID_' + slice_id.split('_')[1]
    df_copy = df_meta.copy()
    uid_df = df_copy.loc[df_copy['study_instance_uid'] == study_uid]

    if index == 0:
        index = 1  
    if index == (uid_df.shape[0] -1):
        index = index-1
    #print(uid_df)
    filename_down = uid_df.loc[uid_df['slice_id'] == study_uid + f'_{index - 1}']['filename'].values[0]
    filename = uid_df.loc[uid_df['slice_id'] == study_uid + f'_{index}']['filename'].values[0]
    filename_up = uid_df.loc[uid_df['slice_id'] == study_uid + f'_{index + 1}']['filename'].values[0]
    #print(filename_down, filename, filename_up)

    image_down = load_rescale(filename_down)
    image_up = load_rescale(filename_up)
    image = load_rescale(filename)
    image_cat = np.concatenate([image_up[:,:,np.newaxis], image[:,:,np.newaxis], image_down[:,:,np.newaxis]], 2)
    label = uid_df.loc[uid_df['filename']==filename].loc[:, 'any': 'subdural'].values
    return image_cat, label, filename

def save_file(slice_id):
    """
    wrapper function for loading, concatinating and saving image
    """
    img_cat, label, filename = load_neighbouring_slices(slice_id)
    np.save(save_path + filename.replace('.dcm', '.npy'), img_cat)

In [None]:
#define path, load csv
path = os.path.abspath("./Data/stage_2_train/")
df_meta = pd.read_csv(os.path.abspath("./train_EfficientNet.csv"))
slice_ids = df_meta["slice_id"]
save_path = os.path.abspath("./Data/") + "/dataset_neighbouring/"
if not os.path.isdir(save_path):
    os.makedirs(save_path)
    print("\ncreated folder: ", savepath), repeat(df_train)

In [None]:
#let loop run and save files
with Pool(5) as p:
    p.map(save_file, slice_ids)

In [None]:
#check results
for i in range(1, 4):
    load = np.load(save_path + os.listdir(save_path)[i])
    print("min:", load.min(), "mean:", load.mean(), "max:", load.max())
    plt.figure()
    plt.imshow(load)