<a href="https://colab.research.google.com/github/MiHess/cxr-bse/blob/master/prepare_input_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os
import glob
import numpy as np
import PIL

from tqdm import tqdm


In [0]:
from google.colab import drive

drive.mount('/content/drive/')


In [0]:
bse_data_path = "/content/drive/My Drive/dev/bse/data"

raw_jsrt_bse_path = os.path.join(bse_data_path, "raw_jsrt_bse")
target_jsrt_bse_path = os.path.join(bse_data_path, "jsrt_bse")

raw_jsrt_path = os.path.join(bse_data_path, "raw_jsrt")
target_jsrt_path = os.path.join(bse_data_path, "jsrt")


In [0]:
def load_raw_jsrt_image(filepath, shape=(2048, 2048)):
    """ Loads image from disk and returns it as numpy array.
    """
    with  open(filepath, "rb") as fid:
        img_array = np.fromfile(fid, np.dtype('>u2')).reshape(shape) / 16
    
    return img_array.astype(np.uint8)


def load_raw_jsrt_bse_image(filepath):
    """ Loads image from disk and returns it as numpy array.
    """
    img = Image.open(filepath)
    img_array = np.array(img) / 256
    
    return img_array.astype(np.uint8)


In [0]:
def _downsample_image_array(img_array, factor=4):
    """ Downsamples an image array with predefined scaling factor.
    
    On original shape of (2048, 2048) a factor 4 yields an image array of size (512, 512).
    """
    assert isinstance(factor, int), "factor needs to be an integer"
    assert factor >= 1, "factor needs to be greater or equal 1"
    
    img_array = img_array.copy()
    
    return img_array[::factor, ::factor]


def _get_file_id(filepath):
    assert os.path.exists(filepath), "file not found"
    
    filename = os.path.basename(filepath)
    if filename.split("."):
        return filename.split(".")[0].upper()
    else:
        raise ValueError("invalid filename")
        
    
def _save_image(img, output_path, output_file_id, suffix="png"):
    """ Saves to disk.
    """
    assert os.path.isdir(output_path), "output directory does not exist"
    
    img.save(os.path.join(output_path, output_file_id) + "." + suffix)


In [0]:
def process_raw_image(filepath, image_loader_function, output_path):
    """ Loads, converts, downsamples, and saves an input image from either of the two sources.
    """
    assert os.path.exists(filepath), "file not found"
    
    if not os.path.isdir(output_path):
        os.makedirs(output_path)
        
    img_array = image_loader_function(filepath)
    img_array = _downsample_image_array(img_array)

    img = PIL.Image.fromarray(img_array)
    img = PIL.ImageOps.invert(img)

    _save_image(img, output_path, _get_file_id(filepath))


In [163]:
raw_jsrt_bse_filepaths = glob.glob(os.path.join(raw_jsrt_bse_path, "*.png"))

for raw_jsrt_filepath in tqdm(raw_jsrt_filepaths):
    process_raw_image(raw_jsrt_filepath, load_raw_jsrt_image, target_jsrt_path)
    

100%|██████████| 247/247 [02:41<00:00,  1.56it/s]


In [164]:
raw_jsrt_filepaths = glob.glob(os.path.join(raw_jsrt_path, "*.IMG"))

for raw_jsrt_bse_filepath in tqdm(raw_jsrt_bse_filepaths):
    process_raw_image(raw_jsrt_bse_filepath, load_raw_jsrt_bse_image, target_jsrt_bse_path)
    

100%|██████████| 247/247 [03:21<00:00,  1.65it/s]
