In [10]:
import shutil
import os
from Llibreries.grid_library import * 
import cv2

Get the original dataset of lungs images classified as "normal" / "effusion"

In [11]:
original_ds_path = '../../../v_imatgesPulmons/Datasets/1ROI_resized/' 
assert os.path.isdir(original_ds_path), "Couldn't find the dataset at {}".format(original_ds_path.data_dir)


In [12]:
def getImgs(setName, labelType): #setName can be 'train'/'valid'/'test'; labelType can be 'normal'/'effusion'
    data_dir = os.path.join(original_ds_path, setName, labelType)
    imgs = os.listdir(data_dir)
    imgs = [os.path.join(data_dir, f) for f in imgs if f.endswith('.png')]
    return imgs

In [13]:
# Get the images paths of each set (train/valid/test) and type (normal/effusion)
imgsPaths = {'train': (getImgs('train_val/train', 'normal'), getImgs('train_val/train', 'effusion')),
                 'valid': (getImgs('train_val/valid', 'normal'), getImgs('train_val/valid', 'effusion')),
                 'test': (getImgs('test', 'normal'), getImgs('test', 'effusion'))}

Extract the grids of the detected lungs where pleural effusion can be located and create a new dataset with these grids

In [14]:
def extractPleuralGrids(imgPath):
    """Return the grid of the left lung and the grid of the right lung where pleural effusion can be located"""
    
    imgOri = cv2.imread(imgPath, cv2.IMREAD_GRAYSCALE) 

    imgRes = cv2.imread(imgPath) 

    # First, grid the image with nRows x nCols grids
    nRows = 3
    nCols = 6
    grids = gridImg(imgRes,imgOri,nRows, nCols) # grid bbox left in the X-ray image / right in the body

    # Then, get the grids where pleural effusion can be located
    effusion_grid_left_lung = grids[12]
    effusion_grid_right_lung = grids[17]
    
    # The last grid of the image will be the effusion grid of the right lung, in this case grid #17

    # The effusion grid of the left lung is the first grid of the last rows of grids. 
    # We can do nGrids-nCols and we'll get this grid,in this case it's grid #12. 
   
    # For more details and to understand why I use each grid number, 
    # check "grid_image.ipynb" on folder "Algorisme_quadricules" on the root of this project.

    return [effusion_grid_left_lung, effusion_grid_right_lung]

In [15]:
size = 64

def resizeGrids(gridLeftLung, gridRightLung):
    """Resize each grid so that they have the same size"""
    
    # Use bilinear interpolation instead of the default "nearest neighbor" method
    gridLeftLungResized = cv2.resize(gridLeftLung, (size, size), interpolation = cv2.INTER_AREA)
    gridRightLungResized = cv2.resize(gridRightLung, (size, size), interpolation = cv2.INTER_AREA)
    return [gridLeftLungResized, gridRightLungResized]


def saveGrids(grids, output_dir, caseName):
    """Save to the `output_dir` the img of the pleural effusion grids related to case "caseName" """

    # Save the grids
    cv2.imwrite(os.path.join(output_dir,caseName), grids)

Main

In [16]:
grids_ds_path = '../Datasets/gridsCombined/'

# Process train, valid and test sets
grids_train_val_dir = os.path.join(grids_ds_path, "train_val") 
grids_test_dir = os.path.join(grids_ds_path, "test") 

for set in ['train', 'valid', 'test']:
    # Get the path of 'train'/'valid'/'test' sets on the new dataset (grids_ds)
    # train and valid sets will be stored inside 'train_val' folder 
    if set == 'test':
        output_dir_set = grids_test_dir
    else:
        output_dir_set = os.path.join(grids_train_val_dir, set) 

    # Get the paths of 'normal' and 'effusion' subfolders 
    normal_dir = os.path.join(output_dir_set, 'normal')
    effusion_dir = os.path.join(output_dir_set, 'effusion')

    # Delete if possible the folders 'normal' and 'effusion' from previous executions of this notebook
    shutil.rmtree(normal_dir, ignore_errors=True) # making ignore_errors = True will not raise a FileNotFoundError in case directory doesn't exist
    shutil.rmtree(effusion_dir, ignore_errors=True)

    # Create folders named 'normal' and 'effusion' and their parent folders named 'train'/'valid'/'test'
    os.makedirs(normal_dir, exist_ok=True)
    os.makedirs(effusion_dir, exist_ok=True)

    print("Processing {} data, saving grids to {}".format(set, output_dir_set))
    # Process the images of each set (train/valid/test)
    # Each set has the same number of normal and "pleural effusion" images  
    # Process normal imgs first and then process the effusion imgs
    for i in range(2): 
        grids_dir = [normal_dir, effusion_dir] 
        for j in range(len(imgsPaths[set][i])): # imgsPaths[set][0] are the normal images paths of a set; imgsPaths[set][1] are the effusion images paths of a set
            # Process each normal/effusion image from the corresponding set (train/val/test)
            imgPath = imgsPaths[set][i][j] 
            imgName = imgPath.split('\\')[-1] # for example, if imgPath = '[...] train\normal\normal1.png'; imgName = normal1.png 
            gridLeftLung, gridRightLung = extractPleuralGrids(imgPath)
            # Before combining each grid we need to make them have the same size
            resizedGrids = resizeGrids(gridLeftLung, gridRightLung) # resizedGrids is a list that contains both grids resized to the same size
            # Horizontally concatenate the grid of the left lung with the grid of the right lung
            grids = cv2.hconcat(resizedGrids)
            # Save the image of the combined grids  
            saveGrids(grids, grids_dir[i], imgName)
print("Done building dataset")

Processing train data, saving grids to ../Datasets/gridsCombined/train_val\train
Processing valid data, saving grids to ../Datasets/gridsCombined/train_val\valid
Processing test data, saving grids to ../Datasets/gridsCombined/test
Done building dataset
