In [1]:
import shutil
import os
from Llibreries.grid_library import * 
from Llibreries.lungs_detection_library import *
import cv2

Due to IPython and Windows limitation, python multiprocessing isn't available now.
So `number_workers` is changed to 0 to avoid getting stuck


torch.linalg.solve has its arguments reversed and does not return the LU factorization.
To get the LU factorization see torch.lu, which can be used with torch.lu_solve or torch.lu_unpack.
X = torch.solve(B, A).solution
should be replaced with
X = torch.linalg.solve(A, B) (Triggered internally at  C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\aten\src\ATen\native\BatchLinearAlgebra.cpp:859.)
  ret = func(*args, **kwargs)


Get the lungs detector model 

In [2]:
lungsDetectorModel = setupLungsDetectionLearner()

Get the original dataset of pleural effusion/normal images with their full size 

In [3]:
original_ds_path = '../../../Datasets_originals/RepoGithub_ref/original_ds/' 
assert os.path.isdir(original_ds_path), "Couldn't find the dataset at {}".format(original_ds_path.data_dir)


In [4]:
def getImgs(setName, labelType): #setName can be 'train'/'valid'/'test'; labelType can be 'normal'/'effusion'
    data_dir = os.path.join(original_ds_path, setName, labelType)
    imgs = os.listdir(data_dir)
    imgs = [os.path.join(data_dir, f) for f in imgs if f.endswith('.png')]
    return imgs

In [5]:
# Get the images paths of each set (train/valid/test) and type (normal/effusion)
imgsPaths = {'train': (getImgs('train', 'normal'), getImgs('train', 'effusion')),
                 'valid': (getImgs('valid', 'normal'), getImgs('valid', 'effusion')),
                 'test': (getImgs('test', 'normal'), getImgs('test', 'effusion'))}

Extract the grids of the detected lungs where pleural effusion can be located and create a new dataset with these grids

In [6]:
def extractPleuralGrids(imgPath):
    """Return the grid of the left lung and the grid of the right lung where pleural effusion can be located"""
    
    imgOri = cv2.imread(imgPath, cv2.IMREAD_GRAYSCALE) 

    imgRes = cv2.imread(imgPath) 
    
    predictedBboxes = lungsDetectorModel.predict(imgPath) # get a TensorBBox of size 2x4 with the bboxes coordinates
    ''' Example of predictedBboxes:
    (TensorBBox([[  7.9246,  13.2136,  74.8705, 100.0172],
        [ 78.8447,   8.7797, 146.1627,  99.2884]]), TensorBase([-0.9009, -0.7798, -0.0641,  0.6670, -0.0144, -0.8537,  0.8270,  0.6548]), 
        TensorBase([-0.9009, -0.7798, -0.0641,  0.6670, -0.0144, -0.8537,  0.8270,  0.6548]))
    '''
    predictedBboxes = ((predictedBboxes[1] + 1) / 2).numpy() # predictedBboxes[1] is a TensorBase that contains the "real" predicted bboxes coordinates 

    leftLung = predictedBboxes[0:4] * [imgOri.shape[0], imgOri.shape[1], imgOri.shape[0], imgOri.shape[1]]
    # convert a TensorBBox to a numpy array, so that, we can access each coord as a flot
    rightLung = predictedBboxes[4:8] * [imgOri.shape[0], imgOri.shape[1], imgOri.shape[0], imgOri.shape[1]]

    gridsLungLeft = gridBbox(leftLung, imgRes, imgOri, 3, 3) # grid bbox left in the X-ray image / right in the body
    gridsLungRight = gridBbox(rightLung, imgRes, imgOri, 3, 3) # grid bbox right

    return gridsLungLeft[6], gridsLungRight[8]

In [7]:
size = 64

def resizeGrids(gridLeftLung, gridRightLung):
    """Resize each grid so that they have the same size"""
    
    # Use bilinear interpolation instead of the default "nearest neighbor" method
    gridLeftLungResized = cv2.resize(gridLeftLung, (size, size), interpolation = cv2.INTER_AREA)
    gridRightLungResized = cv2.resize(gridRightLung, (size, size), interpolation = cv2.INTER_AREA)
    return [gridLeftLungResized, gridRightLungResized]


def saveGrids(grids, output_dir, caseName):
    """Save to the `output_dir` the img of the pleural effusion grids related to case "caseName" """

    # Save the grids
    cv2.imwrite(os.path.join(output_dir,caseName), grids)

Main

In [8]:
grids_ds_path = '../Classificador/v2_gridsCombined/Dataset/'

# Process train, valid and test sets
grids_train_val_dir = os.path.join(grids_ds_path, "train_val") 
grids_test_dir = os.path.join(grids_ds_path, "test") 

for set in ['train', 'valid', 'test']:
    # Get the path of 'train'/'valid'/'test' sets on the new dataset (grids_ds)
    # train and valid sets will be stored inside 'train_val' folder 
    if set == 'test':
        output_dir_set = grids_test_dir
    else:
        output_dir_set = os.path.join(grids_train_val_dir, set) 

    # Get the paths of 'normal' and 'effusion' subfolders 
    normal_dir = os.path.join(output_dir_set, 'normal')
    effusion_dir = os.path.join(output_dir_set, 'effusion')

    # Delete if possible the folders 'normal' and 'effusion' from previous executions of this notebook
    shutil.rmtree(normal_dir, ignore_errors=True) # making ignore_errors = True will not raise a FileNotFoundError in case directory doesn't exist
    shutil.rmtree(effusion_dir, ignore_errors=True)

    # Create folders named 'normal' and 'effusion' and their parent folders named 'train'/'valid'/'test'
    os.makedirs(normal_dir, exist_ok=True)
    os.makedirs(effusion_dir, exist_ok=True)

    print("Processing {} data, saving grids to {}".format(set, output_dir_set))
    # Process the images of each set (train/valid/test)
    # Each set has the same number of normal and "pleural effusion" images  
    # Process normal imgs first and then process the effusion imgs
    for i in range(2): 
        grids_dir = [normal_dir, effusion_dir] 
        for j in range(len(imgsPaths[set][i])): # imgsPaths[set][0] are the normal images paths of a set; imgsPaths[set][1] are the effusion images paths of a set
            # Process each normal/effusion image from the corresponding set (train/val/test)
            imgPath = imgsPaths[set][i][j] 
            imgName = imgPath.split('\\')[-1] # for example, if imgPath = '[...] train\normal\normal1.png'; imgName = normal1.png 
            gridLeftLung, gridRightLung = extractPleuralGrids(imgPath)
            # Before combining each grid we need to make them have the same size
            resizedGrids = resizeGrids(gridLeftLung, gridRightLung) # resizedGrids is a list that contains both grids resized to the same size
            # Horizontally concatenate the grid of the left lung with the grid of the right lung
            grids = cv2.hconcat(resizedGrids)
            # Save the image of the combined grids  
            saveGrids(grids, grids_dir[i], imgName)
print("Done building dataset")

Processing train data, saving grids to ../Datasets/RepoGithub_ref/grids_ds\train_val\train
normal1.png


normal10.png


normal100.png


normal101.png


normal102.png


normal103.png


normal104.png


normal106.png


normal107.png


normal108.png


normal110.png


normal111.png


normal112.png


normal113.png


normal114.png


normal116.png


normal117.png


normal118.png


normal119.png


normal12.png


normal120.png


normal121.png


normal122.png


normal123.png


normal124.png


normal125.png


normal126.png


normal127.png


normal128.png


normal129.png


normal13.png


normal130.png


normal131.png


normal132.png


normal133.png


normal134.png


normal135.png


normal136.png


normal137.png


normal138.png


normal139.png


normal14.png


normal140.png


normal141.png


normal142.png


normal143.png


normal144.png


normal145.png


normal146.png


normal147.png


normal148.png


normal149.png


normal15.png


normal150.png


normal151.png


normal152.png


normal153.png


normal154.png


normal155.png


normal156.png


normal158.png


normal159.png


normal16.png


normal160.png


normal161.png


normal162.png


normal163.png


normal164.png


normal165.png


normal166.png


normal167.png


normal168.png


normal169.png


normal17.png


normal170.png


normal171.png


normal173.png


normal174.png


normal175.png


normal176.png


normal177.png


normal178.png


normal18.png


normal180.png


normal181.png


normal182.png


normal183.png


normal184.png


normal186.png


normal187.png


normal188.png


IndexError: index 512 is out of bounds for axis 0 with size 512