# Lung Tumor Segmentation 2:
## Preprocessing

In [1]:
%matplotlib notebook
from pathlib import Path
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

In [2]:
!pip install celluloid

Collecting celluloid
  Downloading celluloid-0.2.0-py3-none-any.whl.metadata (4.8 kB)
Downloading celluloid-0.2.0-py3-none-any.whl (5.4 kB)
Installing collected packages: celluloid
Successfully installed celluloid-0.2.0


In [3]:
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import cv2

In [4]:
root = Path("/kaggle/input/medical-segmentation-decathlon-lung/imagesTr")
label = Path("/kaggle/input/medical-segmentation-decathlon-lung/imagesTs")

In [5]:
def change_img_to_label_path(path):
    """
    Replaces imagesTr with labelsTr
    """
    parts = list(path.parts)  # get all directories whithin the path
    parts[parts.index("imagesTr")] = "labelsTr"  # Replace imagesTr with labelsTr
    return Path(*parts)  # Combine list back into a Path object

In [6]:
sample_path = list(root.glob("lung*/*"))[2]  # Choose a subject
sample_path_label = change_img_to_label_path(sample_path)

In [7]:
sample_path, sample_path_label

(PosixPath('/kaggle/input/medical-segmentation-decathlon-lung/imagesTr/lung_051.nii/lung_051.nii'),
 PosixPath('/kaggle/input/medical-segmentation-decathlon-lung/labelsTr/lung_051.nii/lung_051.nii'))

In [8]:
data = nib.load(sample_path)
label = nib.load(sample_path_label)

ct = data.get_fdata()
mask = label.get_fdata()

In [9]:
nib.aff2axcodes(data.affine)

('L', 'A', 'S')

In [10]:
all_files = list(root.glob("lung_*/*"))  # Get all subjects

In [11]:
len(all_files)

63

In [12]:
save_root = Path("/kaggle/working/Preprocessed")

for counter, path_to_ct_data in enumerate(tqdm(all_files)):
        
    path_to_label = change_img_to_label_path(path_to_ct_data)  # Get path to ground truth
    
    # Load and extract corresponding data
    ct_data = nib.load(path_to_ct_data).get_fdata()
    label_data = nib.load(path_to_label).get_fdata()
    
    # Crop volume and label. Remove the first 30 slices  
    ct_data = ct_data[:,:,30:] / 3071
    label_data = label_data[:,:,30:]
        
    # Check if train or val data and create corresponding path
    if counter < 57:
        current_path = save_root/"train"/str(counter)
    else:
        current_path = save_root/"val"/str(counter)
    
    # Loop over the slices in the full volume and store the data and labels in the data/masks directory
    for i in range(ct_data.shape[-1]):
        slice = ct_data[:,:,i]
        mask = label_data[:,:,i]
        
        # Resize slice and label to common resolution to reduce training time
        slice = cv2.resize(slice, (256, 256))
        mask = cv2.resize(mask, (256, 256), interpolation=cv2.INTER_NEAREST)
        
        slice_path = current_path/"data"
        mask_path = current_path/"masks"
        slice_path.mkdir(parents=True, exist_ok=True)
        mask_path.mkdir(parents=True, exist_ok=True)
        
        np.save(slice_path/str(i), slice)
        np.save(mask_path/str(i), mask)

  0%|          | 0/63 [00:00<?, ?it/s]

In [13]:
!zip -r lung_seg_decathalon.zip /kaggle/working/Preprocessed

  adding: kaggle/working/Preprocessed/ (stored 0%)
  adding: kaggle/working/Preprocessed/val/ (stored 0%)
  adding: kaggle/working/Preprocessed/val/59/ (stored 0%)
  adding: kaggle/working/Preprocessed/val/59/data/ (stored 0%)
  adding: kaggle/working/Preprocessed/val/59/data/387.npy (deflated 72%)
  adding: kaggle/working/Preprocessed/val/59/data/205.npy (deflated 72%)
  adding: kaggle/working/Preprocessed/val/59/data/355.npy (deflated 72%)
  adding: kaggle/working/Preprocessed/val/59/data/463.npy (deflated 72%)
  adding: kaggle/working/Preprocessed/val/59/data/132.npy (deflated 74%)
  adding: kaggle/working/Preprocessed/val/59/data/187.npy (deflated 72%)
  adding: kaggle/working/Preprocessed/val/59/data/271.npy (deflated 72%)
  adding: kaggle/working/Preprocessed/val/59/data/224.npy (deflated 72%)
  adding: kaggle/working/Preprocessed/val/59/data/382.npy (deflated 72%)
  adding: kaggle/working/Preprocessed/val/59/data/347.npy (deflated 72%)
  adding: kaggle/working/Prep