In [None]:
# -*- coding: utf-8 -*-
import sys; print('Python %s on %s' % (sys.version, sys.platform))
import os
import time
import json
from glob import glob, iglob
from tqdm import tqdm
import matplotlib.pyplot as plt

import numpy as np; print('numpy', np.__version__)
import pandas as pd; print('pandas', pd.__version__)
import cv2; print('opencv2', cv2.__version__)

import settings
import helper
import visual

# 1. Load Annotation

In [None]:
df_annotation = pd.read_csv(filepath_or_buffer=settings.PREPROCESS_ANNOTATION_FILE, index_col=['seriesuid'])
df_annotation.index = df_annotation.index.astype('str')
print('annotation:', df_annotation.shape, 'distinct lung:', len(set(df_annotation.index)))

In [None]:
df_annotation.sample(10)

In [None]:
lungs = list(set(df_annotation.index))
print('distinct lungs in annotation:', len(lungs))

# 2. Positive Samples Extraction

In [None]:
IS_EXTRACTION = True
if not os.path.exists(settings.PREPROCESS_POS_DIR):
    os.mkdir(settings.PREPROCESS_POS_DIR)
    os.mkdir(settings.PREPROCESS_POS_DIR + 'lung/')
    os.mkdir(settings.PREPROCESS_POS_DIR + 'medi/')

In [None]:
if IS_EXTRACTION:
    for uid in tqdm(lungs):
        labels = df_annotation.loc[[uid]]
        if len(labels) <= 0:
            continue

        lung_l, mask_l = helper.load_lung_array(uid, int(labels['width'].values[0]), int(labels['height'].values[0]), int(labels['slice'].values[0]), wtype='lung')
        lung_m, mask_m = helper.load_lung_array(uid, int(labels['width'].values[0]), int(labels['height'].values[0]), int(labels['slice'].values[0]), wtype='medi')
        
        lung_l = lung_l*(mask_l>0)
        lung_m = lung_m*(mask_m>0)
        
        for idx, item in labels.iterrows():
            # patch without mask
            if int(item.label) in {1, 5}:
                lung = lung_l
                wtype = 'lung'
            elif int(item.label) in {31, 32}:
                lung = lung_m
                wtype = 'medi'
            
            cube = helper.get_cube_from_lung_array(lung, item.vcoordX, item.vcoordY, item.vcoordZ, block_size=settings.CUBE_POS_SIZE)
            if np.sum(cube) > settings.THRESHOLD_VALID_CUBE:
                seg_label = helper.create_seg_label(diameter=np.array([item.diameterZ, item.diameterY, item.diameterX]), offset=np.array([0,0,0]), block_size=settings.CUBE_POS_SIZE)
                
                helper.save_cube_img(
                    f'{settings.PREPROCESS_POS_DIR}{wtype}/{idx}_x{int(item.vcoordX)}_y{int(item.vcoordY)}_z{int(item.vcoordZ)}_dx{int(round(item.diameterX))}_dy{int(round(item.diameterY))}_dz{int(round(item.diameterZ,0))}_l{int(item.label)}.png', 
                    cube, rows=8, cols=8)
                
                helper.save_cube_img(
                    f'{settings.PREPROCESS_SEG_DIR}{wtype}_label/{idx}_x{int(item.vcoordX)}_y{int(item.vcoordY)}_z{int(item.vcoordZ)}_dx{int(round(item.diameterX))}_dy{int(round(item.diameterY))}_dz{int(round(item.diameterZ,0))}_l{int(item.label)}.png', 
                    seg_label, rows=8, cols=8)
                
                # save filp cube
                helper.save_cube_img(
                    f'{settings.PREPROCESS_POS_DIR}{wtype}/{idx}_filp_x{int(item.vcoordX)}_y{int(item.vcoordY)}_z{int(item.vcoordZ)}_dx{int(round(item.diameterX))}_dy{int(round(item.diameterY))}_dz{int(round(item.diameterZ,0))}_l{int(item.label)}.png', 
                    np.flip(cube.copy(), axis=2), rows=8, cols=8)
                
                helper.save_cube_img(
                    f'{settings.PREPROCESS_SEG_DIR}{wtype}_label/{idx}_filp_x{int(item.vcoordX)}_y{int(item.vcoordY)}_z{int(item.vcoordZ)}_dx{int(round(item.diameterX))}_dy{int(round(item.diameterY))}_dz{int(round(item.diameterZ,0))}_l{int(item.label)}.png', 
                    np.flip(seg_label.copy(), axis=2), rows=8, cols=8)
                

# 3. Validate the Positive Samples

In [None]:
print(len(df_annotation), len(glob(settings.PREPROCESS_POS_DIR + '*/*.png')))

In [None]:
EXAMPLE_SERIESUID = '364920'
WTYPE = 'lung'

In [None]:
labels = df_annotation.loc[[EXAMPLE_SERIESUID]]
print('labels:', len(labels))

In [None]:
if len(labels) > 0:
    for idx, item in labels.iterrows():
        filename = f'{idx}_x{int(item.vcoordX)}_y{int(item.vcoordY)}_z{int(item.vcoordZ)}_dx{int(round(item.diameterX))}_dy{int(round(item.diameterY))}_dz{int(round(item.diameterZ,0))}_l{int(item.label)}.png'
        cube = helper.load_cube_img(settings.PREPROCESS_POS_DIR + WTYPE + '/' + filename, rows=8, cols=8)
        
        assert cube.shape == (64, 64, 64)
        print(filename, '\n', cube)
        print('--'*30)

# 4. Visual Samples By Labels

In [None]:
EXAMPLE_SERIESUID = '364920'
WTYPE = 'lung'

In [None]:
for img_file in iglob(settings.PREPROCESS_POS_DIR + WTYPE + '/' + f'{EXAMPLE_SERIESUID}*.png'):
    seg_file = img_file.replace(settings.PREPROCESS_POS_DIR, settings.PREPROCESS_SEG_DIR)
    seg_file = seg_file.replace(WTYPE, WTYPE+'_label')
    print(img_file, '\n', seg_file)
    img = cv2.imread(img_file, cv2.IMREAD_GRAYSCALE)
    seg = cv2.imread(seg_file, cv2.IMREAD_GRAYSCALE)
    fig, axs = plt.subplots(1, 2, figsize=(32, 16))
    axs[0].imshow(img, cmap='gray')
    axs[1].imshow(img*(seg>0), cmap='gray')
    plt.show()