In [None]:
# -*- coding: utf-8 -*-
import sys; print('Python %s on %s' % (sys.version, sys.platform))
import os
import time
import json
from glob import glob, iglob
from tqdm import tqdm
import matplotlib.pyplot as plt

import numpy as np; print('numpy', np.__version__)
import pandas as pd; print('pandas', pd.__version__)
import cv2; print('opencv2', cv2.__version__)
import SimpleITK as itk; print(itk.Version())

import settings
import visual

# 1. Annotation

In [None]:
df_anno = pd.read_csv(settings.ANNOTATION_FILE)
df_anno['seriesuid'] = df_anno['seriesuid'].astype(str)
df_anno = df_anno.set_index('seriesuid')
df_anno.head(10)

In [None]:
df_anno.describe()

In [None]:
len(set(list(df_anno.index)))

# 2. MHD FILES

In [None]:
all_mhd = glob(settings.RAW_TRAIN_DIR + '*.mhd')

In [None]:
no_label_mhd = set([x.split('/')[-1].strip('.mhd') for x in all_mhd]) - set(list(df_anno.index))
print(len(no_label_mhd), no_label_mhd)
with open(settings.PREPROCESS_DIR + 'no_label_mhd.json', "w") as json_file:
    json.dump(list(no_label_mhd), json_file)

In [None]:
list_anno = []
for src_path in iglob(settings.RAW_TRAIN_DIR + '*.mhd'):
    # load lung and get property, convert to numpy.ndarray
    itk_img = itk.ReadImage(src_path)
    lung_id = src_path.split('/')[-1].strip('.mhd')
         
    origin = np.array(itk_img.GetOrigin())
    spacing = np.array(itk_img.GetSpacing())   
    rescale = spacing / settings.TARGET_VOXEL_MM
    
    lung_array = itk.GetArrayFromImage(itk_img)
    num_z, height, width = lung_array.shape
    width_res = int(np.round(width * rescale[0]))
    height_res = int(np.round(height * rescale[1]))
    slice_res = int(np.round(num_z * rescale[2]))
    resample = np.array([width_res, height_res, slice_res])    
    
    print('lung:', lung_id, type(lung_array), lung_array.shape, '(z,y,x)')
    print('resample:', type(resample), resample, '(x,y,z)')
    print('origin:', type(origin), origin, '(x,y,z)')
    print('spacing:', type(spacing), spacing, '(x,y,z)')
    print('rescale:', type(rescale), rescale, '(x,y,z)')
    
    # write down in annotation
    if lung_id in set(list(df_anno.index)):
        records = df_anno.loc[[lung_id]]
        print('label records:', type(records), len(records))
        no = 0  
        for i, item in records.iterrows():
            no += 1
            # world coor to voxel coor
            vCoordX, vCoordY, vCoordZ = np.array([item.coordX, item.coordY, item.coordZ]) - origin
            vCoordZ = np.round(vCoordZ)
            print('vcoord:', vCoordX, vCoordY, vCoordZ, '(x,y,z)')
            assert 0 < vCoordX <= width_res and 0 < vCoordY <= height_res and 0 <= vCoordZ < slice_res
            
            vcoord = np.array([vCoordX, vCoordY, vCoordZ])
            diameter = np.array([item.diameterX,  item.diameterY,  item.diameterZ])
            
            anno = {
                'seriesuid': lung_id,
                'width': width_res,
                'height': height_res,
                'slice': slice_res,
                'vcoordX': vCoordX,
                'vcoordY': vCoordY,
                'vcoordZ': vCoordZ,
                'diameterX': diameter[0],
                'diameterY': diameter[1],
                'diameterZ': diameter[2],
                'originX': origin[0],
                'originY': origin[1],
                'originZ': origin[2],
                'spacingX': spacing[0],
                'spacingY': spacing[1],
                'spacingZ': spacing[2],
                'label': item.label
            }
            print(no, anno)
            print('-'*50)
            list_anno.append(anno)
    else:
        print('no label found', lung_id)
    print('-'*100, lung_id, 'done', '\n')

In [None]:
assert len(list_anno) == len(df_anno)

In [None]:
columns = [k for k, v in list_anno[0].items()]
columns

In [None]:
df_anno_new = pd.DataFrame(list_anno, columns=columns)
df_anno_new = df_anno_new.set_index('seriesuid')
df_anno_new.to_csv(settings.PREPROCESS_ANNOTATION_FILE, encoding='utf-8')


# 3. Annotation Analytic

In [None]:
df_anno_new = pd.read_csv(settings.PREPROCESS_ANNOTATION_FILE)
df_anno_new['seriesuid'] = df_anno_new['seriesuid'].astype(str)
df_anno_new = df_anno_new.set_index('seriesuid')
df_anno_new.head(10)

In [None]:
visual.plot_annotation(df_anno_new[df_anno_new['label']==31.], title='')

In [None]:
visual.plot_annotation(df_anno_new[(df_anno_new['label']==1.)|(df_anno_new['label']==5.)], title='')