In [1]:
import os
import pandas as pd
import numpy as np
import cv2
from PIL import Image

In [2]:
class CFG:
    gt_df = "/kaggle/input/sennet-hoa-gt-data/gt.csv"
    data_root = "/kaggle/input"
#     train_groups = ["kidney_1_dense", 'kidney_1_voi', 'kidney_2', 'kidney_3_dense', 'kidney_3_sparse']
    train_groups = ["kidney_1_dense", "kidney_3_dense"]
    
    low=10
    high=99.8 
    alpha=0.01

In [3]:
def save_as_png(array, file_path):
    # create dir tree if doesn't exist
    file_dir = os.path.dirname(file_path)
    if not os.path.exists(file_dir):
        os.makedirs(file_dir)
        
    img = Image.fromarray(array)
    img.save(file_path, format='PNG')

In [4]:
gt_df = pd.read_csv(CFG.gt_df)
gt_df["img_path"] = gt_df["img_path"].apply(lambda x: os.path.join(CFG.data_root, x))
gt_df["msk_path"] = gt_df["msk_path"].apply(lambda x: os.path.join(CFG.data_root, x))

In [5]:
for group in CFG.train_groups:
    print('doing', group)
    df = gt_df.query("group in @group").reset_index(drop=True)
    img_paths = sorted(df["img_path"].values.tolist())
    msk_paths = sorted(df["msk_path"].values.tolist())
    
    volume = np.stack([cv2.imread(f, cv2.IMREAD_UNCHANGED) for f in img_paths])
    print('loaded volume')
    
    # Normalize the entire volume
    xmin = np.percentile(volume, CFG.low)
    xmax = np.percentile(volume, CFG.high)
    volume = (volume - xmin) / (xmax - xmin)
    print('part one done')

    # Adjusting extremes
    volume[volume > 1] = (volume[volume > 1] - 1) * CFG.alpha + 1
    volume[volume < 0] = volume[volume < 0] * CFG.alpha
    print('part two done')

    # Scaling to 8-bit
#     volume = np.clip(volume * 255, 0, 255).astype(np.uint8)
    
    print('normalized, now saving')
    
    # Save each slice of the volume
    for i, img in enumerate(volume):
        img = np.clip(img * 255, 0, 255).astype(np.uint8)        
        png_path = img_paths[i].replace('/kaggle/input/blood-vessel-segmentation/train', '.').replace('.tif', '.png')
        save_as_png(img, png_path)

    del volume
     
    print('done saving')
    
    # save masks    
    for label_path in msk_paths:
        label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE)

        png_path = label_path.replace('/kaggle/input/blood-vessel-segmentation/train', '.').replace('.tif', '.png')
        save_as_png(label, png_path)

doing kidney_1_dense
loaded volume
part one done
part two done
normalized, now saving
done saving
doing kidney_3_dense
loaded volume
part one done
part two done
normalized, now saving
done saving


In [6]:
# sanity check

image_path = '/kaggle/working/kidney_1_dense/images/0154.png'
mask_path = '/kaggle/working/kidney_1_dense/labels/0154.png'

image = Image.open(image_path)
mask = Image.open(mask_path)

image_array = np.array(image)
mask_array = np.array(mask)

image_info = {
    'format': image.format,
    'mode': image.mode,
    'size': image.size,
    'dtype': image_array.dtype,
    'max_value': np.max(image_array),
    'min_value': np.min(image_array)
}

mask_info = {
    'format': mask.format,
    'mode': mask.mode,
    'size': mask.size,
    'dtype': mask_array.dtype,
    'unique_values': np.unique(mask_array)
}

image_info, mask_info

({'format': 'PNG',
  'mode': 'L',
  'size': (912, 1303),
  'dtype': dtype('uint8'),
  'max_value': 255,
  'min_value': 0},
 {'format': 'PNG',
  'mode': 'L',
  'size': (912, 1303),
  'dtype': dtype('uint8'),
  'unique_values': array([  0, 255], dtype=uint8)})