In [1]:
import numpy as np

import modin.pandas as pd

import os
os.environ["MODIN_ENGINE"] = "ray"  # Modin will use Dask

import skimage

from matplotlib import pyplot as plt

In [2]:
DATASET_NAME = "BRATS2013"
PATCH_SIZE = 65
IS_BALANCED = "balanced"
SPLIT_ID = 0
SPLIT_NAME = "train"
DATASET_PATH = f"../data/{DATASET_NAME}_patches_{PATCH_SIZE}_{IS_BALANCED}/{SPLIT_ID}/{SPLIT_NAME}_df.json"

In [3]:
df = pd.read_json(DATASET_PATH)

2023-01-02 19:45:26,411	INFO worker.py:1538 -- Started a local Ray instance.


In [4]:
PATCH_SIZES = [65, 53, 33]
NUM_CLASSES = 6

In [5]:
def crop_center(img,cropx,cropy):
  y,x = img.shape
  startx = x//2-(cropx//2)
  starty = y//2-(cropy//2)    
  return img[starty:starty+cropy,startx:startx+cropx]

In [6]:
def export_patches(df_row):
  new_df_row = {}

  for patch_size in PATCH_SIZES:
    img_np = np.load(f"{df_row['img_path']}/img.npy")

    patch_np = np.empty((img_np.shape[0], patch_size, patch_size))

    for channel in range(img_np.shape[0]):
      
      patches = skimage.util.view_as_windows(
        arr_in=img_np[channel, ...], window_shape=(PATCH_SIZE, PATCH_SIZE), 
        step=1
      )

      patch = patches[df_row['patch_id'][0], df_row['patch_id'][1]]

      # plt.imsave(f"./{PATCH_SIZE}_{PATCH_SIZE}.png", patch)

      # vmin = np.min(patch)
      # vmax = np.max(patch)

      if patch_size != 65:

        patch = crop_center(patch, patch_size, patch_size)
        # plt.imsave(f"./{patch_size}_{patch_size}.png", patch, vmin=vmin, vmax=vmax)


      patch_np[channel, ...] = patch

    patch_export_name = df_row['img_path'].split("/")[-1].split(".")[0]
    
    patch_export_dir = f"../data/{DATASET_NAME}_patches_{patch_size}_{IS_BALANCED}/{SPLIT_ID}/{SPLIT_NAME}"
    if not os.path.exists(patch_export_dir):
      os.makedirs(patch_export_dir)

    patch_export_path = f"{patch_export_dir}/{patch_export_name}_{df_row['patch_id'][0]}_{df_row['patch_id'][1]}"
    np.save(patch_export_path, patch_np)

    label_one_hot = np.eye(NUM_CLASSES)[[df_row["label"]]]

    # print(df_row["label"], label_one_hot)

    new_df_row[f"patch_{patch_size}_x_{patch_size}_img_path"] = patch_export_path
  
  new_df_row[f"patch_label"] = df_row["label"]

  return new_df_row

In [7]:
df = df.apply(
  export_patches, axis=1, result_type="expand"
)

[2m[33m(raylet)[0m Error processing line 1 of /home/dansolombrino/.local/lib/python3.10/site-packages/modin-autoimport-pandas.pth:
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m   Traceback (most recent call last):
[2m[33m(raylet)[0m     File "/usr/lib/python3.10/site.py", line 192, in addpackage
[2m[33m(raylet)[0m       exec(line)
[2m[33m(raylet)[0m     File "<string>", line 1, in <module>
[2m[33m(raylet)[0m     File "/home/dansolombrino/.local/lib/python3.10/site-packages/pandas/__init__.py", line 16, in <module>
[2m[33m(raylet)[0m       raise ImportError(
[2m[33m(raylet)[0m   ImportError: Unable to import required dependencies:
[2m[33m(raylet)[0m   pytz: No module named 'pytz'
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m Remainder of file ignored
[2m[33m(raylet)[0m Error processing line 1 of /home/dansolombrino/.local/lib/python3.10/site-packages/modin-autoimport-pandas.pth:
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m   Traceback (most recent call last):
[

In [8]:
df

Unnamed: 0,patch_65_x_65_img_path,patch_53_x_53_img_path,patch_33_x_33_img_path,patch_label
7901401,../data/BRATS2013_patches_65_balanced/0/train/...,../data/BRATS2013_patches_53_balanced/0/train/...,../data/BRATS2013_patches_33_balanced/0/train/...,0
2028150,../data/BRATS2013_patches_65_balanced/0/train/...,../data/BRATS2013_patches_53_balanced/0/train/...,../data/BRATS2013_patches_33_balanced/0/train/...,0
15177203,../data/BRATS2013_patches_65_balanced/0/train/...,../data/BRATS2013_patches_53_balanced/0/train/...,../data/BRATS2013_patches_33_balanced/0/train/...,0
25204945,../data/BRATS2013_patches_65_balanced/0/train/...,../data/BRATS2013_patches_53_balanced/0/train/...,../data/BRATS2013_patches_33_balanced/0/train/...,0
17604224,../data/BRATS2013_patches_65_balanced/0/train/...,../data/BRATS2013_patches_53_balanced/0/train/...,../data/BRATS2013_patches_33_balanced/0/train/...,0
...,...,...,...,...
19492581,../data/BRATS2013_patches_65_balanced/0/train/...,../data/BRATS2013_patches_53_balanced/0/train/...,../data/BRATS2013_patches_33_balanced/0/train/...,5
25020124,../data/BRATS2013_patches_65_balanced/0/train/...,../data/BRATS2013_patches_53_balanced/0/train/...,../data/BRATS2013_patches_33_balanced/0/train/...,5
1887788,../data/BRATS2013_patches_65_balanced/0/train/...,../data/BRATS2013_patches_53_balanced/0/train/...,../data/BRATS2013_patches_33_balanced/0/train/...,5
9780033,../data/BRATS2013_patches_65_balanced/0/train/...,../data/BRATS2013_patches_53_balanced/0/train/...,../data/BRATS2013_patches_33_balanced/0/train/...,5


In [9]:
for patch_size in PATCH_SIZES:
  df_patch_size = df[[f"patch_{patch_size}_x_{patch_size}_img_path", "patch_label"]]

  df_export_path = f"../data/{DATASET_NAME}_patches_{patch_size}_{IS_BALANCED}/{SPLIT_ID}/{SPLIT_NAME}_labels_df.json"
  df_patch_size.to_json(df_export_path)
  