In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!apt install rar

In [None]:
%matplotlib inline

import numpy as np
import os
import cv2
import scipy.ndimage
import matplotlib.pyplot as plt

from random import seed
from random import random
from scipy.ndimage import rotate
from skimage import measure, morphology
from mpl_toolkits.mplot3d.art3d import Poly3DCollection

In [None]:
def augment_ct(ct_scan, rot_angle, zoom_factor):

  dims = ct_scan.shape
  aug_ct_scan = np.zeros((dims))

  for i in range(dims[0]):
    # Rotation
    aug_ct_scan[i] = rotate(ct_scan[i], rot_angle, reshape=False)
    # Zoom
    aug_ct_scan[i] = cv2_clipped_zoom(aug_ct_scan[i], zoom_factor)

  return aug_ct_scan

In [None]:
def cv2_clipped_zoom(img, zoom_factor):
    """
    Center zoom in/out of the given image and returning an enlarged/shrinked view of the image without changing dimensions
    Args:
        img : image array
        zoom_factor : amount of zoom as a ratio (0 to Inf)
    """
    height, width = img.shape[:2] # it's also the final desired shape
    new_height, new_width = int(height * zoom_factor), int(width * zoom_factor)

    # Crop only the part that will remain in the result (more efficient)
    # Centered bbox of the final desired size in resized (larger/smaller) image coordinates
    y1, x1 = max(0, new_height - height) // 2, max(0, new_width - width) // 2
    y2, x2 = y1 + height, x1 + width
    bbox = np.array([y1,x1,y2,x2])
    # Map back to original image coordinates
    bbox = (bbox / zoom_factor).astype(np.int)
    y1, x1, y2, x2 = bbox
    cropped_img = img[y1:y2, x1:x2]

    # Handle padding when downscaling
    resize_height, resize_width = min(new_height, height), min(new_width, width)
    pad_height1, pad_width1 = (height - resize_height) // 2, (width - resize_width) // 2
    pad_height2, pad_width2 = (height - resize_height) - pad_height1, (width - resize_width) - pad_width1
    pad_spec = [(pad_height1, pad_height2), (pad_width1, pad_width2)] + [(0,0)] * (img.ndim - 2)

    result = cv2.resize(cropped_img, (resize_width, resize_height))
    result = np.pad(result, pad_spec, mode='constant')
    assert result.shape[0] == height and result.shape[1] == width

    return result

In [None]:
%cd ... # <--- INSERT HERE (...) THE PATH OF THE FOLDER

In [None]:
!grep "" -c dimentionally-uniform-preprocessed-LUAD-samples.txt # a .txt file containing the list of dimentionally-uniform, preprocessed LUAD scans
!grep "" -c dimentionally-uniform-preprocessed-LUSC-samples.txt # a .txt file containing the list of dimentionally-uniform, preprocessed LUSC scans

!grep "" -c train-samples.txt # a .txt file containing the list of training samples
!grep "" -c val-samples.txt # a .txt file containing the list of validation samples
!grep "" -c test-samples.txt # a .txt file containing the list of test samples

In [None]:
path_train_list = "./train-samples.txt"
path_train_file = open(path_train_list, 'r')
train_cts = path_train_file.readlines()

N_SLICES = 250
RES_Y = 190
RES_X = 270

# Data augmentation
N_SAMPLES = len(train_cts)*3

ds_train_fp = np.memmap("./train-samples-memmap.dat", mode='w+', shape=(N_SAMPLES, N_SLICES, RES_Y, RES_X))
ds_train_lab = np.zeros((N_SAMPLES, 2))

i = 0
seed(29)
print("N. of train samples: ",N_SAMPLES)
for path in train_cts:
  i += 2

  path = path.strip()

  print(i)
  #print(path)

  scans = np.load(path)

  rand_angle = np.random.uniform(-15,15)
  rand_zoom = np.random.uniform(0.8,1.2)
  aug_scans = augment_ct(scans, rand_angle, rand_zoom)

  ds_train_fp[i-2] = scans
  ds_train_fp[i-1] = aug_scans

  if "LUAD" in path:
    print("LUAD")
    ds_train_lab[i-2, 0] = 1
    ds_train_lab[i-1, 0] = 1
  else:
    print("LUSC")
    ds_train_lab[i-2, 1] = 1
    ds_train_lab[i-1, 1] = 1

  print(ds_train_lab[i-2])

  # Show some slices
  plt.imshow(scans[125, :, :], cmap=plt.cm.gray, vmin=0, vmax=80)
  plt.show()
  plt.imshow(aug_scans[125, :, :], cmap=plt.cm.gray, vmin=0, vmax=80)
  plt.show()

Output hidden; open in https://colab.research.google.com to view.

In [None]:
import h5py

ds_train_h5f = h5py.File('./train-zyx-250x190x270.h5', 'w')

In [None]:
ds_train_h5f.create_dataset('train_X', data=ds_train_fp)
ds_train_h5f.create_dataset('train_Y', data=ds_train_lab)

In [None]:
ds_train_h5f.close()

In [None]:
!rar a -m1 -v5g train-zyx-250x190x270.rar "./train-zyx-250x190x270.h5"