In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import os
import cv2
import scipy.ndimage
import matplotlib.pyplot as plt

from scipy.ndimage import zoom

In [None]:
def crop(img,num_pixel=0): # automatic crop of black pixels has been already performed, that's why num_pixel=0

	return img[num_pixel:img.shape[0]-num_pixel,num_pixel:img.shape[1]-num_pixel]

In [None]:
def resize(img,new_dim=(190,270)):
	zoom0 = new_dim[0]/img.shape[0]
	zoom1 = new_dim[1]/img.shape[1]
	pix_resized = zoom(img,(zoom0,zoom1),order=3)

	return pix_resized

In [None]:
def sample_stack(stack, rows=6, cols=6, start_with=10, show_every=2):
    fig,ax = plt.subplots(rows,cols,figsize=[12,12])

    for i in range(rows*cols):
        ind = start_with + i*show_every
        ax[int(i/rows),int(i % rows)].set_title('slice %d' % ind)
        ax[int(i/rows),int(i % rows)].imshow(stack[ind],cmap='gray')
        ax[int(i/rows),int(i % rows)].axis('off')

    plt.show()

# LUAD (also called ADC)

### NSCLC-Radiogenomics

In [None]:
adeno_scans = []
path = '.../' # <--- INSERT HERE (...) THE PATH OF CROP&CUT LUAD SCANS
for r, d, f in os.walk(path):
  for file in f:
    if file.endswith('.npy'):
      adeno_scans.append(os.path.join(r, file))

In [None]:
i = 0
for i in range (len(adeno_scans)):
  scan = np.load(adeno_scans[i])
  scan = scan.astype(int)

  if scan.shape[0] > 250: # if n. slices >250, initial and final slices of the same scan are removed in equal number in order to center the volume of the lungs
                          # else if n. slices <250 (or equal to 250), zero padding is performed
    slices_to_delete = scan.shape[0] - 250
    #print('Number of slices to delete for scan '+str(i+1)+': ',slices_to_delete)
    n2 = int(slices_to_delete // 2)
    if (slices_to_delete % 2) == 0:
      ct_slices = scan[n2:-n2]
    else:
      n3 = n2+1
      ct_slices = scan[n2:-n3]
  else:
    slices_to_add = 250 - scan.shape[0]
    #print('Number of slices to add for scan '+str(i+1)+': ',slices_to_add)
    ct_slices = np.pad(scan,((0,slices_to_add),(0,0),(0,0)),'constant')
  #print("Shape before standardizing the number of slices\t"+str(scan.shape)) # z, y, x
  #print("Shape after standardizing the number of slices\t"+str(ct_slices.shape)) # z, y, x

  ct_resized = np.zeros((250,190,270))
  for j in range(ct_slices.shape[0]):
    ct_scan = crop(ct_slices[j])
    ct_resized[j] = resize(ct_scan)
  resized_scan = np.array(ct_resized)
  #print("Shape before standardizing also the slice resolution\t"+str(ct_slices.shape)) # z, y, x
  #print("Shape after standardizing also the slice resolution\t"+str(ct_resized.shape)) # z, y, x
  sample_stack(ct_resized)
  i+=1

  np.save(".../NSCLC-Radiogenomics-LUAD-"+str(i),resized_scan) # <--- INSERT HERE (...) THE PATH WHERE TO SAVE DIMENTIONALLY-UNIFORM LUAD SCANS

### NSCLC-Radiomics

In [None]:
adeno_scans = []
path = '.../'
for r, d, f in os.walk(path):
  for file in f:
    if file.endswith('.npy'):
      adeno_scans.append(os.path.join(r, file))

In [None]:
i = 0
for i in range (len(adeno_scans)):
  scan = np.load(adeno_scans[i])
  scan = scan.astype(int)

  if scan.shape[0] > 250:
    slices_to_delete = scan.shape[0] - 250
    #print('Number of slices to delete for scan '+str(i+1)+': ',slices_to_delete)
    n2 = int(slices_to_delete // 2)
    if (slices_to_delete % 2) == 0:
      ct_slices = scan[n2:-n2]
    else:
      n3 = n2+1
      ct_slices = scan[n2:-n3]
  else:
    slices_to_add = 250 - scan.shape[0]
    #print('Number of slices to add for scan '+str(i+1)+': ',slices_to_add)
    ct_slices = np.pad(scan,((0,slices_to_add),(0,0),(0,0)),'constant')
  #print("Shape before standardizing the number of slices\t"+str(scan.shape)) # z, y, x
  #print("Shape after standardizing the number of slices\t"+str(ct_slices.shape)) # z, y, x

  ct_resized = np.zeros((250,190,270))
  for j in range(ct_slices.shape[0]):
    ct_scan = crop(ct_slices[j])
    ct_resized[j] = resize(ct_scan)
  resized_scan = np.array(ct_resized)
  #print("Shape before standardizing also the slice resolution\t"+str(ct_slices.shape)) # z, y, x
  #print("Shape after standardizing also the slice resolution\t"+str(ct_resized.shape)) # z, y, x
  sample_stack(ct_resized)
  i+=1

  np.save(".../NSCLC-Radiomics-LUAD-"+str(i),resized_scan)

### NSCLC-Radiomics-Genomics

In [None]:
adeno_scans = []
path = '.../'
for r, d, f in os.walk(path):
  for file in f:
    if file.endswith('.npy'):
      adeno_scans.append(os.path.join(r, file))

In [None]:
i = 0
for i in range (len(adeno_scans)):
  scan = np.load(adeno_scans[i])
  scan = scan.astype(int)

  if scan.shape[0] > 250:
    slices_to_delete = scan.shape[0] - 250
    #print('Number of slices to delete for scan '+str(i+1)+': ',slices_to_delete)
    n2 = int(slices_to_delete // 2)
    if (slices_to_delete % 2) == 0:
      ct_slices = scan[n2:-n2]
    else:
      n3 = n2+1
      ct_slices = scan[n2:-n3]
  else:
    slices_to_add = 250 - scan.shape[0]
    #print('Number of slices to add for scan '+str(i+1)+': ',slices_to_add)
    ct_slices = np.pad(scan,((0,slices_to_add),(0,0),(0,0)),'constant')
  #print("Shape before standardizing the number of slices\t"+str(scan.shape)) # z, y, x
  #print("Shape after standardizing the number of slices\t"+str(ct_slices.shape)) # z, y, x

  ct_resized = np.zeros((250,190,270))
  for j in range(ct_slices.shape[0]):
    ct_scan = crop(ct_slices[j])
    ct_resized[j] = resize(ct_scan)
  resized_scan = np.array(ct_resized)
  #print("Shape before standardizing also the slice resolution\t"+str(ct_slices.shape)) # z, y, x
  #print("Shape after standardizing also the slice resolution\t"+str(ct_resized.shape)) # z, y, x
  sample_stack(ct_resized)
  i+=1

  np.save(".../NSCLC-Radiomics-Genomics-LUAD-"+str(i),resized_scan)

### TCGA-LUAD

In [None]:
adeno_scans = []
path = '.../'
for r, d, f in os.walk(path):
  for file in f:
    if file.endswith('.npy'):
      adeno_scans.append(os.path.join(r, file))

In [None]:
i = 0
for i in range (len(adeno_scans)):
  scan = np.load(adeno_scans[i])
  scan = scan.astype(int)

  if scan.shape[0] > 250:
    slices_to_delete = scan.shape[0] - 250
    #print('Number of slices to delete for scan '+str(i+1)+': ',slices_to_delete)
    n2 = int(slices_to_delete // 2)
    if (slices_to_delete % 2) == 0:
      ct_slices = scan[n2:-n2]
    else:
      n3 = n2+1
      ct_slices = scan[n2:-n3]
  else:
    slices_to_add = 250 - scan.shape[0]
    #print('Number of slices to add for scan '+str(i+1)+': ',slices_to_add)
    ct_slices = np.pad(scan,((0,slices_to_add),(0,0),(0,0)),'constant')
  #print("Shape before standardizing the number of slices\t"+str(scan.shape)) # z, y, x
  #print("Shape after standardizing the number of slices\t"+str(ct_slices.shape)) # z, y, x

  ct_resized = np.zeros((250,190,270))
  for j in range(ct_slices.shape[0]):
    ct_scan = crop(ct_slices[j])
    ct_resized[j] = resize(ct_scan)
  resized_scan = np.array(ct_resized)
  #print("Shape before standardizing also the slice resolution\t"+str(ct_slices.shape)) # z, y, x
  #print("Shape after standardizing also the slice resolution\t"+str(ct_resized.shape)) # z, y, x
  sample_stack(ct_resized)
  i+=1

  np.save(".../TCGA-LUAD-LUAD-"+str(i),resized_scan)

# LUSC (also called SCC)

### NSCLC-Radiomics

In [None]:
squamo_scans = []
path = '.../' # <--- INSERT HERE (...) THE PATH OF CROP&CUT LUSC SCANS
for r, d, f in os.walk(path):
  for file in f:
    if file.endswith('.npy'):
      squamo_scans.append(os.path.join(r, file))

In [None]:
i = 0
for i in range (len(squamo_scans)):
  scan = np.load(squamo_scans[i])
  scan = scan.astype(int)

  if scan.shape[0] > 250: # if n. slices >250, initial and final slices of the same scan are removed in equal number in order to center the volume of the lungs
                          # else if n. slices <250 (or equal to 250), zero padding is performed
    slices_to_delete = scan.shape[0] - 250
    #print('Number of slices to delete for scan '+str(i+1)+': ',slices_to_delete)
    n2 = int(slices_to_delete // 2)
    if (slices_to_delete % 2) == 0:
      ct_slices = scan[n2:-n2]
    else:
      n3 = n2+1
      ct_slices = scan[n2:-n3]
  else:
    slices_to_add = 250 - scan.shape[0]
    #print('Number of slices to add for scan '+str(i+1)+': ',slices_to_add)
    ct_slices = np.pad(scan,((0,slices_to_add),(0,0),(0,0)),'constant')
  #print("Shape before standardizing the number of slices\t"+str(scan.shape)) # z, y, x
  #print("Shape after standardizing the number of slices\t"+str(ct_slices.shape)) # z, y, x

  ct_resized = np.zeros((250,190,270))
  for j in range(ct_slices.shape[0]):
    ct_scan = crop(ct_slices[j])
    ct_resized[j] = resize(ct_scan)
  resized_scan = np.array(ct_resized)
  #print("Shape before standardizing also the slice resolution\t"+str(ct_slices.shape)) # z, y, x
  #print("Shape after standardizing also the slice resolution\t"+str(ct_resized.shape)) # z, y, x
  sample_stack(ct_resized)
  i+=1

  np.save(".../NSCLC-Radiomics-LUSC-"+str(i),resized_scan) # <--- INSERT HERE (...) THE PATH WHERE TO SAVE DIMENTIONALLY-UNIFORM LUSC SCANS

### NSCLC-Radiomics-Genomics

In [None]:
squamo_scans = []
path = '.../'
for r, d, f in os.walk(path):
  for file in f:
    if file.endswith('.npy'):
      squamo_scans.append(os.path.join(r, file))

In [None]:
i = 0
for i in range (len(squamo_scans)):
  scan = np.load(squamo_scans[i])
  scan = scan.astype(int)

  if scan.shape[0] > 250:
    slices_to_delete = scan.shape[0] - 250
    #print('Number of slices to delete for scan '+str(i+1)+': ',slices_to_delete)
    n2 = int(slices_to_delete // 2)
    if (slices_to_delete % 2) == 0:
      ct_slices = scan[n2:-n2]
    else:
      n3 = n2+1
      ct_slices = scan[n2:-n3]
  else:
    slices_to_add = 250 - scan.shape[0]
    #print('Number of slices to add for scan '+str(i+1)+': ',slices_to_add)
    ct_slices = np.pad(scan,((0,slices_to_add),(0,0),(0,0)),'constant')
  #print("Shape before standardizing the number of slices\t"+str(scan.shape)) # z, y, x
  #print("Shape after standardizing the number of slices\t"+str(ct_slices.shape)) # z, y, x

  ct_resized = np.zeros((250,190,270))
  for j in range(ct_slices.shape[0]):
    ct_scan = crop(ct_slices[j])
    ct_resized[j] = resize(ct_scan)
  resized_scan = np.array(ct_resized)
  #print("Shape before standardizing also the slice resolution\t"+str(ct_slices.shape)) # z, y, x
  #print("Shape after standardizing also the slice resolution\t"+str(ct_resized.shape)) # z, y, x
  sample_stack(ct_resized)
  i+=1

  np.save(".../NSCLC-Radiomics-Genomics-LUSC-"+str(i),resized_scan)