# Connect to google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Dependencies

In [2]:
#Dependencies to load the dataset in tf.data format
!pip install med_dataloader==0.1.12 -q

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.7/52.7 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for med_dataloader (setup.py) ... [?25l[?25hdone


In [3]:
#Link colab to the directories of a given google drive account
import os
from google.colab import drive
ROOT_PATH = os.path.join(os.sep, 'content' , 'gdrive')
drive.mount(ROOT_PATH)

Mounted at /content/gdrive


In [4]:
ROOT_PATH

'/content/gdrive'

In [5]:
#Possible usefull dependencies
import med_dataloader as mdl
import numpy as np

import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
from tensorflow.keras import Model
import keras

from datetime import datetime
from dateutil.tz import gettz
import SimpleITK as sitk

import scipy
import cv2

import SimpleITK as sitk
import imageio

from sklearn.utils import shuffle
import re

In [6]:
WD_PATH = os.path.join(ROOT_PATH, 'MyDrive', 'PROJECT_NEURO')

DATASET_PATH = os.path.join(WD_PATH, 'Dataset_GS') #Move the folder containig dataset (e.g. Dataset_SP) into WD_PATH

MODELS_PATH = os.path.join(WD_PATH, 'Models') #Folder where best trained models will be saved

if not os.path.exists(MODELS_PATH): #si può togliere
    os.mkdir(MODELS_PATH)


Uncached Data

In [7]:
# lisa di unchached images' file name
images = os.listdir('/content/gdrive/MyDrive/PROJECT_NEURO/GS_images')

In [8]:
# exctract the subjects
subjects = set([i[:6] for i in images])
subjects = sorted(list(subjects))
subjects

['s00001', 's00002', 's00004', 's00005']

In [9]:
# numero di immagini per ciascun paziente
# Inizializza un dizionario per tenere traccia del numero di immagini per soggetto
images_for_subjects_dict = {}

# Conta il numero di immagini per ciascun soggetto
for image in images:
    subject = image[:6]
    images_for_subjects_dict[subject] = images_for_subjects_dict.get(subject, 0) + 1

# Se necessario, converte il dizionario in una lista ordinata di conteggi
images_for_subjects = [images_for_subjects_dict.get(subject, 0) for subject in subjects]

print(images_for_subjects)


[280, 240, 150, 150]


# Load data

In [10]:
NUM_CLASSES=7

X_train_denoised=np.load('/content/gdrive/MyDrive/PROJECT_NEURO/Processed_X/X_train_denoised.npy')

y_train=np.load('/content/gdrive/MyDrive/PROJECT_NEURO/y/y_train.npy')


# Data Augmentation

In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Assuming you have x_train and y_train as your training data
# x_train.shape = (500, 256, 256, 1)
# y_train.shape = (500, 256, 256, 7)
seed=42
np.random.seed(seed)
# Create an instance of ImageDataGenerator with desired augmentation settings
data_gen = ImageDataGenerator(
    rotation_range=10, #Degree range for random rotations
    width_shift_range=5, #if >=1 lateral shift of the image
    height_shift_range=5,  #if >=1 vertical shift of the image
    zoom_range=0.1, #Range for random zoom.
    horizontal_flip=False,
    vertical_flip=False,
    fill_mode='nearest'
)

# Fit the ImageDataGenerator on your data
data_gen.fit(tf.expand_dims(X_train_denoised, axis=-1))


## Visualize augmented data

In [None]:
# num_da_vedere = 100
num_da_vedere = 50
num_samples = X_train_denoised.shape[0]

sample_indices = np.random.choice(num_samples, num_da_vedere, replace=False)

selected_samples_x = X_train_denoised[sample_indices]
selected_samples_y = y_train[sample_indices]
selected_samples_y=np.argmax(selected_samples_y,axis=-1)

c = np.stack([selected_samples_x, selected_samples_y], axis=-1)

aug_c = data_gen.flow(c, batch_size=10, shuffle=True)
aug_c=aug_c[0]

aug_x=aug_c[:,:,:,0]
aug_y=aug_c[:,:,:,1]

plt.figure(figsize=(15, 5 * num_da_vedere))
for i in range(num_da_vedere):
  plt.subplot(num_da_vedere,4,4*i+1)
  plt.imshow(selected_samples_x[i, :, :], cmap='gray')
  plt.title(f'Original - Sample {[i]}')

  plt.subplot(num_da_vedere,4,4*i+2)
  plt.imshow(aug_x[i, :, :], cmap='gray')
  plt.title(f'Augmented - Sample {[i]}')

  plt.subplot(num_da_vedere,4,4*i+3)
  plt.imshow(selected_samples_y[i, :, :])
  plt.title(f'Original mask - Sample {[i]}')

  plt.subplot(num_da_vedere,4,4*i+4)
  plt.imshow(aug_y[i, :, :])
  plt.title(f'Augmented Mask - Sample {[i]}')


## Apply augmentation to train set

In [None]:
num_augmented_items = 500 #maximum number of augmented samples in order to not saturate ram

c = np.stack([X_train_denoised,np.argmax(y_train,axis=-1)], axis=-1)

#data_gen.fit(c) #fit the ImageDataGenerator

aug_c = data_gen.flow(c, batch_size=num_augmented_items, shuffle=True) #apply augmentation
aug_c=aug_c[0]

aug_x=aug_c[:,:,:,0]
aug_y=aug_c[:,:,:,1]

del c
del aug_c

aug_y = np.stack([(aug_y == level).astype(int) for level in np.linspace(0, 6, 7)], axis=-1) #return to shape (N,256,256,7)


#X_train_denoised=np.concatenate([X_train_denoised,aug_x],axis=0)
#y_train=np.concatenate([y_train,aug_y],axis=0)
(X_train_denoised,y_train)=shuffle(X_train_denoised,y_train,random_state=seed)



In [None]:
print('X_train_denoised_augmented.shape: ',X_train_denoised.shape)
print('y_train_augmented.shape: ',y_train.shape)

X_train_denoised_augmented.shape:  (574, 256, 256)
y_train_augmented.shape:  (574, 256, 256, 7)


# Save data

In [None]:
np.save('/content/gdrive/MyDrive/PROJECT_NEURO/Processed_X/X_test_denoised_augmented.npy', X_train_denoised)
np.save('/content/gdrive/MyDrive/PROJECT_NEURO/y/y_train_augmented.npy', y_train)