<a href="https://colab.research.google.com/github/Niccolo-Rocchi/Medical_Imaging_project/blob/main/data_upload.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [42]:
%%capture 
!pip install pydicom

In [43]:
# For reading images
import numpy as np
from pydicom import dcmread 
import glob
from google.colab import drive
# For dealing with datasets
import pandas as pd
# For plots
import matplotlib.pyplot as plt
# For nets utils
import tensorflow as tf

In [44]:
# Mount Drive data
drive.mount('/content/drive/')
%cd /content/drive/MyDrive/pneumotorax_data

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).
/content/drive/MyDrive/DS Lab in Medicine - projects/Medical Imaging - project/pneumotorax_data


In [45]:
encoded_pixels = pd.read_csv('./encoded_pixels.csv')

## Data upload

In [46]:
# RLE to mask conversion provided by competition organizers with the dataset.
def rle2mask(rle, width, height):
    mask= np.zeros(width* height)
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]

    current_position = 0
    for index, start in enumerate(starts):
        current_position += start
        mask[current_position:current_position+lengths[index]] = 255
        current_position += lengths[index]

    return mask.reshape(width, height, order='F')

In [47]:
# Create input for Keras' fit_generator function
class DataGenerator:

  # Method that yields (image, mask) tuple
  def data_generator(self, total_items):
    i = 0
    while i < total_items:
      # Extract ImageId and encoded pixels
      id, rle = tuple(encoded_pixels.iloc[i])
      # Convert encoded pixels to mask
      mask = rle2mask(rle, 1024, 1024)
      # Read the image associate to ImageId
      try:
        dcm_file = dcmread(f"./dicom_files/{id}.dcm")
      except:
        continue
      dcm_image = dcm_file.pixel_array
      # Rescale image
      image = tf.keras.layers.Rescaling(dcm_image, 65536)
      yield (image, mask)
      i += 1

  # Method used to feed the net
  def net_input(self, total_items, batch_size, epochs):
    # Create a tensorflow iterator
    tf_iterator = tf.data.Dataset.from_generator(self.data_generator(total_items), 
                                                  output_types=(tf.float64, tf.int64))
    # Shuffle data
    tf_iterator = tf_iterator.shuffle(buffer_size = 10000, seed = 123)
    # Create epochs
    tf_iterator = tf_iterator.repeat(epochs)
    # Create batches
    tf_iterator = tf_iterator.batch(batch_size)
    # Convert to a proper iterator
    tf_iterator = tf_iterator.make_one_shot_iterator()
    # Yield the result
    while True:
      batch_images, batch_masks = tf_iterator.get_next()
      yield (batch_images, batch_masks)

## References

1. https://towardsdatascience.com/medical-image-dataloaders-in-tensorflow-2-x-ee5327a4398f
2. https://stackoverflow.com/questions/55375416/tensorflow-model-fit-using-a-dataset-generator
3. https://faroit.com/keras-docs/1.2.0/models/model/
4. https://www.tensorflow.org/api_docs/python/tf/keras/layers/Rescaling
5. https://www.tensorflow.org/api_docs/python/tf/data/Dataset?version=nightly#from_generator