<a href="https://colab.research.google.com/github/Angelvj/TFG/blob/main/code/read_tfrecords.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import numpy as np, os
import tensorflow as tf
import nibabel as nib

##Setup distributed strategy
https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/TPUStrategy

https://www.tensorflow.org/guide/tpu#train_a_model_using_keras_high_level_apis

**NOTE**: for using TPU, better to use **Kaggle**

In [9]:
DEVICE = "GPU"

if DEVICE == "TPU":
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU', tpu.master())
    except ValueError:
        print('Could not connect to TPU')
        tpu = None
    
    if tpu:
        try:
            print("initializing  TPU ...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("TPU initialized")
        except _:
            print("failed to initialize TPU")
    else:
        DEVICE = "GPU"

if DEVICE != "TPU":
    print("Using default strategy for CPU and single GPU")
    strategy = tf.distribute.get_strategy()

if DEVICE == "GPU":
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

AUTO     = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

Using default strategy for CPU and single GPU
Num GPUs Available:  1
REPLICAS: 1


In [10]:
def parse_example(example):
    features = {
        "image": tf.io.FixedLenSequenceFeature(shape=(), dtype=tf.float32, allow_missing=True),
        "shape": tf.io.FixedLenSequenceFeature(shape=(), dtype=tf.int64, allow_missing=True),
        "name": tf.io.FixedLenFeature([], tf.string),
        "label": tf.io.FixedLenFeature([], tf.int64)
    }
    
    example = tf.io.parse_single_example(example, features) # Now each feature is a standard tensor
    image  = tf.reshape(example['image'], example['shape'])
    label = example['label']
    name = example['name']

    return image, name, label

def get_dataset(files, repeat = False, shuffle = False, batch_size = 1):

    raw_ds = tf.data.TFRecordDataset(files, num_parallel_reads=AUTO)
    raw_ds = raw_ds.cache() # TOSEARCH: ¿what happens with BIG datasets?
    
    if repeat:
        raw_ds.repeat()

    if shuffle:
        raw_ds = raw_ds.shuffle(len(files), reshuffle_each_iteration=None) # For perfect shuffling, buffer size == len(data)
        opt = tf.data.Options()
        opt.experimental_deterministic = False # outputs doesn't need to be produced on deterministic order (faster)
        raw_ds = raw_ds.with_options(opt)

    dataset = raw_ds.map(parse_example, num_parallel_calls=AUTO)

    # Add data augmentation here
    # dataset = dataset.map(lambda img, imgname_or_label: (prepare_image(img, augment=augment, dim=dim), 
    #                                            imgname_or_label), num_parallel_calls=AUTO)

    # dataset = dataset.batch(batch_size * REPLICAS) # experimental_distribute_dataset uses the global batch size
    # dataset = dataset.prefetch(AUTO)
    return dataset

# Testing functions above 

In [None]:
import matplotlib.pyplot as plt

from google.colab import drive
drive.mount('/content/drive')

# Root data folder
ROOT = '/content/drive/My Drive/Machine learning/data'

# Path to brain data
BRAIN = ROOT + '/alzheimer_pet&mri'
BRAIN_PREPROCESSED = BRAIN + '/preprocessed'
BRAIN_RAW = BRAIN + '/raw'

# Paths brain images modalities
PET_PREPROCESSED = BRAIN_PREPROCESSED + '/PET'
MRI_PREPROCESSED= BRAIN_PREPROCESSED + '/MRI'
PET_RAW = BRAIN_RAW + '/PET'
MRI_RAW = BRAIN_RAW + '/MRI'

# Preprocessed MRI subtypes (divided into grey and white matter)
MRI_MATTER = ['GREY', 'WHITE']
 
# Class subfolders and labels for each one
CLASSES = ['ppNOR', 'ppAD', 'ppMCI'] 
LABELS = {'ppNOR': 0, 'ppAD': 1, 'ppMCI': 2}

# ============== tfrecord folders ==============

PET_PREPROCESSED_TFREC = PET_PREPROCESSED + '/tfrecords'
MRI_PREPROCESSED_TFREC = MRI_PREPROCESSED + '/tfrecords'
PET_RAW_TFREC = PET_RAW + '/tfrecords'
MRI_RAW_TFREC = MRI_RAW + '/tfrecords'

# =============== Get tfrecord filenames ===============

get_tfrec = lambda path : [path + f for f in os.listdir(path) if os.path.splitext(f)[1] == '.tfrec']

pet_prep_train = get_tfrec(PET_PREPROCESSED_TFREC + '/train/')
pet_prep_test = get_tfrec(PET_PREPROCESSED_TFREC + '/test/')

mri_prep_grey_train = get_tfrec(MRI_PREPROCESSED_TFREC + '/GREY/train/')
mri_prep_grey_test = get_tfrec(MRI_PREPROCESSED_TFREC + '/GREY/test/')

mri_prep_white_train = get_tfrec(MRI_PREPROCESSED_TFREC + '/WHITE/train/')
mri_prep_white_test = get_tfrec(MRI_PREPROCESSED_TFREC + '/WHITE/test/')

def show_slices(slices):
    """ Function to display row of image slices """
    fig, axes = plt.subplots(1, len(slices))
    for i, slice in enumerate(slices):
        axes[i].imshow(slice.T, cmap="gray", origin="lower")
    plt.show()

dataset = get_dataset(pet_prep_train)

for features in dataset.take(10):
    image = features[0].numpy()
    slice_0 = image[26, :, :, 0]
    slice_1 = image[:, 30, :, 0]
    slice_2 = image[:, :, 16, 0]

    show_slices([slice_0, slice_1, slice_2])