# Libraries

In [1]:
import os
import tensorflow as tf
import random
from glob import glob

# Files

In [2]:
data_dir = "./Records/TFrecords"

In [3]:
files = glob(data_dir+"/*.tfrecord")
print(f"Files count: {len(files)}")

Files count: 30


# Split the data into train/val records (80% for training, 20% for validation)

## Shuffle the training set 

In [4]:
random.seed(123)
random.shuffle(files)

## Define the parameters 

In [5]:
length = len(files)
training_length = int(0.8*length)

## Split the data

In [6]:
training_files = files[0:training_length]
validation_files = files[training_length:]

## Verify

In [7]:
print(f"Training dataset length: {len(training_files)}")
print(f"Validation dataset length: {len(validation_files)}")

Training dataset length: 24
Validation dataset length: 6


# Generate the TF.records training and validation files

## Training TF.record

In [8]:
training_tfrecord = tf.data.TFRecordDataset(filenames=training_files, num_parallel_reads=os.cpu_count())

In [9]:
validation_tfrecord = tf.data.TFRecordDataset(filenames=validation_files, num_parallel_reads=os.cpu_count())

# Save the files

## File names

In [10]:
filename_training = "training.tfrecord"
filename_validation = "validation.tfrecord"

## Write the files

### Training

In [11]:
writer_training = tf.data.experimental.TFRecordWriter(filename_training)
writer_training.write(training_tfrecord)

Instructions for updating:
To write TFRecords to disk, use `tf.io.TFRecordWriter`. To save and load the contents of a dataset, use `tf.data.experimental.save` and `tf.data.experimental.load`


### Validation

In [12]:
writer_validation = tf.data.experimental.TFRecordWriter(filename_validation)
writer_validation.write(validation_tfrecord)