This notebook shows how to convert train and test datasets to TFRecord file format.

Steps to follow:

1.   Sign in to the competition and download *Test data* and *Train data* from: https://platform.ai4eo.eu/seeing-beyond-the-visible-permanent/data
2.  unzip *test_data.zip* in `hyperspectral-cnn-soil-estimation/dataset/test_data` and unzip *train_data.zip* in `hyperspectral-cnn-soil-estimation/dataset/train_data`
3. Run this notebook: the resulting TFRecord files will be saved in `hyperspectral-cnn-soil-estimation/dataset`




In [1]:
#Import all required packages
import os, logging, io
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
import pandas as pd
import tensorflow as tf
from tfrecord_utilities import *
from PIL import Image
import numpy as np

In [2]:
%cd
%cd hyperspectral-cnn-soil-estimation/dataset

/home/microsat
/home/microsat/hyperspectral-cnn-soil-estimation/dataset


# Test set

Define directories

In [3]:
images_dir='test_data/test_data'
csv_dir='test_gt_v2.csv'
output_path='test_tfrecords{}.record'

Run the following cell to create the TFRecord file

In [4]:
#Define the desired number of TFRecords files
number_of_tfrecords_files=1

#Read csv file content
csv = pd.read_csv(csv_dir).values

#Sequentially read the dataset and write TFRecord(s)
number_of_images_per_file=np.floor(len(csv)/number_of_tfrecords_files).astype('int32')
images_processed=0
images_index_start=0
images_index_end=number_of_images_per_file

for i in range(number_of_tfrecords_files):
    writer = tf.io.TFRecordWriter(output_path.format(i))
    
    
    for row in csv[images_index_start:images_index_end]:
      images_processed+=1
      features=row
      images_dir_full = os.path.join(images_dir)
      
      
      tf_example = create_tf_example_test(row, images_dir_full)
      writer.write(tf_example.SerializeToString())

    images_index_start=images_index_end
    images_index_end=(i+2)*number_of_images_per_file

    if i==number_of_tfrecords_files-2:
      images_index_end=len(csv)
    writer.close()
    print('Successfully created the TFRecord file: {}'.format(output_path.format(i)))

Successfully created the TFRecord file: test_tfrecords0.record


# Train set

Define directories

In [5]:
#Define directories
images_dir='train_data/train_data/train_data'
csv_dir='train_data/train_data/train_gt.csv'
output_path='train_tfrecords{}.record'

Run the following cell to create the TFRecord file

In [6]:
#Define the desired number of TFRecords files
number_of_tfrecords_files=1

#Read csv file content
csv = pd.read_csv(csv_dir).values

#Sequentially read the dataset and write TFRecord(s)
number_of_images_per_file=np.floor(len(csv)/number_of_tfrecords_files).astype('int32')
images_processed=0
images_index_start=0
images_index_end=number_of_images_per_file

for i in range(number_of_tfrecords_files):
    writer = tf.io.TFRecordWriter(output_path.format(i))
    
    
    for row in csv[images_index_start:images_index_end]:
      images_processed+=1
      features=row
      images_dir_full = os.path.join(images_dir)
      
      
      tf_example = create_tf_example_train(row, images_dir_full)
      writer.write(tf_example.SerializeToString())

    images_index_start=images_index_end
    images_index_end=(i+2)*number_of_images_per_file

    if i==number_of_tfrecords_files-2:
      images_index_end=len(csv)
    writer.close()
    print('Successfully created the TFRecord file: {}'.format(output_path.format(i)))

Successfully created the TFRecord file: train_tfrecords0.record
