In [1]:
import os
import sys
import pickle
import numpy as np
from tqdm import tqdm
import tensorflow as tf
sys.path.append('../../../')

In [2]:
from modis_utils.misc import restore_data, cache_data

# Utils functions

In [3]:
def _float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))

In [4]:
def convert_to_tfrecord(input_files, output_file, resize=None):
    """Converts a file to TFRecords."""
    print('Generating %s' % output_file)
    with tf.python_io.TFRecordWriter(output_file) as record_writer:
        for input_file in tqdm(input_files):
            inputs, labels, inputs_pw, labels_pw = restore_data(input_file)
            if resize:
                inputs = inputs[:, 1:, 1:]
                labels = labels[:, 1:, 1:]
                inputs_pw = inputs_pw[:, 1:, 1:]
                labels_pw = labels_pw[:, 1:, 1:]
            example = tf.train.Example(features=tf.train.Features(
                feature={
                    'inputs': _float_feature(inputs.flatten().tolist()),
                    'labels': _float_feature(labels.flatten().tolist()),
                    'inputs_pw': _float_feature(inputs_pw.flatten().tolist()),
                    'labels_pw': _float_feature(labels_pw.flatten().tolist())
                }))
            record_writer.write(example.SerializeToString())

In [5]:
def convert_to_tfrecord_patch(input_files, output_file):
    """Converts a file to TFRecords."""
    print('Generating %s' % output_file)
    with tf.python_io.TFRecordWriter(output_file) as record_writer:
        for input_file in tqdm(input_files):
            inputs, labels, inputs_pw, labels_pw = restore_data(input_file)
            num_entries_in_batch = len(inputs)
            for i in range(num_entries_in_batch):
                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'inputs': _float_feature(inputs[i].flatten().tolist()),
                        'labels': _float_feature(labels[i].flatten().tolist()),
                        'inputs_pw': _float_feature(inputs_pw[i].flatten().tolist()),
                        'labels_pw': _float_feature(labels_pw[i].flatten().tolist())
                    }))
                record_writer.write(example.SerializeToString())

In [6]:
def create_dataset(data_dir, output_dir, f):
    for subset in ('val', 'test', 'train'):
        input_dir = os.path.join(data_dir, subset)
        input_files = [os.path.join(input_dir, filename) for filename in os.listdir(input_dir)]
        output_file = os.path.join(output_dir, subset + '.tfrecords')
        try:
            os.remove(output_file)
        except OSError:
            pass
        # Convert to tf.train.Example and write the to TFRecords.
        f(input_files, output_file)
        print('Done {}!'.format(subset))

# One output

## Boundary patch

In [8]:
data_dir = 'one_output/sequence_patch_data'
output_dir = 'one_output/data_patch'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
f = lambda x, y: convert_to_tfrecord_patch(x, y)
create_dataset(data_dir, output_dir, f)

  0%|          | 0/46 [00:00<?, ?it/s]

Generating one_output/data_patch/val.tfrecords


100%|██████████| 46/46 [00:09<00:00,  4.87it/s]
  0%|          | 0/92 [00:00<?, ?it/s]

Done val!
Generating one_output/data_patch/test.tfrecords


100%|██████████| 92/92 [00:20<00:00,  4.91it/s]
  0%|          | 0/529 [00:00<?, ?it/s]

Done test!
Generating one_output/data_patch/train.tfrecords


100%|██████████| 529/529 [02:01<00:00,  4.96it/s]


Done train!


## Whole img

In [7]:
data_dir = 'one_output/sequence_data'
output_dir = 'one_output/data'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
f = lambda x, y: convert_to_tfrecord(x, y, True)
create_dataset(data_dir, output_dir, f)

  0%|          | 0/46 [00:00<?, ?it/s]

Generating one_output/data/val.tfrecords


100%|██████████| 46/46 [01:33<00:00,  2.11s/it]
  0%|          | 0/92 [00:00<?, ?it/s]

Done val!
Generating one_output/data/test.tfrecords


100%|██████████| 92/92 [02:51<00:00,  1.73s/it]
  0%|          | 0/529 [00:00<?, ?it/s]

Done test!
Generating one_output/data/train.tfrecords


100%|██████████| 529/529 [16:08<00:00,  1.85s/it]


Done train!


# Multiple output: 12

## Boundary patch

In [None]:
data_dir = 'multiple_output/12/sequence_patch_data'
output_dir = 'multiple_output/12/data_patch'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
f = lambda x, y: convert_to_tfrecord_patch(x, y)
create_dataset(data_dir, output_dir, f)

## Whole img

In [None]:
data_dir = 'multiple_output/12/sequence_data'
output_dir = 'multiple_output/12/data'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
f = lambda x, y: convert_to_tfrecord(x, y, True)
create_dataset(data_dir, output_dir, f)

  0%|          | 0/46 [00:00<?, ?it/s]

Generating multiple_output/12/data/val.tfrecords


100%|██████████| 46/46 [02:31<00:00,  3.27s/it]
  0%|          | 0/81 [00:00<?, ?it/s]

Done val!
Generating multiple_output/12/data/test.tfrecords


100%|██████████| 81/81 [04:18<00:00,  3.43s/it]
  0%|          | 0/529 [00:00<?, ?it/s]

Done test!
Generating multiple_output/12/data/train.tfrecords


 86%|████████▌ | 455/529 [25:21<04:18,  3.50s/it]

In [None]:
data_dir = 'multiple_output/12/sequence_patch_data'
n_examples = {'train': 0, 'val': 0, 'test': 0}
for subset in ('train', 'val', 'test'):
    n = 0
    subset_data_dir = os.path.join(data_dir, subset)
    for filename in os.listdir(subset_data_dir):
        data = restore_data(os.path.join(subset_data_dir, filename))
        n += len(data[0])
    n_examples[subset] = n
print(n_examples)

In [None]:
data_dir = 'one_output/sequence_patch_data'
n_examples = {'train': 0, 'val': 0, 'test': 0}
for subset in ('train', 'val', 'test'):
    n = 0
    subset_data_dir = os.path.join(data_dir, subset)
    for filename in os.listdir(subset_data_dir):
        data = restore_data(os.path.join(subset_data_dir, filename))
        n += len(data[0])
    n_examples[subset] = n
print(n_examples)