In [2]:
import tensorflow as tf
import os
from random import shuffle
import glob
import numpy as np
import sys
import cv2

In [3]:
shuffle_data = True  # shuffle the addresses before saving
LastFramePicture_path = 'LFPICS/*.png'

# read addresses and labels from the 'train' folder
addrs = glob.glob(LastFramePicture_path)

In [4]:
phaseSep = np.load('phaseSep.npy')
labels = phaseSep #0 = Not Phase Separated  1 = Phase Separated 

In [5]:
if shuffle_data:
    c = list(zip(addrs, labels))
    shuffle(c)
    addrs, labels = zip(*c)

# Divide the hata into 60% train, 20% validation, and 20% test
train_addrs = addrs[0:int(0.6*len(addrs))]
train_labels = labels[0:int(0.6*len(labels))]

val_addrs = addrs[int(0.6*len(addrs)):int(0.8*len(addrs))]
val_labels = labels[int(0.6*len(addrs)):int(0.8*len(addrs))]

test_addrs = addrs[int(0.8*len(addrs)):]
test_labels = labels[int(0.8*len(labels)):]

In [6]:
def load_image(addr):
    # read an image and resize to (224, 224)
    # cv2 load images as BGR, convert it to RGB
    img = cv2.imread(addr)
    img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32)
    return img

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [7]:
train_filename = 'train.tfrecords'  # address to save the TFRecords file
# open the TFRecords file
writer = tf.python_io.TFRecordWriter(train_filename)
for i in range(len(train_addrs)):
    # print how many images are saved every 1000 images
    if not i % 100:
        print 'Train data: {}/{}'.format(i, len(train_addrs))
        sys.stdout.flush()
    # Load the image
    img = load_image(train_addrs[i])
    label = train_labels[i]
    # Create a feature
    feature = {'train/label': _int64_feature(label),
               'train/image': _bytes_feature(tf.compat.as_bytes(img.tostring()))}
    # Create an example protocol buffer
    example = tf.train.Example(features=tf.train.Features(feature=feature))
    
    # Serialize to string and write on the file
    writer.write(example.SerializeToString())
    
writer.close()
sys.stdout.flush()

Train data: 0/648
Train data: 100/648
Train data: 200/648
Train data: 300/648
Train data: 400/648
Train data: 500/648
Train data: 600/648


In [8]:
# open the TFRecords file
val_filename = 'val.tfrecords'  # address to save the TFRecords file
writer = tf.python_io.TFRecordWriter(val_filename)

for i in range(len(val_addrs)):
    # print how many images are saved every 1000 images
    if not i % 100:
        print 'Val data: {}/{}'.format(i, len(val_addrs))
        sys.stdout.flush()

    # Load the image
    img = load_image(val_addrs[i])

    label = val_labels[i]

    # Create a feature
    feature = {'val/label': _int64_feature(label),
               'val/image': _bytes_feature(tf.compat.as_bytes(img.tostring()))}

    # Create an example protocol buffer
    example = tf.train.Example(features=tf.train.Features(feature=feature))

    # Serialize to string and write on the file
    writer.write(example.SerializeToString())

writer.close()
sys.stdout.flush()

# open the TFRecords file
test_filename = 'test.tfrecords'  # address to save the TFRecords file
writer = tf.python_io.TFRecordWriter(test_filename)

for i in range(len(test_addrs)):
    # print how many images are saved every 1000 images
    if not i % 100:
        print 'Test data: {}/{}'.format(i, len(test_addrs))
        sys.stdout.flush()

    # Load the image
    img = load_image(test_addrs[i])

    label = test_labels[i]

    # Create a feature
    feature = {'test/label': _int64_feature(label),
               'test/image': _bytes_feature(tf.compat.as_bytes(img.tostring()))}

    # Create an example protocol buffer
    example = tf.train.Example(features=tf.train.Features(feature=feature))

    # Serialize to string and write on the file
    writer.write(example.SerializeToString())

writer.close()
sys.stdout.flush()

Val data: 0/216
Val data: 100/216
Val data: 200/216
Test data: 0/216
Test data: 100/216
Test data: 200/216


In [12]:
#35, 25, 30
print phaseSep[1:10]
print addrs[1:10]

[1 1 1 1 0 0 0 0 0]
('LFPICS/final_tstep_pa80_pb120_xa30.png', 'LFPICS/final_tstep_pa20_pb130_xa90.png', 'LFPICS/final_tstep_pa30_pb140_xa10.png', 'LFPICS/final_tstep_pa30_pb70_xa50.png', 'LFPICS/final_tstep_pa90_pb150_xa20.png', 'LFPICS/final_tstep_pa40_pb120_xa70.png', 'LFPICS/final_tstep_pa30_pb60_xa10.png', 'LFPICS/final_tstep_pa50_pb120_xa20.png', 'LFPICS/final_tstep_pa10_pb80_xa30.png')
