In [1]:
# NOTE: this is a custom cell that contains the common imports I personally 
# use these may/may not be necessary for the following examples

# DL framework
import tensorflow as tf

from datetime import datetime

# common packages
import numpy as np
import os # handling file i/o
import sys
import math
import time # timing epochs

# for ordered dict when building layer components
import collections

# plotting pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import pyplot
from matplotlib import colors # making colors consistent
from mpl_toolkits.axes_grid1 import make_axes_locatable # colorbar helper

# read image
from imageio import imread
# + data augmentation
from scipy import ndimage
from scipy import misc

# used for manually saving best params
import pickle

# for shuffling data batches
from sklearn.utils import shuffle

# const
SEED = 42

# Helper to make the output consistent
def reset_graph(seed=SEED):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

# helper to create dirs if they don't already exist
def maybe_create_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print("{} created".format(dir_path))
    else:
        print("{} already exists".format(dir_path))
    
# set tf log level to supress messages, unless an error
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Important Version information
print("Python: {}".format(sys.version_info[:]))
print('TensorFlow: {}'.format(tf.__version__))

# Check if using GPU
if not tf.test.gpu_device_name():
    print('No GPU')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
    
reset_graph()

Python: (3, 5, 4, 'final', 0)
TensorFlow: 1.6.0-dev20180105
No GPU


In [2]:
# `/record_holder` will (hopefully) contain our tf_records file
# by the end of this notebook
FINAL_DIR = "record_holder"
maybe_create_dir(FINAL_DIR)

record_holder already exists


In [3]:
ROOT_DIR = "../../improving_classification_with_GAN/numpy/sigmoid/pet/299"

for _, _, files in os.walk(ROOT_DIR):
    files = sorted(files)
    for filename in files:
        print(filename)
        
X_test = np.load(os.path.join(ROOT_DIR, files[0]))
X_train = np.load(os.path.join(ROOT_DIR, files[1]))
X_val = np.load(os.path.join(ROOT_DIR, files[2]))
y_test = np.load(os.path.join(ROOT_DIR, files[3]))
y_train = np.load(os.path.join(ROOT_DIR, files[4]))
y_val = np.load(os.path.join(ROOT_DIR, files[5]))

# reset_graph()
# X_test_ph =  tf.placeholder(X_test.dtype, X_test.shape)
# X_train_ph = tf.placeholder(X_train.dtype, X_train.shape)
# X_val_ph = tf.placeholder(X_val.dtype, X_val.shape)
# y_test_ph = tf.placeholder(y_test.dtype, y_test.shape)
# y_train_ph = tf.placeholder(y_train.dtype, y_train.shape)
# y_val_ph = tf.placeholder(y_val.dtype, y_val.shape)

# def create_dataset_obj(X, y):
#     dataset = tf.data.Dataset.from_tensor_slices((X, y))
#     return dataset

# tr_dataset = create_dataset_obj(X_train_ph, 
#                                 X_train_ph)
# val_dataset = create_dataset_obj(X_val_ph, 
#                                  y_val_ph)
# test_dataset = create_dataset_obj(X_test_ph, 
#                                   y_test_ph)
print("done")

X_test.npy
X_train.npy
X_val.npy
y_test.npy
y_train.npy
y_val.npy
done


In [4]:
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [5]:
def numpy_to_tfrecords(features, lables, setType):
    tfrecords_file_name = str(setType) + '.tfrecords'
    writer = tf.python_io.TFRecordWriter(os.path.join(FINAL_DIR, tfrecords_file_name))
    
    labelName = str(setType) + '/label'
    featureName = str(setType) + '/image'
    
    # TODO: assert same length
    for i in range(len(features)):
        label = lables[i]
        img = features[i]
    
        # create features
        feature = {labelName: _int64_feature(label),
                   featureName: _bytes_feature(tf.compat.as_bytes(img.tostring()))}
        
        # create example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))
        
        writer.write(example.SerializeToString())
        
        if i % 25 == 0:
            print("{} {} written".format(i, setType))
        
    writer.close()
    sys.stdout.flush()
    print("done")

In [6]:
numpy_to_tfrecords(X_test, y_test, "test")

0 test written
25 test written
50 test written
75 test written
100 test written
125 test written
150 test written
175 test written
200 test written
225 test written
250 test written
275 test written
done


In [7]:
numpy_to_tfrecords(X_val, y_val, "val")

0 val written
25 val written
50 val written
75 val written
100 val written
125 val written
150 val written
175 val written
200 val written
225 val written
250 val written
done


In [8]:
numpy_to_tfrecords(X_train, y_train, "train")

0 train written
25 train written
50 train written
75 train written
100 train written
125 train written
150 train written
175 train written
200 train written
225 train written
250 train written
275 train written
300 train written
325 train written
350 train written
375 train written
400 train written
425 train written
450 train written
475 train written
500 train written
525 train written
550 train written
575 train written
600 train written
625 train written
650 train written
675 train written
700 train written
725 train written
750 train written
775 train written
800 train written
825 train written
850 train written
875 train written
900 train written
925 train written
950 train written
975 train written
1000 train written
1025 train written
1050 train written
1075 train written
1100 train written
1125 train written
1150 train written
1175 train written
1200 train written
1225 train written
1250 train written
1275 train written
1300 train written
1325 train written
1350 train written


## Reading the files

In [9]:
def _test_parse_function(example_proto):
    setType = 'test'
    labelName = str(setType) + '/label'
    featureName = str(setType) + '/image'
    feature = {featureName: tf.FixedLenFeature([], tf.string),
               labelName: tf.FixedLenFeature([], tf.int64)}
    
    # decode
    parsed_features = tf.parse_single_example(example_proto, features=feature)
    
    # convert image data from string to number
    image = tf.decode_raw(parsed_features[featureName], tf.float32)
    image = tf.reshape(image, [299, 299, 3])
    label = tf.cast(parsed_features[labelName], tf.int64)
    
    # [do any preprocessing here]
    
    return image, label

In [10]:
filenames_ph = tf.placeholder(tf.string, shape=[None])

dataset = tf.data.TFRecordDataset(filenames_ph)
dataset = dataset.map(_test_parse_function)  # Parse the record into tensors.
dataset = dataset.repeat(5)  # Repeat the input indefinitely.
dataset = dataset.batch(16)

iterator = dataset.make_initializable_iterator()

In [13]:
#def numpy_to_tfrecords(setType):
    
    #reader = tf.TFRecordReader()
    #_, serialized_example = reader.read(filename_queue)


setType = 'test'
tfrecords_file_name = str(setType) + '.tfrecords'
tfrecord_file_path = os.path.join(FINAL_DIR, tfrecords_file_name)
# list of filenames
#filename_queue = tf.train.string_input_producer([tfrecord_file_path], num_epochs=1)

with tf.Session() as sess:
    sess.run(iterator.initializer, feed_dict={filenames_ph: [tfrecord_file_path]})
    next_test_element = iterator.get_next()
    i = 0
    while True:
        try:
            i += 1
            print("i: {} => {}".format(i, sess.run(next_test_element)[1]))
        except tf.errors.OutOfRangeError:
            break

i: 1 => [1 1 1 0 1 1 0 1 1 1 0 1 1 0 1 0]
i: 2 => [1 1 0 0 0 0 0 0 0 0 1 0 1 0 1 1]
i: 3 => [1 1 1 0 1 0 0 0 0 0 0 0 0 1 0 0]
i: 4 => [0 0 0 0 0 0 1 0 0 0 0 1 1 0 0 1]
i: 5 => [0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0]
i: 6 => [0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0]
i: 7 => [1 0 1 0 0 1 0 0 1 0 1 0 1 0 1 1]
i: 8 => [0 0 0 1 0 0 1 1 1 1 1 1 0 0 0 0]
i: 9 => [1 0 1 0 1 0 1 1 1 1 0 0 1 1 0 0]
i: 10 => [1 1 1 1 0 1 0 1 0 0 1 1 1 0 1 1]
i: 11 => [0 0 0 0 1 0 0 1 0 1 0 1 0 0 1 1]
i: 12 => [1 1 1 1 0 1 1 0 1 0 1 0 0 0 0 0]
i: 13 => [1 0 1 0 1 1 1 1 0 1 1 1 0 0 0 0]
i: 14 => [1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0]
i: 15 => [1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1]
i: 16 => [0 0 0 0 1 0 0 0 1 0 1 1 1 0 1 1]
i: 17 => [0 0 1 0 1 0 1 1 1 1 0 1 0 1 1 0]
i: 18 => [1 1 0 0 1 1 0 0 1 0 1 0 0 1 0 1]
i: 19 => [0 1 1 0 0 0 0 0 0 1 0 0 1 1 1 0]
i: 20 => [1 1 0 1 1 1 0 1 1 0 1 0 1 1 0 0]
i: 21 => [0 0 0 0 0 0 1 0 1 0 1 1 1 1 1 0]
i: 22 => [1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
i: 23 => [0 0 1 0 0 0 0 1 1 0 0 1 0 0 0 1]
i: 24 => [1 1 0 0 0 