In [81]:
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import tensorflow as tf
from tensorflow.python.platform import tf_logging as logging
from tensorflow.models import slim
from tensorflow.models.slim.preprocessing import inception_preprocessing

from tensorflow.contrib.slim.python.slim.data import tfexample_decoder, dataset, dataset_data_provider, data_provider
import os, time

"""
Note : Before running the code below ensure you have all the data in TFRecord and appertaining format.
        Else : Follow the code inside TensorFlowIO module to create such files. THis module assumes that 
        the data are in their proper directories
"""

# The below code is copied from the code source mentioned below. 
# Code Source : https://kwotsin.github.io/tech/2017/02/11/transfer-learning.html

'\nNote : Before running the code below ensure you have all the data in TFRecord and appertaining format.\n        Else : Follow the code inside TensorFlowIO module to create such files. THis module assumes that \n        the data are in their proper directories\n'

In [82]:
# Directory Listings

# Log file creation
logDir = './log'

# State the image size we are resizing
imageSize = 299 # the default size for inception

# State the path where the label file is stored
labelFile = "/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/TensorFlowOPS/flowers/labels.txt"
labels = open(labelFile, 'r')

# Dictionary to refer each label to its string name
labelsToNameDict = {}
for record in labels:
    label, stringName = record.split(':')
    stringName = stringName[:-1]
    labelsToNameDict[int(label)] = stringName
    
# Record the file pattern for the TFRecord files
filePattern = "flowers_%s_*.tfRecord"

# A dictionary describing the dataset, to be required by tensorflow dataset Class
itemToDescription = {
    'image': "A 3-channel RGB coloured five different types of flowers, tulips, sunflower, roses, dandelion, daisy",
    'label': "A label value defined to each flower 1: dandelion, 2:roses, 3:sunflower, 4:tulips"
    
}

## Function details:

(1) **getSplits**: It allows us to obtain a specific split form the disk. We store two different types of files in the disk "trai file and validation file". These files were split into two and get_split helps us to ge the required split.

In the process we also collect some information such as number_of samples in the split, and etc.

(2) **loadBatch**  The most important function in this is the DatasetDataProvider

**DatasetDataProvider:** Our Dataset is provided in tfrecord format, the motive we have here is that we need tensors from our dataset to feed it as batches to the algorithm. DatasetDataProvider helps us doing that. It has two things 1) ParallelReader and 2) Decoder. The ParallelReader reads the input tfRecords and the decoder converts the input tfRecords into Tensor.

* **Parallel Reader**: As the name suggests it will the input file with multiple reader and queue these records into tf.RandomShuffleQueue(**The RandomShuffleQueue already shuffles your dataset, so you dont have to bother doing it later again**). Then these records are dequeued and passed on to the decoder. 

* **Decoder**: The decoder does most of the heavy lifting. IT takes in the two dictionary that we described above
  * keysToFeature: This dictionary provides the ItemHandler object information about the data such as what format is the picture image, the label type (Fixed lenght or variable lenght)
  * itemsToHandler: It specifies the name of the file to which the tensor are converted. For example by default it looks for keys such as image/encoded, image/format.
  
  

In [83]:
def getSplits(splitName, datasetDir, filePattern):
    if splitName not in ['train','validation']:
        raise ValueError('The spllit name %s is not recognized, Make sure they are either train, validation or test'%splitName)

    # create a path for data IO: The pattern should be the split name
    filePatternPath = os.path.join(datasetDir, filePattern%(splitName))
    print (filePatternPath)

    # Count the total number of training samples:
    filePattern = filePattern%(splitName)
    print (filePattern)
    tfRecordsFilesPath = [os.path.join(datasetDir, file) 
                          for file in os.listdir(datasetDir) 
                          if file.startswith('flowers_' + split_name)]
    print ('tfRecordsFile : ',tfRecordsFilesPath)

    # We now count all the records inside the tfRecord files that are separated into multiple shards
    numSamples = 0
    for tfRecord in tfRecordsFilesPath:
        for record in tf.python_io.tf_record_iterator(tfRecord):
            numSamples += 1

    print ('Total records to to be trained are: ', numSamples)

    # Define what type of reader
    reader = tf.TFRecordReader
    
    # Create a keys to feature dictionary
    keysToFeature = {
        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'),
        'image/class/label': tf.FixedLenFeature(
        [], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
    }
    print (keysToFeature)

    # Create the dictionary for the decoder
    itemsToHandlers = {
        'image': tfexample_decoder.Image(),   # takes by default : (image_key="image/encoded", format_key="image_format")  
        'label': tfexample_decoder.Tensor('image/class/label')
    }
    # tfexample_decoder is the module and Image in the class so basically the key imaage int the dictionary contains the 
    # object of class image as the value.
    # Similarly the key Label also holds the obect of class Tensor in the value

    decoder = tfexample_decoder.TFExampleDecoder(keysToFeature, itemsToHandlers)
    print (decoder)

    labelsTONameDict = labelsToNameDict

    dataetObj = dataset.Dataset(
            data_sources = filePatternPath,
            decoder = decoder,
            reader = reader,
            num_readers = 4,                  # Specify how many reader you would want to read and queue the tfRecords
            num_samples = numSamples,
            num_classes = 5,
            labels_to_name = labelsTONameDict,
            items_to_descriptions = itemToDescription)
    
    return dataetObj

datasetDir = "/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/TensorFlowOPS/flowers"
split_name = "train"
filePattern = "flowers_%s_*.tfrecord"

datasetOBJ = getSplits(splitName=split_name, datasetDir=datasetDir, filePattern=filePattern)
# 'image': slim.tfexample_decoder.Image(),
#     'label': slim.tfexample_decocer.tensor('image/class/label')

/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/TensorFlowOPS/flowers/flowers_train_*.tfrecord
flowers_train_*.tfrecord
tfRecordsFile :  ['/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/TensorFlowOPS/flowers/flowers_train_00000-of-00002.tfrecord', '/Users/sam/All-Program/App-DataSet/Deep-Neural-Nets/TensorFlowOPS/flowers/flowers_train_00001-of-00002.tfrecord']
Total records to to be trained are:  2569
{'image/encoded': FixedLenFeature(shape=(), dtype=tf.string, default_value=''), 'image/format': FixedLenFeature(shape=(), dtype=tf.string, default_value='jpg'), 'image/class/label': FixedLenFeature(shape=[], dtype=tf.int64, default_value=<tf.Tensor 'zeros_8:0' shape=() dtype=int64>)}
<tensorflow.contrib.slim.python.slim.data.tfexample_decoder.TFExampleDecoder object at 0x11ed77940>


In [None]:
def loadBatch(datasetOBJ, batchSize, height, width, isTraining=True):
    """
    dataset: the return value from getSplit Function
    batchSize: How many records in a batch.
    height: the height of the image
    width: The weigth of the image
    isTraining: to determine weather to perform training of validation
    
    Output:
    1: Tensor of shape [batchSize, height, width, numChannels]
    2. Labels of shape [batchSize,]
    """
    
    # First create a dataprovider object, so that we can easily extract the image and its label from the object
    dataProviderOBJ = dataset_data_provider.DatasetDataProvider(
        dataset = datasetOBJ, 
        common_queue_capacity = 24+3*batchSize,   # The normal queue capacity
        common_queue_min = 24)                    # atleast how many records to queue
    
    # The below operation gets the image in Tensor format, and we can preprocess them using Inception Preprocessing
    rawImage, label = dataProviderOBJ.get(['image', 'label'])
    
    
    
    

In [28]:
# As discussed in the above description let us create a DatasetProvider
batchSize = 64
dataProvider = dataset_data_provider.DatasetDataProvider(
    dataset = datasetOBJ,
    common_queue_capacity = 24 + 3 * batchSize,
    common_queue_min = 24)

raw_image, label = dataProvider.get(('image', 'label'))

print ('The shape of raw image is : ', raw_image.get_shape().as_list())
print ('The shape of the label data is : ', label.get_shape().as_list())

The shape of raw image is :  [None, None, 3]
The shape of the label data is :  []


## Implementing Inception:

In [29]:
# Preprocesing :
height = 
image = inception_preprocessing.preprocess_image(raw_image, height, width, is_training)

NameError: name 'height' is not defined