In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv
from random import shuffle
import glob
import sys
import codecs
import utils

# Write Images and Text To TfRecord

In [None]:
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [None]:
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [None]:
def convert_to_TFrecord(load_dir,save_dir):
    '''Convert Images and Their Corresponding Text to TFrecord 
       Train Set 60% Validation set 20% Test set 20%
    '''
    #Directory for images and text
    image_train_path = load_dir + '/*.jpg'
    text_train_path = load_dir + '/*.txt'
    
    #All images and Text Paths
    img_addrs = glob.glob(image_train_path)
    text_addrs = glob.glob(text_train_path)
    
    # to shuffle data
    c = list(zip(img_addrs, text_addrs))
    shuffle(c)
    img_addrs, text_addrs = zip(*c)
    
    # Divide the hata into 60% train, 20% validation, and 20% test
    train_imgs = img_addrs[0:int(0.6 * len(img_addrs))]
    train_text = text_addrs[0:int(0.6 * len(text_addrs))]
    val_imgs = img_addrs[int(0.6 * len(img_addrs)):int(0.8 * len(img_addrs))]
    val_text = text_addrs[int(0.6 * len(text_addrs)):int(0.8 * len(text_addrs))]
    test_imgs = img_addrs[int(0.8 * len(img_addrs)):]
    test_text = text_addrs[int(0.8 * len(text_addrs)):]
    
    #Converting Images and text into TFrecord for train,val and test Seperately
    train_TFrecord(train_imgs, train_text, save_dir)
    val_TFrecord(val_imgs, val_text, save_dir)
    test_TFrecord(test_imgs, test_text, save_dir)

In [None]:
def train_TFrecord(train_imgs, train_text, save_dir):
    # address to save the TFRecords file
    train_filename = save_dir + '/train.tfrecords'  
    
    # open the TFRecords file
    with tf.python_io.TFRecordWriter(train_filename) as writer:
        for i in range(len(train_imgs)):
            
            #Print Converted Status
            if not i%100:
                print('converted {}/{}'.format(i,len(train_imgs)))
                
            # Load the image
            img = utils.load_image(train_imgs[i]).astype(np.uint8)
            label = utils.load_text(train_text[i]).astype(np.uint8)
            # Create a feature
            feature = {
                'label': _bytes_feature(tf.compat.as_bytes(label.tostring())),
                'image': _bytes_feature(tf.compat.as_bytes(img.tostring()))
            }
            # Create an example protocol buffer
            example = tf.train.Example(features=tf.train.Features(feature=feature))
        
            # Serialize to string and write on the file
            writer.write(example.SerializeToString())
        
        print('Train TFrecord Created Successfully')

In [None]:
def val_TFrecord(val_imgs, val_text, save_dir):
    # address to save the TFRecords file
    val_filename = save_dir + '/val.tfrecords'  
    
    # open the TFRecords file
    with tf.python_io.TFRecordWriter(val_filename) as writer:
        for i in range(len(val_imgs)):
            
            #Print Converted Status
            if not i%100:
                print('converted {}/{}'.format(i,len(val_imgs)))
                
            # Load the image
            img = utils.load_image(val_imgs[i])
            label = utils.load_text(val_text[i])
            # Create a feature
            feature = {
                'label': _bytes_feature(tf.compat.as_bytes(label.tostring())),
                'image': _bytes_feature(tf.compat.as_bytes(img.tostring()))
            }
            # Create an example protocol buffer
            example = tf.train.Example(features=tf.train.Features(feature=feature))
        
            # Serialize to string and write on the file
            writer.write(example.SerializeToString())
        
        print('Validation TFrecord Created Successfully')

In [None]:
def test_TFrecord(test_imgs, test_text, save_dir):
    # address to save the TFRecords file
    test_filename = save_dir + '/test.tfrecords'  
    
    # open the TFRecords file
    with tf.python_io.TFRecordWriter(test_filename) as writer:
        for i in range(len(test_imgs)):
            
            #Print Converted Status
            if not i%100:
                print('converted {}/{}'.format(i,len(test_imgs)))
                
            # Load the image
            img = utils.load_image(test_imgs[i])
            label = utils.load_text(test_text[i])
            # Create a feature
            feature = {
                'label': _bytes_feature(tf.compat.as_bytes(label.tostring())),
                'image': _bytes_feature(tf.compat.as_bytes(img.tostring()))
            }
            # Create an example protocol buffer
            example = tf.train.Example(features=tf.train.Features(feature=feature))
        
            # Serialize to string and write on the file
            writer.write(example.SerializeToString())
        
        print('Test TFrecord Created Successfully')

In [None]:
load_dir='E:/Urdu Ocr/segmentation/Augmented/1'
save_dir='E:/Urdu Ocr/segmentation/tfrecords'
convert_to_TFrecord(load_dir,save_dir)

# Load TfRecord File

In [None]:
train_filename = './tfrecords/train.tfrecords'
dataset = tf.data.TFRecordDataset(filenames=train_filename)
dataset = dataset.map(utils.parse)
iterator = dataset.make_one_shot_iterator()
get_next = iterator.get_next()

In [None]:
with tf.Session() as sess:
    img, txt = sess.run(get_next)
    plt.imshow(img)
    plt.show()
    print(utils.dense_to_text(txt))