In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
files_path = 'D:/masters-project/trial1/deliverable'
files_path

'drive/MyDrive/Colab Notebooks/sheethal sheethal/final_deliverable'

# To convert the images into TF Records, so that TPU's can be utilized

In [None]:
import cv2
import pandas as pd
import tensorflow as tf
from os.path import join

## Set the data path

In [None]:
data_path = join(files_path, 'data')
data_path

'drive/MyDrive/Colab Notebooks/sheethal sheethal/final_deliverable/data'

## Code to generate TF Records

In [None]:
def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [None]:
def serialize_example(img, img_nm, hour, minute, error_type):
  """
  Creates a tf.train.Example message ready to be written to a file.
  """
  # Create a dictionary mapping the feature name to the tf.train.Example-compatible
  # data type.
  feature = {
      'image': _bytes_feature(img),
      'image_name': _bytes_feature(img_nm),
      'hour': _int64_feature(hour),
      'minute': _int64_feature(minute),
      'error_type': _bytes_feature(error_type),
  }

  # Create a Features message using tf.train.Example.
  example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
  return example_proto.SerializeToString()

In [None]:
def save_tf_records(data_path, _datatype, tf_rec_size):

    print('\n\nRunning for {} data'.format(_datatype))

    # load labels file
    labels = pd.read_csv(join(data_path, f'{_datatype}_label.csv'))

    # calculate number of images and tf-records to create
    NUM_IMAGES = labels.shape[0]
    print("\nTotal number of images: ", NUM_IMAGES)

    NUM_TFRECS = NUM_IMAGES // tf_rec_size + (1 if NUM_IMAGES % tf_rec_size else 0)
    print("\nTotal number of Tf records: ", NUM_TFRECS)

    for j in range(NUM_TFRECS):
        print(f'\nWriting TF record {j} of {NUM_TFRECS}')

        NUM_RECS = min(tf_rec_size, NUM_IMAGES - j*tf_rec_size)

        print(f'{_datatype}_tfrecs/{_datatype}%.2i-%i.tfrec'%(j, NUM_RECS), end=': ')

        tf_rec_path = join(data_path, f'{_datatype}_tfrecs/{_datatype}%.2i-%i.tfrec'%(j, NUM_RECS))

        with tf.io.TFRecordWriter(tf_rec_path) as writer:

            for k in range(NUM_RECS):

                img_name = labels.loc[tf_rec_size*j+k, 'imgid']
                hour = labels.loc[tf_rec_size*j+k, 'hour']
                minute = labels.loc[tf_rec_size*j+k, 'minute']
                error_type = labels.loc[tf_rec_size*j+k, 'error_type']

                img = cv2.imread(join(data_path, _datatype, img_name))
                img = cv2.imencode('.jpg', img, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tostring()

                data = serialize_example(img, img_name.encode(), hour, minute, error_type.encode())

                writer.write(data)

                if k%100==0: print(k, end=', ')
        print()

## Set Prameters

In [None]:
SIZE_PER_TFREC = 1000

### Generate for Training data

In [None]:
save_tf_records(data_path, 'train', SIZE_PER_TFREC)



Running for train data

Total number of images:  25000

Total number of Tf records:  25

Writing TF record 0 of 25
train_tfrecs/train00-1000.tfrec: 



0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 1 of 25
train_tfrecs/train01-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 2 of 25
train_tfrecs/train02-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 3 of 25
train_tfrecs/train03-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 4 of 25
train_tfrecs/train04-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 5 of 25
train_tfrecs/train05-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 6 of 25
train_tfrecs/train06-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 7 of 25
train_tfrecs/train07-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 8 of 25
train_tfrecs/train08-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 9 of 25
train_tfrecs/train09-1000.tfrec: 0, 100, 200, 300, 4

### Generate for Testing data

In [None]:
save_tf_records(data_path, 'test', SIZE_PER_TFREC)



Running for test data

Total number of images:  5000

Total number of Tf records:  5

Writing TF record 0 of 5
test_tfrecs/test00-1000.tfrec: 



0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 1 of 5
test_tfrecs/test01-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 2 of 5
test_tfrecs/test02-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 3 of 5
test_tfrecs/test03-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 4 of 5
test_tfrecs/test04-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 


### Generate for Validation data

In [None]:
save_tf_records(data_path, 'val', SIZE_PER_TFREC)



Running for val data

Total number of images:  5000

Total number of Tf records:  5

Writing TF record 0 of 5
val_tfrecs/val00-1000.tfrec: 



0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 1 of 5
val_tfrecs/val01-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 2 of 5
val_tfrecs/val02-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 3 of 5
val_tfrecs/val03-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 

Writing TF record 4 of 5
val_tfrecs/val04-1000.tfrec: 0, 100, 200, 300, 400, 500, 600, 700, 800, 900, 
