In [1]:
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
from matplotlib import pyplot as plt

In [2]:
def load_jpeg(filename):
    raw = tf.io.read_file(filename)
    img = tf.image.decode_jpeg(raw,3)
    return img

#if smaller than target size only padding
#if bigger then target size, resizing after padding 
def resize_img_tensor(img,target_size = (224,224)):
    if len(img.shape) == 3 and img.shape[2] == 3:
        h = img.shape[0]
        w = img.shape[1]
    elif len(img.shape) == 4 and img.shape[3] == 3:
        h = img.shape[1]
        w = img.shape[2]
    else:
        raise ValueError("img should be a 3D Tensor with shape=(None,None,3) or (None,None,None,4)")
    
    if max(target_size) >= max(h,w):
        scale = 1.0
        offset_h = (target_size[0]-h)//2
        offset_w = (target_size[1]-w)//2
        ret = tf.image.pad_to_bounding_box(img, offset_h, offset_w, target_size[0], target_size[1])
    else:
        scale = max(h,w)/target_size[0]
        ret = tf.image.resize_with_pad(img,target_size[0],target_size[1],'bicubic')
    ret = tf.cast(ret,tf.float32)
    return ret,scale


In [3]:
def generate_TFRecord(df,target_size,filename):
    options = tf.io.TFRecordOptions(compression_type='ZLIB',compression_level=1)
    count = 0
    with tf.io.TFRecordWriter(filename,options) as writer:
        for file,label in zip(df['filename'],df['distance']): 
            img = load_jpeg(INPUT_ROOT+file)
            img,scale = resize_img_tensor(img,target_size)
            img = img/127.5 - 1                                  #scale to [-1,1]
            img = tf.io.serialize_tensor(img).numpy()
            label = tf.convert_to_tensor(label/scale,tf.float32) #label are scaled according to the resize scale
            feature = {
                'img':tf.train.Feature(bytes_list=tf.train.BytesList(value=[img])),
                'label':tf.train.Feature(float_list=tf.train.FloatList(value=[label]))
            }
            example = tf.train.Example(features=tf.train.Features(feature=feature)).SerializeToString()
            writer.write(example)
            count += 1
    print(count,'\n=========================================')

In [4]:
INPUT_ROOT = '/kaggle/input/tsp-cv/'
TARGET_SIZE = (448,448)
AUTOTUNE = tf.data.AUTOTUNE

train_csv_path = INPUT_ROOT+'train.csv'
test_csv_path = INPUT_ROOT+'test.csv'
train_df = pd.read_csv(train_csv_path)
test_df = pd.read_csv(test_csv_path)

generate_TFRecord(train_df[:-658],TARGET_SIZE,'tsp_train.tfrecord')
generate_TFRecord(train_df[-658:],TARGET_SIZE,'tsp_val.tfrecord')
print(TARGET_SIZE)

2022-04-20 01:52:41.260691: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


15360 
658 
(448, 448)
