### Import libraries

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "3"

To prevent elements such as Tensorflow import logs, perform these tasks.

In [2]:
import glob
import numpy as np
import tensorflow as tf
import IPython.display as display

### Convert raw files to TFRecord

In [3]:
def _bytes_feature(value: [str, bytes]) -> tf.train.Feature:
    """string / byte를 byte_list로 반환합니다."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList는 EagerTensor에서 문자열을 풀지 않습니다.
    
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [4]:
def _float_feature(value: float) -> tf.train.Feature:
    """float / double를 float_list로 반환합니다."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

In [5]:
def _int64_feature(value: [bool, int]) -> tf.train.Feature:
    """bool / enum / int / uint를 int64_list로 반환합니다."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [6]:
def _image_to_byte(value: str) -> bytes:
    """image를 bytes로 반환합니다."""
    raw_image = open(value, "rb").read()
    return raw_image

In [7]:
def serialize_example(raw_image: bytes, label_int: int, for_test: bool) -> tf.train.Example.SerializeToString:
    """
    파일을 만들기 위해서 tf.train.Example 메시지를 만듭니다.
    """
    feature = {
        "raw_image": _bytes_feature(raw_image),
        "label": _int64_feature(label_int),
        "for_test": _int64_feature(for_test),
    }
    
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString

In [8]:
def get_dataset_information(path: str) -> np.array:
    """
    데이터의 정보(이미지 경로, 라벨, 테스트용 유무)의 정보를 리스트로 정리하여 np.array로 반환합니다.
    """
    raw_image, label, for_test = [], [], []
    label_form = {"NonDemented": 0, "VeryMildDemented": 1, "MildDemented": 2, "ModerateDemented": 3}
    
    image_paths = glob.glob(path + "/*/*/*.jpg")
    
    for image_path in image_paths:
        image_information = image_path.split("\\")
        data_type, data_label = image_information[1:3]
        
        raw_image.append(_image_to_byte(image_path))
        label.append(label_form[data_label])
        for_test.append(True if data_type=="test" else False)
        
    return np.array(raw_image), np.array(label, dtype=np.int64), np.array(for_test)

In [9]:
raw_image, label, for_test = get_dataset_information("./dataset")

features_dataset = tf.data.Dataset.from_tensor_slices((raw_image, label, for_test))

In [10]:
# for f0, f1, f2, f3 in features_dataset.take(1):
#     print(f0)
#     print(f1)
#     print(f2)
#     print(f3)

In [11]:
def tf_serialize_example(raw_image, label_int, for_test):
    tf_string = tf.py_function(serialize_example,
                              (raw_image, label, for_test),
                               tf.string)
    return tf.reshape(tf_string, ())

In [12]:
serialized_features_dataset = features_dataset.map(tf_serialize_example)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


In [13]:
def generator():
    for features in features_dataset:
        yield serialize_example(*features)

In [14]:
serialized_features_dataset = tf.data.Dataset.from_generator(
    generator, output_types=tf.string, output_shapes=())

In [15]:
filename = "./tfrecord/MRI_DATASET.tfrecord"
writer = tf.data.experimental.TFRecordWriter(filename)
writer.write(serialized_features_dataset)

InvalidArgumentError: TypeError: <tf.Tensor: shape=(), dtype=int64, numpy=2> has type <class 'tensorflow.python.framework.ops.EagerTensor'>, but expected one of: (<class 'int'>,)
Traceback (most recent call last):

  File "c:\users\user\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\script_ops.py", line 244, in __call__
    ret = func(*args)

  File "c:\users\user\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 302, in wrapper
    return func(*args, **kwargs)

  File "c:\users\user\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 827, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))

  File "<ipython-input-13-cf3ec1a35ee4>", line 3, in generator
    yield serialize_example(*features)

  File "<ipython-input-7-7ba49aebad3d>", line 7, in serialize_example
    "label": _int64_feature(label_int),

  File "<ipython-input-5-6aff76875256>", line 3, in _int64_feature
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

  File "c:\users\user\anaconda3\envs\tensorflow\lib\site-packages\google\protobuf\internal\python_message.py", line 542, in init
    copy.extend(field_value)

  File "c:\users\user\anaconda3\envs\tensorflow\lib\site-packages\google\protobuf\internal\containers.py", line 282, in extend
    new_values = [self._type_checker.CheckValue(elem) for elem in elem_seq_iter]

  File "c:\users\user\anaconda3\envs\tensorflow\lib\site-packages\google\protobuf\internal\containers.py", line 282, in <listcomp>
    new_values = [self._type_checker.CheckValue(elem) for elem in elem_seq_iter]

  File "c:\users\user\anaconda3\envs\tensorflow\lib\site-packages\google\protobuf\internal\type_checkers.py", line 171, in CheckValue
    raise TypeError(message)

TypeError: <tf.Tensor: shape=(), dtype=int64, numpy=2> has type <class 'tensorflow.python.framework.ops.EagerTensor'>, but expected one of: (<class 'int'>,)


	 [[{{node PyFunc}}]] [Op:DatasetToTFRecord]