### Import libraries

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "3"

To prevent elements such as Tensorflow import logs, perform these tasks.

In [2]:
import glob
import numpy as np
import tensorflow as tf
import IPython.display as display

### Create TFRecord functions

In [36]:
def _bytes_feature(value: [str, bytes]) -> tf.train.Feature:
    """string / byte를 byte_list로 반환합니다."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy() # BytesList는 EagerTensor에서 문자열을 풀지 않습니다.
    
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

In [13]:
def _float_feature(value: float) -> tf.train.Feature:
    """float / double를 float_list로 반환합니다."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

In [37]:
def _int64_feature(value: [bool, int]) -> tf.train.Feature:
    """bool / enum / int / uint를 int64_list로 반환합니다."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [15]:
def _image_to_byte(value: str) -> bytes:
    """image를 bytes로 반환합니다."""
    raw_image = open(value, "rb")
    image_bytes = raw_image.read()
    return image_bytes

In [38]:
def serialize_example(raw_image: str, label_str: str, label_int: int, for_test: bool) -> tf.train.Example.SerializeToString:
    """
    파일을 만들기 위해서 tf.train.Example 메시지를 만듭니다.
    """
    feature = {
        "raw_image": _bytes_feature(_image_to_byte(raw_image)),
        "label_str": _bytes_feature(bytes(label_str, encoding="utf-8")),
        "label_int": _int64_feature(label_int),
        "for_test": _int64_feature(for_test),
    }
    
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString

In [None]:
def get_dataset(path: str):
    feature1, feature2, feature3, feature4 = np.array(), np.array(), np.array(), np.array()
    
    image_paths = glob.glob(path + "/*/*/*.jpg")
    
    for image_path in image_paths:
        image_information = image_path.split("\\")
        data_type, label = map(str, image_information[1:3])

In [None]:
features_dataset = tf.data.Dataset.from_tensor_slices

In [24]:
# Raw이미지 데이터셋 위치를 입력 받아, TFRecord로 변환하여 반환
def convert_to_tfrecord(input_file, output_file, label_form):
    image_paths = glob.glob(input_file + "/*/*/*.jpg")
    
    for image_path in image_paths:
        image_bytes = _read_image_bytes(image_path)
        
        # 입력 받은 경로에서 데이터의 타입 및 라벨, 파일명들을 추출
        path_information = image_path.split("\\")
        data_type, label, file_name = map(str, path_information[1:4])
        
        # tensorflow example 타입으로 변경
        tf_example = tf.train.Example(features=tf.train.Features(feature={
            "image": _bytes_feature(image_bytes),
            "type": _bytes_feature(bytes(data_type, encoding="utf8")),
            "label_string": _bytes_feature(bytes(label, encoding="utf8")),
            "label_int": _int64_feature(label_form[label])
        }))
        
        # train/test 데이터인지에 따라, 디렉토리를 지정 후
        # 각 디렉토리 속 파일 갯수에 따라, 파일명을 인덱싱
        try:
            type_directory = output_file + "/{0}".format(data_type)
            file_cnt = len(os.listdir(type_directory))
            
            file_name = type_directory + "/{0}_{1}_{2}.tfrecord".format(data_type, label, file_cnt)
            
            writer = tf.io.TFRecordWriter(file_name)
        except:
            type_directory = output_file + "/{0}".format(data_type)
            os.makedirs(os.path.join(type_directory))
            file_cnt = len(os.listdir(type_directory))
            
            file_name = type_directory + "/{0}_{1}_{2}.tfrecord".format(data_type, label, file_cnt)
            
            writer = tf.io.TFRecordWriter(file_name)
        
        try:
            print("Start Generating %s" % file_name)
            writer.write(tf_example.SerializeToString())
            break
        except:
            print("Failed generating %s" % file_name)
            pass

In [25]:
label_form = {"NonDemented": 0, "VeryMildDemented": 1, "MildDemented": 2, "ModerateDemented": 3}

convert_to_tfrecord("./dataset/", "./test/", label_form)

NameError: name '_read_image_bytes' is not defined

In [None]:
filename = "./test/test/test_MildDemented_0.tfrecord"
raw_dataset = tf.data.TFRecordDataset(filename)

features = {
    "image": tf.io.FixedLenFeature([], tf.string),
    "type": tf.io.FixedLenFeature([], tf.string),
    "label_string": tf.io.FixedLenFeature([], tf.string),
    "label_int": tf.io.FixedLenFeature([], tf.int64)
}

def _parse_image_function(example):
    return tf.io.parse_single_example(example, features)

parsed_dataset = raw_dataset.map(_parse_image_function)
# raw_example = next(iter(parsed_dataset))
parsed = tf.train.Example.FromString(bytes(parsed_dataset))

parsed.features.features["type"]

# test_file = "./test/test/test_MildDemented_0.tfrecord"
# raw_image_dataset = tf.data.TFRecordDataset(test_file)
# parsed_image_dataset = raw_image_dataset.map(_parse_image_function)
# print(parsed_image_dataset)