In [1]:
import tensorflow as tf
import os
from PIL import Image
import matplotlib.pyplot as plt
TRAIN_PATH = 'data/'
train_ids = next(os.walk(TRAIN_PATH))[2]

In [2]:
print(train_ids[:5])

['k.png', 'j.png', 'h.png', 'i 복사본.png', 'i.png']


In [3]:
train_imgs = []
for id_ in train_ids:
    img_str = open(TRAIN_PATH+id_,'rb').read()
    train_imgs.append(img_str)

In [4]:
# 이미지의 라벨을 설정 여기선, 모두 1로 설정
# tf.session(안씀) > tf.function
# train.Feature의 int/float/bytes 형식을 
# tf.train.example이 받아서 TFRecord를 생성해준다.

def _bytes_feature(value):
  """string / byte 타입을 받아서 byte list를 리턴합니다."""
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
  """float / double 타입을 받아서 float list를 리턴합니다."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
  """bool / enum / int / uint 타입을 받아서 int64 list를 리턴합니다."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def image_example(img_str, label=1):
    img_shape = tf.io.decode_png(img_str).shape
#     print(img_shape)
    feature = {
      'height': _int64_feature(img_shape[0]),
      'width': _int64_feature(img_shape[1]),
      'depth': _int64_feature(img_shape[2]),
      'label': _int64_feature(label),
      'image_raw': _bytes_feature(img_str),
    }
    return tf.train.Example(features=tf.train.Features(feature=feature))

# 예제 이미지들을 images.tfrecords 파일에 저장합니다.
with tf.io.TFRecordWriter('images.tfrecords') as writer:
    for imgstr in train_imgs:
        tf_example = image_example(imgstr)
        writer.write(tf_example.SerializeToString())

In [11]:
dataset = tf.data.TFRecordDataset('images.tfrecords')
print(dataset)

def read_tfrecord(example):
    features = {
        'height': tf.io.FixedLenFeature([], tf.int64),
        'width': tf.io.FixedLenFeature([], tf.int64),
        'depth': tf.io.FixedLenFeature([], tf.int64),
        'label': tf.io.FixedLenFeature([], tf.int64),
        'image_raw': tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, features)
    
    image = tf.image.decode_png(example['image_raw'], channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    label = example['label']
    
    return image, label

def get_batched_dataset(filenames):
    option_no_order = tf.data.Options()
    option_no_order.experimental_deterministic = False

    dataset = tf.data.Dataset.list_files(filenames)
    dataset = dataset.with_options(option_no_order)
    # https://www.tensorflow.org/guide/data_performance?hl=ko
    dataset = dataset.interleave(tf.data.TFRecordDataset, cycle_length=16, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset = dataset.map(read_tfrecord, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    dataset = dataset.cache() # This dataset fits in RAM
    dataset = dataset.repeat()
#     dataset = dataset.shuffle(2048)
    BATCH_SIZE = 5
    dataset = dataset.batch(BATCH_SIZE, drop_remainder=True) 
    # 데이터를 미리 가져온다..?
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) 

    return dataset

def get_training_dataset():
    return get_batched_dataset('images.tfrecords')
# parsed_image_dataset = dataset.map(_parse_image_function)
# parsed_image_dataset = parsed_image_dataset.batch(2)


from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense

inputs = Input((512, 680, 3))
x = Conv2D(16,(3,3),activation='relu')(inputs)
x = Flatten()(x)
outputs = Dense(1)(x)

model = Model(inputs,outputs)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['mse'])
model.summary()

<TFRecordDatasetV2 shapes: (), types: tf.string>
Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 512, 680, 3)]     0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 510, 678, 16)      448       
_________________________________________________________________
flatten_3 (Flatten)          (None, 5532480)           0         
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 5532481   
Total params: 5,532,929
Trainable params: 5,532,929
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Dataset에서 `fit` 메서드를 호출할 때 `steps_per_epoch` 설정을 잊지 마세요.
# https://www.tensorflow.org/guide/keras/overview?hl=ko
model.fit(get_training_dataset(), epochs=10, steps_per_epoch=30)

Train for 30 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x13d232780>

In [6]:
raw_example = next(iter(dataset))
parsed = tf.train.Example.FromString(raw_example.numpy())
parsed.features.feature['image/text']

<TFRecordDatasetV2 shapes: (), types: tf.string>


In [None]:
# https://hwiyong.tistory.com/280?category=840057
# 공부 ㅎㅎ;

In [None]:
# 이미지 모델을 위한 파이프라인은 분산 파일 시스템에서 데이터를 통합
# 이미지 파이프라인에서 요소는 이미지와 레이블을 나타내는 텐서의 요소 쌍인 
# 단일 학습 예시를 나타낼 수 있습니다.

In [30]:
dataset = tf.data.Dataset.from_tensor_slices([8, 3, 0, 8, 2, 1])
dataset

<TensorSliceDataset shapes: (), types: tf.int32>

In [31]:
for elem in dataset:
  print(elem.numpy())

8
3
0
8
2
1


In [32]:
it = iter(dataset)
print(next(it).numpy())
print(next(it).numpy())
print(next(it).numpy())

8
3
0


In [33]:
# 데이터의 모든합
print(dataset.reduce(0, lambda state,value: state+value).numpy())

22


In [None]:
# Tensor, SparseTensor, RaggedTensor, TensorArray, Dataset