In [20]:
import tensorflow as tf
import glob
import numpy
import os

## tf.data pix2pix 버전(from tensor slice 버전)

https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/generative/pix2pix.ipynb#scrollTo=tyaP4hLJ8b4W

## tf.data classification 버전(list_files 버전)

In [1]:
!unzip /content/dataset.zip

Archive:  /content/dataset.zip
   creating: dataset/virabhadrasana ii/
  inflating: dataset/virabhadrasana ii/42-0.png  
  inflating: dataset/virabhadrasana ii/43-0.png  
  inflating: dataset/virabhadrasana ii/43-1.png  
  inflating: dataset/virabhadrasana ii/45-0.png  
  inflating: dataset/virabhadrasana ii/46-0.png  
  inflating: dataset/virabhadrasana ii/47-0.png  
  inflating: dataset/virabhadrasana ii/48-0.png  
  inflating: dataset/virabhadrasana ii/50-0.png  
  inflating: dataset/virabhadrasana ii/51-0.png  
  inflating: dataset/virabhadrasana ii/52-0.png  
  inflating: dataset/virabhadrasana ii/53-0.png  
  inflating: dataset/virabhadrasana ii/54-0.png  
  inflating: dataset/virabhadrasana ii/55-0.png  
  inflating: dataset/virabhadrasana ii/File36.gif  
   creating: dataset/virasana/
  inflating: dataset/virasana/54-0.png  
  inflating: dataset/virasana/57-0.png  
  inflating: dataset/virasana/61-0.png  
  inflating: dataset/virasana/70-0.png  
  inflating: dataset/virasana/73

In [34]:
data_dir = "/content/dataset/"
image_count = len(list(glob.glob(data_dir+'*/*.png')))
img_height = 28
img_width = 28
batch_size = 4

In [15]:
image_count

43

In [16]:
# 전체 ds 불러오기
list_ds = tf.data.Dataset.list_files(data_dir+'*/*', shuffle=False)
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)

In [18]:
for f in list_ds.take(5):
  print(f.numpy())

b'/content/dataset/virasana/61-0.png'
b'/content/dataset/virabhadrasana ii/55-0.png'
b'/content/dataset/yoganidrasana/97-0.png'
b'/content/dataset/virasana/90-0.png'
b'/content/dataset/virasana/93-0.png'


In [22]:
# 전체 ds의 클래스 가져오기
class_names = np.array(sorted(os.listdir(data_dir)))
print(class_names)

['virabhadrasana ii' 'virasana' 'yoganidrasana']


In [23]:
# 전체 ds를 split하기
val_size = int(image_count * 0.2)
train_ds = list_ds.skip(val_size)
val_ds = list_ds.take(val_size)

In [24]:
print(tf.data.experimental.cardinality(train_ds).numpy())
print(tf.data.experimental.cardinality(val_ds).numpy())

36
8


In [25]:
# 이미지 경로를 받아서 클래스 반환
def get_label(file_path):
  # Convert the path to a list of path components
  parts = tf.strings.split(file_path, os.path.sep)
  # The second to last is the class-directory
  one_hot = parts[-2] == class_names
  # Integer encode the label
  return tf.argmax(one_hot)

In [26]:
# 이미지 resize 하는 util 함수
def decode_img(img):
  # Convert the compressed string to a 3D uint8 tensor
  img = tf.io.decode_jpeg(img, channels=3)
  # Resize the image to the desired size
  return tf.image.resize(img, [img_height, img_width])

In [27]:
# 이미지 경로를 받아서 이미지와 라벨 반환
def process_path(file_path):
  label = get_label(file_path)
  # Load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  return img, label

In [30]:
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
train_ds = train_ds.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)
val_ds = val_ds.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)

In [32]:
for image, label in train_ds.take(5):
  print("Image shape: ", image.numpy().shape)
  print("Label: ", label.numpy())

Image shape:  (28, 28, 3)
Label:  0
Image shape:  (28, 28, 3)
Label:  0
Image shape:  (28, 28, 3)
Label:  2
Image shape:  (28, 28, 3)
Label:  2
Image shape:  (28, 28, 3)
Label:  0


In [35]:
# 성능을 위해 잘 섞고 배치 처리
def configure_for_performance(ds):
  ds = ds.cache()
  ds = ds.shuffle(buffer_size=1000)
  ds = ds.batch(batch_size)
  ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
  return ds

train_ds = configure_for_performance(train_ds)
val_ds = configure_for_performance(val_ds)

In [37]:
num_classes = 3

model = tf.keras.Sequential([
  tf.keras.layers.Rescaling(1./255),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(num_classes)
])

In [39]:
model.compile(
  optimizer='adam',
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])

In [40]:
# 학습
model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=3
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f932e90afa0>