In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf

from tensorflow.keras.preprocessing import image_dataset_from_directory

In [None]:
!pip install -q -U tf-hub-nightly
import tensorflow_hub as hub

from tensorflow.keras import layers

# 미세조정 Augmentation(이미지)

In [None]:
import tensorflow_datasets as tfds
import pathlib

## 경로

경로를 str으로 저장후 pathlib연결


In [None]:
data_root= '/content/drive/MyDrive/Vision/Classification/programmers/train/train/'
data_dir = pathlib.Path(data_root)
image_count = len(list(data_dir.glob('*/*.jpg')))

### 경로 디렉토리에서 이미지파일을 dataset으로

In [None]:
list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*'), shuffle=False)
list_ds = list_ds.shuffle(image_count, reshuffle_each_iteration=False)

In [None]:
# print(list_ds)

데이터 확인하기

In [None]:
for f in list_ds.take(5):
  print(f.numpy())

b'/content/drive/MyDrive/Vision/Classification/programmers/train/train/dog/pic_297.jpg'
b'/content/drive/MyDrive/Vision/Classification/programmers/train/train/house/pic_013.jpg'
b'/content/drive/MyDrive/Vision/Classification/programmers/train/train/horse/pic_106.jpg'
b'/content/drive/MyDrive/Vision/Classification/programmers/train/train/dog/pic_329.jpg'
b'/content/drive/MyDrive/Vision/Classification/programmers/train/train/house/pic_247.jpg'


### 폴더이름을 라벨로

In [None]:
class_names = np.array(sorted([item.name for item in data_dir.glob('*') if item.name != "LICENSE.txt"]))
print(class_names)

['dog' 'elephant' 'giraffe' 'guitar' 'horse' 'house' 'person']


In [None]:
class_names.shape

(7,)

### train,val 로 나누기

In [None]:
val_size = int(image_count * 0.2)
train_size=image_count-val_size

train_ds = list_ds.skip(val_size)
val_ds = list_ds.take(val_size)

shape확인

In [None]:
train_ds

<SkipDataset shapes: (), types: tf.string>

In [None]:
print(tf.data.experimental.cardinality(train_ds).numpy())
print(tf.data.experimental.cardinality(val_ds).numpy())

1359
339


이미지 사이즈 변수

In [None]:
img_height=600
img_width=600
IMG_SIZE = 600

## 이미지 데이터셋 라벨붙이기

In [None]:
def get_label(file_path):
  # convert the path to a list of path components
  parts = tf.strings.split(file_path, os.path.sep)
  # The second to last is the class-directory
  one_hot = parts[-2] == class_names
  # Integer encode the label
  return one_hot
  # return tf.one_hot(one_hot,len(class_names))


In [None]:
def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_jpeg(img, channels=3)
  # resize the image to the desired size
  return tf.image.resize(img, [img_height, img_width])

In [None]:
def process_path(file_path):
  label = get_label(file_path)
  # load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  print(label)
  return img, label

In [None]:
# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(process_path, num_parallel_calls=AUTOTUNE)

Tensor("Equal:0", shape=(7,), dtype=bool)
Tensor("Equal:0", shape=(7,), dtype=bool)


In [None]:
train_ds

<ParallelMapDataset shapes: ((600, 600, 3), (7,)), types: (tf.float32, tf.bool)>

In [None]:
print(train_ds)

<ParallelMapDataset shapes: ((600, 600, 3), (7,)), types: (tf.float32, tf.bool)>


## Augmentation

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [None]:
def resize_and_rescale(image, label):
  image = tf.cast(image, tf.float32)
  image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])
  image = (image / 255.0)
  return image, label

In [None]:
def augment(image,label):
  image, label = resize_and_rescale(image, label)
  # Add 6 pixels of padding
  image = tf.image.resize_with_crop_or_pad(image, IMG_SIZE + 6, IMG_SIZE + 6) 
   # Random crop back to the original size
  image = tf.image.random_crop(image, size=[IMG_SIZE, IMG_SIZE, 3])
  image = tf.image.random_brightness(image, max_delta=0.5) # Random brightness
  image = tf.clip_by_value(image, 0, 1)
  return image, label

In [None]:
print(train_ds)

<ParallelMapDataset shapes: ((600, 600, 3), (7,)), types: (tf.float32, tf.bool)>


In [None]:
train_ds = (
    train_ds
    .shuffle(2000)
    .map(augment, num_parallel_calls=AUTOTUNE)
    .batch(32)
    .prefetch(AUTOTUNE)
)

In [None]:
val_ds = (
    val_ds
    .map(resize_and_rescale, num_parallel_calls=AUTOTUNE)
    .batch(8)
    .prefetch(AUTOTUNE)
    
)

In [None]:
train_ds.take(1)



<TakeDataset shapes: ((None, 600, 600, 3), (None, 7)), types: (tf.float32, tf.bool)>