# v1-1 Pretraining
https://www.tensorflow.org/tutorials/images/transfer_learning?hl=ko

1. Code, image 정리
2. Epochs 추가

In [1]:
from util import *
%load_ext autoreload
%autoreload 2
%matplotlib inline

# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
# os.environ["CUDA_VISIBLE_DEVICES"]="0"
try:
    for gpu in tf.config.experimental.list_physical_devices('GPU'):
        tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
    print(e)

In [2]:
batch_size = 32
img_height = 180
img_width  = 180
RS         = 42

In [3]:
data_dir = join(PATH.INPUT, 'train')
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir, validation_split=0.2, subset='training', shuffle=True, seed=RS, image_size=(img_height, img_width), batch_size=batch_size
)
val_ds   = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir, validation_split=0.2, subset='validation', shuffle=True, seed=RS, image_size=(img_height, img_width), batch_size=batch_size
)
class_names = train_ds.class_names
num_classes = len(class_names)
print(class_names)

for image_batch, labels_batch in train_ds:
    print(image_batch.shape)
    print(labels_batch.shape)
    break
    
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds   = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

Found 13861 files belonging to 10 classes.
Using 11089 files for training.
Found 13861 files belonging to 10 classes.
Using 2772 files for validation.
['Tomato_D01', 'Tomato_D04', 'Tomato_D05', 'Tomato_D07', 'Tomato_D08', 'Tomato_D09', 'Tomato_H', 'Tomato_P03', 'Tomato_P05', 'Tomato_R01']
(32, 180, 180, 3)
(32,)


In [4]:
from tensorflow.keras import layers
    
normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)
data_augmentation = Sequential([
    layers.experimental.preprocessing.RandomFlip('horizontal', input_shape=(img_height, img_width, 3)),
    layers.experimental.preprocessing.RandomRotation(0.1),
    layers.experimental.preprocessing.RandomZoom(0.1)
])
model = Sequential([
    data_augmentation,
    normalization_layer,
    
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),

    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),

    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),

    layers.Dropout(0.2),
    
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes)
])
model.compile(optimizer='adam', loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential (Sequential)      (None, 180, 180, 3)       0         
_________________________________________________________________
rescaling (Rescaling)        (None, 180, 180, 3)       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 180, 180, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 90, 90, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 90, 90, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 45, 45, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 45, 45, 64)       

In [None]:
epochs = 15
history = model.fit(
    train_ds, validation_data=val_ds, epochs=epochs
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15

In [None]:
hist         = history.history
epochs_range = range(epochs)

plt.figure(figsize=(16, 8))
for idx, metric in enumerate(['accuracy', 'loss']):
    train_metric = hist[metric]
    val_metric   = hist[f'val_{metric}']
    
    plt.subplot(1, 2, idx+1)
    plt.plot(epochs_range, train_metric, label=f'training {metric}')
    plt.plot(epochs_range, val_metric, label=f'validation {metric}')
    plt.legend(fontsize='x-large');  plt.grid()
    plt.title(f'Training and Validation {metric}')
    
plt.show()

In [None]:
data_dir = join(PATH.INPUT, 'test')

img_arrays = []
names      = []
for path, name in sorted(list_files(data_dir), key=lambda e: int(''.join(filter(str.isdigit, e[1])))):
    img = keras.preprocessing.image.load_img(path, target_size=(img_height, img_width))
    img_array = keras.preprocessing.image.img_to_array(img)
    img_arrays.append(img_array)
    names.append(name)
img_arrays = np.array(img_arrays)

predictions = model.predict(img_arrays)
predictions = np.argmax(predictions, axis=1)

sub = pd.DataFrame({'file_name': names, 'answer': predictions})
output_path = join(PATH.OUTPUT, 'v1-1.csv')
sub.to_csv(output_path, index=None)