<a href="https://colab.research.google.com/github/Lucas-Kuo/VR_DL_HW1/blob/main/VR_DL_HW1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/Lucas-Kuo/VR_DL_HW1.git
%cd VR_DL_HW1

In [None]:
!pip install gdown
!pip install imutils

In [None]:
import gdown
# Download the training and testing dataset from my google drive
url = "https://drive.google.com/u/0/uc?id=1dYt4iLy0euxVXordHq4RRHgWojgUjgIf&export=download"
output = "2021VRDL_HW1_datasets.zip"
gdown.download(url, output, quiet=False)

url = "https://drive.google.com/u/0/uc?id=1yV3Bz5hpsJgwpplQARsCxVSnYxsRQALE&export=download"
output = "checkpoints/checkpoints.data-00000-of-00001"
gdown.download(url, output, quiet=False)

In [None]:
!unzip 2021VRDL_HW1_datasets.zip
!mkdir training_images
!mkdir dataset
!mkdir dataset/evaluation
!unzip training_images.zip -d training_images
!unzip testing_images.zip -d dataset/evaluation

In [None]:
!python config.py
!python build_dataset2.py

In [None]:
!python train_model.py

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf

from tensorflow.keras.preprocessing import image_dataset_from_directory

In [None]:
path = os.path.sep.join(["self_utils", "sample_answer.txt"])
evaluation_filenames = []
with open(path, "r") as f:
  for line in f:
    filename = line.split()[0]
    evaluation_filenames.append(filename)
evaluation_base_dir = os.path.sep.join(["dataset", "evaluation"])

In [None]:
# initialize the list of class label names
CLASSES = []
CLASS_NAMES_FILE = "classes.txt"
with open(CLASS_NAMES_FILE, "r") as f:
  for line in f:
    line = line[:-1]
    CLASSES.append(line)

In [None]:
PATH = "dataset"
train_dir = os.path.join(PATH, "training")
validation_dir = os.path.join(PATH, 'validation')

BATCH_SIZE = 32
IMG_SIZE = (600, 600)

train_dataset = image_dataset_from_directory(train_dir, shuffle=True, class_names=CLASSES, label_mode="categorical",
                      batch_size=BATCH_SIZE, image_size=IMG_SIZE)

validation_dataset = image_dataset_from_directory(validation_dir, shuffle=True, class_names=CLASSES, label_mode="categorical",
                          batch_size=BATCH_SIZE, image_size=IMG_SIZE)

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)

In [None]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
  tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
  tf.keras.layers.RandomContrast(0.5, seed=None)
])

In [None]:
# preprocess_input = tf.keras.applications.resnet_v2.preprocess_input
preprocess_input = tf.keras.applications.efficientnet.preprocess_input

In [None]:
# Create the base model from the pre-trained model MobileNet V2
IMG_SHAPE = IMG_SIZE + (3,)
# base_model = tf.keras.applications.ResNet152V2(input_shape=IMG_SHAPE,
#                         include_top=False,
#                         weights='imagenet')
base_model = tf.keras.applications.efficientnet.EfficientNetB7(input_shape=IMG_SHAPE,
                        include_top=False,
                        weights='imagenet')
# base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
#                         include_top=False,
#                         weights='imagenet')

In [None]:
base_model.trainable = False
base_model.summary()

In [None]:
# global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
average_layer = tf.keras.layers.AveragePooling2D(pool_size=(19, 19))
prediction_layer = tf.keras.layers.Dense(len(CLASSES), activation="softmax", activity_regularizer=tf.keras.regularizers.L2(0.1))

inputs = tf.keras.Input(shape=(600, 600, 3))
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = average_layer(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(256, activation="relu")(x)
x = tf.keras.layers.Dropout(0.5)(x)
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)

In [None]:
base_learning_rate = 0.001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              # loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=['accuracy'])
model.summary()

In [None]:
model.load_weights('./checkpoints/checkpoints')
# loss, acc = model.evaluate(validation_dataset)
# print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))

In [None]:
BATCH_SIZE = 32
testDS = image_dataset_from_directory("dataset/evaluation", labels=None, shuffle=False, label_mode=None, batch_size=BATCH_SIZE, image_size=IMG_SIZE)

In [None]:
predictions = model.predict(testDS)
output = list(np.argmax(predictions, axis = 1))

In [None]:
result = {}
N = len(output)
for i in range(N):
  name = testDS.file_paths[i][-8:] # the file path has the format: .../.../xxxx.jpg
  label = CLASSES[output[i]]
  result[name] = label

with open("answer.txt", "w") as f:
  for filename in evaluation_filenames:
    s = filename + ' ' + result[filename] + '\n'
    f.write(s)

In [None]:
loss, acc = model.evaluate(validation_dataset)
print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))

In [None]:
with open("answer.txt", "w") as f:
  for filename in evaluation_filenames[:10]:
    imagePath = os.path.sep.join([evaluation_base_dir, filename])
    image = tf.io.read_file(imagePath)
    image = tf.image.decode_png(image, channels=3)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    image = tf.image.resize(image, IMG_SIZE)

    evaluation_list = np.array([image])

    prediction = model.predict(evaluation_list)
    print(prediction[0][106])
    # print(np.argmax(prediction, axis = 1))
    # output = CLASSES[np.argmax(prediction, axis = 1)[0]]
    # print(output)

    answer = f"{filename} {output}\n"
    # print(answer)
    # f.write(answer)

In [None]:
for filename in evaluation_filenames:
  imagePath = os.path.sep.join([evaluation_base_dir, filename])
  print(imagePath)

In [None]:
CLASSES[np.argmax(prediction, axis = 1)[0]]

In [None]:
p = "dataset/validation/107.Common_Raven/1305.jpg"
image = tf.io.read_file(imagePath)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.image.resize(image, IMG_SIZE)

evaluation_list = np.array([image])

prediction = model.predict(evaluation_list)
print(prediction)
# print(np.argmax(prediction, axis = 1))

In [None]:
evaluation_list = []
for filename in evaluation_filenames:
  imagePath = os.path.sep.join([evaluation_base_dir, filename])
  image = tf.io.read_file(imagePath)
  image = tf.image.decode_png(image, channels=3)
  image = tf.image.convert_image_dtype(image, dtype=tf.float32)
  image = tf.image.resize(image, IMG_SIZE)
  evaluation_list.append(image)

evaluation_list = np.array(evaluation_list)

In [None]:
predictions = model.predict(evaluation_list)
predictions = np.argmax(predictions, axis=1)
predictions.shape

In [None]:
len(base_model.layers)

In [None]:
base_model.trainable = True

fine_tune_at = 700

for layer in base_model.layers[:fine_tune_at]:
  layer.trainable = False

In [None]:
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(),
              optimizer = tf.keras.optimizers.RMSprop(lr=base_learning_rate/10),
              metrics=['accuracy'])
model.summary()
len(model.trainable_variables)

In [None]:
fine_tune_epochs = 30
finetune_history = model.fit(train_dataset,
          epochs=fine_tune_epochs,
          validation_data=validation_dataset)

In [None]:
model.save_weights('./checkpoints2')

In [None]:
acc = finetune_history.history['accuracy']
val_acc = finetune_history.history['val_accuracy']

loss = finetune_history.history['loss']
val_loss = finetune_history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy(fine tuned)')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,6.0])
plt.title('Training and Validation Loss(fine tuned)')
plt.xlabel('epoch')
plt.show()

In [None]:
initial_epochs = 30

# checkpoint_path = "training_1/cp.ckpt"
# cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,save_weights_only=True,verbose=1)

history = model.fit(train_dataset,
          epochs=initial_epochs,
          validation_data=validation_dataset)
# os.makedirs('./checkpoints')
# model.save_weights('./checkpoints/first_checkpoint')

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,6.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:

model.save_weights('./checkpoints')

# From here on, EfficientNetB4 is used

In [None]:
PATH = "dataset"
train_dir = os.path.join(PATH, "training")
validation_dir = os.path.join(PATH, 'validation')

BATCH_SIZE = 32
IMG_SIZE = (380, 380)

train_dataset = image_dataset_from_directory(train_dir, shuffle=True, class_names=CLASSES, label_mode="categorical",
                      batch_size=BATCH_SIZE, image_size=IMG_SIZE)

validation_dataset = image_dataset_from_directory(validation_dir, shuffle=True, class_names=CLASSES, label_mode="categorical",
                          batch_size=BATCH_SIZE, image_size=IMG_SIZE)

AUTOTUNE = tf.data.AUTOTUNE

train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
validation_dataset = validation_dataset.prefetch(buffer_size=AUTOTUNE)

In [None]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
  tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
  tf.keras.layers.RandomContrast(0.5, seed=None)
])

preprocess_input = tf.keras.applications.efficientnet.preprocess_input

In [None]:
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.efficientnet.EfficientNetB4(input_shape=IMG_SHAPE,
                        include_top=False,
                        weights='imagenet')
base_model.trainable = False
base_model.summary()

In [None]:
average_layer = tf.keras.layers.AveragePooling2D(pool_size=(12, 12))
prediction_layer = tf.keras.layers.Dense(len(CLASSES), activation="softmax", activity_regularizer=tf.keras.regularizers.L2(0.1))

inputs = tf.keras.Input(shape=(380, 380, 3))
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = average_layer(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(256, activation="relu")(x)
x = tf.keras.layers.Dropout(0.5)(x)
outputs = prediction_layer(x)
model = tf.keras.Model(inputs, outputs)

In [None]:
base_learning_rate = 0.001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              # loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=['accuracy'])
model.summary()

In [None]:
initial_epochs = 50
history = model.fit(train_dataset,
          epochs=initial_epochs,
          validation_data=validation_dataset)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,5.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
model.save_weights('./Enet_B4_checkpoint/B4_checkpoints')

In [None]:
loss, acc = model.evaluate(validation_dataset)
print("Acc of validation: {:5.2f}%".format(acc * 100))

In [None]:
p = "dataset/training/087.Mallard/0712.jpg"
image = tf.io.read_file(p)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
image = tf.image.resize(image, IMG_SIZE)

evaluation_list = np.array([image])

prediction = model.predict(validation_dataset)
# print(prediction)
print(np.argmax(prediction, axis = 1))

In [None]:
print(prediction[0][66])
print(np.argmax(prediction, axis = 1))


In [None]:
def load_images(imagePath):
  # read the image from disk, decode it, convert the data type to
  # floating point, and resize it
  image = tf.io.read_file(imagePath)
  image = tf.image.decode_png(image, channels=3)
  image = tf.image.convert_image_dtype(image, dtype=tf.float32)
  image = tf.image.resize(image, IMG_SIZE)

  # parse the class label from the file path
  label = None

  # return the image and the label
  return (image, label)

In [None]:
from imutils import paths

In [None]:
testPaths = list(paths.list_images("dataset/evaluation"))

In [None]:
# build the testing dataset and data input pipeline
testDS = tf.data.Dataset.from_tensor_slices(testPaths)
testDS = (testDS
	.map(load_images, num_parallel_calls=AUTOTUNE)
	.cache()
	.batch(32)
	.prefetch(AUTOTUNE)
)

In [None]:
BATCH_SIZE = 32
testDS = image_dataset_from_directory("dataset/evaluation", labels=None, shuffle=False, label_mode=None, batch_size=BATCH_SIZE, image_size=IMG_SIZE)

In [None]:
predictions = model.predict(testDS)

In [None]:
print(np.argmax(predictions, axis = 1))

In [None]:
output = list(np.argmax(predictions, axis = 1))

In [None]:
print(testDS.file_paths[0])
# print(testDS.)

In [None]:
result = {}
N = len(output)
for i in range(N):
  name = testDS.file_paths[i][-8:]
  label = CLASSES[output[i]]
  result[name] = label

In [None]:
result["3306.jpg"]

In [None]:
with open("answer.txt", "w") as f:
  for filename in evaluation_filenames:
    s = filename + ' ' + result[filename] + '\n'
    f.write(s)