In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import os
import math
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
from pathlib import Path
from tqdm.notebook import tqdm
import cv2
import shutil
import glob
from utils.rsc_optimizer import RSCModelWrapper 
import json
from tensorflow.keras.applications.resnet import preprocess_input
from utils.data_augmentation_pacs import random_crop, grayscale, horizontal_flip, standardize
from utils.visualize import plot_misclassified_images, plotImages

In [None]:
 # set some paths
model_dir = Path('bin')
DATA_DIR = Path('../Homework3-PACS/')
DATA_DIR_art_painting = Path('../Homework3-PACS/PACS/art_painting')
DATA_DIR_cartoon = Path('../Homework3-PACS/PACS/cartoon')
DATA_DIR_photo = Path('../Homework3-PACS/PACS/photo')
DATA_DIR_sketch = Path('../Homework3-PACS/PACS/sketch')



# set some variables 
config = json.load(open('config.json', 'r'))
label_names =  {'dog': 0, 'elephant': 1, 'giraffe': 2, 'guitar': 3, 'horse': 4, 'house': 5, 'person': 6}
config

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(gpus[1], 'GPU')
tf.config.experimental.set_memory_growth(gpus[1], True)

# Import the Dataset

In [None]:
def create_dataset(img_folder, data_augmentation=True):
    img_data_array=[]
    class_name=[]
   
    for dir_name in os.listdir(img_folder):
        for file in os.listdir(os.path.join(img_folder, dir_name)):       
            image_path= os.path.join(img_folder, dir_name,  file)
            image= cv2.imread(image_path)[:,:,::-1]
            image=cv2.resize(image, (config['input_shape'][1], config['input_shape'][0]),interpolation = cv2.INTER_AREA)           
            image=np.array(image, dtype = np.float32)
            img_data_array.append(image)
            class_name.append(label_names[dir_name])
    return np.array(img_data_array), np.array(class_name)

In [None]:
# download PACS dataset if not present
if not os.path.isdir(DATA_DIR):
    !git clone https://github.com/MachineLearning2020/Homework3-PACS.git $DATA_DIR

# extract the image array and class name
X_art, y_art = create_dataset(DATA_DIR_art_painting)
X_photo, y_photo = create_dataset(DATA_DIR_photo)
X_cartoon, y_cartoon = create_dataset(DATA_DIR_cartoon)
X_sketch, y_sketch = create_dataset(DATA_DIR_sketch)

In [None]:
print(X_art.shape, y_art.shape)
print(X_photo.shape, y_photo.shape)
print(X_cartoon.shape, y_cartoon.shape)
print(X_sketch.shape, y_sketch.shape)

# Visualize the Dataset

In [None]:
start_index = 500
end_index = 520
plotImages((X_photo[start_index:end_index].astype('uint8'), y_photo[start_index:end_index]), end_index-start_index, list(label_names))

In [None]:
start_index = 1000
end_index = 1020
plotImages((X_art[start_index:end_index].astype('uint8'),
            y_art[start_index:end_index]), end_index-start_index, list(label_names))

# Pre-process the Dataset

## Split train and test

In [None]:
def train_test_split(train_x, test_x, train_y, test_y):
    X_train = np.concatenate(train_x)
    y_train = np.concatenate(train_y)
    X_test = np.concatenate(test_x)
    y_test = np.concatenate(test_y)
    return X_train, X_test, y_train, y_test


In [None]:
# art, photo, cartoon, sketch
X_train, X_test, y_train, y_test = train_test_split([X_cartoon, X_photo, X_art],
                                                    [X_sketch], [y_cartoon, y_photo, y_art], [y_sketch])

## Create a tf.Data pipeline

In [None]:
def tf_data_preprocess(X, y, batch_size, buffer_size):
    ds = tf.data.Dataset.from_tensor_slices((X,y))
    ds = ds.map(random_crop, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.map(horizontal_flip, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.map(grayscale, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.map(normalize, num_parallel_calls=tf.data.AUTOTUNE)
    ds = ds.shuffle(buffer_size).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

# Build the Model

In [None]:
backbone = tf.keras.applications.ResNet50(include_top=False, weights='imagenet',input_shape=config['input_shape'])

In [None]:
#create classification head
class_head = tf.keras.models.Sequential([tf.keras.layers.Dense(512*4),
                                         tf.keras.layers.Dense(7)]) # no softmax

In [None]:
# create the model
model = RSCModelWrapper(backbone, class_head, 
                        trainable_backbone=True, 
                        percentile=config["percentile"], batch_percentage=config["batch_percentage"])

In [None]:
model.summary()

# Train Network with RSC

In [None]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    0.004,
    decay_steps=X_train.shape[0]//config['batch_size']*24,
    decay_rate=0.1,
    staircase=False)

In [None]:
optimizer = tf.keras.optimizers.SGD(learning_rate=config['lr'])
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False) # paper was not from logits
metric = tf.keras.metrics.SparseCategoricalAccuracy()

In [None]:
 model.compile(config, loss=loss, metric=metric, optimizer=optimizer, 
               do_not_restore=True, pre_process_fc=tf_data_preprocess)

In [None]:
model.fit(X_train, y_train, batch_size=config['batch_size'], epochs=config["epochs"],
           buffer_size=config["buffer_size"], validation_data=(X_test, y_test))

# Evaluate the Network

## Standardize

In [None]:
X_train_norm, y_train_norm = standardize(X_train, y_train)
X_test_norm, y_test_norm = standardize(X_test.copy(), y_test.copy())

In [None]:
model.evaluate(X_train_norm, y_train_norm)

In [None]:
model.evaluate(X_test_norm, y_test_norm)