# Preparation



## Only for Colab

### Unzip the dataset

In [None]:
%%script false
# unzip the zip dataset
import zipfile
!unzip /content/sample_data/a.zip

In [None]:
%%script false
# Install TensorFlow v2 only in Colab
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

## Importing libraries

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
import os

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import tensorflow as tf
import numpy as np

#test if the GPU is working
'''from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
tf.test.is_gpu_available(
    cuda_only=False,
    min_cuda_compute_capability=None
)
'''

SEED = 1234
tf.random.set_seed(SEED)  
cwd = os.getcwd()

# Upload and preprocessing



The split of the validation/training set is automatically done by generators  

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

apply_data_augmentation = True

if apply_data_augmentation:
    train_data_gen = ImageDataGenerator(rotation_range=10,
                                        width_shift_range=10,
                                        height_shift_range=10,
                                        zoom_range=0.3,
                                        horizontal_flip=True,
                                        vertical_flip=True,
                                        validation_split=0.2,
                                        fill_mode='constant',
                                        cval=0,
                                        rescale=1./255)
else:
    train_data_gen = ImageDataGenerator(rescale=1./255,
                                       validation_split=0.2)

test_data_gen = ImageDataGenerator(rescale=1./255)


### Create generators to read images from dataset directory

**.flow_from_directory:**
returns a DirectoryIterator yielding tuples of (x, y) where x is a numpy array containing a batch of images 
with shape (batch_size, *target_size, channels) and y is a numpy array of corresponding labels.

In [None]:
dataset_dir = os.path.join(cwd, 'Classification_Dataset')
print (dataset_dir)

# Batch size
bs = 8 

# img shape
img_h = 256
img_w = 256

num_classes=20
class_list = ['owl', 'galaxy','lightning', 'wine-bottle', 't-shirt', 'waterfall', 'sword', 'school-bus', 'calculator', 'sheet-music', 
              'airplanes', 'lightbulb', 'skyscraper', 'mountain-bike', 'fireworks', 'computer-monitor', 'bear', 'grand-piano', 'kangaroo', 'laptop']

training_dir = os.path.join(dataset_dir, 'training')
train_gen = train_data_gen.flow_from_directory(training_dir,
                                               classes=class_list,
                                               batch_size=bs,
                                               class_mode='categorical',
                                               shuffle=True,
                                               seed=SEED,
                                              subset='training') 

test_dir = os.path.join(dataset_dir,'test')
test_gen = test_data_gen.flow_from_directory(
        test_dir,
        target_size=(256, 256),
        class_mode=None,
        shuffle=False,
        batch_size=1)

valid_gen = train_data_gen.flow_from_directory(training_dir,
                                               batch_size=bs,
                                               classes=class_list,
                                               class_mode='categorical',
                                               shuffle=False,
                                               seed=SEED,
                                               subset='validation')

## Create Dataset objects

**tf.data.Dataset.from_generator:**
creates a Dataset whose elements are generated by generator.
The generator argument must be a callable object that returns an object that supports the iter() protocol (e.g. a generator function). The elements generated by generator must be compatible with the given output_types and (optional) output_shapes arguments.

In [None]:
train_dataset = tf.data.Dataset.from_generator(lambda: train_gen,
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, num_classes]))
train_dataset = train_dataset.repeat()

valid_dataset = tf.data.Dataset.from_generator(lambda: valid_gen, 
                                               output_types=(tf.float32, tf.float32),
                                               output_shapes=([None, img_h, img_w, 3], [None, num_classes]))
valid_dataset = valid_dataset.repeat()


**Data augmentation test**

In [None]:
%%script false

import time
import matplotlib.pyplot as plt

%matplotlib notebook

fig = plt.figure()
ax = fig.gca()
fig.show()

iterator = iter(train_dataset)

for _ in range(1000):
    augmented_img, target = next(iterator)
    augmented_img = augmented_img[0] 
    augmented_img = augmented_img * 255  
    
    plt.imshow(np.uint8(augmented_img))
    fig.canvas.draw()
    time.sleep(1)

**Generators checks**

In [None]:
%%script false

iterator = iter(train_dataset)
sample, target = next(iterator)

sample = sample[18, ...]  
sample = sample * 255  

from PIL import Image
img = Image.fromarray(np.uint8(sample))
img = img.resize([128, 128])
img

# target[0] 

# Model creation and processing 

### Load model (xception)

In [None]:
xception = tf.keras.applications.Xception(weights='imagenet', include_top=False, input_shape=(img_h, img_w, 3))

In [None]:
# xception.summary()
# xception.layers

### Create Model

In [None]:
finetuning = True

if finetuning:
    freeze_until = 15 # layer from which we want to fine-tune
    
    for layer in xception.layers[:freeze_until]:
        layer.trainable = False
else:
    xception.trainable = False
    
model = tf.keras.Sequential()
model.add(xception)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(units=512, activation='relu'))
model.add(tf.keras.layers.Dense(units=num_classes, activation='softmax'))

### Visualize created model and weights

In [None]:
# Visualize created model as a table
model.summary()

# Visualize initialized weights
# model.weights[0]

### Optimization params

In [None]:
# Loss
loss = tf.keras.losses.CategoricalCrossentropy()

# learning rate
lr = 1e-4
optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

# Validation metrics
# ------------------
metrics = ['accuracy']

# Compile Model
model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

## Training 

### Without callbacks

In [None]:
%%script false

callbacks = []

early_stop = True
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    callbacks.append(es_callback)

model.fit(x=train_dataset,
          epochs=100,  #### set repeat in training dataset
          steps_per_epoch=len(train_gen),
          validation_data=valid_dataset,
          validation_steps=len(valid_gen))

### with callbacks

In [None]:
import os
from datetime import datetime

cwd = os.getcwd()

exps_dir = os.path.join(cwd, 'transfer_learning_challenge_1')
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

model_name = 'CNN_challenge1_TL'

exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)
    
callbacks = []

# Model checkpoint
ckpt_dir = os.path.join(exp_dir, 'ckpts')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp_{epoch:02d}.ckpt'), 
                                                   save_weights_only=True)  

# Visualize Learning on Tensorboard
tb_dir = os.path.join(exp_dir, 'tb_logs')
if not os.path.exists(tb_dir):
    os.makedirs(tb_dir)

tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                             profile_batch=0,
                                             histogram_freq=1) 
callbacks.append(tb_callback)

# Early Stopping
early_stop = True
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    callbacks.append(es_callback)

model.fit(x=train_dataset,
          epochs=100, 
          steps_per_epoch=len(train_gen),
          validation_data=valid_dataset,
          validation_steps=len(valid_gen), 
          callbacks=callbacks)


## Predicting

In [None]:
prediction = model.predict_generator(test_gen,
                                    verbose=1)

Check if predictions are ok

In [None]:
%%script false
prediction.shape
print(prediction[1])

**Check and write predictions**

In [None]:
from PIL import Image

image=0
relations = {} 
image_names = []
images_class_num= []

for img_pred in prediction:
    class_num=img_pred.argmax()
    image_name = test_gen.filenames[image].replace('sub\\','')
    relations[image_name] = class_num
    image_names.append(image_name)
    images_class_num.append(class_num)
    image+=1



**Write the results**

In [None]:
import csv

list = [(k, v) for k, v in relations.items()] 
w = csv.writer(open("challenge1TL_output.csv", "w"))
w.writerow(["Id", "Category"])
w.writerows(list)