# Object Localization Exercise

@Class: Advanced Computer Vision

@Organization: VietAI

@Description: This exercise is to localize cat face in given images.

Student Infomation
- Name : [Fill in your fullname here]
- Email  : [Fill in your email]
- Phone : [Fill in your phone]

## Mounting to Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd 'drive/My Drive/'

In [None]:
import os

In [None]:
if not os.path.exists("cat_face_exercise"):
    os.makedirs("cat_face_exercise")
%cd 'cat_face_exercise'

## Downloading dataset

The original dataset can be found here. In the scope of this exercise, we use its cleaned version. The dataset contains:
* train.csv : .csv file of $8996$ rows, containing image names and coordinates of cat faces in format (x0, y0, width, height)
* test.csv : .csv file of $1000$ rows, containing image names
* images/ : a folder contains $9996$ images with size of $256 \times 256$

In [None]:
if not os.path.exists("images"):
    from google_drive_downloader import GoogleDriveDownloader as gdd
    gdd.download_file_from_google_drive(file_id='1WYD6F0utLMaRtdfBqE6i8cpGruMpWnXz', dest_path='../cat_face_exercise/data.zip', unzip=True)
    !rm -rf data.zip

In [None]:
import os
import random

import cv2

import numpy as np
import pandas as pd
import tensorflow as tf

from matplotlib import pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from imgaug import augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage

In [None]:
TRAIN_FILE = "train.csv"
TEST_FILE = "test.csv"
IMAGE_DIR = "images"

In [None]:
df = pd.read_csv(TRAIN_FILE)
df.head()

In [None]:
test_df = pd.read_csv(TEST_FILE)
test_df.head()

In [None]:
train_df, val_df = train_test_split(df)

In [None]:
print(len(df))
print(len(train_df))
print(len(val_df))
print(len(test_df))

In [None]:
class Config:
    
    seed = 2020
    
    img_width = 256
    img_height = 256
    
    num_classes = 4

    batch_size = 32
    epochs = 2    
    lr = 1e-5
    
    verbose = 1
    
    best_checkpoint_path = 'best.h5'
    latest_checkpoint_path = 'latest.h5'
    
config = Config()   

## TODO 1: Data Loader
 You need to do the augmentation when loading data

In [None]:
class ImageGenerator:
    
    def create(image_df, augument=True, is_train=True):
        while True:
            
            if is_train:
                image_df = shuffle(image_df, random_state=config.seed)
                
            for start in range(0, len(image_df), config.batch_size):
                end = min(start + config.batch_size, len(image_df))
                batch_images = []
                X_train_batch = image_df.iloc[start:end]
                
                if is_train:
                    batch_labels = np.zeros((len(X_train_batch), 4))
                
                for i in range(len(X_train_batch)):
                    # load image
                    image_path = os.path.join(IMAGE_DIR, X_train_batch.iloc[i]['ImageId'])
                    image = cv2.imread(image_path)
                    
                    if is_train:
                        x0 = X_train_batch.iloc[i]['x0']
                        y0 = X_train_batch.iloc[i]['y0']
                        w = X_train_batch.iloc[i]['width']
                        h = X_train_batch.iloc[i]['height']
                    
                        # augment data
                        if augument:
                            # TODO 1: augmentation
                            pass

                        batch_labels[i][0] = y0 / 256
                        batch_labels[i][1] = x0 / 256
                        batch_labels[i][2] = h / 256
                        batch_labels[i][3] = w / 256

                    batch_images.append(image / 255)
                    
                if is_train:
                    yield np.array(batch_images, np.float32), batch_labels
                else:
                    yield np.array(batch_images, np.float32)

In [None]:
train_generator = ImageGenerator.create(
    train_df,
    augument=True
)

val_generator = ImageGenerator.create(
    val_df,
    augument=False
)

## TODO 2: Build your own model

In [None]:
model = tf.keras.Sequential([
    # TODO 2: build your own model
])

# Print out model summary
model.summary()

In [None]:
model.compile(
    loss='mean_squared_error',
    optimizer=tf.keras.optimizers.Adam(lr=config.lr)
)

In [None]:
best_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    config.best_checkpoint_path, 
    monitor='val_loss', 
    verbose=1, 
    save_best_only=True, 
    save_weights_only=False,
    mode='min'
    
)

latest_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    config.latest_checkpoint_path, 
    monitor='val_loss', 
    verbose=1, 
    save_best_only=False, 
    save_weights_only=False,
    mode='min'
    
)

early = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss", 
    mode="min", 
    patience=10
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='loss',
    patience=2,
    factor=0.2,
    verbose=1,
    min_lr=1e-9
)

callbacks_list = [
    best_checkpoint,
    latest_checkpoint,
    reduce_lr,
    early
]

## Training

In [None]:
model.fit_generator(
    train_generator,
    steps_per_epoch= np.ceil(float(len(train_df)) / config.batch_size),
    validation_data=val_generator,
    validation_steps= np.ceil(float(len(val_df)) / config.batch_size),
    epochs=config.epochs,
    callbacks=callbacks_list,
    verbose=config.verbose
)

## Prediction

In [None]:
test_generator = ImageGenerator.create(
    test_df,
    augument=False,
    is_train=False
)

In [None]:
ls

In [None]:
pred_model = tf.keras.models.load_model(config.best_checkpoint_path)

In [None]:
predict = pred_model.predict(test_generator,
                             steps=np.ceil(float(len(test_df)) / config.batch_size),
                             verbose=config.verbose)

In [None]:
predict

In [None]:
predict = predict*256
predict = predict.astype(int)

## Create submission file

In [None]:
def clip(val, minval, maxval):
    """Clips a value between min and max (both including)."""
    if val < minval:
        return minval
    elif val > maxval:
        return maxval
    else:
        return val

In [None]:
# clip coordinates to be inside image
for box in predict:
    y0, x0, height, width = box
    
    y1 = y0 + height
    x1 = x0 + width
    
    x0 = clip(x0, 0, 255)
    x1 = clip(x1, 0, 255)
    y0 = clip(y0, 0, 255)
    y1 = clip(y1, 0, 255)
    
    if y0 > y1:
        y0, y1 = y1, y0
    if x0 > x1:
        x0, x1 = x1, x0

    height = y1 - y0
    width = x1 - x0
    
    box[0] = y0
    box[1] = x0
    box[2] = height
    box[3] = width

In [None]:
predict

In [None]:
submissions = pd.DataFrame()
submissions["ImageId"] = test_df["ImageId"]
submissions["y0"] = predict[:, 0]
submissions["x0"] = predict[:, 1]
submissions["height"] = predict[:, 2]
submissions["width"] = predict[:, 3]
submissions.head()

In [None]:
submissions.to_csv("submission.csv", index=False)

## TODO 3: Visualize prediction

You need to visualize predicted bounding box on given test images

In [None]:
N = len(test_df)
N

In [None]:
def draw_box(model, image_path):
    # TODO 3: draw predicted bounding box on given test images
    pass
    

In [None]:
num_sample = 10

for idx in range(num_sample):
    idx = random.randint(0, N - 1)
    image_name = test_df["ImageId"].iloc[idx]
    image_path = os.path.join(IMAGE_DIR, image_name)
    
    draw_box(pred_model, image_path)

## TODO 4: Customize your model

You need to build a model that can check if cat face exists in given images. If exists, then predict the cat face coordinates

In [None]:
# TODO 4:

# What to submit?

Please send a .zip file to my email: thanglecao0412@gmail.com with the subject [VietAI Detection Exercise - Your name]. 
The .zip file includes
- This notebook when you finish
- Your submission file (.csv) on test dataset. Note that the submission file has the same format as **train.csv**
- Your best model checkpoint

# Deadline: 23h55 pm 23-09-2020